diff --git a/Jenkinsfile b/Jenkinsfile
index 55c9d5019f045..e1b4c5b3a1d67 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -19,7 +19,7 @@
 
 def doValidation() {
   sh """
-    ./gradlew -PscalaVersion=$SCALA_VERSION clean compileJava compileScala compileTestJava compileTestScala \
+    ./retry_zinc ./gradlew -PscalaVersion=$SCALA_VERSION clean compileJava compileScala compileTestJava compileTestScala \
         spotlessScalaCheck checkstyleMain checkstyleTest spotbugsMain rat \
         --profile --no-daemon --continue -PxmlSpotBugsReport=true
   """
@@ -160,42 +160,6 @@ pipeline {
             echo 'Skipping Kafka Streams archetype test for Java 17'
           }
         }
-
-        stage('ARM') {
-          agent { label 'arm4' }
-          options {
-            timeout(time: 2, unit: 'HOURS') 
-            timestamps()
-          }
-          environment {
-            SCALA_VERSION=2.12
-          }
-          steps {
-            doValidation()
-            catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') {
-              doTest(env, 'unitTest')
-            }
-            echo 'Skipping Kafka Streams archetype test for ARM build'
-          }
-        }
-
-        stage('PowerPC') {
-          agent { label 'ppc64le' }
-          options {
-            timeout(time: 2, unit: 'HOURS')
-            timestamps()
-          }
-          environment {
-            SCALA_VERSION=2.12
-          }
-          steps {
-            doValidation()
-            catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') {
-              doTest(env, 'unitTest')
-            }
-            echo 'Skipping Kafka Streams archetype test for PowerPC build'
-          }
-        }
         
         // To avoid excessive Jenkins resource usage, we only run the stages
         // above at the PR stage. The ones below are executed after changes
diff --git a/LICENSE-binary b/LICENSE-binary
index 42a8d79f86c9c..c2400694f2525 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -208,50 +208,53 @@ License Version 2.0:
 audience-annotations-0.5.0
 commons-cli-1.4
 commons-lang3-3.8.1
-jackson-annotations-2.12.3
-jackson-core-2.12.3
-jackson-databind-2.12.3
-jackson-dataformat-csv-2.12.3
-jackson-datatype-jdk8-2.12.3
-jackson-jaxrs-base-2.12.3
-jackson-jaxrs-json-provider-2.12.3
-jackson-module-jaxb-annotations-2.12.3
+jackson-annotations-2.12.6
+jackson-core-2.12.6
+jackson-databind-2.12.6.1
+jackson-dataformat-csv-2.12.6
+jackson-datatype-jdk8-2.12.6
+jackson-jaxrs-base-2.12.6
+jackson-jaxrs-json-provider-2.12.6
+jackson-module-jaxb-annotations-2.12.6
 jackson-module-paranamer-2.10.5
-jackson-module-scala_2.13-2.12.3
+jackson-module-scala_2.13-2.12.6
 jakarta.validation-api-2.0.2
 javassist-3.27.0-GA
-jetty-client-9.4.43.v20210629
-jetty-continuation-9.4.43.v20210629
-jetty-http-9.4.43.v20210629
-jetty-io-9.4.43.v20210629
-jetty-security-9.4.43.v20210629
-jetty-server-9.4.43.v20210629
-jetty-servlet-9.4.43.v20210629
-jetty-servlets-9.4.43.v20210629
-jetty-util-9.4.43.v20210629
-jetty-util-ajax-9.4.43.v20210629
+jetty-client-9.4.44.v20210927
+jetty-continuation-9.4.44.v20210927
+jetty-http-9.4.44.v20210927
+jetty-io-9.4.44.v20210927
+jetty-security-9.4.44.v20210927
+jetty-server-9.4.44.v20210927
+jetty-servlet-9.4.44.v20210927
+jetty-servlets-9.4.44.v20210927
+jetty-util-9.4.44.v20210927
+jetty-util-ajax-9.4.44.v20210927
 jersey-common-2.34
 jersey-server-2.34
-jose4j-0.7.8
-log4j-1.2.17
+jose4j-0.7.9
 lz4-java-1.8.0
-maven-artifact-3.8.1
+maven-artifact-3.8.4
 metrics-core-4.1.12.1
-netty-buffer-4.1.68.Final
-netty-codec-4.1.68.Final
-netty-common-4.1.68.Final
-netty-handler-4.1.68.Final
-netty-resolver-4.1.68.Final
-netty-transport-4.1.68.Final
-netty-transport-native-epoll-4.1.68.Final
-netty-transport-native-unix-common-4.1.68.Final
-plexus-utils-3.2.1
-rocksdbjni-6.22.1.1
-scala-collection-compat_2.13-2.4.4
-scala-library-2.13.6
-scala-logging_2.13-3.9.3
-scala-reflect-2.13.6
-scala-java8-compat_2.13-1.0.0
+metrics-core-2.2.0
+netty-buffer-4.1.73.Final
+netty-codec-4.1.73.Final
+netty-common-4.1.73.Final
+netty-handler-4.1.73.Final
+netty-resolver-4.1.73.Final
+netty-tcnative-classes-2.0.46.Final
+netty-transport-4.1.73.Final
+netty-transport-classes-epoll-4.1.73.Final
+netty-transport-native-epoll-4.1.73.Final
+netty-transport-native-unix-common-4.1.73.Final
+plexus-utils-3.3.0
+reload4j-1.2.19
+rocksdbjni-6.29.4.1
+scala-collection-compat_2.13-2.6.0
+scala-library-2.13.8
+scala-logging_2.13-3.9.4
+scala-reflect-2.13.8
+scala-java8-compat_2.13-1.0.2
 snappy-java-1.1.8.4
 zookeeper-3.6.3
 zookeeper-jute-3.6.3
@@ -300,18 +303,18 @@ MIT License
 
 argparse4j-0.7.0, see: licenses/argparse-MIT
 jopt-simple-5.0.4, see: licenses/jopt-simple-MIT
-slf4j-api-1.7.30, see: licenses/slf4j-MIT
-slf4j-log4j12-1.7.30, see: licenses/slf4j-MIT
+slf4j-api-1.7.36, see: licenses/slf4j-MIT
+slf4j-reload4j-1.7.36, see: licenses/slf4j-MIT
 
 ---------------------------------------
 BSD 2-Clause
 
-zstd-jni-1.5.0-4 see: licenses/zstd-jni-BSD-2-clause
+zstd-jni-1.5.2-1 see: licenses/zstd-jni-BSD-2-clause
 
 ---------------------------------------
 BSD 3-Clause
 
-jline-3.12.1, see: licenses/jline-BSD-3-clause
+jline-3.21.0, see: licenses/jline-BSD-3-clause
 paranamer-2.8, see: licenses/paranamer-BSD-3-clause
 
 ---------------------------------------
diff --git a/README.md b/README.md
index 5e409f8dada6c..2aa509e081012 100644
--- a/README.md
+++ b/README.md
@@ -37,13 +37,16 @@ Follow instructions in https://kafka.apache.org/quickstart
     ./gradlew integrationTest
     
 ### Force re-running tests without code change ###
-    ./gradlew cleanTest test
-    ./gradlew cleanTest unitTest
-    ./gradlew cleanTest integrationTest
+    ./gradlew -Prerun-tests test
+    ./gradlew -Prerun-tests unitTest
+    ./gradlew -Prerun-tests integrationTest
 
 ### Running a particular unit/integration test ###
     ./gradlew clients:test --tests RequestResponseTest
 
+### Repeatedly running a particular unit/integration test ###
+    I=0; while ./gradlew clients:test -Prerun-tests --tests RequestResponseTest --fail-fast; do (( I=$I+1 )); echo "Completed run: $I"; sleep 1; done
+
 ### Running a particular test method within a unit/integration test ###
     ./gradlew core:test --tests kafka.api.ProducerFailureHandlingTest.testCannotSendToInternalTopic
     ./gradlew clients:test --tests org.apache.kafka.clients.MetadataTest.testTimeToNextUpdate
@@ -180,8 +183,8 @@ Please note for this to work you should create/update user maven settings (typic
      ...
 
 
-### Installing the jars to the local Maven repository ###
-The recommended command is:
+### Installing ALL the jars to the local Maven repository ###
+The recommended command to build for both Scala 2.12 and 2.13 is:
 
     ./gradlewAll publishToMavenLocal
 
@@ -189,6 +192,12 @@ For backwards compatibility, the following also works:
 
     ./gradlewAll install
 
+### Installing specific projects to the local Maven repository ###
+
+    ./gradlew -PskipSigning :streams:publishToMavenLocal
+    
+If needed, you can specify the Scala version with `-PscalaVersion=2.13`.
+
 ### Building the test jar ###
     ./gradlew testJar
 
diff --git a/Vagrantfile b/Vagrantfile
index ee08487be66cd..a053be28d01dc 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -51,6 +51,9 @@ ec2_subnet_id = nil
 # Only override this by setting it to false if you're running in a VPC and you
 # are running Vagrant from within that VPC as well.
 ec2_associate_public_ip = nil
+ec2_iam_instance_profile_name = nil
+
+ebs_volume_type = 'gp3'
 
 jdk_major = '8'
 jdk_full = '8u202-linux-x64'
@@ -60,6 +63,18 @@ if File.exists?(local_config_file) then
   eval(File.read(local_config_file), binding, "Vagrantfile.local")
 end
 
+# override any instance type set by Vagrantfile.local or above via an environment variable
+if ENV['INSTANCE_TYPE'] then
+  ec2_instance_type = ENV['INSTANCE_TYPE']
+end
+
+# choose size based on overridden size
+if ec2_instance_type.start_with?("m3") then
+  ebs_volume_size = 20
+else
+  ebs_volume_size = 40
+end
+
 # TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered.
 Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
   config.hostmanager.enabled = enable_hostmanager
@@ -118,9 +133,11 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
     aws.region = ec2_region
     aws.availability_zone = ec2_az
     aws.instance_type = ec2_instance_type
+
     aws.ami = ec2_ami
     aws.security_groups = ec2_security_groups
     aws.subnet_id = ec2_subnet_id
+    aws.block_device_mapping = [{ 'DeviceName' => '/dev/sda1', 'Ebs.VolumeType' => ebs_volume_type, 'Ebs.VolumeSize' => ebs_volume_size }]
     # If a subnet is specified, default to turning on a public IP unless the
     # user explicitly specifies the option. Without a public IP, Vagrant won't
     # be able to SSH into the hosts unless Vagrant is also running in the VPC.
@@ -133,6 +150,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
       region.spot_instance = ec2_spot_instance
       region.spot_max_price = ec2_spot_max_price
     end
+    aws.iam_instance_profile_name = ec2_iam_instance_profile_name
 
     # Exclude some directories that can grow very large from syncing
     override.vm.synced_folder ".", "/vagrant", type: "rsync", rsync__exclude: ['.git', 'core/data/', 'logs/', 'tests/results/', 'results/']
diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh
index 6167583780bd5..a9096ea02161d 100755
--- a/bin/kafka-run-class.sh
+++ b/bin/kafka-run-class.sh
@@ -32,7 +32,7 @@ if [ -z "$INCLUDE_TEST_JARS" ]; then
 fi
 
 # Exclude jars not necessary for running commands.
-regex="(-(test|test-sources|src|scaladoc|javadoc)\.jar|jar.asc)$"
+regex="(-(test|test-sources|src|scaladoc|javadoc)\.jar|jar.asc|connect-file.*\.jar)$"
 should_include_file() {
   if [ "$INCLUDE_TEST_JARS" = true ]; then
     return 0
@@ -48,7 +48,7 @@ should_include_file() {
 base_dir=$(dirname $0)/..
 
 if [ -z "$SCALA_VERSION" ]; then
-  SCALA_VERSION=2.13.6
+  SCALA_VERSION=2.13.8
   if [[ -f "$base_dir/gradle.properties" ]]; then
     SCALA_VERSION=`grep "^scalaVersion=" "$base_dir/gradle.properties" | cut -d= -f 2`
   fi
@@ -171,7 +171,7 @@ do
   CLASSPATH="$CLASSPATH:$dir/*"
 done
 
-for cc_pkg in "api" "transforms" "runtime" "file" "mirror" "mirror-client" "json" "tools" "basic-auth-extension"
+for cc_pkg in "api" "transforms" "runtime" "mirror" "mirror-client" "json" "tools" "basic-auth-extension"
 do
   for file in "$base_dir"/connect/${cc_pkg}/build/libs/connect-${cc_pkg}*.jar;
   do
diff --git a/bin/windows/kafka-run-class.bat b/bin/windows/kafka-run-class.bat
index 26ef84a4f5c9a..df1e20ba11c66 100755
--- a/bin/windows/kafka-run-class.bat
+++ b/bin/windows/kafka-run-class.bat
@@ -27,7 +27,7 @@ set BASE_DIR=%CD%
 popd
 
 IF ["%SCALA_VERSION%"] EQU [""] (
-  set SCALA_VERSION=2.13.6
+  set SCALA_VERSION=2.13.8
 )
 
 IF ["%SCALA_BINARY_VERSION%"] EQU [""] (
diff --git a/bin/windows/kafka-storage.bat b/bin/windows/kafka-storage.bat
new file mode 100644
index 0000000000000..4a0e458a623b8
--- /dev/null
+++ b/bin/windows/kafka-storage.bat
@@ -0,0 +1,17 @@
+@echo off
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem     http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+
+"%~dp0kafka-run-class.bat" kafka.tools.StorageTool %*
diff --git a/build.gradle b/build.gradle
index 2b91533c16024..f17011ca4d205 100644
--- a/build.gradle
+++ b/build.gradle
@@ -34,13 +34,14 @@ plugins {
   id 'com.github.ben-manes.versions' version '0.42.0'
   id 'idea'
   id 'java-library'
-  id 'org.owasp.dependencycheck' version '6.5.3'
-  id 'org.nosphere.apache.rat' version "0.7.0"
+  id 'org.owasp.dependencycheck' version '7.1.1'
+  id 'org.nosphere.apache.rat' version "0.7.1"
 
-  id "com.github.spotbugs" version '5.0.5' apply false
-  id 'org.gradle.test-retry' version '1.3.1' apply false
+  id "com.github.spotbugs" version '5.0.9' apply false
+  id 'org.gradle.test-retry' version '1.4.0' apply false
   id 'org.scoverage' version '7.0.0' apply false
   id 'com.github.johnrengelman.shadow' version '7.1.2' apply false
+  id "io.swagger.core.v3.swagger-gradle-plugin" version "2.2.0"
 }
 
 spotless {
@@ -65,8 +66,10 @@ ext {
   if (JavaVersion.current().isCompatibleWith(JavaVersion.VERSION_16))
     defaultJvmArgs.addAll(
       "--add-opens=java.base/java.io=ALL-UNNAMED",
+      "--add-opens=java.base/java.lang=ALL-UNNAMED",
       "--add-opens=java.base/java.nio=ALL-UNNAMED",
       "--add-opens=java.base/java.nio.file=ALL-UNNAMED",
+      "--add-opens=java.base/java.util=ALL-UNNAMED",
       "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
       "--add-opens=java.base/java.util.regex=ALL-UNNAMED",
       "--add-opens=java.base/java.util.stream=ALL-UNNAMED",
@@ -76,7 +79,7 @@ ext {
     )
 
   maxTestForks = project.hasProperty('maxParallelForks') ? maxParallelForks.toInteger() : Runtime.runtime.availableProcessors()
-  maxScalacThreads = project.hasProperty('maxScalacThreads') ? maxScalacParallelism.toInteger() :
+  maxScalacThreads = project.hasProperty('maxScalacThreads') ? maxScalacThreads.toInteger() :
       Math.min(Runtime.runtime.availableProcessors(), 8)
   userIgnoreFailures = project.hasProperty('ignoreFailures') ? ignoreFailures : false
 
@@ -149,12 +152,13 @@ allprojects {
       }
     }
   }
+  task printAllDependencies(type: DependencyReportTask) {}
 }
 
 def determineCommitId() {
   def takeFromHash = 16
-  if (project.hasProperty('commitId2')) {
-    commitId2.take(takeFromHash)
+  if (project.hasProperty('commitId')) {
+    commitId.take(takeFromHash)
   } else if (file("$rootDir/.git/HEAD").exists()) {
     def headRef = file("$rootDir/.git/HEAD").text
     if (headRef.contains('ref: ')) {
@@ -207,7 +211,7 @@ if (file('.git').exists()) {
 } else {
   rat.enabled = false
 }
-println("Starting build with version $version (commit id ${commitId.take(8)}) using Gradle $gradleVersion, Java ${JavaVersion.current()} and Scala ${versions.scala}")
+println("Starting build with version $version (commit id ${commitId == null ? "null" : commitId.take(8)}) using Gradle $gradleVersion, Java ${JavaVersion.current()} and Scala ${versions.scala}")
 println("Build properties: maxParallelForks=$maxTestForks, maxScalacThreads=$maxScalacThreads, maxTestRetries=$userMaxTestRetries")
 
 subprojects {
@@ -319,7 +323,7 @@ subprojects {
   }
 
   // Remove the relevant project name once it's converted to JUnit 5
-  def shouldUseJUnit5 = !(["runtime", "streams"].contains(it.project.name))
+  def shouldUseJUnit5 = !(["runtime"].contains(it.project.name))
 
   def testLoggingEvents = ["passed", "skipped", "failed"]
   def testShowStandardStreams = false
@@ -400,13 +404,14 @@ subprojects {
   if (JavaVersion.current().isCompatibleWith(JavaVersion.VERSION_16)) {
     testsToExclude.addAll([
       // connect tests
-      "**/AbstractHerderTest.*", "**/ConnectClusterStateImplTest.*", "**/ConnectorPluginsResourceTest.*",
+      "**/AbstractHerderTest.*", "**/ConnectorPluginsResourceTest.*",
       "**/ConnectorsResourceTest.*", "**/DistributedHerderTest.*", "**/FileOffsetBakingStoreTest.*",
       "**/ErrorHandlingTaskTest.*", "**/KafkaConfigBackingStoreTest.*", "**/KafkaOffsetBackingStoreTest.*",
       "**/KafkaBasedLogTest.*", "**/OffsetStorageWriterTest.*", "**/StandaloneHerderTest.*",
-      "**/SourceTaskOffsetCommitterTest.*", "**/WorkerConfigTransformerTest.*", "**/WorkerGroupMemberTest.*",
-      "**/WorkerSinkTaskTest.*", "**/WorkerSinkTaskThreadedTest.*", "**/WorkerSourceTaskTest.*",
-      "**/WorkerTaskTest.*", "**/WorkerTest.*",
+      "**/SourceTaskOffsetCommitterTest.*",
+      "**/WorkerTest.*", "**/WorkerSinkTaskTest.*", "**/WorkerSinkTaskThreadedTest.*",
+      "**/WorkerSourceTaskTest.*", "**/AbstractWorkerSourceTaskTest.*", "**/ExactlyOnceWorkerSourceTaskTest.*",
+      "**/WorkerTaskTest.*",
       // streams tests
       "**/KafkaStreamsTest.*"
     ])
@@ -435,13 +440,19 @@ subprojects {
       maxRetries = userMaxTestRetries
       maxFailures = userMaxTestRetryFailures
     }
+
+    // Allows devs to run tests in a loop to debug flaky tests. See README.
+    if (project.hasProperty("rerun-tests")) {
+      outputs.upToDateWhen { false }
+    }
   }
 
   task integrationTest(type: Test, dependsOn: compileJava) {
     maxParallelForks = maxTestForks
     ignoreFailures = userIgnoreFailures
 
-    maxHeapSize = defaultMaxHeapSize
+    // Increase heap size for integration tests
+    maxHeapSize = "2560m"
     jvmArgs = defaultJvmArgs
 
 
@@ -455,8 +466,18 @@ subprojects {
     exclude testsToExclude
 
     if (shouldUseJUnit5) {
-      useJUnitPlatform {
-        includeTags "integration"
+      if (project.name == 'streams') {
+        useJUnitPlatform {
+          includeTags "integration"
+          includeTags "org.apache.kafka.test.IntegrationTest"
+	  // Both engines are needed to run JUnit 4 tests alongside JUnit 5 tests.
+          // junit-vintage (JUnit 4) can be removed once the JUnit 4 migration is complete.
+          includeEngines "junit-vintage", "junit-jupiter"
+        }
+      } else {
+        useJUnitPlatform {
+          includeTags "integration"
+        }
       }
     } else {
       useJUnit {
@@ -468,6 +489,11 @@ subprojects {
       maxRetries = userMaxTestRetries
       maxFailures = userMaxTestRetryFailures
     }
+
+    // Allows devs to run tests in a loop to debug flaky tests. See README.
+    if (project.hasProperty("rerun-tests")) {
+      outputs.upToDateWhen { false }
+    }
   }
 
   task unitTest(type: Test, dependsOn: compileJava) {
@@ -487,8 +513,18 @@ subprojects {
     exclude testsToExclude
 
     if (shouldUseJUnit5) {
-      useJUnitPlatform {
-        excludeTags "integration"
+      if (project.name == 'streams') {
+        useJUnitPlatform {
+          excludeTags "integration"
+          excludeTags "org.apache.kafka.test.IntegrationTest"
+	  // Both engines are needed to run JUnit 4 tests alongside JUnit 5 tests.
+          // junit-vintage (JUnit 4) can be removed once the JUnit 4 migration is complete.
+          includeEngines "junit-vintage", "junit-jupiter"
+        }
+      } else {
+        useJUnitPlatform {
+          excludeTags "integration"
+        }
       }
     } else {
       useJUnit {
@@ -505,8 +541,8 @@ subprojects {
   // remove test output from all test types
   tasks.withType(Test).all { t ->
     cleanTest {
-      delete t.reports.junitXml.destination
-      delete t.reports.html.destination
+      delete t.reports.junitXml.outputLocation
+      delete t.reports.html.outputLocation
     }
   }
 
@@ -817,6 +853,10 @@ project(':core') {
     apply plugin: "org.scoverage"
   archivesBaseName = "kafka_${versions.baseScala}"
 
+  configurations {
+    generator
+  }
+
   dependencies {
     // `core` is often used in users' tests, define the following dependencies as `api` for backwards compatibility
     // even though the `core` module doesn't expose any public API
@@ -857,6 +897,7 @@ project(':core') {
     testImplementation project(':clients').sourceSets.test.output
     testImplementation project(':metadata').sourceSets.test.output
     testImplementation project(':raft').sourceSets.test.output
+    testImplementation project(':server-common').sourceSets.test.output
     testImplementation libs.bcpkix
     testImplementation libs.mockitoCore
     testImplementation(libs.apacheda) {
@@ -878,6 +919,8 @@ project(':core') {
     testImplementation(libs.jfreechart) {
       exclude group: 'junit', module: 'junit'
     }
+
+    generator project(':generator')
   }
 
   if (userEnableTestCoverage) {
@@ -905,7 +948,7 @@ project(':core') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -915,14 +958,17 @@ project(':core') {
   }
 
   task processMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", "kafka.internals.generated",
              "-o", "src/generated/java/kafka/internals/generated",
              "-i", "src/main/resources/common/message",
              "-m", "MessageDataGenerator"
     ]
     inputs.dir("src/main/resources/common/message")
+        .withPropertyName("messages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated/java/kafka/internals/generated")
   }
 
@@ -930,77 +976,77 @@ project(':core') {
 
   task genProtocolErrorDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.common.protocol.Errors'
+    mainClass = 'org.apache.kafka.common.protocol.Errors'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "protocol_errors.html").newOutputStream()
   }
 
   task genProtocolTypesDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.common.protocol.types.Type'
+    mainClass = 'org.apache.kafka.common.protocol.types.Type'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "protocol_types.html").newOutputStream()
   }
 
   task genProtocolApiKeyDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.common.protocol.ApiKeys'
+    mainClass = 'org.apache.kafka.common.protocol.ApiKeys'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "protocol_api_keys.html").newOutputStream()
   }
 
   task genProtocolMessageDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.common.protocol.Protocol'
+    mainClass = 'org.apache.kafka.common.protocol.Protocol'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "protocol_messages.html").newOutputStream()
   }
 
   task genAdminClientConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.clients.admin.AdminClientConfig'
+    mainClass = 'org.apache.kafka.clients.admin.AdminClientConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "admin_client_config.html").newOutputStream()
   }
 
   task genProducerConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.clients.producer.ProducerConfig'
+    mainClass = 'org.apache.kafka.clients.producer.ProducerConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "producer_config.html").newOutputStream()
   }
 
   task genConsumerConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.clients.consumer.ConsumerConfig'
+    mainClass = 'org.apache.kafka.clients.consumer.ConsumerConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "consumer_config.html").newOutputStream()
   }
 
   task genKafkaConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'kafka.server.KafkaConfig'
+    mainClass = 'kafka.server.KafkaConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "kafka_config.html").newOutputStream()
   }
 
   task genTopicConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'kafka.log.LogConfig'
+    mainClass = 'kafka.log.LogConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "topic_config.html").newOutputStream()
   }
 
   task genConsumerMetricsDocs(type: JavaExec) {
     classpath = sourceSets.test.runtimeClasspath
-    main = 'org.apache.kafka.clients.consumer.internals.ConsumerMetrics'
+    mainClass = 'org.apache.kafka.clients.consumer.internals.ConsumerMetrics'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "consumer_metrics.html").newOutputStream()
   }
 
   task genProducerMetricsDocs(type: JavaExec) {
     classpath = sourceSets.test.runtimeClasspath
-    main = 'org.apache.kafka.clients.producer.internals.ProducerMetrics'
+    mainClass = 'org.apache.kafka.clients.producer.internals.ProducerMetrics'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "producer_metrics.html").newOutputStream()
   }
@@ -1012,7 +1058,7 @@ project(':core') {
                                ':connect:runtime:genConnectPredicateDocs',
                                ':connect:runtime:genSinkConnectorConfigDocs', ':connect:runtime:genSourceConnectorConfigDocs',
                                ':streams:genStreamsConfigDocs', 'genConsumerMetricsDocs', 'genProducerMetricsDocs',
-                               ':connect:runtime:genConnectMetricsDocs'], type: Tar) {
+                               ':connect:runtime:genConnectMetricsDocs', ':connect:runtime:genConnectOpenAPIDocs'], type: Tar) {
     archiveClassifier = 'site-docs'
     compression = Compression.GZIP
     from project.file("$rootDir/docs")
@@ -1115,6 +1161,10 @@ project(':core') {
 project(':metadata') {
   archivesBaseName = "kafka-metadata"
 
+  configurations {
+    generator
+  }
+
   dependencies {
     implementation project(':server-common')
     implementation project(':clients')
@@ -1125,14 +1175,18 @@ project(':metadata') {
     compileOnly libs.log4j
     testImplementation libs.junitJupiter
     testImplementation libs.hamcrest
+    testImplementation libs.mockitoCore
+    testImplementation libs.mockitoInline
     testImplementation libs.slf4jlog4j
     testImplementation project(':clients').sourceSets.test.output
     testImplementation project(':raft').sourceSets.test.output
+    testImplementation project(':server-common').sourceSets.test.output
+    generator project(':generator')
   }
 
   task processMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", "org.apache.kafka.common.metadata",
              "-o", "src/generated/java/org/apache/kafka/common/metadata",
              "-i", "src/main/resources/common/metadata",
@@ -1140,6 +1194,9 @@ project(':metadata') {
              "-t", "MetadataRecordTypeGenerator", "MetadataJsonConvertersGenerator"
            ]
     inputs.dir("src/main/resources/common/metadata")
+        .withPropertyName("messages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated/java/org/apache/kafka/common/metadata")
   }
 
@@ -1197,6 +1254,10 @@ project(':generator') {
 project(':clients') {
   archivesBaseName = "kafka-clients"
 
+  configurations {
+    generator
+  }
+
   dependencies {
     implementation libs.zstd
     implementation libs.lz4
@@ -1209,19 +1270,23 @@ project(':clients') {
 
     testImplementation libs.bcpkix
     testImplementation libs.junitJupiter
-    testImplementation libs.mockitoCore
+    testImplementation libs.mockitoInline
 
     testRuntimeOnly libs.slf4jlog4j
     testRuntimeOnly libs.jacksonDatabind
     testRuntimeOnly libs.jacksonJDK8Datatypes
     testImplementation libs.jose4j
     testImplementation libs.jacksonJaxrsJsonProvider
+
+    generator project(':generator')
   }
 
   task createVersionFile() {
-    ext.receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    def receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    inputs.property "commitId", commitId
+    inputs.property "version", version
     outputs.file receiptFile
-    outputs.upToDateWhen { false }
+
     doLast {
       def data = [
         commitId: commitId,
@@ -1246,8 +1311,8 @@ project(':clients') {
   }
 
   task processMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", "org.apache.kafka.common.message",
              "-o", "src/generated/java/org/apache/kafka/common/message",
              "-i", "src/main/resources/common/message",
@@ -1255,18 +1320,24 @@ project(':clients') {
              "-m", "MessageDataGenerator", "JsonConverterGenerator"
            ]
     inputs.dir("src/main/resources/common/message")
+        .withPropertyName("messages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated/java/org/apache/kafka/common/message")
   }
 
   task processTestMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", "org.apache.kafka.common.message",
              "-o", "src/generated-test/java/org/apache/kafka/common/message",
              "-i", "src/test/resources/common/message",
              "-m", "MessageDataGenerator", "JsonConverterGenerator"
            ]
     inputs.dir("src/test/resources/common/message")
+        .withPropertyName("testMessages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated-test/java/org/apache/kafka/common/message")
   }
 
@@ -1278,7 +1349,7 @@ project(':clients') {
     }
     test {
       java {
-        srcDirs = ["src/generated/java", "src/generated-test/java", "src/test/java"]
+        srcDirs = ["src/generated-test/java", "src/test/java"]
       }
     }
   }
@@ -1318,6 +1389,10 @@ project(':clients') {
 project(':raft') {
   archivesBaseName = "kafka-raft"
 
+  configurations {
+    generator
+  }
+
   dependencies {
     implementation project(':server-common')
     implementation project(':clients')
@@ -1332,12 +1407,16 @@ project(':raft') {
     testImplementation libs.jqwik
 
     testRuntimeOnly libs.slf4jlog4j
+
+    generator project(':generator')
   }
 
   task createVersionFile() {
-    ext.receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    def receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    inputs.property "commitId", commitId
+    inputs.property "version", version
     outputs.file receiptFile
-    outputs.upToDateWhen { false }
+
     doLast {
       def data = [
         commitId: commitId,
@@ -1351,13 +1430,16 @@ project(':raft') {
   }
 
   task processMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", "org.apache.kafka.raft.generated",
              "-o", "src/generated/java/org/apache/kafka/raft/generated",
              "-i", "src/main/resources/common/message",
              "-m", "MessageDataGenerator", "JsonConverterGenerator"]
     inputs.dir("src/main/resources/common/message")
+        .withPropertyName("messages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated/java/org/apache/kafka/raft/generated")
   }
 
@@ -1404,6 +1486,7 @@ project(':server-common') {
   dependencies {
     api project(':clients')
     implementation libs.slf4jApi
+    implementation libs.metrics
 
     testImplementation project(':clients')
     testImplementation project(':clients').sourceSets.test.output
@@ -1414,9 +1497,11 @@ project(':server-common') {
   }
 
   task createVersionFile() {
-    ext.receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    def receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    inputs.property "commitId", commitId
+    inputs.property "version", version
     outputs.file receiptFile
-    outputs.upToDateWhen { false }
+
     doLast {
       def data = [
               commitId: commitId,
@@ -1470,9 +1555,11 @@ project(':storage:api') {
   }
 
   task createVersionFile() {
-    ext.receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    def receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    inputs.property "commitId", commitId
+    inputs.property "version", version
     outputs.file receiptFile
-    outputs.upToDateWhen { false }
+
     doLast {
       def data = [
               commitId: commitId,
@@ -1517,6 +1604,10 @@ project(':storage:api') {
 project(':storage') {
   archivesBaseName = "kafka-storage"
 
+  configurations {
+    generator
+  }
+
   dependencies {
     implementation project(':storage:api')
     implementation project(':server-common')
@@ -1528,17 +1619,23 @@ project(':storage') {
     testImplementation project(':clients').sourceSets.test.output
     testImplementation project(':core')
     testImplementation project(':core').sourceSets.test.output
+    testImplementation project(':server-common')
+    testImplementation project(':server-common').sourceSets.test.output
     testImplementation libs.junitJupiter
     testImplementation libs.mockitoCore
     testImplementation libs.bcpkix
 
     testRuntimeOnly libs.slf4jlog4j
+
+    generator project(':generator')
   }
 
   task createVersionFile() {
-    ext.receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    def receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    inputs.property "commitId", commitId
+    inputs.property "version", version
     outputs.file receiptFile
-    outputs.upToDateWhen { false }
+
     doLast {
       def data = [
               commitId: commitId,
@@ -1552,14 +1649,17 @@ project(':storage') {
   }
 
   task processMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", " org.apache.kafka.server.log.remote.metadata.storage.generated",
              "-o", "src/generated/java/org/apache/kafka/server/log/remote/metadata/storage/generated",
              "-i", "src/main/resources/message",
              "-m", "MessageDataGenerator", "JsonConverterGenerator",
              "-t", "MetadataRecordTypeGenerator", "MetadataJsonConvertersGenerator" ]
     inputs.dir("src/main/resources/message")
+        .withPropertyName("messages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated/java/org/apache/kafka/server/log/remote/metadata/storage/generated")
   }
 
@@ -1630,7 +1730,7 @@ project(':tools') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -1680,7 +1780,7 @@ project(':trogdor') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -1743,6 +1843,10 @@ project(':streams') {
   archivesBaseName = "kafka-streams"
   ext.buildStreamsVersionFileName = "kafka-streams-version.properties"
 
+  configurations {
+    generator
+  }
+
   dependencies {
     api project(':clients')
     // `org.rocksdb.Options` is part of Kafka Streams public api via `RocksDBConfigSetter`
@@ -1754,11 +1858,12 @@ project(':streams') {
 
     // testCompileOnly prevents streams from exporting a dependency on test-utils, which would cause a dependency cycle
     testCompileOnly project(':streams:test-utils')
+
     testImplementation project(':clients').sourceSets.test.output
     testImplementation project(':core')
     testImplementation project(':core').sourceSets.test.output
     testImplementation libs.log4j
-    testImplementation libs.junitJupiterApi
+    testImplementation libs.junitJupiter
     testImplementation libs.junitVintageEngine
     testImplementation libs.easymock
     testImplementation libs.powermockJunit4
@@ -1769,17 +1874,22 @@ project(':streams') {
 
     testRuntimeOnly project(':streams:test-utils')
     testRuntimeOnly libs.slf4jlog4j
+
+    generator project(':generator')
   }
 
   task processMessages(type:JavaExec) {
-    main = "org.apache.kafka.message.MessageGenerator"
-    classpath = project(':generator').sourceSets.main.runtimeClasspath
+    mainClass = "org.apache.kafka.message.MessageGenerator"
+    classpath = configurations.generator
     args = [ "-p", "org.apache.kafka.streams.internals.generated",
              "-o", "src/generated/java/org/apache/kafka/streams/internals/generated",
              "-i", "src/main/resources/common/message",
              "-m", "MessageDataGenerator"
            ]
     inputs.dir("src/main/resources/common/message")
+        .withPropertyName("messages")
+        .withPathSensitivity(PathSensitivity.RELATIVE)
+    outputs.cacheIf { true }
     outputs.dir("src/generated/java/org/apache/kafka/streams/internals/generated")
   }
 
@@ -1812,9 +1922,11 @@ project(':streams') {
   }
 
   task createStreamsVersionFile() {
-    ext.receiptFile = file("$buildDir/kafka/$buildStreamsVersionFileName")
+    def receiptFile = file("$buildDir/kafka/$buildVersionFileName")
+    inputs.property "commitId", commitId
+    inputs.property "version", version
     outputs.file receiptFile
-    outputs.upToDateWhen { false }
+
     doLast {
       def data = [
               commitId: commitId,
@@ -1841,7 +1953,7 @@ project(':streams') {
 
   task genStreamsConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.streams.StreamsConfig'
+    mainClass = 'org.apache.kafka.streams.StreamsConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "streams_config.html").newOutputStream()
   }
@@ -1868,6 +1980,7 @@ project(':streams') {
             ':streams:upgrade-system-tests-28:test',
             ':streams:upgrade-system-tests-30:test',
             ':streams:upgrade-system-tests-31:test',
+            ':streams:upgrade-system-tests-32:test',
             ':streams:examples:test'
     ]
   )
@@ -1989,7 +2102,10 @@ project(':streams:upgrade-system-tests-0100') {
   archivesBaseName = "kafka-streams-upgrade-system-tests-0100"
 
   dependencies {
-    testImplementation libs.kafkaStreams_0100
+    testImplementation(libs.kafkaStreams_0100) {
+      exclude group: 'org.slf4j', module: 'slf4j-log4j12'
+      exclude group: 'log4j', module: 'log4j'
+    }
     testRuntimeOnly libs.junitJupiter
   }
 
@@ -2002,7 +2118,10 @@ project(':streams:upgrade-system-tests-0101') {
   archivesBaseName = "kafka-streams-upgrade-system-tests-0101"
 
   dependencies {
-    testImplementation libs.kafkaStreams_0101
+    testImplementation(libs.kafkaStreams_0101) {
+      exclude group: 'org.slf4j', module: 'slf4j-log4j12'
+      exclude group: 'log4j', module: 'log4j'
+    }
     testRuntimeOnly libs.junitJupiter
   }
 
@@ -2206,6 +2325,19 @@ project(':streams:upgrade-system-tests-31') {
   }
 }
 
+project(':streams:upgrade-system-tests-32') {
+  archivesBaseName = "kafka-streams-upgrade-system-tests-32"
+
+  dependencies {
+    testImplementation libs.kafkaStreams_32
+    testRuntimeOnly libs.junitJupiter
+  }
+
+  systemTestLibs {
+    dependsOn testJar
+  }
+}
+
 project(':jmh-benchmarks') {
 
   apply plugin: 'com.github.johnrengelman.shadow'
@@ -2219,6 +2351,7 @@ project(':jmh-benchmarks') {
       // jmh requires jopt 4.x while `core` depends on 5.0, they are not binary compatible
       exclude group: 'net.sf.jopt-simple', module: 'jopt-simple'
     }
+    implementation project(':server-common')
     implementation project(':clients')
     implementation project(':metadata')
     implementation project(':streams')
@@ -2254,7 +2387,7 @@ project(':jmh-benchmarks') {
 
   task jmh(type: JavaExec, dependsOn: [':jmh-benchmarks:clean', ':jmh-benchmarks:shadowJar']) {
 
-    main="-jar"
+    mainClass = "-jar"
 
     doFirst {
       if (System.getProperty("jmhArgs")) {
@@ -2308,7 +2441,7 @@ project(':connect:api') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2345,7 +2478,7 @@ project(':connect:transforms') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2385,7 +2518,7 @@ project(':connect:json') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2428,6 +2561,8 @@ project(':connect:runtime') {
     implementation libs.jettyClient
     implementation libs.reflections
     implementation libs.mavenArtifact
+    implementation libs.swaggerJaxrs2
+    implementation libs.swaggerAnnotations
 
     testImplementation project(':clients').sourceSets.test.output
     testImplementation project(':core')
@@ -2439,7 +2574,7 @@ project(':connect:runtime') {
     testImplementation libs.junitVintageEngine
     testImplementation libs.powermockJunit4
     testImplementation libs.powermockEasymock
-    testImplementation libs.mockitoCore
+    testImplementation libs.mockitoInline
     testImplementation libs.httpclient
 
     testRuntimeOnly libs.slf4jlog4j
@@ -2451,8 +2586,8 @@ project(':connect:runtime') {
 
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
+      // No need to copy log4j since the module has an explicit dependency on that
       include('slf4j-log4j12*')
-      include('log4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2468,46 +2603,66 @@ project(':connect:runtime') {
 
   task genConnectConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.connect.runtime.distributed.DistributedConfig'
+    mainClass = 'org.apache.kafka.connect.runtime.distributed.DistributedConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "connect_config.html").newOutputStream()
   }
 
   task genSinkConnectorConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.connect.runtime.SinkConnectorConfig'
+    mainClass = 'org.apache.kafka.connect.runtime.SinkConnectorConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "sink_connector_config.html").newOutputStream()
   }
 
   task genSourceConnectorConfigDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.connect.runtime.SourceConnectorConfig'
+    mainClass = 'org.apache.kafka.connect.runtime.SourceConnectorConfig'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "source_connector_config.html").newOutputStream()
   }
 
   task genConnectTransformationDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.connect.tools.TransformationDoc'
+    mainClass = 'org.apache.kafka.connect.tools.TransformationDoc'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "connect_transforms.html").newOutputStream()
   }
 
   task genConnectPredicateDocs(type: JavaExec) {
     classpath = sourceSets.main.runtimeClasspath
-    main = 'org.apache.kafka.connect.tools.PredicateDoc'
+    mainClass = 'org.apache.kafka.connect.tools.PredicateDoc'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "connect_predicates.html").newOutputStream()
   }
 
   task genConnectMetricsDocs(type: JavaExec) {
     classpath = sourceSets.test.runtimeClasspath
-    main = 'org.apache.kafka.connect.runtime.ConnectMetrics'
+    mainClass = 'org.apache.kafka.connect.runtime.ConnectMetrics'
     if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
     standardOutput = new File(generatedDocsDir, "connect_metrics.html").newOutputStream()
   }
 
+  task setVersionInOpenAPISpec(type: Copy) {
+    from "$rootDir/gradle/openapi.template"
+    into "$buildDir/resources/docs"
+    rename ('openapi.template', 'openapi.yaml')
+    expand(kafkaVersion: "$rootProject.version")
+  }
+
+  task genConnectOpenAPIDocs(type: io.swagger.v3.plugins.gradle.tasks.ResolveTask, dependsOn: setVersionInOpenAPISpec) {
+    classpath = sourceSets.main.runtimeClasspath
+    buildClasspath = classpath
+    outputFileName = 'connect_rest'
+    outputFormat = 'YAML'
+    prettyPrint = 'TRUE'
+    sortOutput = 'TRUE'
+    openApiFile = file("$buildDir/resources/docs/openapi.yaml")
+    resourcePackages = ['org.apache.kafka.connect.runtime.rest.resources']
+    if( !generatedDocsDir.exists() ) { generatedDocsDir.mkdirs() }
+    outputDir = file(generatedDocsDir)
+  }
+
 }
 
 project(':connect:file') {
@@ -2517,9 +2672,8 @@ project(':connect:file') {
     implementation project(':connect:api')
     implementation libs.slf4jApi
 
-    testImplementation libs.easymock
     testImplementation libs.junitJupiter
-    testImplementation libs.mockitoInline // supports mocking static methods, final classes, etc.
+    testImplementation libs.mockitoCore
 
     testRuntimeOnly libs.slf4jlog4j
     testImplementation project(':clients').sourceSets.test.output
@@ -2532,7 +2686,7 @@ project(':connect:file') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2571,7 +2725,7 @@ project(':connect:basic-auth-extension') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2618,7 +2772,7 @@ project(':connect:mirror') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
@@ -2653,7 +2807,7 @@ project(':connect:mirror-client') {
   tasks.create(name: "copyDependantLibs", type: Copy) {
     from (configurations.testRuntimeClasspath) {
       include('slf4j-log4j12*')
-      include('log4j*jar')
+      include('reload4j*jar')
     }
     from (configurations.runtimeClasspath) {
       exclude('kafka-clients*')
diff --git a/checkstyle/import-control-core.xml b/checkstyle/import-control-core.xml
index 36e5cc63551af..4042cba402fdd 100644
--- a/checkstyle/import-control-core.xml
+++ b/checkstyle/import-control-core.xml
@@ -54,6 +54,7 @@
     <allow pkg="org.apache.kafka.metadata" />
     <allow pkg="org.apache.kafka.metalog" />
     <allow pkg="org.apache.kafka.server.common" />
+    <allow pkg="org.apache.kafka.server.fault" />
   </subpackage>
 
   <subpackage name="tools">
@@ -82,6 +83,7 @@
     <allow pkg="org.apache.kafka.controller"/>
     <allow pkg="org.apache.kafka.metadata"/>
     <allow pkg="org.apache.kafka.server.authorizer"/>
+    <allow pkg="org.apache.kafka.server.common" />
     <allow pkg="kafka.test.annotation"/>
     <allow pkg="kafka.test.junit"/>
     <allow pkg="kafka.network"/>
diff --git a/checkstyle/import-control.xml b/checkstyle/import-control.xml
index 3b8a78da205b7..4b07a26cba5c9 100644
--- a/checkstyle/import-control.xml
+++ b/checkstyle/import-control.xml
@@ -232,6 +232,8 @@
     <allow pkg="org.apache.kafka.raft" />
     <allow pkg="org.apache.kafka.server.authorizer" />
     <allow pkg="org.apache.kafka.server.common" />
+    <allow pkg="org.apache.kafka.server.fault" />
+    <allow pkg="org.apache.kafka.server.metrics" />
     <allow pkg="org.apache.kafka.server.policy"/>
     <allow pkg="org.apache.kafka.snapshot" />
     <allow pkg="org.apache.kafka.test" />
@@ -258,9 +260,12 @@
     <allow pkg="org.apache.kafka.common.message" />
     <allow pkg="org.apache.kafka.common.metadata" />
     <allow pkg="org.apache.kafka.common.protocol" />
+    <allow pkg="org.apache.kafka.common.record" />
+    <allow pkg="org.apache.kafka.common.requests" />
     <allow pkg="org.apache.kafka.image" />
     <allow pkg="org.apache.kafka.metadata" />
     <allow pkg="org.apache.kafka.metalog" />
+    <allow pkg="org.apache.kafka.queue" />
     <allow pkg="org.apache.kafka.raft" />
     <allow pkg="org.apache.kafka.server.authorizer" />
     <allow pkg="org.apache.kafka.server.common" />
@@ -269,8 +274,12 @@
       <allow pkg="org.apache.kafka.common.acl" />
       <allow pkg="org.apache.kafka.common.requests" />
       <allow pkg="org.apache.kafka.common.resource" />
+      <allow pkg="org.apache.kafka.controller" />
       <allow pkg="org.apache.kafka.metadata" />
     </subpackage>
+    <subpackage name="fault">
+      <allow pkg="org.apache.kafka.server.fault" />
+    </subpackage>
   </subpackage>
 
   <subpackage name="metalog">
@@ -310,10 +319,17 @@
   <subpackage name="server">
     <allow pkg="org.apache.kafka.common" />
 
+    <!-- This is required to make AlterConfigPolicyTest work. -->
+    <allow pkg="org.apache.kafka.server.policy" />
+
     <subpackage name="common">
       <allow pkg="org.apache.kafka.server.common" />
     </subpackage>
 
+    <subpackage name="metrics">
+      <allow pkg="com.yammer.metrics" />
+    </subpackage>
+
     <subpackage name="log">
       <allow pkg="com.fasterxml.jackson" />
       <allow pkg="kafka.api" />
@@ -338,6 +354,7 @@
     <allow pkg="net.sourceforge.argparse4j" />
     <allow pkg="org.apache.kafka.common"/>
     <allow pkg="org.apache.kafka.metadata"/>
+    <allow pkg="org.apache.kafka.controller.util"/>
     <allow pkg="org.apache.kafka.queue"/>
     <allow pkg="org.apache.kafka.raft"/>
     <allow pkg="org.apache.kafka.server.common" />
@@ -551,6 +568,7 @@
         <allow pkg="org.glassfish.jersey" />
         <allow pkg="com.fasterxml.jackson" />
         <allow pkg="org.apache.http"/>
+        <allow pkg="io.swagger.v3.oas.annotations"/>
         <subpackage name="resources">
           <allow pkg="org.apache.log4j" />
         </subpackage>
diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml
index cd82efe1421c8..bec3da1637a9d 100644
--- a/checkstyle/suppressions.xml
+++ b/checkstyle/suppressions.xml
@@ -25,7 +25,7 @@
 
     <!-- Generator -->
     <suppress checks="CyclomaticComplexity|BooleanExpressionComplexity"
-              files="(SchemaGenerator|MessageDataGenerator|FieldSpec).java"/>
+              files="(SchemaGenerator|MessageDataGenerator|FieldSpec|FieldType).java"/>
     <suppress checks="NPathComplexity"
               files="(MessageDataGenerator|FieldSpec|WorkerSinkTask).java"/>
     <suppress checks="JavaNCSS"
@@ -39,6 +39,8 @@
     <suppress checks="(NPathComplexity|ClassFanOutComplexity|CyclomaticComplexity|ClassDataAbstractionCoupling|FinalLocalVariable|LocalVariableName|MemberName|ParameterName|MethodLength|JavaNCSS|AvoidStarImport)"
               files="core[\\/]src[\\/](generated|generated-test)[\\/].+.java$"/>
     <suppress checks="NPathComplexity" files="(ClusterTestExtensions|KafkaApisBuilder).java"/>
+    <suppress checks="MethodLength"
+              files="(KafkaClusterTestKit).java"/>
 
     <!-- Clients -->
     <suppress id="dontUseSystemExit"
@@ -130,14 +132,14 @@
     <suppress checks="ClassFanOutComplexity"
               files="Worker(|Test).java"/>
     <suppress checks="MethodLength"
-              files="(KafkaConfigBackingStore|Values|IncrementalCooperativeAssignor).java"/>
+              files="(DistributedHerder|KafkaConfigBackingStore|Values|IncrementalCooperativeAssignor).java"/>
     <suppress checks="ParameterNumber"
               files="Worker(SinkTask|SourceTask|Coordinator).java"/>
     <suppress checks="ParameterNumber"
               files="ConfigKeyInfo.java"/>
 
     <suppress checks="ClassDataAbstractionCoupling"
-              files="(RestServer|AbstractHerder|DistributedHerder).java"/>
+              files="(RestServer|AbstractHerder|DistributedHerder|Worker).java"/>
 
     <suppress checks="BooleanExpressionComplexity"
               files="JsonConverter.java"/>
@@ -170,7 +172,7 @@
 
     <!-- Streams -->
     <suppress checks="ClassFanOutComplexity"
-              files="(KafkaStreams|KStreamImpl|KTableImpl|InternalTopologyBuilder|StreamsPartitionAssignor|StreamThread|IQv2StoreIntegrationTest).java"/>
+              files="(KafkaStreams|KStreamImpl|KTableImpl|InternalTopologyBuilder|StreamsPartitionAssignor|StreamThread|IQv2StoreIntegrationTest|KStreamImplTest).java"/>
 
     <suppress checks="MethodLength"
               files="KTableImpl.java"/>
@@ -182,7 +184,7 @@
               files="(KafkaStreams|KStreamImpl|KTableImpl).java"/>
 
     <suppress checks="CyclomaticComplexity"
-              files="(KafkaStreams|StreamsPartitionAssignor|StreamThread|TaskManager|PartitionGroup).java"/>
+              files="(KafkaStreams|StreamsPartitionAssignor|StreamThread|TaskManager|PartitionGroup|SubscriptionWrapperSerde|AssignorConfiguration).java"/>
 
     <suppress checks="StaticVariableName"
               files="StreamsMetricsImpl.java"/>
@@ -213,7 +215,7 @@
 
     <!-- Streams tests -->
     <suppress checks="ClassFanOutComplexity"
-              files="(StreamsPartitionAssignorTest|StreamThreadTest|StreamTaskTest|TaskManagerTest|TopologyTestDriverTest).java"/>
+              files="(RecordCollectorTest|StreamsPartitionAssignorTest|StreamThreadTest|StreamTaskTest|TaskManagerTest|TopologyTestDriverTest).java"/>
 
     <suppress checks="MethodLength"
               files="(EosIntegrationTest|EosV2UpgradeIntegrationTest|KStreamKStreamJoinTest|RocksDBWindowStoreTest).java"/>
@@ -225,7 +227,7 @@
               files="(EosV2UpgradeIntegrationTest|KStreamKStreamJoinTest|KTableKTableForeignKeyJoinIntegrationTest|RocksDBGenericOptionsToDbOptionsColumnFamilyOptionsAdapterTest|RelationalSmokeTest|MockProcessorContextStateStoreTest).java"/>
 
     <suppress checks="JavaNCSS"
-              files="(EosV2UpgradeIntegrationTest|KStreamKStreamJoinTest|TaskManagerTest).java"/>
+              files="(EosV2UpgradeIntegrationTest|KStreamKStreamJoinTest|StreamThreadTest|TaskManagerTest).java"/>
 
     <suppress checks="NPathComplexity"
               files="(EosV2UpgradeIntegrationTest|EosTestDriver|KStreamKStreamJoinTest|KTableKTableForeignKeyJoinIntegrationTest|RelationalSmokeTest|MockProcessorContextStateStoreTest).java"/>
@@ -290,24 +292,28 @@
     <suppress checks="ClassDataAbstractionCoupling"
               files="(QuorumController|QuorumControllerTest|ReplicationControlManager|ReplicationControlManagerTest).java"/>
     <suppress checks="ClassFanOutComplexity"
-              files="(QuorumController|ReplicationControlManager|ReplicationControlManagerTest).java"/>
-    <suppress checks="ParameterNumber"
+              files="(QuorumController|QuorumControllerTest|ReplicationControlManager|ReplicationControlManagerTest).java"/>
+    <suppress checks="(ParameterNumber|ClassDataAbstractionCoupling)"
               files="(QuorumController).java"/>
     <suppress checks="CyclomaticComplexity"
               files="(ClientQuotasImage|MetadataDelta|QuorumController|ReplicationControlManager).java"/>
     <suppress checks="NPathComplexity"
-              files="(ClientQuotasImage|KafkaEventQueue|ReplicationControlManager).java"/>
+              files="(ClientQuotasImage|KafkaEventQueue|ReplicationControlManager|FeatureControlManager).java"/>
     <suppress checks="(NPathComplexity|ClassFanOutComplexity|CyclomaticComplexity|ClassDataAbstractionCoupling|LocalVariableName|MemberName|ParameterName|MethodLength|JavaNCSS|AvoidStarImport)"
             files="metadata[\\/]src[\\/](generated|generated-test)[\\/].+.java$"/>
     <suppress checks="BooleanExpressionComplexity"
               files="(MetadataImage).java"/>
+    <suppress checks="ImportControl"
+              files="ApiVersionsResponse.java"/>
+    <suppress checks="AvoidStarImport"
+              files="MetadataVersionTest.java"/>
 
     <!-- Storage -->
     <suppress checks="(CyclomaticComplexity|ParameterNumber)"
               files="(RemoteLogManagerConfig).java"/>
 
     <!-- benchmarks -->
-    <suppress checks="ClassDataAbstractionCoupling"
+    <suppress checks="(ClassDataAbstractionCoupling|ClassFanOutComplexity)"
               files="(ReplicaFetcherThreadBenchmark).java"/>
 
 </suppressions>
diff --git a/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java b/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java
index 95efdbeae425a..f4d9092258773 100644
--- a/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java
+++ b/clients/src/main/java/org/apache/kafka/clients/ClusterConnectionStates.java
@@ -246,7 +246,6 @@ public long pollDelayMs(String id, long now) {
     public void checkingApiVersions(String id) {
         NodeConnectionState nodeState = nodeState(id);
         nodeState.state = ConnectionState.CHECKING_API_VERSIONS;
-        resetReconnectBackoff(nodeState);
         resetConnectionSetupTimeout(nodeState);
         connectingNodes.remove(id);
     }
diff --git a/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java b/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java
index 5371a73ece192..b142867abc939 100644
--- a/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java
+++ b/clients/src/main/java/org/apache/kafka/clients/CommonClientConfigs.java
@@ -17,6 +17,8 @@
 package org.apache.kafka.clients;
 
 import org.apache.kafka.common.config.AbstractConfig;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.config.SaslConfigs;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.Utils;
 import org.slf4j.Logger;
@@ -203,4 +205,15 @@ public static Map<String, Object> postProcessReconnectBackoffConfigs(AbstractCon
         }
         return rval;
     }
+
+    public static void postValidateSaslMechanismConfig(AbstractConfig config) {
+        SecurityProtocol securityProtocol = SecurityProtocol.forName(config.getString(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+        String clientSaslMechanism = config.getString(SaslConfigs.SASL_MECHANISM);
+        if (securityProtocol == SecurityProtocol.SASL_PLAINTEXT || securityProtocol == SecurityProtocol.SASL_SSL) {
+            if (clientSaslMechanism == null || clientSaslMechanism.isEmpty()) {
+                throw new ConfigException(SaslConfigs.SASL_MECHANISM, null, "When the " + CommonClientConfigs.SECURITY_PROTOCOL_CONFIG +
+                        " configuration enables SASL, mechanism must be non-null and non-empty string.");
+            }
+        }
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java
index cabc3cccddece..81463d508a6a3 100644
--- a/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java
+++ b/clients/src/main/java/org/apache/kafka/clients/NetworkClient.java
@@ -915,7 +915,9 @@ private void handleApiVersionsResponse(List<ClientResponse> responses,
             }
             return;
         }
-        NodeApiVersions nodeVersionInfo = new NodeApiVersions(apiVersionsResponse.data().apiKeys());
+        NodeApiVersions nodeVersionInfo = new NodeApiVersions(
+            apiVersionsResponse.data().apiKeys(),
+            apiVersionsResponse.data().supportedFeatures());
         apiVersions.update(node, nodeVersionInfo);
         this.connectionStates.ready(node);
         log.debug("Node {} has finalized features epoch: {}, finalized features: {}, supported features: {}, API versions: {}.",
diff --git a/clients/src/main/java/org/apache/kafka/clients/NodeApiVersions.java b/clients/src/main/java/org/apache/kafka/clients/NodeApiVersions.java
index 3c09f0eb4e781..a3aaa88fee19c 100644
--- a/clients/src/main/java/org/apache/kafka/clients/NodeApiVersions.java
+++ b/clients/src/main/java/org/apache/kafka/clients/NodeApiVersions.java
@@ -17,8 +17,9 @@
 package org.apache.kafka.clients;
 
 import org.apache.kafka.common.errors.UnsupportedVersionException;
+import org.apache.kafka.common.feature.SupportedVersionRange;
 import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersion;
-import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersionCollection;
+import org.apache.kafka.common.message.ApiVersionsResponseData.SupportedFeatureKey;
 import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.requests.ApiVersionsResponse;
 import org.apache.kafka.common.utils.Utils;
@@ -27,6 +28,7 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumMap;
+import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -44,6 +46,8 @@ public class NodeApiVersions {
     // List of APIs which the broker supports, but which are unknown to the client
     private final List<ApiVersion> unknownApis = new ArrayList<>();
 
+    private final Map<String, SupportedVersionRange> supportedFeatures;
+
     /**
      * Create a NodeApiVersions object with the current ApiVersions.
      *
@@ -72,7 +76,7 @@ public static NodeApiVersions create(Collection<ApiVersion> overrides) {
             }
             if (!exists) apiVersions.add(ApiVersionsResponse.toApiVersion(apiKey));
         }
-        return new NodeApiVersions(apiVersions);
+        return new NodeApiVersions(apiVersions, Collections.emptyList());
     }
 
 
@@ -91,7 +95,7 @@ public static NodeApiVersions create(short apiKey, short minVersion, short maxVe
                 .setMaxVersion(maxVersion)));
     }
 
-    public NodeApiVersions(ApiVersionCollection nodeApiVersions) {
+    public NodeApiVersions(Collection<ApiVersion> nodeApiVersions, Collection<SupportedFeatureKey> nodeSupportedFeatures) {
         for (ApiVersion nodeApiVersion : nodeApiVersions) {
             if (ApiKeys.hasId(nodeApiVersion.apiKey())) {
                 ApiKeys nodeApiKey = ApiKeys.forId(nodeApiVersion.apiKey());
@@ -101,18 +105,13 @@ public NodeApiVersions(ApiVersionCollection nodeApiVersions) {
                 unknownApis.add(nodeApiVersion);
             }
         }
-    }
 
-    public NodeApiVersions(Collection<ApiVersion> nodeApiVersions) {
-        for (ApiVersion nodeApiVersion : nodeApiVersions) {
-            if (ApiKeys.hasId(nodeApiVersion.apiKey())) {
-                ApiKeys nodeApiKey = ApiKeys.forId(nodeApiVersion.apiKey());
-                supportedVersions.put(nodeApiKey, nodeApiVersion);
-            } else {
-                // Newer brokers may support ApiKeys we don't know about
-                unknownApis.add(nodeApiVersion);
-            }
+        Map<String, SupportedVersionRange> supportedFeaturesBuilder = new HashMap<>();
+        for (SupportedFeatureKey supportedFeature : nodeSupportedFeatures) {
+            supportedFeaturesBuilder.put(supportedFeature.name(),
+                    new SupportedVersionRange(supportedFeature.minVersion(), supportedFeature.maxVersion()));
         }
+        this.supportedFeatures = Collections.unmodifiableMap(supportedFeaturesBuilder);
     }
 
     /**
@@ -233,4 +232,8 @@ public ApiVersion apiVersion(ApiKeys apiKey) {
     public Map<ApiKeys, ApiVersion> allSupportedApiVersions() {
         return supportedVersions;
     }
+
+    public Map<String, SupportedVersionRange> supportedFeatures() {
+        return supportedFeatures;
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/Admin.java b/clients/src/main/java/org/apache/kafka/clients/admin/Admin.java
index 0c795bc5206dc..1d469a66436a6 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/Admin.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/Admin.java
@@ -36,6 +36,7 @@
 
 import java.time.Duration;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -919,12 +920,21 @@ default ListConsumerGroupsResult listConsumerGroups() {
      * @param options The options to use when listing the consumer group offsets.
      * @return The ListGroupOffsetsResult
      */
-    ListConsumerGroupOffsetsResult listConsumerGroupOffsets(String groupId, ListConsumerGroupOffsetsOptions options);
+    default ListConsumerGroupOffsetsResult listConsumerGroupOffsets(String groupId, ListConsumerGroupOffsetsOptions options) {
+        @SuppressWarnings("deprecation")
+        ListConsumerGroupOffsetsSpec groupSpec = new ListConsumerGroupOffsetsSpec()
+            .topicPartitions(options.topicPartitions());
+
+        // We can use the provided options with the batched API, which uses topic partitions from
+        // the group spec and ignores any topic partitions set in the options.
+        return listConsumerGroupOffsets(Collections.singletonMap(groupId, groupSpec), options);
+    }
 
     /**
      * List the consumer group offsets available in the cluster with the default options.
      * <p>
-     * This is a convenience method for {@link #listConsumerGroupOffsets(String, ListConsumerGroupOffsetsOptions)} with default options.
+     * This is a convenience method for {@link #listConsumerGroupOffsets(Map, ListConsumerGroupOffsetsOptions)}
+     * to list offsets of all partitions of one group with default options.
      *
      * @return The ListGroupOffsetsResult.
      */
@@ -932,6 +942,29 @@ default ListConsumerGroupOffsetsResult listConsumerGroupOffsets(String groupId)
         return listConsumerGroupOffsets(groupId, new ListConsumerGroupOffsetsOptions());
     }
 
+    /**
+     * List the consumer group offsets available in the cluster for the specified consumer groups.
+     *
+     * @param groupSpecs Map of consumer group ids to a spec that specifies the topic partitions of the group to list offsets for.
+     *
+     * @param options The options to use when listing the consumer group offsets.
+     * @return The ListConsumerGroupOffsetsResult
+     */
+    ListConsumerGroupOffsetsResult listConsumerGroupOffsets(Map<String, ListConsumerGroupOffsetsSpec> groupSpecs, ListConsumerGroupOffsetsOptions options);
+
+    /**
+     * List the consumer group offsets available in the cluster for the specified groups with the default options.
+     * <p>
+     * This is a convenience method for
+     * {@link #listConsumerGroupOffsets(Map, ListConsumerGroupOffsetsOptions)} with default options.
+     *
+     * @param groupSpecs Map of consumer group ids to a spec that specifies the topic partitions of the group to list offsets for.
+     * @return The ListConsumerGroupOffsetsResult.
+     */
+    default ListConsumerGroupOffsetsResult listConsumerGroupOffsets(Map<String, ListConsumerGroupOffsetsSpec> groupSpecs) {
+        return listConsumerGroupOffsets(groupSpecs, new ListConsumerGroupOffsetsOptions());
+    }
+
     /**
      * Delete consumer groups from the cluster.
      *
@@ -1446,6 +1479,35 @@ default DescribeFeaturesResult describeFeatures() {
      */
     UpdateFeaturesResult updateFeatures(Map<String, FeatureUpdate> featureUpdates, UpdateFeaturesOptions options);
 
+    /**
+     * Describes the state of the metadata quorum.
+     * <p>
+     * This is a convenience method for {@link #describeMetadataQuorum(DescribeMetadataQuorumOptions)} with default options.
+     * See the overload for more details.
+     *
+     * @return the {@link DescribeMetadataQuorumResult} containing the result
+     */
+    default DescribeMetadataQuorumResult describeMetadataQuorum() {
+        return describeMetadataQuorum(new DescribeMetadataQuorumOptions());
+    }
+
+    /**
+     * Describes the state of the metadata quorum.
+     * <p>
+     * The following exceptions can be anticipated when calling {@code get()} on the futures obtained from
+     * the returned {@code DescribeMetadataQuorumResult}:
+     * <ul>
+     *   <li>{@link org.apache.kafka.common.errors.ClusterAuthorizationException}
+     *   If the authenticated user didn't have {@code DESCRIBE} access to the cluster.</li>
+     *   <li>{@link org.apache.kafka.common.errors.TimeoutException}
+     *   If the request timed out before the controller could list the cluster links.</li>
+     * </ul>
+     *
+     * @param options The {@link DescribeMetadataQuorumOptions} to use when describing the quorum.
+     * @return the {@link DescribeMetadataQuorumResult} containing the result
+     */
+    DescribeMetadataQuorumResult describeMetadataQuorum(DescribeMetadataQuorumOptions options);
+
     /**
      * Unregister a broker.
      * <p>
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/AdminClientConfig.java b/clients/src/main/java/org/apache/kafka/clients/admin/AdminClientConfig.java
index 16feef66d4351..37af386410355 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/AdminClientConfig.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/AdminClientConfig.java
@@ -25,6 +25,8 @@
 import org.apache.kafka.common.config.ConfigDef.Type;
 import org.apache.kafka.common.config.SecurityConfig;
 import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.common.utils.Utils;
 
 import java.util.Map;
 import java.util.Set;
@@ -212,6 +214,7 @@ public class AdminClientConfig extends AbstractConfig {
                                 .define(SECURITY_PROTOCOL_CONFIG,
                                         Type.STRING,
                                         DEFAULT_SECURITY_PROTOCOL,
+                                        in(Utils.enumOptions(SecurityProtocol.class)),
                                         Importance.MEDIUM,
                                         SECURITY_PROTOCOL_DOC)
                                 .withClientSslSupport()
@@ -220,6 +223,7 @@ public class AdminClientConfig extends AbstractConfig {
 
     @Override
     protected Map<String, Object> postProcessParsedConfig(final Map<String, Object> parsedValues) {
+        CommonClientConfigs.postValidateSaslMechanismConfig(this);
         return CommonClientConfigs.postProcessReconnectBackoffConfigs(this, parsedValues);
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/ConfigEntry.java b/clients/src/main/java/org/apache/kafka/clients/admin/ConfigEntry.java
index 30686c93eaeef..154fc8e65db35 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/ConfigEntry.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/ConfigEntry.java
@@ -61,8 +61,14 @@ public ConfigEntry(String name, String value) {
      * @param isReadOnly whether the config is read-only and cannot be updated
      * @param synonyms Synonym configs in order of precedence
      */
-    ConfigEntry(String name, String value, ConfigSource source, boolean isSensitive, boolean isReadOnly,
-                List<ConfigSynonym> synonyms, ConfigType type, String documentation) {
+    public ConfigEntry(String name,
+            String value,
+            ConfigSource source,
+            boolean isSensitive,
+            boolean isReadOnly,
+            List<ConfigSynonym> synonyms,
+            ConfigType type,
+            String documentation) {
         Objects.requireNonNull(name, "name should not be null");
         this.name = name;
         this.value = value;
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/CreateDelegationTokenOptions.java b/clients/src/main/java/org/apache/kafka/clients/admin/CreateDelegationTokenOptions.java
index 6a082d499bbb4..693e8f52f63b7 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/CreateDelegationTokenOptions.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/CreateDelegationTokenOptions.java
@@ -19,6 +19,7 @@
 
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Optional;
 
 import org.apache.kafka.common.annotation.InterfaceStability;
 import org.apache.kafka.common.security.auth.KafkaPrincipal;
@@ -32,6 +33,7 @@
 public class CreateDelegationTokenOptions extends AbstractOptions<CreateDelegationTokenOptions> {
     private long maxLifeTimeMs = -1;
     private List<KafkaPrincipal> renewers =  new LinkedList<>();
+    private KafkaPrincipal owner = null;
 
     public CreateDelegationTokenOptions renewers(List<KafkaPrincipal> renewers) {
         this.renewers = renewers;
@@ -42,6 +44,15 @@ public List<KafkaPrincipal> renewers() {
         return renewers;
     }
 
+    public CreateDelegationTokenOptions owner(KafkaPrincipal owner) {
+        this.owner = owner;
+        return this;
+    }
+
+    public Optional<KafkaPrincipal> owner() {
+        return Optional.ofNullable(owner);
+    }
+
     public CreateDelegationTokenOptions maxlifeTimeMs(long maxLifeTimeMs) {
         this.maxLifeTimeMs = maxLifeTimeMs;
         return this;
diff --git a/core/src/main/scala/kafka/common/BaseEnum.scala b/clients/src/main/java/org/apache/kafka/clients/admin/DescribeMetadataQuorumOptions.java
similarity index 52%
rename from core/src/main/scala/kafka/common/BaseEnum.scala
rename to clients/src/main/java/org/apache/kafka/clients/admin/DescribeMetadataQuorumOptions.java
index 9c39466b7f378..8f54cc81f2110 100644
--- a/core/src/main/scala/kafka/common/BaseEnum.scala
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/DescribeMetadataQuorumOptions.java
@@ -1,12 +1,12 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
+ * contributor license agreements. See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * the License. You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,13 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package kafka.common
+package org.apache.kafka.clients.admin;
 
-/*
- * We inherit from `Product` and `Serializable` because `case` objects and classes inherit from them and if we don't
- * do it here, the compiler will infer types that unexpectedly include `Product` and `Serializable`, see
- * http://underscore.io/blog/posts/2015/06/04/more-on-sealed.html for more information.
+/**
+ * Options for {@link Admin#describeMetadataQuorum(DescribeMetadataQuorumOptions)}
  */
-trait BaseEnum extends Product with Serializable {
-  def name: String
+public class DescribeMetadataQuorumOptions extends AbstractOptions<DescribeMetadataQuorumOptions> {
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/DescribeMetadataQuorumResult.java b/clients/src/main/java/org/apache/kafka/clients/admin/DescribeMetadataQuorumResult.java
new file mode 100644
index 0000000000000..aa9bbe84eadd3
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/DescribeMetadataQuorumResult.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.admin;
+
+import org.apache.kafka.common.KafkaFuture;
+
+/**
+ * The result of {@link Admin#describeMetadataQuorum(DescribeMetadataQuorumOptions)}
+ */
+public class DescribeMetadataQuorumResult {
+
+    private final KafkaFuture<QuorumInfo> quorumInfo;
+
+    DescribeMetadataQuorumResult(KafkaFuture<QuorumInfo> quorumInfo) {
+        this.quorumInfo = quorumInfo;
+    }
+
+    /**
+     * Returns a future containing the QuorumInfo
+     */
+    public KafkaFuture<QuorumInfo> quorumInfo() {
+        return quorumInfo;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/FeatureUpdate.java b/clients/src/main/java/org/apache/kafka/clients/admin/FeatureUpdate.java
index 38753af3fe7fc..b1dd026078dd8 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/FeatureUpdate.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/FeatureUpdate.java
@@ -23,33 +23,86 @@
  */
 public class FeatureUpdate {
     private final short maxVersionLevel;
-    private final boolean allowDowngrade;
+    private final UpgradeType upgradeType;
+
+    public enum UpgradeType {
+        UNKNOWN(0),
+        UPGRADE(1),
+        SAFE_DOWNGRADE(2),
+        UNSAFE_DOWNGRADE(3);
+
+        private final byte code;
+
+        UpgradeType(int code) {
+            this.code = (byte) code;
+        }
+
+        public byte code() {
+            return code;
+        }
+
+        public static UpgradeType fromCode(int code) {
+            if (code == 1) {
+                return UPGRADE;
+            } else if (code == 2) {
+                return SAFE_DOWNGRADE;
+            } else if (code == 3) {
+                return UNSAFE_DOWNGRADE;
+            } else {
+                return UNKNOWN;
+            }
+        }
+    }
 
     /**
      * @param maxVersionLevel   the new maximum version level for the finalized feature.
-     *                          a value &lt; 1 is special and indicates that the update is intended to
+     *                          a value of zero is special and indicates that the update is intended to
      *                          delete the finalized feature, and should be accompanied by setting
      *                          the allowDowngrade flag to true.
      * @param allowDowngrade    - true, if this feature update was meant to downgrade the existing
-     *                            maximum version level of the finalized feature.
+     *                            maximum version level of the finalized feature. Only "safe" downgrades are
+     *                            enabled with this boolean. See {@link FeatureUpdate#FeatureUpdate(short, UpgradeType)}
      *                          - false, otherwise.
      */
+    @Deprecated
     public FeatureUpdate(final short maxVersionLevel, final boolean allowDowngrade) {
-        if (maxVersionLevel < 1 && !allowDowngrade) {
+        this(maxVersionLevel, allowDowngrade ? UpgradeType.SAFE_DOWNGRADE : UpgradeType.UPGRADE);
+    }
+
+    /**
+     * @param maxVersionLevel   The new maximum version level for the finalized feature.
+     *                          a value of zero is special and indicates that the update is intended to
+     *                          delete the finalized feature, and should be accompanied by setting
+     *                          the upgradeType to safe or unsafe.
+     * @param upgradeType     Indicate what kind of upgrade should be performed in this operation.
+     *                          - UPGRADE: upgrading the feature level
+     *                          - SAFE_DOWNGRADE: only downgrades which do not result in metadata loss are permitted
+     *                          - UNSAFE_DOWNGRADE: any downgrade, including those which may result in metadata loss, are permitted
+     */
+    public FeatureUpdate(final short maxVersionLevel, final UpgradeType upgradeType) {
+        if (maxVersionLevel == 0 && upgradeType.equals(UpgradeType.UPGRADE)) {
             throw new IllegalArgumentException(String.format(
-                "The allowDowngrade flag should be set when the provided maxVersionLevel:%d is < 1.",
-                maxVersionLevel));
+                    "The downgradeType flag should be set to SAFE or UNSAFE when the provided maxVersionLevel:%d is < 1.",
+                    maxVersionLevel));
+        }
+        if (maxVersionLevel < 0) {
+            throw new IllegalArgumentException("Cannot specify a negative version level.");
         }
         this.maxVersionLevel = maxVersionLevel;
-        this.allowDowngrade = allowDowngrade;
+        this.upgradeType = upgradeType;
     }
 
     public short maxVersionLevel() {
         return maxVersionLevel;
     }
 
+    @Deprecated
     public boolean allowDowngrade() {
-        return allowDowngrade;
+        return upgradeType != UpgradeType.UPGRADE;
+    }
+
+    public UpgradeType upgradeType() {
+        return upgradeType;
     }
 
     @Override
@@ -63,16 +116,16 @@ public boolean equals(Object other) {
         }
 
         final FeatureUpdate that = (FeatureUpdate) other;
-        return this.maxVersionLevel == that.maxVersionLevel && this.allowDowngrade == that.allowDowngrade;
+        return this.maxVersionLevel == that.maxVersionLevel && this.upgradeType.equals(that.upgradeType);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(maxVersionLevel, allowDowngrade);
+        return Objects.hash(maxVersionLevel, upgradeType);
     }
 
     @Override
     public String toString() {
-        return String.format("FeatureUpdate{maxVersionLevel:%d, allowDowngrade:%s}", maxVersionLevel, allowDowngrade);
+        return String.format("FeatureUpdate{maxVersionLevel:%d, downgradeType:%s}", maxVersionLevel, upgradeType);
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java b/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java
index 03322fdcf1dc8..41eb27a1ddad8 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java
@@ -135,6 +135,7 @@
 import org.apache.kafka.common.message.DescribeLogDirsRequestData;
 import org.apache.kafka.common.message.DescribeLogDirsRequestData.DescribableLogDirTopic;
 import org.apache.kafka.common.message.DescribeLogDirsResponseData;
+import org.apache.kafka.common.message.DescribeQuorumResponseData;
 import org.apache.kafka.common.message.DescribeUserScramCredentialsRequestData;
 import org.apache.kafka.common.message.DescribeUserScramCredentialsRequestData.UserName;
 import org.apache.kafka.common.message.DescribeUserScramCredentialsResponseData;
@@ -208,12 +209,16 @@
 import org.apache.kafka.common.requests.DescribeLogDirsResponse;
 import org.apache.kafka.common.requests.DescribeUserScramCredentialsRequest;
 import org.apache.kafka.common.requests.DescribeUserScramCredentialsResponse;
+import org.apache.kafka.common.requests.DescribeQuorumRequest;
+import org.apache.kafka.common.requests.DescribeQuorumRequest.Builder;
+import org.apache.kafka.common.requests.DescribeQuorumResponse;
 import org.apache.kafka.common.requests.ElectLeadersRequest;
 import org.apache.kafka.common.requests.ElectLeadersResponse;
 import org.apache.kafka.common.requests.ExpireDelegationTokenRequest;
 import org.apache.kafka.common.requests.ExpireDelegationTokenResponse;
 import org.apache.kafka.common.requests.IncrementalAlterConfigsRequest;
 import org.apache.kafka.common.requests.IncrementalAlterConfigsResponse;
+import org.apache.kafka.common.requests.JoinGroupRequest;
 import org.apache.kafka.common.requests.ListGroupsRequest;
 import org.apache.kafka.common.requests.ListGroupsResponse;
 import org.apache.kafka.common.requests.ListOffsetsRequest;
@@ -257,6 +262,7 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.OptionalLong;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
@@ -268,6 +274,8 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.kafka.common.internals.Topic.METADATA_TOPIC_NAME;
+import static org.apache.kafka.common.internals.Topic.METADATA_TOPIC_PARTITION;
 import static org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignablePartition;
 import static org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData.ReassignablePartitionResponse;
 import static org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData.ReassignableTopicResponse;
@@ -306,9 +314,9 @@ public class KafkaAdminClient extends AdminClient {
     private static final long INVALID_SHUTDOWN_TIME = -1;
 
     /**
-     * The base reason for a LeaveGroupRequest
+     * The default reason for a LeaveGroupRequest.
      */
-    static final String LEAVE_GROUP_REASON = "member was removed by an admin";
+    static final String DEFAULT_LEAVE_GROUP_REASON = "member was removed by an admin";
 
     /**
      * Thread name prefix for admin client network thread
@@ -2716,7 +2724,11 @@ private static Map<String, LogDirDescription> logDirDescriptions(DescribeLogDirs
                             new ReplicaInfo(p.partitionSize(), p.offsetLag(), p.isFutureKey()));
                 }
             }
-            result.put(logDirResult.logDir(), new LogDirDescription(Errors.forCode(logDirResult.errorCode()).exception(), replicaInfoMap));
+            result.put(logDirResult.logDir(), new LogDirDescription(
+                    Errors.forCode(logDirResult.errorCode()).exception(),
+                    replicaInfoMap,
+                    logDirResult.totalBytes(),
+                    logDirResult.usableBytes()));
         }
         return result;
     }
@@ -3055,10 +3067,14 @@ public CreateDelegationTokenResult createDelegationToken(final CreateDelegationT
 
             @Override
             CreateDelegationTokenRequest.Builder createRequest(int timeoutMs) {
-                return new CreateDelegationTokenRequest.Builder(
-                        new CreateDelegationTokenRequestData()
-                            .setRenewers(renewers)
-                            .setMaxLifetimeMs(options.maxlifeTimeMs()));
+                CreateDelegationTokenRequestData data = new CreateDelegationTokenRequestData()
+                    .setRenewers(renewers)
+                    .setMaxLifetimeMs(options.maxlifeTimeMs());
+                if (options.owner().isPresent()) {
+                    data.setOwnerPrincipalName(options.owner().get().getName());
+                    data.setOwnerPrincipalType(options.owner().get().getPrincipalType());
+                }
+                return new CreateDelegationTokenRequest.Builder(data);
             }
 
             @Override
@@ -3069,6 +3085,7 @@ void handleResponse(AbstractResponse abstractResponse) {
                 } else {
                     CreateDelegationTokenResponseData data = response.data();
                     TokenInformation tokenInfo =  new TokenInformation(data.tokenId(), new KafkaPrincipal(data.principalType(), data.principalName()),
+                        new KafkaPrincipal(data.tokenRequesterPrincipalType(), data.tokenRequesterPrincipalName()),
                         options.renewers(), data.issueTimestampMs(), data.maxTimestampMs(), data.expiryTimestampMs());
                     DelegationToken token = new DelegationToken(tokenInfo, data.hmac());
                     delegationTokenFuture.complete(token);
@@ -3384,13 +3401,14 @@ void handleFailure(Throwable throwable) {
     }
 
     @Override
-    public ListConsumerGroupOffsetsResult listConsumerGroupOffsets(final String groupId,
-                                                                   final ListConsumerGroupOffsetsOptions options) {
+    public ListConsumerGroupOffsetsResult listConsumerGroupOffsets(Map<String, ListConsumerGroupOffsetsSpec> groupSpecs,
+                                                                   ListConsumerGroupOffsetsOptions options) {
         SimpleAdminApiFuture<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> future =
-                ListConsumerGroupOffsetsHandler.newFuture(groupId);
-        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(groupId, options.topicPartitions(), logContext);
+                ListConsumerGroupOffsetsHandler.newFuture(groupSpecs.keySet());
+        ListConsumerGroupOffsetsHandler handler =
+            new ListConsumerGroupOffsetsHandler(groupSpecs, options.requireStable(), logContext);
         invokeDriver(handler, future, options.timeoutMs);
-        return new ListConsumerGroupOffsetsResult(future.get(CoordinatorKey.byGroupId(groupId)));
+        return new ListConsumerGroupOffsetsResult(future.all());
     }
 
     @Override
@@ -3713,7 +3731,7 @@ private Integer nodeFor(ConfigResource resource) {
         }
     }
 
-    private List<MemberIdentity> getMembersFromGroup(String groupId) {
+    private List<MemberIdentity> getMembersFromGroup(String groupId, String reason) {
         Collection<MemberDescription> members;
         try {
             members = describeConsumerGroups(Collections.singleton(groupId)).describedGroups().get(groupId).get().members();
@@ -3723,11 +3741,15 @@ private List<MemberIdentity> getMembersFromGroup(String groupId) {
 
         List<MemberIdentity> membersToRemove = new ArrayList<>();
         for (final MemberDescription member : members) {
+            MemberIdentity memberIdentity = new MemberIdentity().setReason(reason);
+
             if (member.groupInstanceId().isPresent()) {
-                membersToRemove.add(new MemberIdentity().setGroupInstanceId(member.groupInstanceId().get()));
+                memberIdentity.setGroupInstanceId(member.groupInstanceId().get());
             } else {
-                membersToRemove.add(new MemberIdentity().setMemberId(member.consumerId()));
+                memberIdentity.setMemberId(member.consumerId());
             }
+
+            membersToRemove.add(memberIdentity);
         }
         return membersToRemove;
     }
@@ -3735,15 +3757,17 @@ private List<MemberIdentity> getMembersFromGroup(String groupId) {
     @Override
     public RemoveMembersFromConsumerGroupResult removeMembersFromConsumerGroup(String groupId,
                                                                                RemoveMembersFromConsumerGroupOptions options) {
+        String reason = options.reason() == null || options.reason().isEmpty() ?
+            DEFAULT_LEAVE_GROUP_REASON : JoinGroupRequest.maybeTruncateReason(options.reason());
+
         List<MemberIdentity> members;
         if (options.removeAll()) {
-            members = getMembersFromGroup(groupId);
+            members = getMembersFromGroup(groupId, reason);
         } else {
-            members = options.members().stream().map(MemberToRemove::toMemberIdentity).collect(Collectors.toList());
+            members = options.members().stream()
+                .map(m -> m.toMemberIdentity().setReason(reason))
+                .collect(Collectors.toList());
         }
-        
-        String reason = options.reason() == null ? LEAVE_GROUP_REASON : LEAVE_GROUP_REASON + ": " + options.reason();
-        members.forEach(member -> member.setReason(reason));
 
         SimpleAdminApiFuture<CoordinatorKey, Map<MemberIdentity, Errors>> future =
                 RemoveMembersFromConsumerGroupHandler.newFuture(groupId);
@@ -4259,12 +4283,13 @@ UpdateFeaturesRequest.Builder createRequest(int timeoutMs) {
                         new UpdateFeaturesRequestData.FeatureUpdateKey();
                     requestItem.setFeature(feature);
                     requestItem.setMaxVersionLevel(update.maxVersionLevel());
-                    requestItem.setAllowDowngrade(update.allowDowngrade());
+                    requestItem.setUpgradeType(update.upgradeType().code());
                     featureUpdatesRequestData.add(requestItem);
                 }
                 return new UpdateFeaturesRequest.Builder(
                     new UpdateFeaturesRequestData()
                         .setTimeoutMs(timeoutMs)
+                        .setValidateOnly(options.validateOnly())
                         .setFeatureUpdates(featureUpdatesRequestData));
             }
 
@@ -4314,6 +4339,84 @@ void handleFailure(Throwable throwable) {
         return new UpdateFeaturesResult(new HashMap<>(updateFutures));
     }
 
+    @Override
+    public DescribeMetadataQuorumResult describeMetadataQuorum(DescribeMetadataQuorumOptions options) {
+        NodeProvider provider = new LeastLoadedNodeProvider();
+
+        final KafkaFutureImpl<QuorumInfo> future = new KafkaFutureImpl<>();
+        final long now = time.milliseconds();
+        final Call call = new Call(
+                "describeMetadataQuorum", calcDeadlineMs(now, options.timeoutMs()), provider) {
+
+            private QuorumInfo.ReplicaState translateReplicaState(DescribeQuorumResponseData.ReplicaState replica) {
+                return new QuorumInfo.ReplicaState(
+                        replica.replicaId(),
+                        replica.logEndOffset(),
+                        replica.lastFetchTimestamp() == -1 ? OptionalLong.empty() : OptionalLong.of(replica.lastFetchTimestamp()),
+                        replica.lastCaughtUpTimestamp() == -1 ? OptionalLong.empty() : OptionalLong.of(replica.lastCaughtUpTimestamp()));
+            }
+
+            private QuorumInfo createQuorumResult(final DescribeQuorumResponseData.PartitionData partition) {
+                return new QuorumInfo(
+                        partition.leaderId(),
+                        partition.currentVoters().stream().map(v -> translateReplicaState(v)).collect(Collectors.toList()),
+                        partition.observers().stream().map(o -> translateReplicaState(o)).collect(Collectors.toList()));
+            }
+
+            @Override
+            DescribeQuorumRequest.Builder createRequest(int timeoutMs) {
+                return new Builder(DescribeQuorumRequest.singletonRequest(
+                        new TopicPartition(METADATA_TOPIC_NAME, METADATA_TOPIC_PARTITION.partition())));
+            }
+
+            @Override
+            void handleResponse(AbstractResponse response) {
+                final DescribeQuorumResponse quorumResponse = (DescribeQuorumResponse) response;
+                if (quorumResponse.data().errorCode() != Errors.NONE.code()) {
+                    throw Errors.forCode(quorumResponse.data().errorCode()).exception();
+                }
+                if (quorumResponse.data().topics().size() != 1) {
+                    String msg = String.format("DescribeMetadataQuorum received %d topics when 1 was expected",
+                            quorumResponse.data().topics().size());
+                    log.debug(msg);
+                    throw new UnknownServerException(msg);
+                }
+                DescribeQuorumResponseData.TopicData topic = quorumResponse.data().topics().get(0);
+                if (!topic.topicName().equals(METADATA_TOPIC_NAME)) {
+                    String msg = String.format("DescribeMetadataQuorum received a topic with name %s when %s was expected",
+                            topic.topicName(), METADATA_TOPIC_NAME);
+                    log.debug(msg);
+                    throw new UnknownServerException(msg);
+                }
+                if (topic.partitions().size() != 1) {
+                    String msg = String.format("DescribeMetadataQuorum received a topic %s with %d partitions when 1 was expected",
+                            topic.topicName(), topic.partitions().size());
+                    log.debug(msg);
+                    throw new UnknownServerException(msg);
+                }
+                DescribeQuorumResponseData.PartitionData partition = topic.partitions().get(0);
+                if (partition.partitionIndex() != METADATA_TOPIC_PARTITION.partition()) {
+                    String msg = String.format("DescribeMetadataQuorum received a single partition with index %d when %d was expected",
+                            partition.partitionIndex(), METADATA_TOPIC_PARTITION.partition());
+                    log.debug(msg);
+                    throw new UnknownServerException(msg);
+                }
+                if (partition.errorCode() != Errors.NONE.code()) {
+                    throw Errors.forCode(partition.errorCode()).exception();
+                }
+                future.complete(createQuorumResult(partition));
+            }
+
+            @Override
+            void handleFailure(Throwable throwable) {
+                future.completeExceptionally(throwable);
+            }
+        };
+
+        runnable.call(call, now);
+        return new DescribeMetadataQuorumResult(future);
+    }
+
     @Override
     public UnregisterBrokerResult unregisterBroker(int brokerId, UnregisterBrokerOptions options) {
         final KafkaFutureImpl<Void> future = new KafkaFutureImpl<>();
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsOptions.java b/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsOptions.java
index af738ca209fb9..44d3a407327e1 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsOptions.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsOptions.java
@@ -23,31 +23,54 @@
 import java.util.List;
 
 /**
- * Options for {@link Admin#listConsumerGroupOffsets(String)}.
+ * Options for {@link Admin#listConsumerGroupOffsets(java.util.Map)} and {@link Admin#listConsumerGroupOffsets(String)}.
  * <p>
  * The API of this class is evolving, see {@link Admin} for details.
  */
 @InterfaceStability.Evolving
 public class ListConsumerGroupOffsetsOptions extends AbstractOptions<ListConsumerGroupOffsetsOptions> {
 
-    private List<TopicPartition> topicPartitions = null;
+    private List<TopicPartition> topicPartitions;
+    private boolean requireStable = false;
 
     /**
      * Set the topic partitions to list as part of the result.
      * {@code null} includes all topic partitions.
+     * <p>
+     * @deprecated Since 3.3.
+     * Use {@link Admin#listConsumerGroupOffsets(java.util.Map, ListConsumerGroupOffsetsOptions)}
+     * to specify topic partitions.
      *
      * @param topicPartitions List of topic partitions to include
      * @return This ListGroupOffsetsOptions
      */
+    @Deprecated
     public ListConsumerGroupOffsetsOptions topicPartitions(List<TopicPartition> topicPartitions) {
         this.topicPartitions = topicPartitions;
         return this;
     }
 
+    /**
+     * Sets an optional requireStable flag.
+     */
+    public ListConsumerGroupOffsetsOptions requireStable(final boolean requireStable) {
+        this.requireStable = requireStable;
+        return this;
+    }
+
     /**
      * Returns a list of topic partitions to add as part of the result.
+     * <p>
+     * @deprecated Since 3.3.
+     * Use {@link Admin#listConsumerGroupOffsets(java.util.Map, ListConsumerGroupOffsetsOptions)}
+     * to specify topic partitions.
      */
+    @Deprecated
     public List<TopicPartition> topicPartitions() {
         return topicPartitions;
     }
+
+    public boolean requireStable() {
+        return requireStable;
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsResult.java b/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsResult.java
index 48f4531418110..2136e33a401e1 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsResult.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsResult.java
@@ -17,25 +17,32 @@
 
 package org.apache.kafka.clients.admin;
 
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ExecutionException;
+import java.util.stream.Collectors;
+
+import org.apache.kafka.clients.admin.internals.CoordinatorKey;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.annotation.InterfaceStability;
 
-import java.util.Map;
-
 /**
- * The result of the {@link Admin#listConsumerGroupOffsets(String)} call.
+ * The result of the {@link Admin#listConsumerGroupOffsets(Map)} and
+ * {@link Admin#listConsumerGroupOffsets(String)} call.
  * <p>
  * The API of this class is evolving, see {@link Admin} for details.
  */
 @InterfaceStability.Evolving
 public class ListConsumerGroupOffsetsResult {
 
-    final KafkaFuture<Map<TopicPartition, OffsetAndMetadata>> future;
+    final Map<String, KafkaFuture<Map<TopicPartition, OffsetAndMetadata>>> futures;
 
-    ListConsumerGroupOffsetsResult(KafkaFuture<Map<TopicPartition, OffsetAndMetadata>> future) {
-        this.future = future;
+    ListConsumerGroupOffsetsResult(final Map<CoordinatorKey, KafkaFuture<Map<TopicPartition, OffsetAndMetadata>>> futures) {
+        this.futures = futures.entrySet().stream()
+                .collect(Collectors.toMap(e -> e.getKey().idValue, Entry::getValue));
     }
 
     /**
@@ -43,7 +50,42 @@ public class ListConsumerGroupOffsetsResult {
      * If the group does not have a committed offset for this partition, the corresponding value in the returned map will be null.
      */
     public KafkaFuture<Map<TopicPartition, OffsetAndMetadata>> partitionsToOffsetAndMetadata() {
-        return future;
+        if (futures.size() != 1) {
+            throw new IllegalStateException("Offsets from multiple consumer groups were requested. " +
+                    "Use partitionsToOffsetAndMetadata(groupId) instead to get future for a specific group.");
+        }
+        return futures.values().iterator().next();
     }
 
+    /**
+     * Return a future which yields a map of topic partitions to OffsetAndMetadata objects for
+     * the specified group. If the group doesn't have a committed offset for a specific
+     * partition, the corresponding value in the returned map will be null.
+     */
+    public KafkaFuture<Map<TopicPartition, OffsetAndMetadata>> partitionsToOffsetAndMetadata(String groupId) {
+        if (!futures.containsKey(groupId))
+            throw new IllegalArgumentException("Offsets for consumer group '" + groupId + "' were not requested.");
+        return futures.get(groupId);
+    }
+
+    /**
+     * Return a future which yields all Map<String, Map<TopicPartition, OffsetAndMetadata> objects,
+     * if requests for all the groups succeed.
+     */
+    public KafkaFuture<Map<String, Map<TopicPartition, OffsetAndMetadata>>> all() {
+        return KafkaFuture.allOf(futures.values().toArray(new KafkaFuture[0])).thenApply(
+            nil -> {
+                Map<String, Map<TopicPartition, OffsetAndMetadata>> listedConsumerGroupOffsets = new HashMap<>(futures.size());
+                futures.forEach((key, future) -> {
+                    try {
+                        listedConsumerGroupOffsets.put(key, future.get());
+                    } catch (InterruptedException | ExecutionException e) {
+                        // This should be unreachable, since the KafkaFuture#allOf already ensured
+                        // that all of the futures completed successfully.
+                        throw new RuntimeException(e);
+                    }
+                });
+                return listedConsumerGroupOffsets;
+            });
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsSpec.java b/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsSpec.java
new file mode 100644
index 0000000000000..83858e49c8170
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/ListConsumerGroupOffsetsSpec.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.clients.admin;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.annotation.InterfaceStability;
+
+import java.util.Collection;
+import java.util.Objects;
+
+/**
+ * Specification of consumer group offsets to list using {@link Admin#listConsumerGroupOffsets(java.util.Map)}.
+ *
+ * The API of this class is evolving, see {@link Admin} for details.
+ */
+@InterfaceStability.Evolving
+public class ListConsumerGroupOffsetsSpec {
+
+    private Collection<TopicPartition> topicPartitions;
+
+    /**
+     * Set the topic partitions whose offsets are to be listed for a consumer group.
+     * {@code null} includes all topic partitions.
+     *
+     * @param topicPartitions List of topic partitions to include
+     * @return This ListConsumerGroupOffsetSpec
+     */
+    public ListConsumerGroupOffsetsSpec topicPartitions(Collection<TopicPartition> topicPartitions) {
+        this.topicPartitions = topicPartitions;
+        return this;
+    }
+
+    /**
+     * Returns the topic partitions whose offsets are to be listed for a consumer group.
+     * {@code null} indicates that offsets of all partitions of the group are to be listed.
+     */
+    public Collection<TopicPartition> topicPartitions() {
+        return topicPartitions;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (!(o instanceof ListConsumerGroupOffsetsSpec)) {
+            return false;
+        }
+        ListConsumerGroupOffsetsSpec that = (ListConsumerGroupOffsetsSpec) o;
+        return Objects.equals(topicPartitions, that.topicPartitions);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(topicPartitions);
+    }
+
+    @Override
+    public String toString() {
+        return "ListConsumerGroupOffsetsSpec(" +
+                "topicPartitions=" + topicPartitions +
+                ')';
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/LogDirDescription.java b/clients/src/main/java/org/apache/kafka/clients/admin/LogDirDescription.java
index 1c326ec43b926..665c86649ba37 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/LogDirDescription.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/LogDirDescription.java
@@ -20,8 +20,10 @@
 import org.apache.kafka.common.errors.ApiException;
 
 import java.util.Map;
+import java.util.OptionalLong;
 
 import static java.util.Collections.unmodifiableMap;
+import static org.apache.kafka.common.requests.DescribeLogDirsResponse.UNKNOWN_VOLUME_BYTES;
 
 /**
  * A description of a log directory on a particular broker.
@@ -29,10 +31,18 @@
 public class LogDirDescription {
     private final Map<TopicPartition, ReplicaInfo> replicaInfos;
     private final ApiException error;
+    private final OptionalLong totalBytes;
+    private final OptionalLong usableBytes;
 
     public LogDirDescription(ApiException error, Map<TopicPartition, ReplicaInfo> replicaInfos) {
+        this(error, replicaInfos, UNKNOWN_VOLUME_BYTES, UNKNOWN_VOLUME_BYTES);
+    }
+
+    public LogDirDescription(ApiException error, Map<TopicPartition, ReplicaInfo> replicaInfos, long totalBytes, long usableBytes) {
         this.error = error;
         this.replicaInfos = replicaInfos;
+        this.totalBytes = (totalBytes == UNKNOWN_VOLUME_BYTES) ? OptionalLong.empty() : OptionalLong.of(totalBytes);
+        this.usableBytes = (usableBytes == UNKNOWN_VOLUME_BYTES) ? OptionalLong.empty() : OptionalLong.of(usableBytes);
     }
 
     /**
@@ -54,11 +64,29 @@ public Map<TopicPartition, ReplicaInfo> replicaInfos() {
         return unmodifiableMap(replicaInfos);
     }
 
+    /**
+     * The total size of the volume this log directory is on or empty if the broker did not return a value.
+     * For volumes larger than Long.MAX_VALUE, Long.MAX_VALUE is returned.
+     */
+    public OptionalLong totalBytes() {
+        return totalBytes;
+    }
+
+    /**
+     * The usable size on the volume this log directory is on or empty if the broker did not return a value.
+     * For usable sizes larger than Long.MAX_VALUE, Long.MAX_VALUE is returned.
+     */
+    public OptionalLong usableBytes() {
+        return usableBytes;
+    }
+
     @Override
     public String toString() {
         return "LogDirDescription(" +
                 "replicaInfos=" + replicaInfos +
                 ", error=" + error +
+                ", totalBytes=" + totalBytes +
+                ", usableBytes=" + usableBytes +
                 ')';
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/MemberToRemove.java b/clients/src/main/java/org/apache/kafka/clients/admin/MemberToRemove.java
index 4c7b16b1da650..5ca5463d3f285 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/MemberToRemove.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/MemberToRemove.java
@@ -48,8 +48,8 @@ public int hashCode() {
 
     MemberIdentity toMemberIdentity() {
         return new MemberIdentity()
-                   .setGroupInstanceId(groupInstanceId)
-                   .setMemberId(JoinGroupRequest.UNKNOWN_MEMBER_ID);
+            .setGroupInstanceId(groupInstanceId)
+            .setMemberId(JoinGroupRequest.UNKNOWN_MEMBER_ID);
     }
 
     public String groupInstanceId() {
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/QuorumInfo.java b/clients/src/main/java/org/apache/kafka/clients/admin/QuorumInfo.java
new file mode 100644
index 0000000000000..75476d77dcff1
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/QuorumInfo.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.admin;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.OptionalLong;
+
+/**
+ * This class is used to describe the state of the quorum received in DescribeQuorumResponse.
+ */
+public class QuorumInfo {
+    private final Integer leaderId;
+    private final List<ReplicaState> voters;
+    private final List<ReplicaState> observers;
+
+    QuorumInfo(Integer leaderId, List<ReplicaState> voters, List<ReplicaState> observers) {
+        this.leaderId = leaderId;
+        this.voters = voters;
+        this.observers = observers;
+    }
+
+    public Integer leaderId() {
+        return leaderId;
+    }
+
+    public List<ReplicaState> voters() {
+        return voters;
+    }
+
+    public List<ReplicaState> observers() {
+        return observers;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        QuorumInfo that = (QuorumInfo) o;
+        return leaderId.equals(that.leaderId)
+            && voters.equals(that.voters)
+            && observers.equals(that.observers);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(leaderId, voters, observers);
+    }
+
+    @Override
+    public String toString() {
+        return "QuorumInfo(" +
+            "leaderId=" + leaderId +
+            ", voters=" + voters +
+            ", observers=" + observers +
+            ')';
+    }
+
+    public static class ReplicaState {
+        private final int replicaId;
+        private final long logEndOffset;
+        private final OptionalLong lastFetchTimeMs;
+        private final OptionalLong lastCaughtUpTimeMs;
+
+        ReplicaState() {
+            this(0, 0, OptionalLong.empty(), OptionalLong.empty());
+        }
+
+        ReplicaState(
+            int replicaId,
+            long logEndOffset,
+            OptionalLong lastFetchTimeMs,
+            OptionalLong lastCaughtUpTimeMs
+        ) {
+            this.replicaId = replicaId;
+            this.logEndOffset = logEndOffset;
+            this.lastFetchTimeMs = lastFetchTimeMs;
+            this.lastCaughtUpTimeMs = lastCaughtUpTimeMs;
+        }
+
+        /**
+         * Return the ID for this replica.
+         * @return The ID for this replica
+         */
+        public int replicaId() {
+            return replicaId;
+        }
+
+        /**
+         * Return the logEndOffset known by the leader for this replica.
+         * @return The logEndOffset for this replica
+         */
+        public long logEndOffset() {
+            return logEndOffset;
+        }
+
+        /**
+         * Return the lastFetchTime in milliseconds for this replica.
+         * @return The value of the lastFetchTime if known, empty otherwise
+         */
+        public OptionalLong lastFetchTimeMs() {
+            return lastFetchTimeMs;
+        }
+
+        /**
+         * Return the lastCaughtUpTime in milliseconds for this replica.
+         * @return The value of the lastCaughtUpTime if known, empty otherwise
+         */
+        public OptionalLong lastCaughtUpTimeMs() {
+            return lastCaughtUpTimeMs;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            ReplicaState that = (ReplicaState) o;
+            return replicaId == that.replicaId
+                && logEndOffset == that.logEndOffset
+                && lastFetchTimeMs.equals(that.lastFetchTimeMs)
+                && lastCaughtUpTimeMs.equals(that.lastCaughtUpTimeMs);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(replicaId, logEndOffset, lastFetchTimeMs, lastCaughtUpTimeMs);
+        }
+
+        @Override
+        public String toString() {
+            return "ReplicaState(" +
+                "replicaId=" + replicaId +
+                ", logEndOffset=" + logEndOffset +
+                ", lastFetchTimeMs=" + lastFetchTimeMs +
+                ", lastCaughtUpTimeMs=" + lastCaughtUpTimeMs +
+                ')';
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/UpdateFeaturesOptions.java b/clients/src/main/java/org/apache/kafka/clients/admin/UpdateFeaturesOptions.java
index 7a9f2141b2ab1..455f2b87d1573 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/UpdateFeaturesOptions.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/UpdateFeaturesOptions.java
@@ -26,4 +26,24 @@
  */
 @InterfaceStability.Evolving
 public class UpdateFeaturesOptions extends AbstractOptions<UpdateFeaturesOptions> {
+    private boolean validateOnly = false;
+
+    @Deprecated
+    public boolean dryRun() {
+        return validateOnly;
+    }
+
+    public boolean validateOnly() {
+        return validateOnly;
+    }
+
+    @Deprecated
+    public UpdateFeaturesOptions dryRun(boolean dryRun) {
+        return validateOnly(dryRun);
+    }
+
+    public UpdateFeaturesOptions validateOnly(boolean validateOnly) {
+        this.validateOnly = validateOnly;
+        return this;
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/internals/AdminApiDriver.java b/clients/src/main/java/org/apache/kafka/clients/admin/internals/AdminApiDriver.java
index d00db4b18c694..0e1b03d964fea 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/internals/AdminApiDriver.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/internals/AdminApiDriver.java
@@ -21,6 +21,7 @@
 import org.apache.kafka.common.requests.AbstractRequest;
 import org.apache.kafka.common.requests.AbstractResponse;
 import org.apache.kafka.common.requests.FindCoordinatorRequest.NoBatchedFindCoordinatorsException;
+import org.apache.kafka.common.requests.OffsetFetchRequest.NoBatchedOffsetFetchRequestException;
 import org.apache.kafka.common.utils.LogContext;
 import org.slf4j.Logger;
 
@@ -253,7 +254,7 @@ public void onFailure(
                 .collect(Collectors.toSet());
             retryLookup(keysToUnmap);
 
-        } else if (t instanceof NoBatchedFindCoordinatorsException) {
+        } else if (t instanceof NoBatchedFindCoordinatorsException || t instanceof NoBatchedOffsetFetchRequestException) {
             ((CoordinatorStrategy) handler.lookupStrategy()).disableBatch();
             Set<K> keysToUnmap = spec.keys.stream()
                 .filter(future.lookupKeys()::contains)
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/internals/AlterConsumerGroupOffsetsHandler.java b/clients/src/main/java/org/apache/kafka/clients/admin/internals/AlterConsumerGroupOffsetsHandler.java
index eab2e2bb73a40..425ed66bd29a2 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/internals/AlterConsumerGroupOffsetsHandler.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/internals/AlterConsumerGroupOffsetsHandler.java
@@ -179,6 +179,8 @@ private void handleError(
             case INVALID_GROUP_ID:
             case INVALID_COMMIT_OFFSET_SIZE:
             case GROUP_AUTHORIZATION_FAILED:
+            // Member level errors.
+            case UNKNOWN_MEMBER_ID:
                 log.debug("OffsetCommit request for group id {} failed due to error {}.",
                     groupId.idValue, error);
                 partitionResults.put(topicPartition, error);
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/internals/CoordinatorStrategy.java b/clients/src/main/java/org/apache/kafka/clients/admin/internals/CoordinatorStrategy.java
index e6fc0d624a0cd..02b68527c3c7e 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/internals/CoordinatorStrategy.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/internals/CoordinatorStrategy.java
@@ -120,6 +120,10 @@ public void disableBatch() {
         batch = false;
     }
 
+    public boolean batch() {
+        return batch;
+    }
+
     private CoordinatorKey requireSingletonAndType(Set<CoordinatorKey> keys) {
         if (keys.size() != 1) {
             throw new IllegalArgumentException("Unexpected size of key set: expected 1, but got " + keys.size());
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/internals/FenceProducersHandler.java b/clients/src/main/java/org/apache/kafka/clients/admin/internals/FenceProducersHandler.java
index 225c6f4e75139..23572dd4419ca 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/internals/FenceProducersHandler.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/internals/FenceProducersHandler.java
@@ -47,15 +47,15 @@ public FenceProducersHandler(
     }
 
     public static AdminApiFuture.SimpleAdminApiFuture<CoordinatorKey, ProducerIdAndEpoch> newFuture(
-            Collection<String> transactionalIds
+        Collection<String> transactionalIds
     ) {
         return AdminApiFuture.forKeys(buildKeySet(transactionalIds));
     }
 
     private static Set<CoordinatorKey> buildKeySet(Collection<String> transactionalIds) {
         return transactionalIds.stream()
-                .map(CoordinatorKey::byTransactionalId)
-                .collect(Collectors.toSet());
+            .map(CoordinatorKey::byTransactionalId)
+            .collect(Collectors.toSet());
     }
 
     @Override
@@ -75,24 +75,24 @@ InitProducerIdRequest.Builder buildSingleRequest(int brokerId, CoordinatorKey ke
                     " when building `InitProducerId` request");
         }
         InitProducerIdRequestData data = new InitProducerIdRequestData()
-                // Because we never include a producer epoch or ID in this request, we expect that some errors
-                // (such as PRODUCER_FENCED) will never be returned in the corresponding broker response.
-                // If we ever modify this logic to include an epoch or producer ID, we will need to update the
-                // error handling logic for this handler to accommodate these new errors.
-                .setProducerEpoch(ProducerIdAndEpoch.NONE.epoch)
-                .setProducerId(ProducerIdAndEpoch.NONE.producerId)
-                .setTransactionalId(key.idValue)
-                // Set transaction timeout to 1 since it's only being initialized to fence out older producers with the same transactional ID,
-                // and shouldn't be used for any actual record writes
-                .setTransactionTimeoutMs(1);
+            // Because we never include a producer epoch or ID in this request, we expect that some errors
+            // (such as PRODUCER_FENCED) will never be returned in the corresponding broker response.
+            // If we ever modify this logic to include an epoch or producer ID, we will need to update the
+            // error handling logic for this handler to accommodate these new errors.
+            .setProducerEpoch(ProducerIdAndEpoch.NONE.epoch)
+            .setProducerId(ProducerIdAndEpoch.NONE.producerId)
+            .setTransactionalId(key.idValue)
+            // Set transaction timeout to 1 since it's only being initialized to fence out older producers with the same transactional ID,
+            // and shouldn't be used for any actual record writes
+            .setTransactionTimeoutMs(1);
         return new InitProducerIdRequest.Builder(data);
     }
 
     @Override
     public ApiResult<CoordinatorKey, ProducerIdAndEpoch> handleSingleResponse(
-            Node broker,
-            CoordinatorKey key,
-            AbstractResponse abstractResponse
+        Node broker,
+        CoordinatorKey key,
+        AbstractResponse abstractResponse
     ) {
         InitProducerIdResponse response = (InitProducerIdResponse) abstractResponse;
 
@@ -102,14 +102,17 @@ public ApiResult<CoordinatorKey, ProducerIdAndEpoch> handleSingleResponse(
         }
 
         Map<CoordinatorKey, ProducerIdAndEpoch> completed = Collections.singletonMap(key, new ProducerIdAndEpoch(
-                response.data().producerId(),
-                response.data().producerEpoch()
+            response.data().producerId(),
+            response.data().producerEpoch()
         ));
 
         return new ApiResult<>(completed, Collections.emptyMap(), Collections.emptyList());
     }
 
-    private ApiResult<CoordinatorKey, ProducerIdAndEpoch> handleError(CoordinatorKey transactionalIdKey, Errors error) {
+    private ApiResult<CoordinatorKey, ProducerIdAndEpoch> handleError(
+        CoordinatorKey transactionalIdKey,
+        Errors error
+    ) {
         switch (error) {
             case CLUSTER_AUTHORIZATION_FAILED:
                 return ApiResult.failed(transactionalIdKey, new ClusterAuthorizationException(
diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandler.java b/clients/src/main/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandler.java
index b591548954b96..21c7d8d488f3d 100644
--- a/clients/src/main/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandler.java
+++ b/clients/src/main/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandler.java
@@ -17,14 +17,16 @@
 package org.apache.kafka.clients.admin.internals;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Collectors;
 
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsSpec;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.TopicPartition;
@@ -36,28 +38,26 @@
 import org.apache.kafka.common.utils.LogContext;
 import org.slf4j.Logger;
 
-public class ListConsumerGroupOffsetsHandler extends AdminApiHandler.Batched<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> {
+public class ListConsumerGroupOffsetsHandler implements AdminApiHandler<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> {
 
-    private final CoordinatorKey groupId;
-    private final List<TopicPartition> partitions;
+    private final boolean requireStable;
+    private final Map<String, ListConsumerGroupOffsetsSpec> groupSpecs;
     private final Logger log;
-    private final AdminApiLookupStrategy<CoordinatorKey> lookupStrategy;
+    private final CoordinatorStrategy lookupStrategy;
 
     public ListConsumerGroupOffsetsHandler(
-        String groupId,
-        List<TopicPartition> partitions,
+        Map<String, ListConsumerGroupOffsetsSpec> groupSpecs,
+        boolean requireStable,
         LogContext logContext
     ) {
-        this.groupId = CoordinatorKey.byGroupId(groupId);
-        this.partitions = partitions;
         this.log = logContext.logger(ListConsumerGroupOffsetsHandler.class);
         this.lookupStrategy = new CoordinatorStrategy(CoordinatorType.GROUP, logContext);
+        this.groupSpecs = groupSpecs;
+        this.requireStable = requireStable;
     }
 
-    public static AdminApiFuture.SimpleAdminApiFuture<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> newFuture(
-        String groupId
-    ) {
-        return AdminApiFuture.forKeys(Collections.singleton(CoordinatorKey.byGroupId(groupId)));
+    public static AdminApiFuture.SimpleAdminApiFuture<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> newFuture(Collection<String> groupIds) {
+        return AdminApiFuture.forKeys(coordinatorKeys(groupIds));
     }
 
     @Override
@@ -71,18 +71,45 @@ public AdminApiLookupStrategy<CoordinatorKey> lookupStrategy() {
     }
 
     private void validateKeys(Set<CoordinatorKey> groupIds) {
-        if (!groupIds.equals(Collections.singleton(groupId))) {
+        Set<CoordinatorKey> keys = coordinatorKeys(groupSpecs.keySet());
+        if (!keys.containsAll(groupIds)) {
             throw new IllegalArgumentException("Received unexpected group ids " + groupIds +
-                " (expected only " + Collections.singleton(groupId) + ")");
+                    " (expected one of " + keys + ")");
         }
     }
 
+    private static Set<CoordinatorKey> coordinatorKeys(Collection<String> groupIds) {
+        return groupIds.stream()
+           .map(CoordinatorKey::byGroupId)
+           .collect(Collectors.toSet());
+    }
+
+    public OffsetFetchRequest.Builder buildBatchedRequest(Set<CoordinatorKey> groupIds) {
+        // Create a map that only contains the consumer groups owned by the coordinator.
+        Map<String, List<TopicPartition>> coordinatorGroupIdToTopicPartitions = new HashMap<>(groupIds.size());
+        groupIds.forEach(g -> {
+            ListConsumerGroupOffsetsSpec spec = groupSpecs.get(g.idValue);
+            List<TopicPartition> partitions = spec.topicPartitions() != null ? new ArrayList<>(spec.topicPartitions()) : null;
+            coordinatorGroupIdToTopicPartitions.put(g.idValue, partitions);
+        });
+
+        return new OffsetFetchRequest.Builder(coordinatorGroupIdToTopicPartitions, requireStable, false);
+    }
+
     @Override
-    public OffsetFetchRequest.Builder buildBatchedRequest(int coordinatorId, Set<CoordinatorKey> groupIds) {
+    public Collection<RequestAndKeys<CoordinatorKey>> buildRequest(int brokerId, Set<CoordinatorKey> groupIds) {
         validateKeys(groupIds);
-        // Set the flag to false as for admin client request,
-        // we don't need to wait for any pending offset state to clear.
-        return new OffsetFetchRequest.Builder(groupId.idValue, false, partitions, false);
+
+        // When the OffsetFetchRequest fails with NoBatchedOffsetFetchRequestException, we completely disable
+        // the batching end-to-end, including the FindCoordinatorRequest.
+        if (lookupStrategy.batch()) {
+            return Collections.singletonList(new RequestAndKeys<>(buildBatchedRequest(groupIds), groupIds));
+        } else {
+            return groupIds.stream().map(groupId -> {
+                Set<CoordinatorKey> keys = Collections.singleton(groupId);
+                return new RequestAndKeys<>(buildBatchedRequest(keys), keys);
+            }).collect(Collectors.toList());
+        }
     }
 
     @Override
@@ -95,44 +122,46 @@ public ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> handleR
 
         final OffsetFetchResponse response = (OffsetFetchResponse) abstractResponse;
 
-        // the groupError will contain the group level error for v0-v8 OffsetFetchResponse
-        Errors groupError = response.groupLevelError(groupId.idValue);
-        if (groupError != Errors.NONE) {
-            final Map<CoordinatorKey, Throwable> failed = new HashMap<>();
-            final Set<CoordinatorKey> groupsToUnmap = new HashSet<>();
-
-            handleGroupError(groupId, groupError, failed, groupsToUnmap);
-
-            return new ApiResult<>(Collections.emptyMap(), failed, new ArrayList<>(groupsToUnmap));
-        } else {
-            final Map<TopicPartition, OffsetAndMetadata> groupOffsetsListing = new HashMap<>();
-
-            response.partitionDataMap(groupId.idValue).forEach((topicPartition, partitionData) -> {
-                final Errors error = partitionData.error;
-                if (error == Errors.NONE) {
-                    final long offset = partitionData.offset;
-                    final String metadata = partitionData.metadata;
-                    final Optional<Integer> leaderEpoch = partitionData.leaderEpoch;
-                    // Negative offset indicates that the group has no committed offset for this partition
-                    if (offset < 0) {
-                        groupOffsetsListing.put(topicPartition, null);
+        Map<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> completed = new HashMap<>();
+        Map<CoordinatorKey, Throwable> failed = new HashMap<>();
+        List<CoordinatorKey> unmapped = new ArrayList<>();
+        for (CoordinatorKey coordinatorKey : groupIds) {
+            String group = coordinatorKey.idValue;
+            if (response.groupHasError(group)) {
+                handleGroupError(CoordinatorKey.byGroupId(group), response.groupLevelError(group), failed, unmapped);
+            } else {
+                final Map<TopicPartition, OffsetAndMetadata> groupOffsetsListing = new HashMap<>();
+                Map<TopicPartition, OffsetFetchResponse.PartitionData> responseData = response.partitionDataMap(group);
+                for (Map.Entry<TopicPartition, OffsetFetchResponse.PartitionData> partitionEntry : responseData.entrySet()) {
+                    final TopicPartition topicPartition = partitionEntry.getKey();
+                    OffsetFetchResponse.PartitionData partitionData = partitionEntry.getValue();
+                    final Errors error = partitionData.error;
+
+                    if (error == Errors.NONE) {
+                        final long offset = partitionData.offset;
+                        final String metadata = partitionData.metadata;
+                        final Optional<Integer> leaderEpoch = partitionData.leaderEpoch;
+                        // Negative offset indicates that the group has no committed offset for this partition
+                        if (offset < 0) {
+                            groupOffsetsListing.put(topicPartition, null);
+                        } else {
+                            groupOffsetsListing.put(topicPartition, new OffsetAndMetadata(offset, leaderEpoch, metadata));
+                        }
                     } else {
-                        groupOffsetsListing.put(topicPartition, new OffsetAndMetadata(offset, leaderEpoch, metadata));
+                        log.warn("Skipping return offset for {} due to error {}.", topicPartition, error);
                     }
-                } else {
-                    log.warn("Skipping return offset for {} due to error {}.", topicPartition, error);
                 }
-            });
-
-            return ApiResult.completed(groupId, groupOffsetsListing);
+                completed.put(CoordinatorKey.byGroupId(group), groupOffsetsListing);
+            }
         }
+        return new ApiResult<>(completed, failed, unmapped);
     }
 
     private void handleGroupError(
         CoordinatorKey groupId,
         Errors error,
         Map<CoordinatorKey, Throwable> failed,
-        Set<CoordinatorKey> groupsToUnmap
+        List<CoordinatorKey> groupsToUnmap
     ) {
         switch (error) {
             case GROUP_AUTHORIZATION_FAILED:
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java
index ca24c281dc0df..5a2177052900b 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerConfig.java
@@ -23,11 +23,14 @@
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigDef.Importance;
 import org.apache.kafka.common.config.ConfigDef.Type;
+import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.SecurityConfig;
 import org.apache.kafka.common.errors.InvalidConfigurationException;
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.requests.JoinGroupRequest;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.utils.Utils;
 
 import java.util.Arrays;
 import java.util.Collections;
@@ -312,7 +315,7 @@ public class ConsumerConfig extends AbstractConfig {
             " <code>read_committed</code> mode, consumer.poll() will only return messages up to the last stable offset (LSO), which is the one less than the offset of the first open transaction." +
             " In particular any messages appearing after messages belonging to ongoing transactions will be withheld until the relevant transaction has been completed. As a result, <code>read_committed</code>" +
             " consumers will not be able to read up to the high watermark when there are in flight transactions.</p><p> Further, when in <code>read_committed</code> the seekToEnd method will" +
-            " return the LSO";
+            " return the LSO</p>";
 
     public static final String DEFAULT_ISOLATION_LEVEL = IsolationLevel.READ_UNCOMMITTED.toString().toLowerCase(Locale.ROOT);
 
@@ -350,6 +353,7 @@ public class ConsumerConfig extends AbstractConfig {
                                 .define(GROUP_INSTANCE_ID_CONFIG,
                                         Type.STRING,
                                         null,
+                                        new ConfigDef.NonEmptyString(),
                                         Importance.MEDIUM,
                                         GROUP_INSTANCE_ID_DOC)
                                 .define(SESSION_TIMEOUT_MS_CONFIG,
@@ -451,8 +455,8 @@ public class ConsumerConfig extends AbstractConfig {
                                         CommonClientConfigs.RETRY_BACKOFF_MS_DOC)
                                 .define(AUTO_OFFSET_RESET_CONFIG,
                                         Type.STRING,
-                                        "latest",
-                                        in("latest", "earliest", "none"),
+                                        OffsetResetStrategy.LATEST.toString(),
+                                        in(Utils.enumOptions(OffsetResetStrategy.class)),
                                         Importance.MEDIUM,
                                         AUTO_OFFSET_RESET_DOC)
                                 .define(CHECK_CRCS_CONFIG,
@@ -571,6 +575,7 @@ public class ConsumerConfig extends AbstractConfig {
                                 .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
                                         Type.STRING,
                                         CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                                        in(Utils.enumOptions(SecurityProtocol.class)),
                                         Importance.MEDIUM,
                                         CommonClientConfigs.SECURITY_PROTOCOL_DOC)
                                 .withClientSslSupport()
@@ -579,6 +584,7 @@ public class ConsumerConfig extends AbstractConfig {
 
     @Override
     protected Map<String, Object> postProcessParsedConfig(final Map<String, Object> parsedValues) {
+        CommonClientConfigs.postValidateSaslMechanismConfig(this);
         Map<String, Object> refinedConfigs = CommonClientConfigs.postProcessReconnectBackoffConfigs(this, parsedValues);
         maybeOverrideClientId(refinedConfigs);
         return refinedConfigs;
@@ -601,11 +607,16 @@ private void maybeOverrideClientId(Map<String, Object> configs) {
     protected static Map<String, Object> appendDeserializerToConfig(Map<String, Object> configs,
                                                                     Deserializer<?> keyDeserializer,
                                                                     Deserializer<?> valueDeserializer) {
+        // validate deserializer configuration, if the passed deserializer instance is null, the user must explicitly set a valid deserializer configuration value
         Map<String, Object> newConfigs = new HashMap<>(configs);
         if (keyDeserializer != null)
             newConfigs.put(KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializer.getClass());
+        else if (newConfigs.get(KEY_DESERIALIZER_CLASS_CONFIG) == null)
+            throw new ConfigException(KEY_DESERIALIZER_CLASS_CONFIG, null, "must be non-null.");
         if (valueDeserializer != null)
             newConfigs.put(VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializer.getClass());
+        else if (newConfigs.get(VALUE_DESERIALIZER_CLASS_CONFIG) == null)
+            throw new ConfigException(VALUE_DESERIALIZER_CLASS_CONFIG, null, "must be non-null.");
         return newConfigs;
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java
index 0fd4ea9cf4213..6ffb772915d6a 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/KafkaConsumer.java
@@ -563,7 +563,7 @@ public class KafkaConsumer<K, V> implements Consumer<K, V> {
     private static final long NO_CURRENT_THREAD = -1L;
     private static final String JMX_PREFIX = "kafka.consumer";
     static final long DEFAULT_CLOSE_TIMEOUT_MS = 30 * 1000;
-    private static final String DEFAULT_REASON = "rebalance enforced by user";
+    static final String DEFAULT_REASON = "rebalance enforced by user";
 
     // Visible for testing
     final Metrics metrics;
@@ -774,8 +774,12 @@ public KafkaConsumer(Map<String, Object> configs,
             );
 
             // no coordinator will be constructed for the default (null) group id
-            this.coordinator = !groupId.isPresent() ? null :
-                new ConsumerCoordinator(groupRebalanceConfig,
+            if (!groupId.isPresent()) {
+                config.ignore(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG);
+                config.ignore(ConsumerConfig.THROW_ON_FETCH_STABLE_OFFSET_UNSUPPORTED);
+                this.coordinator = null;
+            } else {
+                this.coordinator = new ConsumerCoordinator(groupRebalanceConfig,
                         logContext,
                         this.client,
                         assignors,
@@ -788,6 +792,7 @@ public KafkaConsumer(Map<String, Object> configs,
                         config.getInt(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG),
                         this.interceptors,
                         config.getBoolean(ConsumerConfig.THROW_ON_FETCH_STABLE_OFFSET_UNSUPPORTED));
+            }
             this.fetcher = new Fetcher<>(
                     logContext,
                     this.client,
@@ -1120,7 +1125,7 @@ public void assign(Collection<TopicPartition> partitions) {
                 if (coordinator != null)
                     this.coordinator.maybeAutoCommitOffsetsAsync(time.milliseconds());
 
-                log.info("Subscribed to partition(s): {}", Utils.join(partitions, ", "));
+                log.info("Assigned to partition(s): {}", Utils.join(partitions, ", "));
                 if (this.subscriptions.assignFromUser(new HashSet<>(partitions)))
                     metadata.requestUpdateForNewTopics();
             }
@@ -2322,7 +2327,7 @@ public void enforceRebalance(final String reason) {
             if (coordinator == null) {
                 throw new IllegalStateException("Tried to force a rebalance but consumer does not have a group.");
             }
-            coordinator.requestRejoin(reason == null ? DEFAULT_REASON : DEFAULT_REASON + ": " + reason);
+            coordinator.requestRejoin(reason == null || reason.isEmpty() ? DEFAULT_REASON : reason);
         } finally {
             release();
         }
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetResetStrategy.java b/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetResetStrategy.java
index 6d742b850a134..8b2297c96865e 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetResetStrategy.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/OffsetResetStrategy.java
@@ -16,6 +16,13 @@
  */
 package org.apache.kafka.clients.consumer;
 
+import java.util.Locale;
+
 public enum OffsetResetStrategy {
-    LATEST, EARLIEST, NONE
+    LATEST, EARLIEST, NONE;
+
+    @Override
+    public String toString() {
+        return super.toString().toLowerCase(Locale.ROOT);
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java
index 5b9712f34667e..d2ece9efc587c 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinator.java
@@ -187,11 +187,12 @@ public AbstractCoordinator(GroupRebalanceConfig rebalanceConfig,
     /**
      * Invoked prior to each group join or rejoin. This is typically used to perform any
      * cleanup from the previous generation (such as committing offsets for the consumer)
+     * @param timer Timer bounding how long this method can block
      * @param generation The previous generation or -1 if there was none
      * @param memberId The identifier of this member in the previous group or "" if there was none
      * @return true If onJoinPrepare async commit succeeded, false otherwise
      */
-    protected abstract boolean onJoinPrepare(int generation, String memberId);
+    protected abstract boolean onJoinPrepare(Timer timer, int generation, String memberId);
 
     /**
      * Invoked when the leader is elected. This is used by the leader to perform the assignment
@@ -426,7 +427,7 @@ boolean joinGroupIfNeeded(final Timer timer) {
                 // exception, in which case upon retry we should not retry onJoinPrepare either.
                 needsJoinPrepare = false;
                 // return false when onJoinPrepare is waiting for committing offset
-                if (!onJoinPrepare(generation.generationId, generation.memberId)) {
+                if (!onJoinPrepare(timer, generation.generationId, generation.memberId)) {
                     needsJoinPrepare = true;
                     //should not initiateJoinGroup if needsJoinPrepare still is true
                     return false;
@@ -478,8 +479,12 @@ boolean joinGroupIfNeeded(final Timer timer) {
 
                 resetJoinGroupFuture();
                 synchronized (AbstractCoordinator.this) {
-                    rejoinReason = String.format("rebalance failed due to '%s' (%s)", exception.getMessage(), exception.getClass().getSimpleName());
-                    rejoinNeeded = true;
+                    final String simpleName = exception.getClass().getSimpleName();
+                    final String shortReason = String.format("rebalance failed due to %s", simpleName);
+                    final String fullReason = String.format("rebalance failed due to '%s' (%s)",
+                        exception.getMessage(),
+                        simpleName);
+                    requestRejoin(shortReason, fullReason);
                 }
 
                 if (exception instanceof UnknownMemberIdException ||
@@ -555,7 +560,7 @@ RequestFuture<ByteBuffer> sendJoinGroupRequest() {
                         .setProtocolType(protocolType())
                         .setProtocols(metadata())
                         .setRebalanceTimeoutMs(this.rebalanceConfig.rebalanceTimeoutMs)
-                        .setReason(this.rejoinReason)
+                        .setReason(JoinGroupRequest.maybeTruncateReason(this.rejoinReason))
         );
 
         log.debug("Sending JoinGroup ({}) to coordinator {}", requestBuilder, this.coordinator);
@@ -807,6 +812,9 @@ public void handle(SyncGroupResponse syncResponse,
                 } else if (error == Errors.REBALANCE_IN_PROGRESS) {
                     log.info("SyncGroup failed: The group began another rebalance. Need to re-join the group. " +
                                  "Sent generation was {}", sentGeneration);
+                    // consumer didn't get assignment in this generation, so we need to reset generation
+                    // to avoid joinGroup with out-of-data ownedPartitions in cooperative rebalance
+                    resetStateOnResponseError(ApiKeys.SYNC_GROUP, error, false);
                     future.raise(error);
                 } else if (error == Errors.FENCED_INSTANCE_ID) {
                     // for sync-group request, even if the generation has changed we would not expect the instance id
@@ -939,7 +947,7 @@ protected synchronized void markCoordinatorUnknown(String cause) {
 
     protected synchronized void markCoordinatorUnknown(boolean isDisconnected, String cause) {
         if (this.coordinator != null) {
-            log.info("Group coordinator {} is unavailable or invalid due to cause: {}."
+            log.info("Group coordinator {} is unavailable or invalid due to cause: {}. "
                     + "isDisconnected: {}. Rediscovery will be attempted.", this.coordinator,
                     cause, isDisconnected);
             Node oldCoordinator = this.coordinator;
@@ -1022,15 +1030,28 @@ synchronized void resetGenerationOnLeaveGroup() {
         resetStateAndRejoin("consumer pro-actively leaving the group", true);
     }
 
-    public synchronized void requestRejoinIfNecessary(final String reason) {
+    public synchronized void requestRejoinIfNecessary(final String shortReason,
+                                                      final String fullReason) {
         if (!this.rejoinNeeded) {
-            requestRejoin(reason);
+            requestRejoin(shortReason, fullReason);
         }
     }
 
-    public synchronized void requestRejoin(final String reason) {
-        log.info("Request joining group due to: {}", reason);
-        this.rejoinReason = reason;
+    public synchronized void requestRejoin(final String shortReason) {
+        requestRejoin(shortReason, shortReason);
+    }
+
+    /**
+     * Request to rejoin the group.
+     *
+     * @param shortReason This is the reason passed up to the group coordinator. It must be
+     *                    reasonably small.
+     * @param fullReason This is the reason logged locally.
+     */
+    public synchronized void requestRejoin(final String shortReason,
+                                           final String fullReason) {
+        log.info("Request joining group due to: {}", fullReason);
+        this.rejoinReason = shortReason;
         this.rejoinNeeded = true;
     }
 
@@ -1094,7 +1115,7 @@ public synchronized RequestFuture<Void> maybeLeaveGroup(String leaveReason) {
                 generation.memberId, coordinator, leaveReason);
             LeaveGroupRequest.Builder request = new LeaveGroupRequest.Builder(
                 rebalanceConfig.groupId,
-                Collections.singletonList(new MemberIdentity().setMemberId(generation.memberId).setReason(leaveReason))
+                Collections.singletonList(new MemberIdentity().setMemberId(generation.memberId).setReason(JoinGroupRequest.maybeTruncateReason(leaveReason)))
             );
 
             future = client.send(coordinator, request).compose(new LeaveGroupResponseHandler(generation));
@@ -1425,12 +1446,11 @@ public void run() {
                                 // clear the future so that after the backoff, if the hb still sees coordinator unknown in
                                 // the next iteration it will try to re-discover the coordinator in case the main thread cannot
                                 clearFindCoordinatorFuture();
-
-                                // backoff properly
-                                AbstractCoordinator.this.wait(rebalanceConfig.retryBackoffMs);
                             } else {
                                 lookupCoordinator();
                             }
+                            // backoff properly
+                            AbstractCoordinator.this.wait(rebalanceConfig.retryBackoffMs);
                         } else if (heartbeat.sessionTimeoutExpired(now)) {
                             // the session timeout has expired without seeing a successful heartbeat, so we should
                             // probably make sure the coordinator is still healthy.
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java
index 84cf822a43a3e..9838e7dc8fe4b 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.clients.consumer.internals;
 
+import java.time.Duration;
 import java.util.SortedSet;
 import java.util.TreeSet;
 import org.apache.kafka.clients.GroupRebalanceConfig;
@@ -140,6 +141,12 @@ private boolean sameRequest(final Set<TopicPartition> currentRequest, final Gene
     }
 
     private final RebalanceProtocol protocol;
+    // pending commit offset request in onJoinPrepare
+    private RequestFuture<Void> autoCommitOffsetRequestFuture = null;
+    // a timer for join prepare to know when to stop.
+    // it'll set to rebalance timeout so that the member can join the group successfully
+    // even though offset commit failed.
+    private Timer joinPrepareTimer = null;
 
     /**
      * Initialize the coordination manager.
@@ -401,10 +408,10 @@ protected void onJoinComplete(int generation,
         assignedPartitions.addAll(assignment.partitions());
 
         if (!subscriptions.checkAssignmentMatchedSubscription(assignedPartitions)) {
-            final String reason = String.format("received assignment %s does not match the current subscription %s; " +
+            final String fullReason = String.format("received assignment %s does not match the current subscription %s; " +
                     "it is likely that the subscription has changed since we joined the group, will re-join with current subscription",
                     assignment.partitions(), subscriptions.prettyString());
-            requestRejoin(reason);
+            requestRejoin("received assignment does not match the current subscription", fullReason);
 
             return;
         }
@@ -437,9 +444,9 @@ protected void onJoinComplete(int generation,
                 firstException.compareAndSet(null, invokePartitionsRevoked(revokedPartitions));
 
                 // If revoked any partitions, need to re-join the group afterwards
-                final String reason = String.format("need to revoke partitions %s as indicated " +
+                final String fullReason = String.format("need to revoke partitions %s as indicated " +
                         "by the current assignment and re-join", revokedPartitions);
-                requestRejoin(reason);
+                requestRejoin("need to revoke partitions and re-join", fullReason);
             }
         }
 
@@ -548,14 +555,18 @@ public boolean poll(Timer timer, boolean waitForJoinGroup) {
                 }
             }
         } else {
-            // For manually assigned partitions, if coordinator is unknown, make sure we lookup one and await metadata.
+            // For manually assigned partitions, we do not try to pro-actively lookup coordinator;
+            // instead we only try to refresh metadata when necessary.
             // If connections to all nodes fail, wakeups triggered while attempting to send fetch
             // requests result in polls returning immediately, causing a tight loop of polls. Without
             // the wakeup, poll() with no channels would block for the timeout, delaying re-connection.
             // awaitMetadataUpdate() in ensureCoordinatorReady initiates new connections with configured backoff and avoids the busy loop.
-            if (coordinatorUnknownAndUnready(timer)) {
-                return false;
+            if (metadata.updateRequested() && !client.hasReadyNodes(timer.currentTimeMs())) {
+                client.awaitMetadataUpdate(timer);
             }
+
+            // if there is pending coordinator requests, ensure they have a chance to be transmitted.
+            client.pollNoWakeup();
         }
 
         maybeAutoCommitOffsetsAsync(timer.currentTimeMs());
@@ -735,24 +746,58 @@ private void validateCooperativeAssignment(final Map<String, List<TopicPartition
     }
 
     @Override
-    protected boolean onJoinPrepare(int generation, String memberId) {
+    protected boolean onJoinPrepare(Timer timer, int generation, String memberId) {
         log.debug("Executing onJoinPrepare with generation {} and memberId {}", generation, memberId);
-        boolean onJoinPrepareAsyncCommitCompleted = false;
+        if (joinPrepareTimer == null) {
+            // We should complete onJoinPrepare before rebalanceTimeout,
+            // and continue to join group to avoid member got kicked out from group
+            joinPrepareTimer = time.timer(rebalanceConfig.rebalanceTimeoutMs);
+        } else {
+            joinPrepareTimer.update();
+        }
+
         // async commit offsets prior to rebalance if auto-commit enabled
-        RequestFuture<Void> future = maybeAutoCommitOffsetsAsync();
-        // return true when
-        // 1. future is null, which means no commit request sent, so it is still considered completed
-        // 2. offset commit completed
-        // 3. offset commit failed with non-retriable exception
-        if (future == null)
-            onJoinPrepareAsyncCommitCompleted = true;
-        else if (future.succeeded())
-            onJoinPrepareAsyncCommitCompleted = true;
-        else if (future.failed() && !future.isRetriable()) {
-            log.error("Asynchronous auto-commit of offsets failed: {}", future.exception().getMessage());
-            onJoinPrepareAsyncCommitCompleted = true;
+        // and there is no in-flight offset commit request
+        if (autoCommitEnabled && autoCommitOffsetRequestFuture == null) {
+            autoCommitOffsetRequestFuture = maybeAutoCommitOffsetsAsync();
         }
 
+        // wait for commit offset response before timer expired
+        if (autoCommitOffsetRequestFuture != null) {
+            Timer pollTimer = timer.remainingMs() < joinPrepareTimer.remainingMs() ?
+                    timer : joinPrepareTimer;
+            client.poll(autoCommitOffsetRequestFuture, pollTimer);
+            joinPrepareTimer.update();
+
+            // Keep retrying/waiting the offset commit when:
+            // 1. offset commit haven't done (and joinPrepareTimer not expired)
+            // 2. failed with retryable exception (and joinPrepareTimer not expired)
+            // Otherwise, continue to revoke partitions, ex:
+            // 1. if joinPrepareTime has expired
+            // 2. if offset commit failed with no-retryable exception
+            // 3. if offset commit success
+            boolean onJoinPrepareAsyncCommitCompleted = true;
+            if (joinPrepareTimer.isExpired()) {
+                log.error("Asynchronous auto-commit of offsets failed: joinPrepare timeout. Will continue to join group");
+            } else if (!autoCommitOffsetRequestFuture.isDone()) {
+                onJoinPrepareAsyncCommitCompleted = false;
+            } else if (autoCommitOffsetRequestFuture.failed() && autoCommitOffsetRequestFuture.isRetriable()) {
+                log.debug("Asynchronous auto-commit of offsets failed with retryable error: {}. Will retry it.",
+                        autoCommitOffsetRequestFuture.exception().getMessage());
+                onJoinPrepareAsyncCommitCompleted = false;
+            } else if (autoCommitOffsetRequestFuture.failed() && !autoCommitOffsetRequestFuture.isRetriable()) {
+                log.error("Asynchronous auto-commit of offsets failed: {}. Will continue to join group.",
+                        autoCommitOffsetRequestFuture.exception().getMessage());
+            }
+            if (autoCommitOffsetRequestFuture.isDone()) {
+                autoCommitOffsetRequestFuture = null;
+            }
+            if (!onJoinPrepareAsyncCommitCompleted) {
+                pollTimer.sleep(Math.min(pollTimer.remainingMs(), rebalanceConfig.retryBackoffMs));
+                timer.update();
+                return false;
+            }
+        }
 
         // the generation / member-id can possibly be reset by the heartbeat thread
         // upon getting errors or heartbeat timeouts; in this case whatever is previously
@@ -804,11 +849,14 @@ else if (future.failed() && !future.isRetriable()) {
 
         isLeader = false;
         subscriptions.resetGroupSubscription();
+        joinPrepareTimer = null;
+        autoCommitOffsetRequestFuture = null;
+        timer.update();
 
         if (exception != null) {
             throw new KafkaException("User rebalance callback throws an error", exception);
         }
-        return onJoinPrepareAsyncCommitCompleted;
+        return true;
     }
 
     @Override
@@ -851,17 +899,17 @@ public boolean rejoinNeededOrPending() {
         // we need to rejoin if we performed the assignment and metadata has changed;
         // also for those owned-but-no-longer-existed partitions we should drop them as lost
         if (assignmentSnapshot != null && !assignmentSnapshot.matches(metadataSnapshot)) {
-            final String reason = String.format("cached metadata has changed from %s at the beginning of the rebalance to %s",
+            final String fullReason = String.format("cached metadata has changed from %s at the beginning of the rebalance to %s",
                 assignmentSnapshot, metadataSnapshot);
-            requestRejoinIfNecessary(reason);
+            requestRejoinIfNecessary("cached metadata has changed", fullReason);
             return true;
         }
 
         // we need to join if our subscription has changed since the last join
         if (joinedSubscription != null && !joinedSubscription.equals(subscriptions.subscription())) {
-            final String reason = String.format("subscription has changed from %s at the beginning of the rebalance to %s",
+            final String fullReason = String.format("subscription has changed from %s at the beginning of the rebalance to %s",
                 joinedSubscription, subscriptions.subscription());
-            requestRejoinIfNecessary(reason);
+            requestRejoinIfNecessary("subscription has changed", fullReason);
             return true;
         }
 
@@ -970,7 +1018,7 @@ public void close(final Timer timer) {
         // we do not need to re-enable wakeups since we are closing already
         client.disableWakeups();
         try {
-            maybeAutoCommitOffsetsAsync();
+            maybeAutoCommitOffsetsSync(timer);
             while (pendingAsyncCommits.get() > 0 && timer.notExpired()) {
                 ensureCoordinatorReady(timer);
                 client.poll(timer);
@@ -1004,7 +1052,17 @@ public RequestFuture<Void> commitOffsetsAsync(final Map<TopicPartition, OffsetAn
         if (offsets.isEmpty()) {
             // No need to check coordinator if offsets is empty since commit of empty offsets is completed locally.
             future = doCommitOffsetsAsync(offsets, callback);
-        } else if (!coordinatorUnknown()) {
+        } else if (!coordinatorUnknownAndUnready(time.timer(Duration.ZERO))) {
+            // we need to make sure coordinator is ready before committing, since
+            // this is for async committing we do not try to block, but just try once to
+            // clear the previous discover-coordinator future, resend, or get responses;
+            // if the coordinator is not ready yet then we would just proceed and put that into the
+            // pending requests, and future poll calls would still try to complete them.
+            //
+            // the key here though is that we have to try sending the discover-coordinator if
+            // it's not known or ready, since this is the only place we can send such request
+            // under manual assignment (there we would not have heartbeat thread trying to auto-rediscover
+            // the coordinator).
             future = doCommitOffsetsAsync(offsets, callback);
         } else {
             // we don't know the current coordinator, so try to find it and then send the commit
@@ -1110,6 +1168,24 @@ public boolean commitOffsetsSync(Map<TopicPartition, OffsetAndMetadata> offsets,
         return false;
     }
 
+    private void maybeAutoCommitOffsetsSync(Timer timer) {
+        if (autoCommitEnabled) {
+            Map<TopicPartition, OffsetAndMetadata> allConsumedOffsets = subscriptions.allConsumed();
+            try {
+                log.debug("Sending synchronous auto-commit of offsets {}", allConsumedOffsets);
+                if (!commitOffsetsSync(allConsumedOffsets, timer))
+                    log.debug("Auto-commit of offsets {} timed out before completion", allConsumedOffsets);
+            } catch (WakeupException | InterruptException e) {
+                log.debug("Auto-commit of offsets {} was interrupted before completion", allConsumedOffsets);
+                // rethrow wakeups since they are triggered by the user
+                throw e;
+            } catch (Exception e) {
+                // consistent with async auto-commit failures, we do not propagate the exception
+                log.warn("Synchronous auto-commit of offsets {} failed: {}", allConsumedOffsets, e.getMessage());
+            }
+        }
+    }
+
     public void maybeAutoCommitOffsetsAsync(long now) {
         if (autoCommitEnabled) {
             nextAutoCommitTimer.update(now);
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java
index 54f70cafd72c8..73ffd217efe21 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/Fetcher.java
@@ -1201,7 +1201,7 @@ private Map<Node, FetchSessionHandler.FetchRequestData> prepareFetchRequests() {
                 continue;
             }
 
-            // Use the preferred read replica if set, otherwise the position's leader
+            // Use the preferred read replica if set, otherwise the partition's leader
             Node node = selectReadReplica(partition, leaderOpt.get(), currentTimeMs);
             if (client.isUnavailable(node)) {
                 client.maybeThrowAuthFailure(node);
@@ -1866,12 +1866,11 @@ private void maybeUpdateAssignment(SubscriptionState subscription) {
                 for (TopicPartition tp : newAssignedPartitions) {
                     if (!this.assignedPartitions.contains(tp)) {
                         MetricName metricName = partitionPreferredReadReplicaMetricName(tp);
-                        if (metrics.metric(metricName) == null) {
-                            metrics.addMetric(
-                                metricName,
-                                (Gauge<Integer>) (config, now) -> subscription.preferredReadReplica(tp, 0L).orElse(-1)
-                            );
-                        }
+                        metrics.addMetricIfAbsent(
+                            metricName,
+                            null,
+                            (Gauge<Integer>) (config, now) -> subscription.preferredReadReplica(tp, 0L).orElse(-1)
+                        );
                     }
                 }
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/FetcherMetricsRegistry.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/FetcherMetricsRegistry.java
index 501ffe9a88da8..f76a92462d5ae 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/FetcherMetricsRegistry.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/FetcherMetricsRegistry.java
@@ -96,7 +96,7 @@ public FetcherMetricsRegistry(Set<String> tags, String metricGrpPrefix) {
                 "The total number of fetch requests.", tags);
 
         this.recordsLagMax = new MetricNameTemplate("records-lag-max", groupName,
-                "The maximum lag in terms of number of records for any partition in this window", tags);
+                "The maximum lag in terms of number of records for any partition in this window. NOTE: This is based on current offset and not committed offset", tags);
         this.recordsLeadMin = new MetricNameTemplate("records-lead-min", groupName,
                 "The minimum lead in terms of number of records for any partition in this window", tags);
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
index 416468d945fcb..0225822a19466 100644
--- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
+++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/SubscriptionState.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.clients.consumer.internals;
 
-import java.util.TreeSet;
 import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.clients.Metadata;
 import org.apache.kafka.clients.NodeApiVersions;
@@ -43,6 +42,7 @@
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
+import java.util.TreeSet;
 import java.util.function.LongSupplier;
 import java.util.function.Predicate;
 import java.util.regex.Pattern;
@@ -617,7 +617,7 @@ public synchronized Optional<Integer> preferredReadReplica(TopicPartition tp, lo
      * Unset the preferred read replica. This causes the fetcher to go back to the leader for fetches.
      *
      * @param tp The topic partition
-     * @return true if the preferred read replica was set, false otherwise.
+     * @return the removed preferred read replica if set, None otherwise.
      */
     public synchronized Optional<Integer> clearPreferredReadReplica(TopicPartition tp) {
         return assignedState(tp).clearPreferredReadReplica();
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
index f739336b85692..2d5c8994b4df9 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/KafkaProducer.java
@@ -26,6 +26,7 @@
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 import org.apache.kafka.clients.consumer.OffsetCommitCallback;
 import org.apache.kafka.clients.producer.internals.BufferPool;
+import org.apache.kafka.clients.producer.internals.BuiltInPartitioner;
 import org.apache.kafka.clients.producer.internals.KafkaProducerMetrics;
 import org.apache.kafka.clients.producer.internals.ProducerInterceptors;
 import org.apache.kafka.clients.producer.internals.ProducerMetadata;
@@ -254,6 +255,7 @@ public class KafkaProducer<K, V> implements Producer<K, V> {
     private final Serializer<V> valueSerializer;
     private final ProducerConfig producerConfig;
     private final long maxBlockTimeMs;
+    private final boolean partitionerIgnoreKeys;
     private final ProducerInterceptors<K, V> interceptors;
     private final ApiVersions apiVersions;
     private final TransactionManager transactionManager;
@@ -316,6 +318,23 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
         this(Utils.propsToMap(properties), keySerializer, valueSerializer);
     }
 
+    /**
+     * Check if partitioner is deprecated and log a warning if it is.
+     */
+    @SuppressWarnings("deprecation")
+    private void warnIfPartitionerDeprecated() {
+        // Using DefaultPartitioner and UniformStickyPartitioner is deprecated, see KIP-794.
+        if (partitioner instanceof org.apache.kafka.clients.producer.internals.DefaultPartitioner) {
+            log.warn("DefaultPartitioner is deprecated.  Please clear " + ProducerConfig.PARTITIONER_CLASS_CONFIG
+                    + " configuration setting to get the default partitioning behavior");
+        }
+        if (partitioner instanceof org.apache.kafka.clients.producer.UniformStickyPartitioner) {
+            log.warn("UniformStickyPartitioner is deprecated.  Please clear " + ProducerConfig.PARTITIONER_CLASS_CONFIG
+                    + " configuration setting and set " + ProducerConfig.PARTITIONER_IGNORE_KEYS_CONFIG
+                    + " to 'true' to get the uniform sticky partitioning behavior");
+        }
+    }
+
     // visible for testing
     @SuppressWarnings("unchecked")
     KafkaProducer(ProducerConfig config,
@@ -360,6 +379,8 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
                     ProducerConfig.PARTITIONER_CLASS_CONFIG,
                     Partitioner.class,
                     Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
+            warnIfPartitionerDeprecated();
+            this.partitionerIgnoreKeys = config.getBoolean(ProducerConfig.PARTITIONER_IGNORE_KEYS_CONFIG);
             long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
             if (keySerializer == null) {
                 this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
@@ -397,12 +418,20 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
 
             this.apiVersions = new ApiVersions();
             this.transactionManager = configureTransactionState(config, logContext);
+            // There is no need to do work required for adaptive partitioning, if we use a custom partitioner.
+            boolean enableAdaptivePartitioning = partitioner == null &&
+                config.getBoolean(ProducerConfig.PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_CONFIG);
+            RecordAccumulator.PartitionerConfig partitionerConfig = new RecordAccumulator.PartitionerConfig(
+                enableAdaptivePartitioning,
+                config.getLong(ProducerConfig.PARTITIONER_AVAILABILITY_TIMEOUT_MS_CONFIG)
+            );
             this.accumulator = new RecordAccumulator(logContext,
                     config.getInt(ProducerConfig.BATCH_SIZE_CONFIG),
                     this.compressionType,
                     lingerMs(config),
                     retryBackoffMs,
                     deliveryTimeoutMs,
+                    partitionerConfig,
                     metrics,
                     PRODUCER_METRIC_GROUP_NAME,
                     time,
@@ -440,6 +469,44 @@ public KafkaProducer(Properties properties, Serializer<K> keySerializer, Seriali
         }
     }
 
+    // visible for testing
+    KafkaProducer(ProducerConfig config,
+                  LogContext logContext,
+                  Metrics metrics,
+                  Serializer<K> keySerializer,
+                  Serializer<V> valueSerializer,
+                  ProducerMetadata metadata,
+                  RecordAccumulator accumulator,
+                  TransactionManager transactionManager,
+                  Sender sender,
+                  ProducerInterceptors<K, V> interceptors,
+                  Partitioner partitioner,
+                  Time time,
+                  KafkaThread ioThread) {
+        this.producerConfig = config;
+        this.time = time;
+        this.clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG);
+        this.log = logContext.logger(KafkaProducer.class);
+        this.metrics = metrics;
+        this.producerMetrics = new KafkaProducerMetrics(metrics);
+        this.partitioner = partitioner;
+        this.keySerializer = keySerializer;
+        this.valueSerializer = valueSerializer;
+        this.interceptors = interceptors;
+        this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
+        this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
+        this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG));
+        this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
+        this.partitionerIgnoreKeys = config.getBoolean(ProducerConfig.PARTITIONER_IGNORE_KEYS_CONFIG);
+        this.apiVersions = new ApiVersions();
+        this.transactionManager = transactionManager;
+        this.accumulator = accumulator;
+        this.errors = this.metrics.sensor("errors");
+        this.metadata = metadata;
+        this.sender = sender;
+        this.ioThread = ioThread;
+    }
+
     // visible for testing
     Sender newSender(LogContext logContext, KafkaClient kafkaClient, ProducerMetadata metadata) {
         int maxInflightRequests = producerConfig.getInt(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION);
@@ -530,6 +597,9 @@ private TransactionManager configureTransactionState(ProducerConfig config,
                 log.info("Instantiated a transactional producer.");
             else
                 log.info("Instantiated an idempotent producer.");
+        } else {
+            // ignore unretrieved configurations related to producer transaction
+            config.ignore(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG);
         }
         return transactionManager;
     }
@@ -685,11 +755,15 @@ public void sendOffsetsToTransaction(Map<TopicPartition, OffsetAndMetadata> offs
 
     /**
      * Commits the ongoing transaction. This method will flush any unsent records before actually committing the transaction.
-     *
+     * <p>
      * Further, if any of the {@link #send(ProducerRecord)} calls which were part of the transaction hit irrecoverable
      * errors, this method will throw the last received exception immediately and the transaction will not be committed.
      * So all {@link #send(ProducerRecord)} calls in a transaction must succeed in order for this method to succeed.
-     *
+     * <p>
+     * If the transaction is committed successfully and this method returns without throwing an exception, it is guaranteed
+     * that all {@link Callback callbacks} for records in the transaction will have been invoked and completed.
+     * Note that exceptions thrown by callbacks are ignored; the producer proceeds to commit the transaction in any case.
+     * <p>
      * Note that this method will raise {@link TimeoutException} if the transaction cannot be committed before expiration
      * of {@code max.block.ms}. Additionally, it will raise {@link InterruptException} if interrupted.
      * It is safe to retry in either case, but it is not possible to attempt a different operation (such as abortTransaction)
@@ -768,8 +842,9 @@ public Future<RecordMetadata> send(ProducerRecord<K, V> record) {
      * response after each one.
      * <p>
      * The result of the send is a {@link RecordMetadata} specifying the partition the record was sent to, the offset
-     * it was assigned and the timestamp of the record. If
-     * {@link org.apache.kafka.common.record.TimestampType#CREATE_TIME CreateTime} is used by the topic, the timestamp
+     * it was assigned and the timestamp of the record. If the producer is configured with acks = 0, the {@link RecordMetadata}
+     * will have offset = -1 because the producer does not wait for the acknowledgement from the broker.
+     * If {@link org.apache.kafka.common.record.TimestampType#CREATE_TIME CreateTime} is used by the topic, the timestamp
      * will be the user provided timestamp or the record send time if the user did not specify a timestamp for the
      * record. If {@link org.apache.kafka.common.record.TimestampType#LOG_APPEND_TIME LogAppendTime} is used for the
      * topic, the timestamp will be the Kafka broker local time when the message is appended.
@@ -881,11 +956,24 @@ private void throwIfProducerClosed() {
             throw new IllegalStateException("Cannot perform operation after producer has been closed");
     }
 
+    /**
+     * Call deprecated {@link Partitioner#onNewBatch}
+     */
+    @SuppressWarnings("deprecation")
+    private void onNewBatch(String topic, Cluster cluster, int prevPartition) {
+        assert partitioner != null;
+        partitioner.onNewBatch(topic, cluster, prevPartition);
+    }
+
     /**
      * Implementation of asynchronously send a record to a topic.
      */
     private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback callback) {
-        TopicPartition tp = null;
+        // Append callback takes care of the following:
+        //  - call interceptors and user callback on completion
+        //  - remember partition that is calculated in RecordAccumulator.append
+        AppendCallbacks<K, V> appendCallbacks = new AppendCallbacks<K, V>(callback, this.interceptors, record);
+
         try {
             throwIfProducerClosed();
             // first make sure the metadata for the topic is available
@@ -917,8 +1005,11 @@ private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback call
                         " to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() +
                         " specified in value.serializer", cce);
             }
+
+            // Try to calculate partition, but note that after this call it can be RecordMetadata.UNKNOWN_PARTITION,
+            // which means that the RecordAccumulator would pick a partition using built-in logic (which may
+            // take into account broker load, the amount of data produced to each partition, etc.).
             int partition = partition(record, serializedKey, serializedValue, cluster);
-            tp = new TopicPartition(record.topic(), partition);
 
             setReadOnly(record.headers());
             Header[] headers = record.headers().toArray();
@@ -927,36 +1018,38 @@ private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback call
                     compressionType, serializedKey, serializedValue, headers);
             ensureValidRecordSize(serializedSize);
             long timestamp = record.timestamp() == null ? nowMs : record.timestamp();
-            if (log.isTraceEnabled()) {
-                log.trace("Attempting to append record {} with callback {} to topic {} partition {}", record, callback, record.topic(), partition);
-            }
-            // producer callback will make sure to call both 'callback' and interceptor callback
-            Callback interceptCallback = new InterceptorCallback<>(callback, this.interceptors, tp);
 
-            if (transactionManager != null) {
-                transactionManager.maybeAddPartition(tp);
-            }
+            // A custom partitioner may take advantage on the onNewBatch callback.
+            boolean abortOnNewBatch = partitioner != null;
 
-            RecordAccumulator.RecordAppendResult result = accumulator.append(tp, timestamp, serializedKey,
-                    serializedValue, headers, interceptCallback, remainingWaitMs, true, nowMs);
+            // Append the record to the accumulator.  Note, that the actual partition may be
+            // calculated there and can be accessed via appendCallbacks.topicPartition.
+            RecordAccumulator.RecordAppendResult result = accumulator.append(record.topic(), partition, timestamp, serializedKey,
+                    serializedValue, headers, appendCallbacks, remainingWaitMs, abortOnNewBatch, nowMs, cluster);
+            assert appendCallbacks.getPartition() != RecordMetadata.UNKNOWN_PARTITION;
 
             if (result.abortForNewBatch) {
                 int prevPartition = partition;
-                partitioner.onNewBatch(record.topic(), cluster, prevPartition);
+                onNewBatch(record.topic(), cluster, prevPartition);
                 partition = partition(record, serializedKey, serializedValue, cluster);
-                tp = new TopicPartition(record.topic(), partition);
                 if (log.isTraceEnabled()) {
                     log.trace("Retrying append due to new batch creation for topic {} partition {}. The old partition was {}", record.topic(), partition, prevPartition);
                 }
-                // producer callback will make sure to call both 'callback' and interceptor callback
-                interceptCallback = new InterceptorCallback<>(callback, this.interceptors, tp);
+                result = accumulator.append(record.topic(), partition, timestamp, serializedKey,
+                    serializedValue, headers, appendCallbacks, remainingWaitMs, false, nowMs, cluster);
+            }
 
-                result = accumulator.append(tp, timestamp, serializedKey,
-                    serializedValue, headers, interceptCallback, remainingWaitMs, false, nowMs);
+            // Add the partition to the transaction (if in progress) after it has been successfully
+            // appended to the accumulator. We cannot do it before because the partition may be
+            // unknown or the initially selected partition may be changed when the batch is closed
+            // (as indicated by `abortForNewBatch`). Note that the `Sender` will refuse to dequeue
+            // batches from the accumulator until they have been added to the transaction.
+            if (transactionManager != null) {
+                transactionManager.maybeAddPartition(appendCallbacks.topicPartition());
             }
 
             if (result.batchIsFull || result.newBatchCreated) {
-                log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition);
+                log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), appendCallbacks.getPartition());
                 this.sender.wakeup();
             }
             return result.future;
@@ -965,34 +1058,28 @@ private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback call
             // for other exceptions throw directly
         } catch (ApiException e) {
             log.debug("Exception occurred during message send:", e);
-            // producer callback will make sure to call both 'callback' and interceptor callback
-            if (tp == null) {
-                // set topicPartition to -1 when null
-                tp = ProducerInterceptors.extractTopicPartition(record);
+            if (callback != null) {
+                TopicPartition tp = appendCallbacks.topicPartition();
+                RecordMetadata nullMetadata = new RecordMetadata(tp, -1, -1, RecordBatch.NO_TIMESTAMP, -1, -1);
+                callback.onCompletion(nullMetadata, e);
             }
-
-            Callback interceptCallback = new InterceptorCallback<>(callback, this.interceptors, tp);
-
-            // The onCompletion callback does expect a non-null metadata, but one will be created inside
-            // the interceptor's onCompletion implementation before the user's callback is invoked.
-            interceptCallback.onCompletion(null, e);
             this.errors.record();
-            this.interceptors.onSendError(record, tp, e);
+            this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
             if (transactionManager != null) {
                 transactionManager.maybeTransitionToErrorState(e);
             }
             return new FutureFailure(e);
         } catch (InterruptedException e) {
             this.errors.record();
-            this.interceptors.onSendError(record, tp, e);
+            this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
             throw new InterruptException(e);
         } catch (KafkaException e) {
             this.errors.record();
-            this.interceptors.onSendError(record, tp, e);
+            this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
             throw e;
         } catch (Exception e) {
             // we notify interceptor about all exceptions, since onSend is called before anything else in this method
-            this.interceptors.onSendError(record, tp, e);
+            this.interceptors.onSendError(record, appendCallbacks.topicPartition(), e);
             throw e;
         }
     }
@@ -1033,6 +1120,7 @@ private ClusterAndWaitTime waitOnMetadata(String topic, Integer partition, long
         // Issue metadata requests until we have metadata for the topic and the requested partition,
         // or until maxWaitTimeMs is exceeded. This is necessary in case the metadata
         // is stale and the number of partitions for this topic has increased in the meantime.
+        long nowNanos = time.nanoseconds();
         do {
             if (partition != null) {
                 log.trace("Requesting metadata update for partition {} of topic {}.", partition, topic);
@@ -1064,6 +1152,8 @@ private ClusterAndWaitTime waitOnMetadata(String topic, Integer partition, long
             partitionsCount = cluster.partitionCountForTopic(topic);
         } while (partitionsCount == null || (partition != null && partition >= partitionsCount));
 
+        producerMetrics.recordMetadataWait(time.nanoseconds() - nowNanos);
+
         return new ClusterAndWaitTime(cluster, elapsed);
     }
 
@@ -1273,21 +1363,33 @@ private ClusterResourceListeners configureClusterResourceListeners(Serializer<K>
     /**
      * computes partition for given record.
      * if the record has partition returns the value otherwise
-     * calls configured partitioner class to compute the partition.
+     * if custom partitioner is specified, call it to compute partition
+     * otherwise try to calculate partition based on key.
+     * If there is no key or key should be ignored return
+     * RecordMetadata.UNKNOWN_PARTITION to indicate any partition
+     * can be used (the partition is then calculated by built-in
+     * partitioning logic).
      */
     private int partition(ProducerRecord<K, V> record, byte[] serializedKey, byte[] serializedValue, Cluster cluster) {
-        Integer partition = record.partition();
-        if (partition != null) {
-            return partition;
-        }
+        if (record.partition() != null)
+            return record.partition();
 
-        int customPartition = partitioner.partition(
+        if (partitioner != null) {
+            int customPartition = partitioner.partition(
                 record.topic(), record.key(), serializedKey, record.value(), serializedValue, cluster);
-        if (customPartition < 0) {
-            throw new IllegalArgumentException(String.format(
+            if (customPartition < 0) {
+                throw new IllegalArgumentException(String.format(
                     "The partitioner generated an invalid partition number: %d. Partition number should always be non-negative.", customPartition));
+            }
+            return customPartition;
+        }
+
+        if (serializedKey != null && !partitionerIgnoreKeys) {
+            // hash the keyBytes to choose a partition
+            return BuiltInPartitioner.partitionForKey(serializedKey, cluster.partitionsForTopic(record.topic()).size());
+        } else {
+            return RecordMetadata.UNKNOWN_PARTITION;
         }
-        return customPartition;
     }
 
     private void throwIfInvalidGroupMetadata(ConsumerGroupMetadata groupMetadata) {
@@ -1355,25 +1457,66 @@ public boolean isDone() {
     }
 
     /**
-     * A callback called when producer request is complete. It in turn calls user-supplied callback (if given) and
-     * notifies producer interceptors about the request completion.
+     * Callbacks that are called by the RecordAccumulator append functions:
+     *  - user callback
+     *  - interceptor callbacks
+     *  - partition callback
      */
-    private static class InterceptorCallback<K, V> implements Callback {
+    private class AppendCallbacks<K, V> implements RecordAccumulator.AppendCallbacks {
         private final Callback userCallback;
         private final ProducerInterceptors<K, V> interceptors;
-        private final TopicPartition tp;
+        private final String topic;
+        private final Integer recordPartition;
+        private final String recordLogString;
+        private volatile int partition = RecordMetadata.UNKNOWN_PARTITION;
+        private volatile TopicPartition topicPartition;
 
-        private InterceptorCallback(Callback userCallback, ProducerInterceptors<K, V> interceptors, TopicPartition tp) {
+        private AppendCallbacks(Callback userCallback, ProducerInterceptors<K, V> interceptors, ProducerRecord<K, V> record) {
             this.userCallback = userCallback;
             this.interceptors = interceptors;
-            this.tp = tp;
+            // Extract record info as we don't want to keep a reference to the record during
+            // whole lifetime of the batch.
+            // We don't want to have an NPE here, because the interceptors would not be notified (see .doSend).
+            topic = record != null ? record.topic() : null;
+            recordPartition = record != null ? record.partition() : null;
+            recordLogString = log.isTraceEnabled() && record != null ? record.toString() : "";
         }
 
+        @Override
         public void onCompletion(RecordMetadata metadata, Exception exception) {
-            metadata = metadata != null ? metadata : new RecordMetadata(tp, -1, -1, RecordBatch.NO_TIMESTAMP, -1, -1);
+            if (metadata == null) {
+                metadata = new RecordMetadata(topicPartition(), -1, -1, RecordBatch.NO_TIMESTAMP, -1, -1);
+            }
             this.interceptors.onAcknowledgement(metadata, exception);
             if (this.userCallback != null)
                 this.userCallback.onCompletion(metadata, exception);
         }
+
+        @Override
+        public void setPartition(int partition) {
+            assert partition != RecordMetadata.UNKNOWN_PARTITION;
+            this.partition = partition;
+
+            if (log.isTraceEnabled()) {
+                // Log the message here, because we don't know the partition before that.
+                log.trace("Attempting to append record {} with callback {} to topic {} partition {}", recordLogString, userCallback, topic, partition);
+            }
+        }
+
+        public int getPartition() {
+            return partition;
+        }
+
+        public TopicPartition topicPartition() {
+            if (topicPartition == null && topic != null) {
+                if (partition != RecordMetadata.UNKNOWN_PARTITION)
+                    topicPartition = new TopicPartition(topic, partition);
+                else if (recordPartition != null)
+                    topicPartition = new TopicPartition(topic, recordPartition);
+                else
+                    topicPartition = new TopicPartition(topic, RecordMetadata.UNKNOWN_PARTITION);
+            }
+            return topicPartition;
+        }
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java b/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java
index 4fd540dceaa8a..3df73b20a4d6e 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/MockProducer.java
@@ -18,7 +18,6 @@
 
 import org.apache.kafka.clients.consumer.ConsumerGroupMetadata;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.clients.producer.internals.FutureRecordMetadata;
 import org.apache.kafka.clients.producer.internals.ProduceRequestResult;
 import org.apache.kafka.common.Cluster;
@@ -117,10 +116,24 @@ public MockProducer(final Cluster cluster,
      *
      * Equivalent to {@link #MockProducer(Cluster, boolean, Partitioner, Serializer, Serializer)} new MockProducer(Cluster.empty(), autoComplete, new DefaultPartitioner(), keySerializer, valueSerializer)}
      */
+    @SuppressWarnings("deprecation")
     public MockProducer(final boolean autoComplete,
                         final Serializer<K> keySerializer,
                         final Serializer<V> valueSerializer) {
-        this(Cluster.empty(), autoComplete, new DefaultPartitioner(), keySerializer, valueSerializer);
+        this(Cluster.empty(), autoComplete, new org.apache.kafka.clients.producer.internals.DefaultPartitioner(), keySerializer, valueSerializer);
+    }
+
+    /**
+     * Create a new mock producer with invented metadata the given autoComplete setting and key\value serializers.
+     *
+     * Equivalent to {@link #MockProducer(Cluster, boolean, Partitioner, Serializer, Serializer)} new MockProducer(cluster, autoComplete, new DefaultPartitioner(), keySerializer, valueSerializer)}
+     */
+    @SuppressWarnings("deprecation")
+    public MockProducer(final Cluster cluster,
+                        final boolean autoComplete,
+                        final Serializer<K> keySerializer,
+                        final Serializer<V> valueSerializer) {
+        this(cluster, autoComplete, new org.apache.kafka.clients.producer.internals.DefaultPartitioner(), keySerializer, valueSerializer);
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/Partitioner.java b/clients/src/main/java/org/apache/kafka/clients/producer/Partitioner.java
index 13eaa5aaea9af..eeafc73d662c3 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/Partitioner.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/Partitioner.java
@@ -44,12 +44,16 @@ public interface Partitioner extends Configurable, Closeable {
     void close();
 
     /**
+     * Note this method is only implemented in DefatultPartitioner and UniformStickyPartitioner which
+     * are now deprecated.  See KIP-794 for more info.
+     *
      * Notifies the partitioner a new batch is about to be created. When using the sticky partitioner,
-     * this method can change the chosen sticky partition for the new batch. 
+     * this method can change the chosen sticky partition for the new batch.
      * @param topic The topic name
      * @param cluster The current cluster metadata
      * @param prevPartition The partition previously selected for the record that triggered a new batch
      */
+    @Deprecated
     default void onNewBatch(String topic, Cluster cluster, int prevPartition) {
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
index afc1e55cdfdad..aff5e49cfcb1b 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerConfig.java
@@ -18,7 +18,6 @@
 
 import org.apache.kafka.clients.ClientDnsLookup;
 import org.apache.kafka.clients.CommonClientConfigs;
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigDef.Importance;
@@ -26,7 +25,10 @@
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.SecurityConfig;
 import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.record.CompressionType;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.serialization.Serializer;
+import org.apache.kafka.common.utils.Utils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -91,6 +93,26 @@ public class ProducerConfig extends AbstractConfig {
                                                  + "This <code>linger.ms</code> setting defaults to 0, which means we'll immediately send out a record even the accumulated "
                                                  + "batch size is under this <code>batch.size</code> setting.";
 
+    /** <code>partitioner.adaptive.partitioning.enable</code> */
+    public static final String PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_CONFIG = "partitioner.adaptive.partitioning.enable";
+    private static final String PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_DOC =
+            "When set to 'true', the producer will try to adapt to broker performance and produce more messages to partitions hosted on faster brokers. "
+            + "If 'false', producer will try to distribute messages uniformly. Note: this setting has no effect if a custom partitioner is used";
+
+    /** <code>partitioner.availability.timeout.ms</code> */
+    public static final String PARTITIONER_AVAILABILITY_TIMEOUT_MS_CONFIG = "partitioner.availability.timeout.ms";
+    private static final String PARTITIONER_AVAILABILITY_TIMEOUT_MS_DOC =
+            "If a broker cannot process produce requests from a partition for <code>" + PARTITIONER_AVAILABILITY_TIMEOUT_MS_CONFIG + "</code> time, "
+            + "the partitioner treats that partition as not available.  If the value is 0, this logic is disabled. "
+            + "Note: this setting has no effect if a custom partitioner is used or <code>" + PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_CONFIG
+            + "<code/> is set to 'false'";
+
+    /** <code>partitioner.ignore.keys</code> */
+    public static final String PARTITIONER_IGNORE_KEYS_CONFIG = "partitioner.ignore.keys";
+    private static final String PARTITIONER_IGNORE_KEYS_DOC = "When set to 'true' the producer won't use record keys to choose a partition. "
+            + "If 'false', producer would choose a partition based on a hash of the key when a key is present. "
+            + "Note: this setting has no effect if a custom partitioner is used.";
+
     /** <code>acks</code> */
     public static final String ACKS_CONFIG = "acks";
     private static final String ACKS_DOC = "The number of acknowledgments the producer requires the leader to have received before considering a request complete. This controls the "
@@ -214,10 +236,11 @@ public class ProducerConfig extends AbstractConfig {
     /** <code>max.in.flight.requests.per.connection</code> */
     public static final String MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION = "max.in.flight.requests.per.connection";
     private static final String MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION_DOC = "The maximum number of unacknowledged requests the client will send on a single connection before blocking."
-                                                                            + " Note that if this config is set to be greater than 1 and <code>enable.idempotence</code> is set to false, there is a risk of"
-                                                                            + " message re-ordering after a failed send due to retries (i.e., if retries are enabled)."
-                                                                            + " Additionally, enabling idempotence requires this config value to be less than or equal to " + MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION_FOR_IDEMPOTENCE + "."
-                                                                            + " If conflicting configurations are set and idempotence is not explicitly enabled, idempotence is disabled.";
+                                                                            + " Note that if this configuration is set to be greater than 1 and <code>enable.idempotence</code> is set to false, there is a risk of"
+                                                                            + " message reordering after a failed send due to retries (i.e., if retries are enabled); "
+                                                                            + " if retries are disabled or if <code>enable.idempotence</code> is set to true, ordering will be preserved."
+                                                                            + " Additionally, enabling idempotence requires the value of this configuration to be less than or equal to " + MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION_FOR_IDEMPOTENCE + "."
+                                                                            + " If conflicting configurations are set and idempotence is not explicitly enabled, idempotence is disabled. ";
 
     /** <code>retries</code> */
     public static final String RETRIES_CONFIG = CommonClientConfigs.RETRIES_CONFIG;
@@ -256,11 +279,11 @@ public class ProducerConfig extends AbstractConfig {
     public static final String PARTITIONER_CLASS_CONFIG = "partitioner.class";
     private static final String PARTITIONER_CLASS_DOC = "A class to use to determine which partition to be send to when produce the records. Available options are:" +
         "<ul>" +
-            "<li><code>org.apache.kafka.clients.producer.internals.DefaultPartitioner</code>: The default partitioner. " +
-        "This strategy will try sticking to a partition until the batch is full, or <code>linger.ms</code> is up. It works with the strategy:" +
+            "<li>If not set, the default partitioning logic is used. " +
+        "This strategy will try sticking to a partition until " + BATCH_SIZE_CONFIG + " bytes is produced to the partition. It works with the strategy:" +
                 "<ul>" +
                     "<li>If no partition is specified but a key is present, choose a partition based on a hash of the key</li>" +
-                    "<li>If no partition or key is present, choose the sticky partition that changes when the batch is full, or <code>linger.ms</code> is up.</li>" +
+                    "<li>If no partition or key is present, choose the sticky partition that changes when " + BATCH_SIZE_CONFIG + " bytes are produced to the partition.</li>" +
                 "</ul>" +
             "</li>" +
             "<li><code>org.apache.kafka.clients.producer.RoundRobinPartitioner</code>: This partitioning strategy is that " +
@@ -268,9 +291,6 @@ public class ProducerConfig extends AbstractConfig {
         "until we run out of partitions and start over again. Note: There's a known issue that will cause uneven distribution when new batch is created. " +
         "Please check KAFKA-9965 for more detail." +
             "</li>" +
-            "<li><code>org.apache.kafka.clients.producer.UniformStickyPartitioner</code>: This partitioning strategy will " +
-        "try sticking to a partition(no matter if the 'key' is provided or not) until the batch is full, or <code>linger.ms</code> is up." +
-            "</li>" +
         "</ul>" +
         "<p>Implementing the <code>org.apache.kafka.clients.producer.Partitioner</code> interface allows you to plug in a custom partitioner.";
 
@@ -329,8 +349,11 @@ public class ProducerConfig extends AbstractConfig {
                                         in("all", "-1", "0", "1"),
                                         Importance.LOW,
                                         ACKS_DOC)
-                                .define(COMPRESSION_TYPE_CONFIG, Type.STRING, "none", Importance.HIGH, COMPRESSION_TYPE_DOC)
+                                .define(COMPRESSION_TYPE_CONFIG, Type.STRING, CompressionType.NONE.name, in(Utils.enumOptions(CompressionType.class)), Importance.HIGH, COMPRESSION_TYPE_DOC)
                                 .define(BATCH_SIZE_CONFIG, Type.INT, 16384, atLeast(0), Importance.MEDIUM, BATCH_SIZE_DOC)
+                                .define(PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_CONFIG, Type.BOOLEAN, true, Importance.LOW, PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_DOC)
+                                .define(PARTITIONER_AVAILABILITY_TIMEOUT_MS_CONFIG, Type.LONG, 0, atLeast(0), Importance.LOW, PARTITIONER_AVAILABILITY_TIMEOUT_MS_DOC)
+                                .define(PARTITIONER_IGNORE_KEYS_CONFIG, Type.BOOLEAN, false, Importance.MEDIUM, PARTITIONER_IGNORE_KEYS_DOC)
                                 .define(LINGER_MS_CONFIG, Type.LONG, 0, atLeast(0), Importance.MEDIUM, LINGER_MS_DOC)
                                 .define(DELIVERY_TIMEOUT_MS_CONFIG, Type.INT, 120 * 1000, atLeast(0), Importance.MEDIUM, DELIVERY_TIMEOUT_MS_DOC)
                                 .define(CLIENT_ID_CONFIG, Type.STRING, "", Importance.MEDIUM, CommonClientConfigs.CLIENT_ID_DOC)
@@ -415,7 +438,7 @@ public class ProducerConfig extends AbstractConfig {
                                         CommonClientConfigs.CONNECTIONS_MAX_IDLE_MS_DOC)
                                 .define(PARTITIONER_CLASS_CONFIG,
                                         Type.CLASS,
-                                        DefaultPartitioner.class,
+                                        null,
                                         Importance.MEDIUM, PARTITIONER_CLASS_DOC)
                                 .define(INTERCEPTOR_CLASSES_CONFIG,
                                         Type.LIST,
@@ -426,6 +449,7 @@ public class ProducerConfig extends AbstractConfig {
                                 .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
                                         Type.STRING,
                                         CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                                        in(Utils.enumOptions(SecurityProtocol.class)),
                                         Importance.MEDIUM,
                                         CommonClientConfigs.SECURITY_PROTOCOL_DOC)
                                 .define(SECURITY_PROVIDERS_CONFIG,
@@ -455,6 +479,7 @@ public class ProducerConfig extends AbstractConfig {
 
     @Override
     protected Map<String, Object> postProcessParsedConfig(final Map<String, Object> parsedValues) {
+        CommonClientConfigs.postValidateSaslMechanismConfig(this);
         Map<String, Object> refinedConfigs = CommonClientConfigs.postProcessReconnectBackoffConfigs(this, parsedValues);
         postProcessAndValidateIdempotenceConfigs(refinedConfigs);
         maybeOverrideClientId(refinedConfigs);
@@ -488,11 +513,11 @@ private void postProcessAndValidateIdempotenceConfigs(final Map<String, Object>
                 if (userConfiguredIdempotence) {
                     throw new ConfigException("Must set " + RETRIES_CONFIG + " to non-zero when using the idempotent producer.");
                 }
-                log.info("Idempotence will be disabled because {} is set to 0.", RETRIES_CONFIG, retries);
+                log.info("Idempotence will be disabled because {} is set to 0.", RETRIES_CONFIG);
                 shouldDisableIdempotence = true;
             }
 
-            final short acks = Short.valueOf(acksStr);
+            final short acks = Short.parseShort(acksStr);
             if (acks != (short) -1) {
                 if (userConfiguredIdempotence) {
                     throw new ConfigException("Must set " + ACKS_CONFIG + " to all in order to use the idempotent " +
@@ -537,11 +562,16 @@ private static String parseAcks(String acksString) {
     static Map<String, Object> appendSerializerToConfig(Map<String, Object> configs,
             Serializer<?> keySerializer,
             Serializer<?> valueSerializer) {
+        // validate serializer configuration, if the passed serializer instance is null, the user must explicitly set a valid serializer configuration value
         Map<String, Object> newConfigs = new HashMap<>(configs);
         if (keySerializer != null)
             newConfigs.put(KEY_SERIALIZER_CLASS_CONFIG, keySerializer.getClass());
+        else if (newConfigs.get(KEY_SERIALIZER_CLASS_CONFIG) == null)
+            throw new ConfigException(KEY_SERIALIZER_CLASS_CONFIG, null, "must be non-null.");
         if (valueSerializer != null)
             newConfigs.put(VALUE_SERIALIZER_CLASS_CONFIG, valueSerializer.getClass());
+        else if (newConfigs.get(VALUE_SERIALIZER_CLASS_CONFIG) == null)
+            throw new ConfigException(VALUE_SERIALIZER_CLASS_CONFIG, null, "must be non-null.");
         return newConfigs;
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerInterceptor.java b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerInterceptor.java
index 8f89d6faa9ab1..48caf98d44a3c 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/ProducerInterceptor.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/ProducerInterceptor.java
@@ -34,7 +34,7 @@
  * <p>
  * Implement {@link org.apache.kafka.common.ClusterResourceListener} to receive cluster metadata once it's available. Please see the class documentation for ClusterResourceListener for more information.
  */
-public interface ProducerInterceptor<K, V> extends Configurable {
+public interface ProducerInterceptor<K, V> extends Configurable, AutoCloseable {
     /**
      * This is called from {@link org.apache.kafka.clients.producer.KafkaProducer#send(ProducerRecord)} and
      * {@link org.apache.kafka.clients.producer.KafkaProducer#send(ProducerRecord, Callback)} methods, before key and value
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/UniformStickyPartitioner.java b/clients/src/main/java/org/apache/kafka/clients/producer/UniformStickyPartitioner.java
index be11d0b662445..6e4fe420df259 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/UniformStickyPartitioner.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/UniformStickyPartitioner.java
@@ -23,6 +23,10 @@
 
 
 /**
+ * NOTE this partitioner is deprecated and shouldn't be used.  To use default partitioning logic
+ * remove partitioner.class configuration setting and set partitioner.ignore.keys=true.
+ * See KIP-794 for more info.
+ *
  * The partitioning strategy:
  * <ul>
  * <li>If a partition is specified in the record, use it
@@ -33,6 +37,7 @@
  * 
  * See KIP-480 for details about sticky partitioning.
  */
+@Deprecated
 public class UniformStickyPartitioner implements Partitioner {
 
     private final StickyPartitionCache stickyPartitionCache = new StickyPartitionCache();
@@ -59,6 +64,7 @@ public void close() {}
      * If a batch completed for the current sticky partition, change the sticky partition. 
      * Alternately, if no sticky partition has been determined, set one.
      */
+    @SuppressWarnings("deprecation")
     public void onNewBatch(String topic, Cluster cluster, int prevPartition) {
         stickyPartitionCache.nextPartition(topic, cluster, prevPartition);
     }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
index 210911ada38cf..67cf485f81a55 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BufferPool.java
@@ -279,7 +279,8 @@ public void deallocate(ByteBuffer buffer, int size) {
     }
 
     public void deallocate(ByteBuffer buffer) {
-        deallocate(buffer, buffer.capacity());
+        if (buffer != null)
+            deallocate(buffer, buffer.capacity());
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java
new file mode 100644
index 0000000000000..a5805df56b78d
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.common.utils.Utils;
+import org.slf4j.Logger;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
+
+/**
+ * Built-in default partitioner.  Note, that this is just a utility class that is used directly from
+ * RecordAccumulator, it does not implement the Partitioner interface.
+ *
+ * The class keeps track of various bookkeeping information required for adaptive sticky partitioning
+ * (described in detail in KIP-794).  There is one partitioner object per topic.
+ */
+public class BuiltInPartitioner {
+    private final Logger log;
+    private final String topic;
+    private final int stickyBatchSize;
+
+    private volatile PartitionLoadStats partitionLoadStats = null;
+    private final AtomicReference<StickyPartitionInfo> stickyPartitionInfo = new AtomicReference<>();
+
+    // Visible and used for testing only.
+    static volatile public Supplier<Integer> mockRandom = null;
+
+    /**
+     * BuiltInPartitioner constructor.
+     *
+     * @param topic The topic
+     * @param stickyBatchSize How much to produce to partition before switch
+     */
+    public BuiltInPartitioner(LogContext logContext, String topic, int stickyBatchSize) {
+        this.log = logContext.logger(BuiltInPartitioner.class);
+        this.topic = topic;
+        this.stickyBatchSize = stickyBatchSize;
+    }
+
+    /**
+     * Calculate the next partition for the topic based on the partition load stats.
+     */
+    private int nextPartition(Cluster cluster) {
+        int random = mockRandom != null ? mockRandom.get() : Utils.toPositive(ThreadLocalRandom.current().nextInt());
+
+        // Cache volatile variable in local variable.
+        PartitionLoadStats partitionLoadStats = this.partitionLoadStats;
+        int partition;
+
+        if (partitionLoadStats == null) {
+            // We don't have stats to do adaptive partitioning (or it's disabled), just switch to the next
+            // partition based on uniform distribution.
+            List<PartitionInfo> availablePartitions = cluster.availablePartitionsForTopic(topic);
+            if (availablePartitions.size() > 0) {
+                partition = availablePartitions.get(random % availablePartitions.size()).partition();
+            } else {
+                // We don't have available partitions, just pick one among all partitions.
+                List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
+                partition = random % partitions.size();
+            }
+        } else {
+            // Calculate next partition based on load distribution.
+            // Note that partitions without leader are excluded from the partitionLoadStats.
+            assert partitionLoadStats.length > 0;
+
+            int[] cumulativeFrequencyTable = partitionLoadStats.cumulativeFrequencyTable;
+            int weightedRandom = random % cumulativeFrequencyTable[partitionLoadStats.length - 1];
+
+            // By construction, the cumulative frequency table is sorted, so we can use binary
+            // search to find the desired index.
+            int searchResult = Arrays.binarySearch(cumulativeFrequencyTable, 0, partitionLoadStats.length, weightedRandom);
+
+            // binarySearch results the index of the found element, or -(insertion_point) - 1
+            // (where insertion_point is the index of the first element greater than the key).
+            // We need to get the index of the first value that is strictly greater, which
+            // would be the insertion point, except if we found the element that's equal to
+            // the searched value (in this case we need to get next).  For example, if we have
+            //  4 5 8
+            // and we're looking for 3, then we'd get the insertion_point = 0, and the function
+            // would return -0 - 1 = -1, by adding 1 we'd get 0.  If we're looking for 4, we'd
+            // get 0, and we need the next one, so adding 1 works here as well.
+            int partitionIndex = Math.abs(searchResult + 1);
+            assert partitionIndex < partitionLoadStats.length;
+            partition = partitionLoadStats.partitionIds[partitionIndex];
+        }
+
+        log.trace("Switching to partition {} in topic {}", partition, topic);
+        return partition;
+    }
+
+    /**
+     * Test-only function.  When partition load stats are defined, return the end of range for the
+     * random number.
+     */
+    public int loadStatsRangeEnd() {
+        assert partitionLoadStats != null;
+        assert partitionLoadStats.length > 0;
+        return partitionLoadStats.cumulativeFrequencyTable[partitionLoadStats.length - 1];
+    }
+
+    /**
+     * Peek currently chosen sticky partition.  This method works in conjunction with {@link #isPartitionChanged}
+     * and {@link #updatePartitionInfo}.  The workflow is the following:
+     *
+     * 1. peekCurrentPartitionInfo is called to know which partition to lock.
+     * 2. Lock partition's batch queue.
+     * 3. isPartitionChanged under lock to make sure that nobody raced us.
+     * 4. Append data to buffer.
+     * 5. updatePartitionInfo to update produced bytes and maybe switch partition.
+     *
+     *  It's important that steps 3-5 are under partition's batch queue lock.
+     *
+     * @param cluster The cluster information (needed if there is no current partition)
+     * @return sticky partition info object
+     */
+    StickyPartitionInfo peekCurrentPartitionInfo(Cluster cluster) {
+        StickyPartitionInfo partitionInfo = stickyPartitionInfo.get();
+        if (partitionInfo != null)
+            return partitionInfo;
+
+        // We're the first to create it.
+        partitionInfo = new StickyPartitionInfo(nextPartition(cluster));
+        if (stickyPartitionInfo.compareAndSet(null, partitionInfo))
+            return partitionInfo;
+
+        // Someone has raced us.
+        return stickyPartitionInfo.get();
+    }
+
+    /**
+     * Check if partition is changed by a concurrent thread.  NOTE this function needs to be called under
+     * the partition's batch queue lock.
+     *
+     * @param partitionInfo The sticky partition info object returned by peekCurrentPartitionInfo
+     * @return true if sticky partition object is changed (race condition)
+     */
+    boolean isPartitionChanged(StickyPartitionInfo partitionInfo) {
+        // partitionInfo may be null if the caller didn't use built-in partitioner.
+        return partitionInfo != null && stickyPartitionInfo.get() != partitionInfo;
+    }
+
+    /**
+     * Update partition info with the number of bytes appended and maybe switch partition.
+     * NOTE this function needs to be called under the partition's batch queue lock.
+     *
+     * @param partitionInfo The sticky partition info object returned by peekCurrentPartitionInfo
+     * @param appendedBytes The number of bytes appended to this partition
+     * @param cluster The cluster information
+     */
+    void updatePartitionInfo(StickyPartitionInfo partitionInfo, int appendedBytes, Cluster cluster) {
+        // partitionInfo may be null if the caller didn't use built-in partitioner.
+        if (partitionInfo == null)
+            return;
+
+        assert partitionInfo == stickyPartitionInfo.get();
+        int producedBytes = partitionInfo.producedBytes.addAndGet(appendedBytes);
+        if (producedBytes >= stickyBatchSize) {
+            // We've produced enough to this partition, switch to next.
+            StickyPartitionInfo newPartitionInfo = new StickyPartitionInfo(nextPartition(cluster));
+            stickyPartitionInfo.set(newPartitionInfo);
+        }
+    }
+
+    /**
+     * Update partition load stats from the queue sizes of each partition
+     * NOTE: queueSizes are modified in place to avoid allocations
+     *
+     * @param queueSizes The queue sizes, partitions without leaders are excluded
+     * @param partitionIds The partition ids for the queues, partitions without leaders are excluded
+     * @param length The logical length of the arrays (could be less): we may eliminate some partitions
+     *               based on latency, but to avoid reallocation of the arrays, we just decrement
+     *               logical length
+     * Visible for testing
+     */
+    public void updatePartitionLoadStats(int[] queueSizes, int[] partitionIds, int length) {
+        if (queueSizes == null) {
+            log.trace("No load stats for topic {}, not using adaptive", topic);
+            partitionLoadStats = null;
+            return;
+        }
+        assert queueSizes.length == partitionIds.length;
+        assert length <= queueSizes.length;
+
+        // The queueSizes.length represents the number of all partitions in the topic and if we have
+        // less than 2 partitions, there is no need to do adaptive logic.
+        // If partitioner.availability.timeout.ms != 0, then partitions that experience high latencies
+        // (greater than partitioner.availability.timeout.ms) may be excluded, the length represents
+        // partitions that are not excluded.  If some partitions were excluded, we'd still want to
+        // go through adaptive logic, even if we have one partition.
+        // See also RecordAccumulator#partitionReady where the queueSizes are built.
+        if (length < 1 || queueSizes.length < 2) {
+            log.trace("The number of partitions is too small: available={}, all={}, not using adaptive for topic {}",
+                    length, queueSizes.length, topic);
+            partitionLoadStats = null;
+            return;
+        }
+
+        // We build cumulative frequency table from the queue sizes in place.  At the beginning
+        // each entry contains queue size, then we invert it (so it represents the frequency)
+        // and convert to a running sum.  Then a uniformly distributed random variable
+        // in the range [0..last) would map to a partition with weighted probability.
+        // Example: suppose we have 3 partitions with the corresponding queue sizes:
+        //  0 3 1
+        // Then we can invert them by subtracting the queue size from the max queue size + 1 = 4:
+        //  4 1 3
+        // Then we can convert it into a running sum (next value adds previous value):
+        //  4 5 8
+        // Now if we get a random number in the range [0..8) and find the first value that
+        // is strictly greater than the number (e.g. for 4 it would be 5), then the index of
+        // the value is the index of the partition we're looking for.  In this example
+        // random numbers 0, 1, 2, 3 would map to partition[0], 4 would map to partition[1]
+        // and 5, 6, 7 would map to partition[2].
+
+        // Calculate max queue size + 1 and check if all sizes are the same.
+        int maxSizePlus1 = queueSizes[0];
+        boolean allEqual = true;
+        for (int i = 1; i < length; i++) {
+            if (queueSizes[i] != maxSizePlus1)
+                allEqual = false;
+            if (queueSizes[i] > maxSizePlus1)
+                maxSizePlus1 = queueSizes[i];
+        }
+        ++maxSizePlus1;
+
+        if (allEqual && length == queueSizes.length) {
+            // No need to have complex probability logic when all queue sizes are the same,
+            // and we didn't exclude partitions that experience high latencies (greater than
+            // partitioner.availability.timeout.ms).
+            log.trace("All queue lengths are the same, not using adaptive for topic {}", topic);
+            partitionLoadStats = null;
+            return;
+        }
+
+        // Invert and fold the queue size, so that they become separator values in the CFT.
+        queueSizes[0] = maxSizePlus1 - queueSizes[0];
+        for (int i = 1; i < length; i++) {
+            queueSizes[i] = maxSizePlus1 - queueSizes[i] + queueSizes[i - 1];
+        }
+        log.trace("Partition load stats for topic {}: CFT={}, IDs={}, length={}",
+                topic, queueSizes, partitionIds, length);
+        partitionLoadStats = new PartitionLoadStats(queueSizes, partitionIds, length);
+    }
+
+    /**
+     * Info for the current sticky partition.
+     */
+    public static class StickyPartitionInfo {
+        private final int index;
+        private final AtomicInteger producedBytes = new AtomicInteger();
+
+        StickyPartitionInfo(int index) {
+            this.index = index;
+        }
+
+        public int partition() {
+            return index;
+        }
+    }
+
+    /*
+     * Default hashing function to choose a partition from the serialized key bytes
+     */
+    public static int partitionForKey(final byte[] serializedKey, final int numPartitions) {
+        return Utils.toPositive(Utils.murmur2(serializedKey)) % numPartitions;
+    }
+
+    /**
+     * The partition load stats for each topic that are used for adaptive partition distribution.
+     */
+    private final static class PartitionLoadStats {
+        public final int[] cumulativeFrequencyTable;
+        public final int[] partitionIds;
+        public final int length;
+        public PartitionLoadStats(int[] cumulativeFrequencyTable, int[] partitionIds, int length) {
+            assert cumulativeFrequencyTable.length == partitionIds.length;
+            assert length <= cumulativeFrequencyTable.length;
+            this.cumulativeFrequencyTable = cumulativeFrequencyTable;
+            this.partitionIds = partitionIds;
+            this.length = length;
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/DefaultPartitioner.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/DefaultPartitioner.java
index cf765d1eee6aa..716773626c8c1 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/DefaultPartitioner.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/DefaultPartitioner.java
@@ -18,11 +18,13 @@
 
 import org.apache.kafka.clients.producer.Partitioner;
 import org.apache.kafka.common.Cluster;
-import org.apache.kafka.common.utils.Utils;
 
 import java.util.Map;
 
 /**
+ * NOTE this partitioner is deprecated and shouldn't be used.  To use default partitioning logic
+ * remove partitioner.class configuration setting.  See KIP-794 for more info.
+ *
  * The default partitioning strategy:
  * <ul>
  * <li>If a partition is specified in the record, use it
@@ -31,6 +33,7 @@
  * 
  * See KIP-480 for details about sticky partitioning.
  */
+@Deprecated
 public class DefaultPartitioner implements Partitioner {
 
     private final StickyPartitionCache stickyPartitionCache = new StickyPartitionCache();
@@ -67,8 +70,7 @@ public int partition(String topic, Object key, byte[] keyBytes, Object value, by
         if (keyBytes == null) {
             return stickyPartitionCache.partition(topic, cluster);
         }
-        // hash the keyBytes to choose a partition
-        return Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
+        return BuiltInPartitioner.partitionForKey(keyBytes, numPartitions);
     }
 
     public void close() {}
@@ -77,6 +79,7 @@ public void close() {}
      * If a batch completed for the current sticky partition, change the sticky partition. 
      * Alternately, if no sticky partition has been determined, set one.
      */
+    @SuppressWarnings("deprecation")
     public void onNewBatch(String topic, Cluster cluster, int prevPartition) {
         stickyPartitionCache.nextPartition(topic, cluster, prevPartition);
     }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetrics.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetrics.java
index 3c6fe2691e2a8..7d942d572cfd5 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetrics.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetrics.java
@@ -34,6 +34,7 @@ public class KafkaProducerMetrics implements AutoCloseable {
     private static final String TXN_COMMIT = "txn-commit";
     private static final String TXN_ABORT = "txn-abort";
     private static final String TOTAL_TIME_SUFFIX = "-time-ns-total";
+    private static final String METADATA_WAIT = "metadata-wait";
 
     private final Map<String, String> tags;
     private final Metrics metrics;
@@ -43,6 +44,7 @@ public class KafkaProducerMetrics implements AutoCloseable {
     private final Sensor sendOffsetsSensor;
     private final Sensor commitTxnSensor;
     private final Sensor abortTxnSensor;
+    private final Sensor metadataWaitSensor;
 
     public KafkaProducerMetrics(Metrics metrics) {
         this.metrics = metrics;
@@ -71,6 +73,10 @@ public KafkaProducerMetrics(Metrics metrics) {
             TXN_ABORT,
             "Total time producer has spent in abortTransaction in nanoseconds."
         );
+        metadataWaitSensor = newLatencySensor(
+            METADATA_WAIT,
+            "Total time producer has spent waiting on topic metadata in nanoseconds."
+        );
     }
 
     @Override
@@ -81,6 +87,7 @@ public void close() {
         removeMetric(TXN_SEND_OFFSETS);
         removeMetric(TXN_COMMIT);
         removeMetric(TXN_ABORT);
+        removeMetric(METADATA_WAIT);
     }
 
     public void recordFlush(long duration) {
@@ -107,6 +114,10 @@ public void recordAbortTxn(long duration) {
         abortTxnSensor.record(duration);
     }
 
+    public void recordMetadataWait(long duration) {
+        metadataWaitSensor.record(duration);
+    }
+
     private Sensor newLatencySensor(String name, String description) {
         Sensor sensor = metrics.sensor(name + TOTAL_TIME_SUFFIX);
         sensor.add(metricName(name, description), new CumulativeSum());
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
index a47c9d38ad32d..a1f684ac95cd4 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/RecordAccumulator.java
@@ -29,19 +29,18 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicInteger;
+
 import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.RecordMetadata;
 import org.apache.kafka.common.utils.ProducerIdAndEpoch;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.KafkaException;
-import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.header.Header;
-import org.apache.kafka.common.metrics.Measurable;
-import org.apache.kafka.common.metrics.MetricConfig;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.record.AbstractRecords;
 import org.apache.kafka.common.record.CompressionRatioEstimator;
@@ -63,8 +62,9 @@
  * The accumulator uses a bounded amount of memory and append calls will block when that memory is exhausted, unless
  * this behavior is explicitly disabled.
  */
-public final class RecordAccumulator {
+public class RecordAccumulator {
 
+    private final LogContext logContext;
     private final Logger log;
     private volatile boolean closed;
     private final AtomicInteger flushesInProgress;
@@ -74,14 +74,17 @@ public final class RecordAccumulator {
     private final int lingerMs;
     private final long retryBackoffMs;
     private final int deliveryTimeoutMs;
+    private final long partitionAvailabilityTimeoutMs;  // latency threshold for marking partition temporary unavailable
+    private final boolean enableAdaptivePartitioning;
     private final BufferPool free;
     private final Time time;
     private final ApiVersions apiVersions;
-    private final ConcurrentMap<TopicPartition, Deque<ProducerBatch>> batches;
+    private final ConcurrentMap<String /*topic*/, TopicInfo> topicInfoMap = new CopyOnWriteMap<>();
+    private final ConcurrentMap<Integer /*nodeId*/, NodeLatencyStats> nodeStats = new CopyOnWriteMap<>();
     private final IncompleteBatches incomplete;
     // The following variables are only accessed by the sender thread, so we don't need to protect them.
     private final Set<TopicPartition> muted;
-    private int drainIndex;
+    private final Map<String, Integer> nodesDrainIndex;
     private final TransactionManager transactionManager;
     private long nextBatchExpiryTimeMs = Long.MAX_VALUE; // the earliest time (absolute) a batch will expire.
 
@@ -96,11 +99,15 @@ public final class RecordAccumulator {
      *        latency for potentially better throughput due to more batching (and hence fewer, larger requests).
      * @param retryBackoffMs An artificial delay time to retry the produce request upon receiving an error. This avoids
      *        exhausting all retries in a short period of time.
+     * @param deliveryTimeoutMs An upper bound on the time to report success or failure on record delivery
+     * @param partitionerConfig Partitioner config
      * @param metrics The metrics
+     * @param metricGrpName The metric group name
      * @param time The time instance to use
      * @param apiVersions Request API versions for current connected brokers
      * @param transactionManager The shared transaction state object which tracks producer IDs, epochs, and sequence
      *                           numbers per partition.
+     * @param bufferPool The buffer pool
      */
     public RecordAccumulator(LogContext logContext,
                              int batchSize,
@@ -108,14 +115,15 @@ public RecordAccumulator(LogContext logContext,
                              int lingerMs,
                              long retryBackoffMs,
                              int deliveryTimeoutMs,
+                             PartitionerConfig partitionerConfig,
                              Metrics metrics,
                              String metricGrpName,
                              Time time,
                              ApiVersions apiVersions,
                              TransactionManager transactionManager,
                              BufferPool bufferPool) {
+        this.logContext = logContext;
         this.log = logContext.logger(RecordAccumulator.class);
-        this.drainIndex = 0;
         this.closed = false;
         this.flushesInProgress = new AtomicInteger(0);
         this.appendsInProgress = new AtomicInteger(0);
@@ -124,40 +132,85 @@ public RecordAccumulator(LogContext logContext,
         this.lingerMs = lingerMs;
         this.retryBackoffMs = retryBackoffMs;
         this.deliveryTimeoutMs = deliveryTimeoutMs;
-        this.batches = new CopyOnWriteMap<>();
+        this.enableAdaptivePartitioning = partitionerConfig.enableAdaptivePartitioning;
+        this.partitionAvailabilityTimeoutMs = partitionerConfig.partitionAvailabilityTimeoutMs;
         this.free = bufferPool;
         this.incomplete = new IncompleteBatches();
         this.muted = new HashSet<>();
         this.time = time;
         this.apiVersions = apiVersions;
+        nodesDrainIndex = new HashMap<>();
         this.transactionManager = transactionManager;
         registerMetrics(metrics, metricGrpName);
     }
 
-    private void registerMetrics(Metrics metrics, String metricGrpName) {
-        MetricName metricName = metrics.metricName("waiting-threads", metricGrpName, "The number of user threads blocked waiting for buffer memory to enqueue their records");
-        Measurable waitingThreads = new Measurable() {
-            public double measure(MetricConfig config, long now) {
-                return free.queued();
-            }
-        };
-        metrics.addMetric(metricName, waitingThreads);
+    /**
+     * Create a new record accumulator with default partitioner config
+     *
+     * @param logContext The log context used for logging
+     * @param batchSize The size to use when allocating {@link MemoryRecords} instances
+     * @param compression The compression codec for the records
+     * @param lingerMs An artificial delay time to add before declaring a records instance that isn't full ready for
+     *        sending. This allows time for more records to arrive. Setting a non-zero lingerMs will trade off some
+     *        latency for potentially better throughput due to more batching (and hence fewer, larger requests).
+     * @param retryBackoffMs An artificial delay time to retry the produce request upon receiving an error. This avoids
+     *        exhausting all retries in a short period of time.
+     * @param deliveryTimeoutMs An upper bound on the time to report success or failure on record delivery
+     * @param metrics The metrics
+     * @param metricGrpName The metric group name
+     * @param time The time instance to use
+     * @param apiVersions Request API versions for current connected brokers
+     * @param transactionManager The shared transaction state object which tracks producer IDs, epochs, and sequence
+     *                           numbers per partition.
+     * @param bufferPool The buffer pool
+     */
+    public RecordAccumulator(LogContext logContext,
+                             int batchSize,
+                             CompressionType compression,
+                             int lingerMs,
+                             long retryBackoffMs,
+                             int deliveryTimeoutMs,
+                             Metrics metrics,
+                             String metricGrpName,
+                             Time time,
+                             ApiVersions apiVersions,
+                             TransactionManager transactionManager,
+                             BufferPool bufferPool) {
+        this(logContext,
+            batchSize,
+            compression,
+            lingerMs,
+            retryBackoffMs,
+            deliveryTimeoutMs,
+            new PartitionerConfig(),
+            metrics,
+            metricGrpName,
+            time,
+            apiVersions,
+            transactionManager,
+            bufferPool);
+    }
 
-        metricName = metrics.metricName("buffer-total-bytes", metricGrpName, "The maximum amount of buffer memory the client can use (whether or not it is currently used).");
-        Measurable totalBytes = new Measurable() {
-            public double measure(MetricConfig config, long now) {
-                return free.totalMemory();
-            }
-        };
-        metrics.addMetric(metricName, totalBytes);
+    private void registerMetrics(Metrics metrics, String metricGrpName) {
+        metrics.addMetric(
+            metrics.metricName("waiting-threads", metricGrpName,
+                "The number of user threads blocked waiting for buffer memory to enqueue their records"),
+            (config, now) -> free.queued());
+
+        metrics.addMetric(
+            metrics.metricName("buffer-total-bytes", metricGrpName,
+                "The maximum amount of buffer memory the client can use (whether or not it is currently used)."),
+            (config, now) -> free.totalMemory());
+
+        metrics.addMetric(
+            metrics.metricName("buffer-available-bytes", metricGrpName,
+                "The total amount of buffer memory that is not being used (either unallocated or in the free list)."),
+            (config, now) -> free.availableMemory());
+    }
 
-        metricName = metrics.metricName("buffer-available-bytes", metricGrpName, "The total amount of buffer memory that is not being used (either unallocated or in the free list).");
-        Measurable availableBytes = new Measurable() {
-            public double measure(MetricConfig config, long now) {
-                return free.availableMemory();
-            }
-        };
-        metrics.addMetric(metricName, availableBytes);
+    private void setPartition(AppendCallbacks callbacks, int partition) {
+        if (callbacks != null)
+            callbacks.setPartition(partition);
     }
 
     /**
@@ -166,85 +219,156 @@ public double measure(MetricConfig config, long now) {
      * The append result will contain the future metadata, and flag for whether the appended batch is full or a new batch is created
      * <p>
      *
-     * @param tp The topic/partition to which this record is being sent
+     * @param topic The topic to which this record is being sent
+     * @param partition The partition to which this record is being sent or RecordMetadata.UNKNOWN_PARTITION
+     *                  if any partition could be used
      * @param timestamp The timestamp of the record
      * @param key The key for the record
      * @param value The value for the record
      * @param headers the Headers for the record
-     * @param callback The user-supplied callback to execute when the request is complete
+     * @param callbacks The callbacks to execute
      * @param maxTimeToBlock The maximum time in milliseconds to block for buffer memory to be available
      * @param abortOnNewBatch A boolean that indicates returning before a new batch is created and
      *                        running the partitioner's onNewBatch method before trying to append again
      * @param nowMs The current time, in milliseconds
+     * @param cluster The cluster metadata
      */
-    public RecordAppendResult append(TopicPartition tp,
+    public RecordAppendResult append(String topic,
+                                     int partition,
                                      long timestamp,
                                      byte[] key,
                                      byte[] value,
                                      Header[] headers,
-                                     Callback callback,
+                                     AppendCallbacks callbacks,
                                      long maxTimeToBlock,
                                      boolean abortOnNewBatch,
-                                     long nowMs) throws InterruptedException {
+                                     long nowMs,
+                                     Cluster cluster) throws InterruptedException {
+        TopicInfo topicInfo = topicInfoMap.computeIfAbsent(topic, k -> new TopicInfo(logContext, k, batchSize));
+
         // We keep track of the number of appending thread to make sure we do not miss batches in
         // abortIncompleteBatches().
         appendsInProgress.incrementAndGet();
         ByteBuffer buffer = null;
         if (headers == null) headers = Record.EMPTY_HEADERS;
         try {
-            // check if we have an in-progress batch
-            Deque<ProducerBatch> dq = getOrCreateDeque(tp);
-            synchronized (dq) {
-                if (closed)
-                    throw new KafkaException("Producer closed while send in progress");
-                RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callback, dq, nowMs);
-                if (appendResult != null)
-                    return appendResult;
-            }
+            // Loop to retry in case we encounter partitioner's race conditions.
+            while (true) {
+                // If the message doesn't have any partition affinity, so we pick a partition based on the broker
+                // availability and performance.  Note, that here we peek current partition before we hold the
+                // deque lock, so we'll need to make sure that it's not changed while we were waiting for the
+                // deque lock.
+                final BuiltInPartitioner.StickyPartitionInfo partitionInfo;
+                final int effectivePartition;
+                if (partition == RecordMetadata.UNKNOWN_PARTITION) {
+                    partitionInfo = topicInfo.builtInPartitioner.peekCurrentPartitionInfo(cluster);
+                    effectivePartition = partitionInfo.partition();
+                } else {
+                    partitionInfo = null;
+                    effectivePartition = partition;
+                }
 
-            // we don't have an in-progress record batch try to allocate a new batch
-            if (abortOnNewBatch) {
-                // Return a result that will cause another call to append.
-                return new RecordAppendResult(null, false, false, true);
-            }
+                // Now that we know the effective partition, let the caller know.
+                setPartition(callbacks, effectivePartition);
+
+                // check if we have an in-progress batch
+                Deque<ProducerBatch> dq = topicInfo.batches.computeIfAbsent(effectivePartition, k -> new ArrayDeque<>());
+                synchronized (dq) {
+                    // After taking the lock, validate that the partition hasn't changed and retry.
+                    if (topicInfo.builtInPartitioner.isPartitionChanged(partitionInfo)) {
+                        log.trace("Partition {} for topic {} switched by a concurrent append, retrying",
+                                partitionInfo.partition(), topic);
+                        continue;
+                    }
+                    RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
+                    if (appendResult != null) {
+                        topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, appendResult.appendedBytes, cluster);
+                        return appendResult;
+                    }
+                }
 
-            byte maxUsableMagic = apiVersions.maxUsableProduceMagic();
-            int size = Math.max(this.batchSize, AbstractRecords.estimateSizeInBytesUpperBound(maxUsableMagic, compression, key, value, headers));
-            log.trace("Allocating a new {} byte message buffer for topic {} partition {} with remaining timeout {}ms", size, tp.topic(), tp.partition(), maxTimeToBlock);
-            buffer = free.allocate(size, maxTimeToBlock);
+                // we don't have an in-progress record batch try to allocate a new batch
+                if (abortOnNewBatch) {
+                    // Return a result that will cause another call to append.
+                    return new RecordAppendResult(null, false, false, true, 0);
+                }
 
-            // Update the current time in case the buffer allocation blocked above.
-            nowMs = time.milliseconds();
-            synchronized (dq) {
-                // Need to check if producer is closed again after grabbing the dequeue lock.
-                if (closed)
-                    throw new KafkaException("Producer closed while send in progress");
+                if (buffer == null) {
+                    byte maxUsableMagic = apiVersions.maxUsableProduceMagic();
+                    int size = Math.max(this.batchSize, AbstractRecords.estimateSizeInBytesUpperBound(maxUsableMagic, compression, key, value, headers));
+                    log.trace("Allocating a new {} byte message buffer for topic {} partition {} with remaining timeout {}ms", size, topic, partition, maxTimeToBlock);
+                    // This call may block if we exhausted buffer space.
+                    buffer = free.allocate(size, maxTimeToBlock);
+                    // Update the current time in case the buffer allocation blocked above.
+                    // NOTE: getting time may be expensive, so calling it under a lock
+                    // should be avoided.
+                    nowMs = time.milliseconds();
+                }
 
-                RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callback, dq, nowMs);
-                if (appendResult != null) {
-                    // Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen often...
+                synchronized (dq) {
+                    // After taking the lock, validate that the partition hasn't changed and retry.
+                    if (topicInfo.builtInPartitioner.isPartitionChanged(partitionInfo)) {
+                        log.trace("Partition {} for topic {} switched by a concurrent append, retrying",
+                                partitionInfo.partition(), topic);
+                        continue;
+                    }
+                    RecordAppendResult appendResult = appendNewBatch(topic, effectivePartition, dq, timestamp, key, value, headers, callbacks, buffer, nowMs);
+                    // Set buffer to null, so that deallocate doesn't return it back to free pool, since it's used in the batch.
+                    if (appendResult.newBatchCreated)
+                        buffer = null;
+                    topicInfo.builtInPartitioner.updatePartitionInfo(partitionInfo, appendResult.appendedBytes, cluster);
                     return appendResult;
                 }
-
-                MemoryRecordsBuilder recordsBuilder = recordsBuilder(buffer, maxUsableMagic);
-                ProducerBatch batch = new ProducerBatch(tp, recordsBuilder, nowMs);
-                FutureRecordMetadata future = Objects.requireNonNull(batch.tryAppend(timestamp, key, value, headers,
-                        callback, nowMs));
-
-                dq.addLast(batch);
-                incomplete.add(batch);
-
-                // Don't deallocate this buffer in the finally block as it's being used in the record batch
-                buffer = null;
-                return new RecordAppendResult(future, dq.size() > 1 || batch.isFull(), true, false);
             }
         } finally {
-            if (buffer != null)
-                free.deallocate(buffer);
+            free.deallocate(buffer);
             appendsInProgress.decrementAndGet();
         }
     }
 
+    /**
+     * Append a new batch to the queue
+     *
+     * @param topic The topic
+     * @param partition The partition (cannot be RecordMetadata.UNKNOWN_PARTITION)
+     * @param dq The queue
+     * @param timestamp The timestamp of the record
+     * @param key The key for the record
+     * @param value The value for the record
+     * @param headers the Headers for the record
+     * @param callbacks The callbacks to execute
+     * @param buffer The buffer for the new batch
+     * @param nowMs The current time, in milliseconds
+     */
+    private RecordAppendResult appendNewBatch(String topic,
+                                              int partition,
+                                              Deque<ProducerBatch> dq,
+                                              long timestamp,
+                                              byte[] key,
+                                              byte[] value,
+                                              Header[] headers,
+                                              AppendCallbacks callbacks,
+                                              ByteBuffer buffer,
+                                              long nowMs) {
+        assert partition != RecordMetadata.UNKNOWN_PARTITION;
+
+        RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callbacks, dq, nowMs);
+        if (appendResult != null) {
+            // Somebody else found us a batch, return the one we waited for! Hopefully this doesn't happen often...
+            return appendResult;
+        }
+
+        MemoryRecordsBuilder recordsBuilder = recordsBuilder(buffer, apiVersions.maxUsableProduceMagic());
+        ProducerBatch batch = new ProducerBatch(new TopicPartition(topic, partition), recordsBuilder, nowMs);
+        FutureRecordMetadata future = Objects.requireNonNull(batch.tryAppend(timestamp, key, value, headers,
+                callbacks, nowMs));
+
+        dq.addLast(batch);
+        incomplete.add(batch);
+
+        return new RecordAppendResult(future, dq.size() > 1 || batch.isFull(), true, false, batch.estimatedSizeInBytes());
+    }
+
     private MemoryRecordsBuilder recordsBuilder(ByteBuffer buffer, byte maxUsableMagic) {
         if (transactionManager != null && maxUsableMagic < RecordBatch.MAGIC_VALUE_V2) {
             throw new UnsupportedVersionException("Attempting to use idempotence with a broker which does not " +
@@ -263,13 +387,18 @@ private MemoryRecordsBuilder recordsBuilder(ByteBuffer buffer, byte maxUsableMag
      */
     private RecordAppendResult tryAppend(long timestamp, byte[] key, byte[] value, Header[] headers,
                                          Callback callback, Deque<ProducerBatch> deque, long nowMs) {
+        if (closed)
+            throw new KafkaException("Producer closed while send in progress");
         ProducerBatch last = deque.peekLast();
         if (last != null) {
+            int initialBytes = last.estimatedSizeInBytes();
             FutureRecordMetadata future = last.tryAppend(timestamp, key, value, headers, callback, nowMs);
-            if (future == null)
+            if (future == null) {
                 last.closeForRecordAppends();
-            else
-                return new RecordAppendResult(future, deque.size() > 1 || last.isFull(), false, false);
+            } else {
+                int appendedBytes = last.estimatedSizeInBytes() - initialBytes;
+                return new RecordAppendResult(future, deque.size() > 1 || last.isFull(), false, false, appendedBytes);
+            }
         }
         return null;
     }
@@ -298,19 +427,20 @@ public void maybeUpdateNextBatchExpiryTime(ProducerBatch batch) {
      */
     public List<ProducerBatch> expiredBatches(long now) {
         List<ProducerBatch> expiredBatches = new ArrayList<>();
-        for (Map.Entry<TopicPartition, Deque<ProducerBatch>> entry : this.batches.entrySet()) {
-            // expire the batches in the order of sending
-            Deque<ProducerBatch> deque = entry.getValue();
-            synchronized (deque) {
-                while (!deque.isEmpty()) {
-                    ProducerBatch batch = deque.getFirst();
-                    if (batch.hasReachedDeliveryTimeout(deliveryTimeoutMs, now)) {
-                        deque.poll();
-                        batch.abortRecordAppends();
-                        expiredBatches.add(batch);
-                    } else {
-                        maybeUpdateNextBatchExpiryTime(batch);
-                        break;
+        for (TopicInfo topicInfo : topicInfoMap.values()) {
+            for (Deque<ProducerBatch> deque : topicInfo.batches.values()) {
+                // expire the batches in the order of sending
+                synchronized (deque) {
+                    while (!deque.isEmpty()) {
+                        ProducerBatch batch = deque.getFirst();
+                        if (batch.hasReachedDeliveryTimeout(deliveryTimeoutMs, now)) {
+                            deque.poll();
+                            batch.abortRecordAppends();
+                            expiredBatches.add(batch);
+                        } else {
+                            maybeUpdateNextBatchExpiryTime(batch);
+                            break;
+                        }
                     }
                 }
             }
@@ -378,12 +508,12 @@ public int splitAndReenqueue(ProducerBatch bigBatch) {
     // producer id. We will not attempt to reorder messages if the producer id has changed, we will throw an
     // IllegalStateException instead.
     private void insertInSequenceOrder(Deque<ProducerBatch> deque, ProducerBatch batch) {
-        // When we are requeing and have enabled idempotence, the reenqueued batch must always have a sequence.
+        // When we are re-enqueueing and have enabled idempotence, the re-enqueued batch must always have a sequence.
         if (batch.baseSequence() == RecordBatch.NO_SEQUENCE)
             throw new IllegalStateException("Trying to re-enqueue a batch which doesn't have a sequence even " +
                 "though idempotency is enabled.");
 
-        if (transactionManager.nextBatchBySequence(batch.topicPartition) == null)
+        if (!transactionManager.hasInflightBatches(batch.topicPartition))
             throw new IllegalStateException("We are re-enqueueing a batch which is not tracked as part of the in flight " +
                 "requests. batch.topicPartition: " + batch.topicPartition + "; batch.baseSequence: " + batch.baseSequence());
 
@@ -420,38 +550,94 @@ private void insertInSequenceOrder(Deque<ProducerBatch> deque, ProducerBatch bat
     }
 
     /**
-     * Get a list of nodes whose partitions are ready to be sent, and the earliest time at which any non-sendable
-     * partition will be ready; Also return the flag for whether there are any unknown leaders for the accumulated
-     * partition batches.
-     * <p>
-     * A destination node is ready to send data if:
-     * <ol>
-     * <li>There is at least one partition that is not backing off its send
-     * <li><b>and</b> those partitions are not muted (to prevent reordering if
-     *   {@value org.apache.kafka.clients.producer.ProducerConfig#MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION}
-     *   is set to one)</li>
-     * <li><b>and <i>any</i></b> of the following are true</li>
-     * <ul>
-     *     <li>The record set is full</li>
-     *     <li>The record set has sat in the accumulator for at least lingerMs milliseconds</li>
-     *     <li>The accumulator is out of memory and threads are blocking waiting for data (in this case all partitions
-     *     are immediately considered ready).</li>
-     *     <li>The accumulator has been closed</li>
-     * </ul>
-     * </ol>
+     * Add the leader to the ready nodes if the batch is ready
+     *
+     * @param nowMs The current time
+     * @param exhausted 'true' is the buffer pool is exhausted
+     * @param part The partition
+     * @param leader The leader for the partition
+     * @param waitedTimeMs How long batch waited
+     * @param backingOff Is backing off
+     * @param full Is batch full
+     * @param nextReadyCheckDelayMs The delay for next check
+     * @param readyNodes The set of ready nodes (to be filled in)
+     * @return The delay for next check
      */
-    public ReadyCheckResult ready(Cluster cluster, long nowMs) {
-        Set<Node> readyNodes = new HashSet<>();
-        long nextReadyCheckDelayMs = Long.MAX_VALUE;
-        Set<String> unknownLeaderTopics = new HashSet<>();
+    private long batchReady(long nowMs, boolean exhausted, TopicPartition part, Node leader,
+                            long waitedTimeMs, boolean backingOff, boolean full,
+                            long nextReadyCheckDelayMs, Set<Node> readyNodes) {
+        if (!readyNodes.contains(leader) && !isMuted(part)) {
+            long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
+            boolean expired = waitedTimeMs >= timeToWaitMs;
+            boolean transactionCompleting = transactionManager != null && transactionManager.isCompleting();
+            boolean sendable = full
+                    || expired
+                    || exhausted
+                    || closed
+                    || flushInProgress()
+                    || transactionCompleting;
+            if (sendable && !backingOff) {
+                readyNodes.add(leader);
+            } else {
+                long timeLeftMs = Math.max(timeToWaitMs - waitedTimeMs, 0);
+                // Note that this results in a conservative estimate since an un-sendable partition may have
+                // a leader that will later be found to have sendable data. However, this is good enough
+                // since we'll just wake up and then sleep again for the remaining time.
+                nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
+            }
+        }
+        return nextReadyCheckDelayMs;
+    }
 
+    /**
+     * Iterate over partitions to see which one have batches ready and collect leaders of those partitions
+     * into the set of ready nodes.  If partition has no leader, add the topic to the set of topics with
+     * no leader.  This function also calculates stats for adaptive partitioning.
+     *
+     * @param cluster The cluster metadata
+     * @param nowMs The current time
+     * @param topic The topic
+     * @param topicInfo The topic info
+     * @param nextReadyCheckDelayMs The delay for next check
+     * @param readyNodes The set of ready nodes (to be filled in)
+     * @param unknownLeaderTopics The set of topics with no leader (to be filled in)
+     * @return The delay for next check
+     */
+    private long partitionReady(Cluster cluster, long nowMs, String topic,
+                                TopicInfo topicInfo,
+                                long nextReadyCheckDelayMs, Set<Node> readyNodes, Set<String> unknownLeaderTopics) {
+        ConcurrentMap<Integer, Deque<ProducerBatch>> batches = topicInfo.batches;
+        // Collect the queue sizes for available partitions to be used in adaptive partitioning.
+        int[] queueSizes = null;
+        int[] partitionIds = null;
+        if (enableAdaptivePartitioning && batches.size() >= cluster.partitionsForTopic(topic).size()) {
+            // We don't do adaptive partitioning until we scheduled at least a batch for all
+            // partitions (i.e. we have the corresponding entries in the batches map), we just
+            // do uniform.  The reason is that we build queue sizes from the batches map,
+            // and if an entry is missing in the batches map, then adaptive partitioning logic
+            // won't know about it and won't switch to it.
+            queueSizes = new int[batches.size()];
+            partitionIds = new int[queueSizes.length];
+        }
+
+        int queueSizesIndex = -1;
         boolean exhausted = this.free.queued() > 0;
-        for (Map.Entry<TopicPartition, Deque<ProducerBatch>> entry : this.batches.entrySet()) {
+        for (Map.Entry<Integer, Deque<ProducerBatch>> entry : batches.entrySet()) {
+            TopicPartition part = new TopicPartition(topic, entry.getKey());
+            // Advance queueSizesIndex so that we properly index available
+            // partitions.  Do it here so that it's done for all code paths.
+            Node leader = cluster.leaderFor(part);
+            if (leader != null && queueSizes != null) {
+                ++queueSizesIndex;
+                assert queueSizesIndex < queueSizes.length;
+                partitionIds[queueSizesIndex] = part.partition();
+            }
+
             Deque<ProducerBatch> deque = entry.getValue();
 
-            final ProducerBatch batch;
             final long waitedTimeMs;
             final boolean backingOff;
+            final int dequeSize;
             final boolean full;
 
             // This loop is especially hot with large partition counts.
@@ -463,43 +649,81 @@ public ReadyCheckResult ready(Cluster cluster, long nowMs) {
             synchronized (deque) {
                 // Deques are often empty in this path, esp with large partition counts,
                 // so we exit early if we can.
-                batch = deque.peekFirst();
+                ProducerBatch batch = deque.peekFirst();
                 if (batch == null) {
                     continue;
                 }
 
                 waitedTimeMs = batch.waitedTimeMs(nowMs);
                 backingOff = batch.attempts() > 0 && waitedTimeMs < retryBackoffMs;
-                full = deque.size() > 1 || batch.isFull();
+                dequeSize = deque.size();
+                full = dequeSize > 1 || batch.isFull();
             }
 
-            TopicPartition part = entry.getKey();
-            Node leader = cluster.leaderFor(part);
             if (leader == null) {
                 // This is a partition for which leader is not known, but messages are available to send.
                 // Note that entries are currently not removed from batches when deque is empty.
                 unknownLeaderTopics.add(part.topic());
-            } else if (!readyNodes.contains(leader) && !isMuted(part)) {
-                long timeToWaitMs = backingOff ? retryBackoffMs : lingerMs;
-                boolean expired = waitedTimeMs >= timeToWaitMs;
-                boolean transactionCompleting = transactionManager != null && transactionManager.isCompleting();
-                boolean sendable = full
-                    || expired
-                    || exhausted
-                    || closed
-                    || flushInProgress()
-                    || transactionCompleting;
-                if (sendable && !backingOff) {
-                    readyNodes.add(leader);
-                } else {
-                    long timeLeftMs = Math.max(timeToWaitMs - waitedTimeMs, 0);
-                    // Note that this results in a conservative estimate since an un-sendable partition may have
-                    // a leader that will later be found to have sendable data. However, this is good enough
-                    // since we'll just wake up and then sleep again for the remaining time.
-                    nextReadyCheckDelayMs = Math.min(timeLeftMs, nextReadyCheckDelayMs);
+            } else {
+                if (queueSizes != null)
+                    queueSizes[queueSizesIndex] = dequeSize;
+                if (partitionAvailabilityTimeoutMs > 0) {
+                    // Check if we want to exclude the partition from the list of available partitions
+                    // if the broker hasn't responded for some time.
+                    NodeLatencyStats nodeLatencyStats = nodeStats.get(leader.id());
+                    if (nodeLatencyStats != null) {
+                        // NOTE: there is no synchronization between reading metrics,
+                        // so we read ready time first to avoid accidentally marking partition
+                        // unavailable if we read while the metrics are being updated.
+                        long readyTimeMs = nodeLatencyStats.readyTimeMs;
+                        if (readyTimeMs - nodeLatencyStats.drainTimeMs > partitionAvailabilityTimeoutMs)
+                            --queueSizesIndex;
+                    }
                 }
+
+                nextReadyCheckDelayMs = batchReady(nowMs, exhausted, part, leader, waitedTimeMs, backingOff,
+                    full, nextReadyCheckDelayMs, readyNodes);
             }
         }
+
+        // We've collected the queue sizes for partitions of this topic, now we can calculate
+        // load stats.  NOTE: the stats are calculated in place, modifying the
+        // queueSizes array.
+        topicInfo.builtInPartitioner.updatePartitionLoadStats(queueSizes, partitionIds, queueSizesIndex + 1);
+        return nextReadyCheckDelayMs;
+    }
+
+    /**
+     * Get a list of nodes whose partitions are ready to be sent, and the earliest time at which any non-sendable
+     * partition will be ready; Also return the flag for whether there are any unknown leaders for the accumulated
+     * partition batches.
+     * <p>
+     * A destination node is ready to send data if:
+     * <ol>
+     * <li>There is at least one partition that is not backing off its send
+     * <li><b>and</b> those partitions are not muted (to prevent reordering if
+     *   {@value org.apache.kafka.clients.producer.ProducerConfig#MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION}
+     *   is set to one)</li>
+     * <li><b>and <i>any</i></b> of the following are true</li>
+     * <ul>
+     *     <li>The record set is full</li>
+     *     <li>The record set has sat in the accumulator for at least lingerMs milliseconds</li>
+     *     <li>The accumulator is out of memory and threads are blocking waiting for data (in this case all partitions
+     *     are immediately considered ready).</li>
+     *     <li>The accumulator has been closed</li>
+     * </ul>
+     * </ol>
+     */
+    public ReadyCheckResult ready(Cluster cluster, long nowMs) {
+        Set<Node> readyNodes = new HashSet<>();
+        long nextReadyCheckDelayMs = Long.MAX_VALUE;
+        Set<String> unknownLeaderTopics = new HashSet<>();
+        // Go topic by topic so that we can get queue sizes for partitions in a topic and calculate
+        // cumulative frequency table (used in partitioner).
+        for (Map.Entry<String, TopicInfo> topicInfoEntry : this.topicInfoMap.entrySet()) {
+            final String topic = topicInfoEntry.getKey();
+            nextReadyCheckDelayMs = partitionReady(cluster, nowMs, topic, topicInfoEntry.getValue(), nextReadyCheckDelayMs, readyNodes, unknownLeaderTopics);
+        }
         return new ReadyCheckResult(readyNodes, nextReadyCheckDelayMs, unknownLeaderTopics);
     }
 
@@ -507,11 +731,12 @@ public ReadyCheckResult ready(Cluster cluster, long nowMs) {
      * Check whether there are any batches which haven't been drained
      */
     public boolean hasUndrained() {
-        for (Map.Entry<TopicPartition, Deque<ProducerBatch>> entry : this.batches.entrySet()) {
-            Deque<ProducerBatch> deque = entry.getValue();
-            synchronized (deque) {
-                if (!deque.isEmpty())
-                    return true;
+        for (TopicInfo topicInfo : topicInfoMap.values()) {
+            for (Deque<ProducerBatch> deque : topicInfo.batches.values()) {
+                synchronized (deque) {
+                    if (!deque.isEmpty())
+                        return true;
+                }
             }
         }
         return false;
@@ -559,13 +784,14 @@ private List<ProducerBatch> drainBatchesForOneNode(Cluster cluster, Node node, i
         int size = 0;
         List<PartitionInfo> parts = cluster.partitionsForNode(node.id());
         List<ProducerBatch> ready = new ArrayList<>();
-        /* to make starvation less likely this loop doesn't start at 0 */
+        /* to make starvation less likely each node has it's own drainIndex */
+        int drainIndex = getDrainIndex(node.idString());
         int start = drainIndex = drainIndex % parts.size();
         do {
             PartitionInfo part = parts.get(drainIndex);
             TopicPartition tp = new TopicPartition(part.topic(), part.partition());
-            this.drainIndex = (this.drainIndex + 1) % parts.size();
-
+            updateDrainIndex(node.idString(), drainIndex);
+            drainIndex = (drainIndex + 1) % parts.size();
             // Only proceed if the partition has no in-flight batches.
             if (isMuted(tp))
                 continue;
@@ -638,6 +864,14 @@ private List<ProducerBatch> drainBatchesForOneNode(Cluster cluster, Node node, i
         return ready;
     }
 
+    private int getDrainIndex(String idString) {
+        return nodesDrainIndex.computeIfAbsent(idString, s -> 0);
+    }
+
+    private void updateDrainIndex(String idString, int drainIndex) {
+        nodesDrainIndex.put(idString, drainIndex);
+    }
+
     /**
      * Drain all the data for the given nodes and collate them into a list of batches that will fit within the specified
      * size on a per-node basis. This method attempts to avoid choosing the same topic-node over and over.
@@ -660,6 +894,36 @@ public Map<Integer, List<ProducerBatch>> drain(Cluster cluster, Set<Node> nodes,
         return batches;
     }
 
+    public void updateNodeLatencyStats(Integer nodeId, long nowMs, boolean canDrain) {
+        // Don't bother with updating stats if the feature is turned off.
+        if (partitionAvailabilityTimeoutMs <= 0)
+            return;
+
+        // When the sender gets a node (returned by the ready() function) that has data to send
+        // but the node is not ready (and so we cannot drain the data), we only update the
+        // ready time, then the difference would reflect for how long a node wasn't ready
+        // to send the data.  Then we can temporarily remove partitions that are handled by the
+        // node from the list of available partitions so that the partitioner wouldn't pick
+        // this partition.
+        // NOTE: there is no synchronization for metric updates, so drainTimeMs is updated
+        // first to avoid accidentally marking a partition unavailable if the reader gets
+        // values between updates.
+        NodeLatencyStats nodeLatencyStats = nodeStats.computeIfAbsent(nodeId, id -> new NodeLatencyStats(nowMs));
+        if (canDrain)
+            nodeLatencyStats.drainTimeMs = nowMs;
+        nodeLatencyStats.readyTimeMs = nowMs;
+    }
+
+    /* Visible for testing */
+    public NodeLatencyStats getNodeLatencyStats(Integer nodeId) {
+        return nodeStats.get(nodeId);
+    }
+
+    /* Visible for testing */
+    public BuiltInPartitioner getBuiltInPartitioner(String topic) {
+        return topicInfoMap.get(topic).builtInPartitioner;
+    }
+
     /**
      * The earliest absolute time a batch will expire (in milliseconds)
      */
@@ -667,23 +931,20 @@ public long nextExpiryTimeMs() {
         return this.nextBatchExpiryTimeMs;
     }
 
-    private Deque<ProducerBatch> getDeque(TopicPartition tp) {
-        return batches.get(tp);
+      /* Visible for testing */
+    public Deque<ProducerBatch> getDeque(TopicPartition tp) {
+        TopicInfo topicInfo = topicInfoMap.get(tp.topic());
+        if (topicInfo == null)
+            return null;
+        return topicInfo.batches.get(tp.partition());
     }
 
     /**
      * Get the deque for the given topic-partition, creating it if necessary.
      */
     private Deque<ProducerBatch> getOrCreateDeque(TopicPartition tp) {
-        Deque<ProducerBatch> d = this.batches.get(tp);
-        if (d != null)
-            return d;
-        d = new ArrayDeque<>();
-        Deque<ProducerBatch> previous = this.batches.putIfAbsent(tp, d);
-        if (previous == null)
-            return d;
-        else
-            return previous;
+        TopicInfo topicInfo = topicInfoMap.computeIfAbsent(tp.topic(), k -> new TopicInfo(logContext, k, batchSize));
+        return topicInfo.batches.computeIfAbsent(tp.partition(), k -> new ArrayDeque<>());
     }
 
     /**
@@ -713,11 +974,6 @@ boolean flushInProgress() {
         return flushesInProgress.get() > 0;
     }
 
-    /* Visible for testing */
-    Map<TopicPartition, Deque<ProducerBatch>> batches() {
-        return Collections.unmodifiableMap(batches);
-    }
-
     /**
      * Initiate the flushing of data from the accumulator...this makes all requests immediately ready
      */
@@ -771,7 +1027,7 @@ public void abortIncompleteBatches() {
         // flag set. We need to do the last abort after no thread was appending in case there was a new
         // batch appended by the last appending thread.
         abortBatches();
-        this.batches.clear();
+        this.topicInfoMap.clear();
     }
 
     /**
@@ -833,6 +1089,32 @@ public void close() {
         this.free.close();
     }
 
+    /**
+     * Partitioner config for built-in partitioner
+     */
+    public static final class PartitionerConfig {
+        private final boolean enableAdaptivePartitioning;
+        private final long partitionAvailabilityTimeoutMs;
+
+        /**
+         * Partitioner config
+         *
+         * @param enableAdaptivePartitioning If it's true, partition switching adapts to broker load, otherwise partition
+         *        switching is random.
+         * @param partitionAvailabilityTimeoutMs If a broker cannot process produce requests from a partition
+         *        for the specified time, the partition is treated by the partitioner as not available.
+         *        If the timeout is 0, this logic is disabled.
+         */
+        public PartitionerConfig(boolean enableAdaptivePartitioning, long partitionAvailabilityTimeoutMs) {
+            this.enableAdaptivePartitioning = enableAdaptivePartitioning;
+            this.partitionAvailabilityTimeoutMs = partitionAvailabilityTimeoutMs;
+        }
+
+        public PartitionerConfig() {
+            this(false, 0);
+        }
+    }
+
     /*
      * Metadata about a record just appended to the record accumulator
      */
@@ -841,15 +1123,32 @@ public final static class RecordAppendResult {
         public final boolean batchIsFull;
         public final boolean newBatchCreated;
         public final boolean abortForNewBatch;
+        public final int appendedBytes;
 
-        public RecordAppendResult(FutureRecordMetadata future, boolean batchIsFull, boolean newBatchCreated, boolean abortForNewBatch) {
+        public RecordAppendResult(FutureRecordMetadata future,
+                                  boolean batchIsFull,
+                                  boolean newBatchCreated,
+                                  boolean abortForNewBatch,
+                                  int appendedBytes) {
             this.future = future;
             this.batchIsFull = batchIsFull;
             this.newBatchCreated = newBatchCreated;
             this.abortForNewBatch = abortForNewBatch;
+            this.appendedBytes = appendedBytes;
         }
     }
 
+    /*
+     * The callbacks passed into append
+     */
+    public interface AppendCallbacks extends Callback {
+        /**
+         * Called to set partition (when append is called, partition may not be calculated yet).
+         * @param partition The partition
+         */
+        void setPartition(int partition);
+    }
+
     /*
      * The set of nodes that have at least one complete record batch in the accumulator
      */
@@ -864,4 +1163,30 @@ public ReadyCheckResult(Set<Node> readyNodes, long nextReadyCheckDelayMs, Set<St
             this.unknownLeaderTopics = unknownLeaderTopics;
         }
     }
+
+    /**
+     * Per topic info.
+     */
+    private static class TopicInfo {
+        public final ConcurrentMap<Integer /*partition*/, Deque<ProducerBatch>> batches = new CopyOnWriteMap<>();
+        public final BuiltInPartitioner builtInPartitioner;
+
+        public TopicInfo(LogContext logContext, String topic, int stickyBatchSize) {
+            builtInPartitioner = new BuiltInPartitioner(logContext, topic, stickyBatchSize);
+        }
+    }
+
+    /**
+     * Node latency stats for each node that are used for adaptive partition distribution
+     * Visible for testing
+     */
+    public final static class NodeLatencyStats {
+        volatile public long readyTimeMs;  // last time the node had batches ready to send
+        volatile public long drainTimeMs;  // last time the node was able to drain batches
+
+        NodeLatencyStats(long nowMs) {
+            readyTimeMs = nowMs;
+            drainTimeMs = nowMs;
+        }
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java
index 2f55e62912d76..55eb6c7be2f54 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/Sender.java
@@ -352,8 +352,16 @@ private long sendProducerData(long now) {
         while (iter.hasNext()) {
             Node node = iter.next();
             if (!this.client.ready(node, now)) {
+                // Update just the readyTimeMs of the latency stats, so that it moves forward
+                // every time the batch is ready (then the difference between readyTimeMs and
+                // drainTimeMs would represent how long data is waiting for the node).
+                this.accumulator.updateNodeLatencyStats(node.id(), now, false);
                 iter.remove();
                 notReadyTimeout = Math.min(notReadyTimeout, this.client.pollDelayMs(node, now));
+            } else {
+                // Update both readyTimeMs and drainTimeMs, this would "reset" the node
+                // latency.
+                this.accumulator.updateNodeLatencyStats(node.id(), now, true);
             }
         }
 
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/TransactionManager.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/TransactionManager.java
index be881a972551d..5aab62eaf225a 100644
--- a/clients/src/main/java/org/apache/kafka/clients/producer/internals/TransactionManager.java
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/TransactionManager.java
@@ -68,7 +68,6 @@
 import org.apache.kafka.common.requests.TxnOffsetCommitRequest.CommittedOffset;
 import org.apache.kafka.common.requests.TxnOffsetCommitResponse;
 import org.apache.kafka.common.utils.LogContext;
-import org.apache.kafka.common.utils.PrimitiveRef;
 import org.slf4j.Logger;
 
 import java.util.ArrayList;
@@ -84,8 +83,6 @@
 import java.util.PriorityQueue;
 import java.util.Set;
 import java.util.SortedSet;
-import java.util.TreeSet;
-import java.util.function.Consumer;
 import java.util.function.Supplier;
 
 /**
@@ -93,121 +90,19 @@
  */
 public class TransactionManager {
     private static final int NO_INFLIGHT_REQUEST_CORRELATION_ID = -1;
-    private static final int NO_LAST_ACKED_SEQUENCE_NUMBER = -1;
+    static final int NO_LAST_ACKED_SEQUENCE_NUMBER = -1;
 
     private final Logger log;
     private final String transactionalId;
     private final int transactionTimeoutMs;
     private final ApiVersions apiVersions;
 
-    private static class TopicPartitionBookkeeper {
-
-        private final Map<TopicPartition, TopicPartitionEntry> topicPartitions = new HashMap<>();
-
-        private TopicPartitionEntry getPartition(TopicPartition topicPartition) {
-            TopicPartitionEntry ent = topicPartitions.get(topicPartition);
-            if (ent == null)
-                throw new IllegalStateException("Trying to get the sequence number for " + topicPartition +
-                        ", but the sequence number was never set for this partition.");
-            return ent;
-        }
-
-        private TopicPartitionEntry getOrCreatePartition(TopicPartition topicPartition) {
-            TopicPartitionEntry ent = topicPartitions.get(topicPartition);
-            if (ent == null) {
-                ent = new TopicPartitionEntry();
-                topicPartitions.put(topicPartition, ent);
-            }
-            return ent;
-        }
-
-        private void addPartition(TopicPartition topicPartition) {
-            this.topicPartitions.putIfAbsent(topicPartition, new TopicPartitionEntry());
-        }
-
-        private boolean contains(TopicPartition topicPartition) {
-            return topicPartitions.containsKey(topicPartition);
-        }
-
-        private void reset() {
-            topicPartitions.clear();
-        }
-
-        private OptionalLong lastAckedOffset(TopicPartition topicPartition) {
-            TopicPartitionEntry entry = topicPartitions.get(topicPartition);
-            if (entry != null && entry.lastAckedOffset != ProduceResponse.INVALID_OFFSET)
-                return OptionalLong.of(entry.lastAckedOffset);
-            else
-                return OptionalLong.empty();
-        }
-
-        private OptionalInt lastAckedSequence(TopicPartition topicPartition) {
-            TopicPartitionEntry entry = topicPartitions.get(topicPartition);
-            if (entry != null && entry.lastAckedSequence != NO_LAST_ACKED_SEQUENCE_NUMBER)
-                return OptionalInt.of(entry.lastAckedSequence);
-            else
-                return OptionalInt.empty();
-        }
-
-        private void startSequencesAtBeginning(TopicPartition topicPartition, ProducerIdAndEpoch newProducerIdAndEpoch) {
-            final PrimitiveRef.IntRef sequence = PrimitiveRef.ofInt(0);
-            TopicPartitionEntry topicPartitionEntry = getPartition(topicPartition);
-            topicPartitionEntry.resetSequenceNumbers(inFlightBatch -> {
-                inFlightBatch.resetProducerState(newProducerIdAndEpoch, sequence.value, inFlightBatch.isTransactional());
-                sequence.value += inFlightBatch.recordCount;
-            });
-            topicPartitionEntry.producerIdAndEpoch = newProducerIdAndEpoch;
-            topicPartitionEntry.nextSequence = sequence.value;
-            topicPartitionEntry.lastAckedSequence = NO_LAST_ACKED_SEQUENCE_NUMBER;
-        }
-    }
-
-    private static class TopicPartitionEntry {
-
-        // The producer id/epoch being used for a given partition.
-        private ProducerIdAndEpoch producerIdAndEpoch;
-
-        // The base sequence of the next batch bound for a given partition.
-        private int nextSequence;
-
-        // The sequence number of the last record of the last ack'd batch from the given partition. When there are no
-        // in flight requests for a partition, the lastAckedSequence(topicPartition) == nextSequence(topicPartition) - 1.
-        private int lastAckedSequence;
-
-        // Keep track of the in flight batches bound for a partition, ordered by sequence. This helps us to ensure that
-        // we continue to order batches by the sequence numbers even when the responses come back out of order during
-        // leader failover. We add a batch to the queue when it is drained, and remove it when the batch completes
-        // (either successfully or through a fatal failure).
-        private SortedSet<ProducerBatch> inflightBatchesBySequence;
-
-        // We keep track of the last acknowledged offset on a per partition basis in order to disambiguate UnknownProducer
-        // responses which are due to the retention period elapsing, and those which are due to actual lost data.
-        private long lastAckedOffset;
-
-        TopicPartitionEntry() {
-            this.producerIdAndEpoch = ProducerIdAndEpoch.NONE;
-            this.nextSequence = 0;
-            this.lastAckedSequence = NO_LAST_ACKED_SEQUENCE_NUMBER;
-            this.lastAckedOffset = ProduceResponse.INVALID_OFFSET;
-            this.inflightBatchesBySequence = new TreeSet<>(Comparator.comparingInt(ProducerBatch::baseSequence));
-        }
-
-        void resetSequenceNumbers(Consumer<ProducerBatch> resetSequence) {
-            TreeSet<ProducerBatch> newInflights = new TreeSet<>(Comparator.comparingInt(ProducerBatch::baseSequence));
-            for (ProducerBatch inflightBatch : inflightBatchesBySequence) {
-                resetSequence.accept(inflightBatch);
-                newInflights.add(inflightBatch);
-            }
-            inflightBatchesBySequence = newInflights;
-        }
-    }
-
-    private final TopicPartitionBookkeeper topicPartitionBookkeeper;
+    private final TxnPartitionMap txnPartitionMap;
 
     private final Map<TopicPartition, CommittedOffset> pendingTxnOffsetCommits;
 
-    // If a batch bound for a partition expired locally after being sent at least once, the partition has is considered
-    // to have an unresolved state. We keep track fo such partitions here, and cannot assign any more sequence numbers
+    // If a batch bound for a partition expired locally after being sent at least once, the partition is considered
+    // to have an unresolved state. We keep track of such partitions here, and cannot assign any more sequence numbers
     // for this partition until the unresolved state gets cleared. This may happen if other inflight batches returned
     // successfully (indicating that the expired batch actually made it to the broker). If we don't get any successful
     // responses for the partition once the inflight request count falls to zero, we reset the producer id and
@@ -320,7 +215,7 @@ public TransactionManager(final LogContext logContext,
         this.partitionsWithUnresolvedSequences = new HashMap<>();
         this.partitionsToRewriteSequences = new HashSet<>();
         this.retryBackoffMs = retryBackoffMs;
-        this.topicPartitionBookkeeper = new TopicPartitionBookkeeper();
+        this.txnPartitionMap = new TxnPartitionMap();
         this.apiVersions = apiVersions;
     }
 
@@ -444,7 +339,7 @@ public synchronized void maybeAddPartition(TopicPartition topicPartition) {
                 return;
             } else {
                 log.debug("Begin adding new partition {} to transaction", topicPartition);
-                topicPartitionBookkeeper.addPartition(topicPartition);
+                txnPartitionMap.getOrCreate(topicPartition);
                 newPartitionsInTransaction.add(topicPartition);
             }
         }
@@ -532,7 +427,7 @@ synchronized public void maybeUpdateProducerIdAndEpoch(TopicPartition topicParti
         if (hasStaleProducerIdAndEpoch(topicPartition) && !hasInflightBatches(topicPartition)) {
             // If the batch was on a different ID and/or epoch (due to an epoch bump) and all its in-flight batches
             // have completed, reset the partition sequence so that the next batch (with the new epoch) starts from 0
-            topicPartitionBookkeeper.startSequencesAtBeginning(topicPartition, this.producerIdAndEpoch);
+            txnPartitionMap.startSequencesAtBeginning(topicPartition, this.producerIdAndEpoch);
             log.debug("ProducerId of partition {} set to {} with epoch {}. Reinitialize sequence at beginning.",
                       topicPartition, producerIdAndEpoch.producerId, producerIdAndEpoch.epoch);
         }
@@ -561,12 +456,12 @@ private void resetIdempotentProducerId() {
     }
 
     private void resetSequenceForPartition(TopicPartition topicPartition) {
-        topicPartitionBookkeeper.topicPartitions.remove(topicPartition);
+        txnPartitionMap.topicPartitions.remove(topicPartition);
         this.partitionsWithUnresolvedSequences.remove(topicPartition);
     }
 
     private void resetSequenceNumbers() {
-        topicPartitionBookkeeper.reset();
+        txnPartitionMap.reset();
         this.partitionsWithUnresolvedSequences.clear();
     }
 
@@ -585,7 +480,7 @@ private void bumpIdempotentProducerEpoch() {
 
         // When the epoch is bumped, rewrite all in-flight sequences for the partition(s) that triggered the epoch bump
         for (TopicPartition topicPartition : this.partitionsToRewriteSequences) {
-            this.topicPartitionBookkeeper.startSequencesAtBeginning(topicPartition, this.producerIdAndEpoch);
+            this.txnPartitionMap.startSequencesAtBeginning(topicPartition, this.producerIdAndEpoch);
             this.partitionsWithUnresolvedSequences.remove(topicPartition);
         }
         this.partitionsToRewriteSequences.clear();
@@ -613,27 +508,27 @@ synchronized void bumpIdempotentEpochAndResetIdIfNeeded() {
      * Returns the next sequence number to be written to the given TopicPartition.
      */
     synchronized Integer sequenceNumber(TopicPartition topicPartition) {
-        return topicPartitionBookkeeper.getOrCreatePartition(topicPartition).nextSequence;
+        return txnPartitionMap.getOrCreate(topicPartition).nextSequence;
     }
 
     /**
      * Returns the current producer id/epoch of the given TopicPartition.
      */
     synchronized ProducerIdAndEpoch producerIdAndEpoch(TopicPartition topicPartition) {
-        return topicPartitionBookkeeper.getOrCreatePartition(topicPartition).producerIdAndEpoch;
+        return txnPartitionMap.getOrCreate(topicPartition).producerIdAndEpoch;
     }
 
     synchronized void incrementSequenceNumber(TopicPartition topicPartition, int increment) {
         Integer currentSequence = sequenceNumber(topicPartition);
 
         currentSequence = DefaultRecordBatch.incrementSequence(currentSequence, increment);
-        topicPartitionBookkeeper.getPartition(topicPartition).nextSequence = currentSequence;
+        txnPartitionMap.get(topicPartition).nextSequence = currentSequence;
     }
 
     synchronized void addInFlightBatch(ProducerBatch batch) {
         if (!batch.hasSequence())
             throw new IllegalStateException("Can't track batch for partition " + batch.topicPartition + " when sequence is not set.");
-        topicPartitionBookkeeper.getPartition(batch.topicPartition).inflightBatchesBySequence.add(batch);
+        txnPartitionMap.get(batch.topicPartition).inflightBatchesBySequence.add(batch);
     }
 
     /**
@@ -647,7 +542,7 @@ synchronized int firstInFlightSequence(TopicPartition topicPartition) {
         if (!hasInflightBatches(topicPartition))
             return RecordBatch.NO_SEQUENCE;
 
-        SortedSet<ProducerBatch> inflightBatches = topicPartitionBookkeeper.getPartition(topicPartition).inflightBatchesBySequence;
+        SortedSet<ProducerBatch> inflightBatches = txnPartitionMap.get(topicPartition).inflightBatchesBySequence;
         if (inflightBatches.isEmpty())
             return RecordBatch.NO_SEQUENCE;
         else
@@ -655,20 +550,20 @@ synchronized int firstInFlightSequence(TopicPartition topicPartition) {
     }
 
     synchronized ProducerBatch nextBatchBySequence(TopicPartition topicPartition) {
-        SortedSet<ProducerBatch> queue = topicPartitionBookkeeper.getPartition(topicPartition).inflightBatchesBySequence;
+        SortedSet<ProducerBatch> queue = txnPartitionMap.get(topicPartition).inflightBatchesBySequence;
         return queue.isEmpty() ? null : queue.first();
     }
 
     synchronized void removeInFlightBatch(ProducerBatch batch) {
         if (hasInflightBatches(batch.topicPartition)) {
-            topicPartitionBookkeeper.getPartition(batch.topicPartition).inflightBatchesBySequence.remove(batch);
+            txnPartitionMap.get(batch.topicPartition).inflightBatchesBySequence.remove(batch);
         }
     }
 
     private int maybeUpdateLastAckedSequence(TopicPartition topicPartition, int sequence) {
         int lastAckedSequence = lastAckedSequence(topicPartition).orElse(NO_LAST_ACKED_SEQUENCE_NUMBER);
         if (sequence > lastAckedSequence) {
-            topicPartitionBookkeeper.getPartition(topicPartition).lastAckedSequence = sequence;
+            txnPartitionMap.get(topicPartition).lastAckedSequence = sequence;
             return sequence;
         }
 
@@ -676,11 +571,11 @@ private int maybeUpdateLastAckedSequence(TopicPartition topicPartition, int sequ
     }
 
     synchronized OptionalInt lastAckedSequence(TopicPartition topicPartition) {
-        return topicPartitionBookkeeper.lastAckedSequence(topicPartition);
+        return txnPartitionMap.lastAckedSequence(topicPartition);
     }
 
     synchronized OptionalLong lastAckedOffset(TopicPartition topicPartition) {
-        return topicPartitionBookkeeper.lastAckedOffset(topicPartition);
+        return txnPartitionMap.lastAckedOffset(topicPartition);
     }
 
     private void updateLastAckedOffset(ProduceResponse.PartitionResponse response, ProducerBatch batch) {
@@ -692,10 +587,10 @@ private void updateLastAckedOffset(ProduceResponse.PartitionResponse response, P
         // response for this. This can happen only if the producer is only idempotent (not transactional) and in
         // this case there will be no tracked bookkeeper entry about it, so we have to insert one.
         if (!lastAckedOffset.isPresent() && !isTransactional()) {
-            topicPartitionBookkeeper.addPartition(batch.topicPartition);
+            txnPartitionMap.getOrCreate(batch.topicPartition);
         }
         if (lastOffset > lastAckedOffset.orElse(ProduceResponse.INVALID_OFFSET)) {
-            topicPartitionBookkeeper.getPartition(batch.topicPartition).lastAckedOffset = lastOffset;
+            txnPartitionMap.get(batch.topicPartition).lastAckedOffset = lastOffset;
         } else {
             log.trace("Partition {} keeps lastOffset at {}", batch.topicPartition, lastOffset);
         }
@@ -768,7 +663,7 @@ synchronized void handleFailedBatch(ProducerBatch batch, RuntimeException except
     // This method must only be called when we know that the batch is question has been unequivocally failed by the broker,
     // ie. it has received a confirmed fatal status code like 'Message Too Large' or something similar.
     private void adjustSequencesDueToFailedBatch(ProducerBatch batch) {
-        if (!topicPartitionBookkeeper.contains(batch.topicPartition))
+        if (!txnPartitionMap.contains(batch.topicPartition))
             // Sequence numbers are not being tracked for this partition. This could happen if the producer id was just
             // reset due to a previous OutOfOrderSequenceException.
             return;
@@ -781,7 +676,7 @@ private void adjustSequencesDueToFailedBatch(ProducerBatch batch) {
 
         setNextSequence(batch.topicPartition, currentSequence);
 
-        topicPartitionBookkeeper.getPartition(batch.topicPartition).resetSequenceNumbers(inFlightBatch -> {
+        txnPartitionMap.get(batch.topicPartition).resetSequenceNumbers(inFlightBatch -> {
             if (inFlightBatch.baseSequence() < batch.baseSequence())
                 return;
 
@@ -790,17 +685,16 @@ private void adjustSequencesDueToFailedBatch(ProducerBatch batch) {
                 throw new IllegalStateException("Sequence number for batch with sequence " + inFlightBatch.baseSequence()
                         + " for partition " + batch.topicPartition + " is going to become negative: " + newSequence);
 
-            log.info("Resetting sequence number of batch with current sequence {} for partition {} to {}", inFlightBatch.baseSequence(), batch.topicPartition, newSequence);
             inFlightBatch.resetProducerState(new ProducerIdAndEpoch(inFlightBatch.producerId(), inFlightBatch.producerEpoch()), newSequence, inFlightBatch.isTransactional());
         });
     }
 
     synchronized boolean hasInflightBatches(TopicPartition topicPartition) {
-        return !topicPartitionBookkeeper.getOrCreatePartition(topicPartition).inflightBatchesBySequence.isEmpty();
+        return !txnPartitionMap.getOrCreate(topicPartition).inflightBatchesBySequence.isEmpty();
     }
 
     synchronized boolean hasStaleProducerIdAndEpoch(TopicPartition topicPartition) {
-        return !producerIdAndEpoch.equals(topicPartitionBookkeeper.getOrCreatePartition(topicPartition).producerIdAndEpoch);
+        return !producerIdAndEpoch.equals(txnPartitionMap.getOrCreate(topicPartition).producerIdAndEpoch);
     }
 
     synchronized boolean hasUnresolvedSequences() {
@@ -865,7 +759,7 @@ private boolean isNextSequence(TopicPartition topicPartition, int sequence) {
     }
 
     private void setNextSequence(TopicPartition topicPartition, int sequence) {
-        topicPartitionBookkeeper.getPartition(topicPartition).nextSequence = sequence;
+        txnPartitionMap.get(topicPartition).nextSequence = sequence;
     }
 
     private boolean isNextSequenceForUnresolvedPartition(TopicPartition topicPartition, int sequence) {
@@ -1017,7 +911,7 @@ synchronized boolean canRetry(ProduceResponse.PartitionResponse response, Produc
                 // inflight batches to be from the beginning and retry them, so that the transaction does not need to
                 // be aborted. For the idempotent producer, bump the epoch to avoid reusing (sequence, epoch) pairs
                 if (isTransactional()) {
-                    topicPartitionBookkeeper.startSequencesAtBeginning(batch.topicPartition, this.producerIdAndEpoch);
+                    txnPartitionMap.startSequencesAtBeginning(batch.topicPartition, this.producerIdAndEpoch);
                 } else {
                     requestEpochBumpForPartition(batch.topicPartition);
                 }
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/TxnPartitionEntry.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/TxnPartitionEntry.java
new file mode 100644
index 0000000000000..be79d8ee0f1ad
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/TxnPartitionEntry.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.clients.producer.internals;
+
+import java.util.Comparator;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.function.Consumer;
+import org.apache.kafka.common.requests.ProduceResponse;
+import org.apache.kafka.common.utils.ProducerIdAndEpoch;
+
+class TxnPartitionEntry {
+
+    // The producer id/epoch being used for a given partition.
+    ProducerIdAndEpoch producerIdAndEpoch;
+
+    // The base sequence of the next batch bound for a given partition.
+    int nextSequence;
+
+    // The sequence number of the last record of the last ack'd batch from the given partition. When there are no
+    // in flight requests for a partition, the lastAckedSequence(topicPartition) == nextSequence(topicPartition) - 1.
+    int lastAckedSequence;
+
+    // Keep track of the in flight batches bound for a partition, ordered by sequence. This helps us to ensure that
+    // we continue to order batches by the sequence numbers even when the responses come back out of order during
+    // leader failover. We add a batch to the queue when it is drained, and remove it when the batch completes
+    // (either successfully or through a fatal failure).
+    SortedSet<ProducerBatch> inflightBatchesBySequence;
+
+    // We keep track of the last acknowledged offset on a per partition basis in order to disambiguate UnknownProducer
+    // responses which are due to the retention period elapsing, and those which are due to actual lost data.
+    long lastAckedOffset;
+
+    // `inflightBatchesBySequence` should only have batches with the same producer id and producer
+    // epoch, but there is an edge case where we may remove the wrong batch if the comparator
+    // only takes `baseSequence` into account.
+    // See https://github.com/apache/kafka/pull/12096#pullrequestreview-955554191 for details.
+    private static final Comparator<ProducerBatch> PRODUCER_BATCH_COMPARATOR =
+        Comparator.comparingLong(ProducerBatch::producerId)
+            .thenComparingInt(ProducerBatch::producerEpoch)
+            .thenComparingInt(ProducerBatch::baseSequence);
+
+    TxnPartitionEntry() {
+        this.producerIdAndEpoch = ProducerIdAndEpoch.NONE;
+        this.nextSequence = 0;
+        this.lastAckedSequence = TransactionManager.NO_LAST_ACKED_SEQUENCE_NUMBER;
+        this.lastAckedOffset = ProduceResponse.INVALID_OFFSET;
+        this.inflightBatchesBySequence = new TreeSet<>(PRODUCER_BATCH_COMPARATOR);
+    }
+
+    void resetSequenceNumbers(Consumer<ProducerBatch> resetSequence) {
+        TreeSet<ProducerBatch> newInflights = new TreeSet<>(PRODUCER_BATCH_COMPARATOR);
+        for (ProducerBatch inflightBatch : inflightBatchesBySequence) {
+            resetSequence.accept(inflightBatch);
+            newInflights.add(inflightBatch);
+        }
+        inflightBatchesBySequence = newInflights;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/clients/producer/internals/TxnPartitionMap.java b/clients/src/main/java/org/apache/kafka/clients/producer/internals/TxnPartitionMap.java
new file mode 100644
index 0000000000000..95553119c5bf4
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/clients/producer/internals/TxnPartitionMap.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.clients.producer.internals;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.OptionalInt;
+import java.util.OptionalLong;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.requests.ProduceResponse;
+import org.apache.kafka.common.utils.PrimitiveRef;
+import org.apache.kafka.common.utils.ProducerIdAndEpoch;
+
+class TxnPartitionMap {
+
+    final Map<TopicPartition, TxnPartitionEntry> topicPartitions = new HashMap<>();
+
+    TxnPartitionEntry get(TopicPartition topicPartition) {
+        TxnPartitionEntry ent = topicPartitions.get(topicPartition);
+        if (ent == null) {
+            throw new IllegalStateException("Trying to get the sequence number for " + topicPartition +
+                ", but the sequence number was never set for this partition.");
+        }
+        return ent;
+    }
+
+    TxnPartitionEntry getOrCreate(TopicPartition topicPartition) {
+        return topicPartitions.computeIfAbsent(topicPartition, tp -> new TxnPartitionEntry());
+    }
+
+    boolean contains(TopicPartition topicPartition) {
+        return topicPartitions.containsKey(topicPartition);
+    }
+
+    void reset() {
+        topicPartitions.clear();
+    }
+
+    OptionalLong lastAckedOffset(TopicPartition topicPartition) {
+        TxnPartitionEntry entry = topicPartitions.get(topicPartition);
+        if (entry != null && entry.lastAckedOffset != ProduceResponse.INVALID_OFFSET) {
+            return OptionalLong.of(entry.lastAckedOffset);
+        } else {
+            return OptionalLong.empty();
+        }
+    }
+
+    OptionalInt lastAckedSequence(TopicPartition topicPartition) {
+        TxnPartitionEntry entry = topicPartitions.get(topicPartition);
+        if (entry != null && entry.lastAckedSequence != TransactionManager.NO_LAST_ACKED_SEQUENCE_NUMBER) {
+            return OptionalInt.of(entry.lastAckedSequence);
+        } else {
+            return OptionalInt.empty();
+        }
+    }
+
+    void startSequencesAtBeginning(TopicPartition topicPartition, ProducerIdAndEpoch newProducerIdAndEpoch) {
+        final PrimitiveRef.IntRef sequence = PrimitiveRef.ofInt(0);
+        TxnPartitionEntry topicPartitionEntry = get(topicPartition);
+        topicPartitionEntry.resetSequenceNumbers(inFlightBatch -> {
+            inFlightBatch.resetProducerState(newProducerIdAndEpoch, sequence.value, inFlightBatch.isTransactional());
+            sequence.value += inFlightBatch.recordCount;
+        });
+        topicPartitionEntry.producerIdAndEpoch = newProducerIdAndEpoch;
+        topicPartitionEntry.nextSequence = sequence.value;
+        topicPartitionEntry.lastAckedSequence = TransactionManager.NO_LAST_ACKED_SEQUENCE_NUMBER;
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/Uuid.java b/clients/src/main/java/org/apache/kafka/common/Uuid.java
index a639f3edd7fc6..83b8f0f0b1644 100644
--- a/clients/src/main/java/org/apache/kafka/common/Uuid.java
+++ b/clients/src/main/java/org/apache/kafka/common/Uuid.java
@@ -31,13 +31,11 @@ public class Uuid implements Comparable<Uuid> {
      * A UUID for the metadata topic in KRaft mode. Will never be returned by the randomUuid method.
      */
     public static final Uuid METADATA_TOPIC_ID = new Uuid(0L, 1L);
-    private static final java.util.UUID METADATA_TOPIC_ID_INTERNAL = new java.util.UUID(0L, 1L);
 
     /**
      * A UUID that represents a null or empty UUID. Will never be returned by the randomUuid method.
      */
     public static final Uuid ZERO_UUID = new Uuid(0L, 0L);
-    private static final java.util.UUID ZERO_ID_INTERNAL = new java.util.UUID(0L, 0L);
 
     private final long mostSignificantBits;
     private final long leastSignificantBits;
@@ -51,15 +49,22 @@ public Uuid(long mostSigBits, long leastSigBits) {
         this.leastSignificantBits = leastSigBits;
     }
 
+    private static Uuid unsafeRandomUuid() {
+        java.util.UUID jUuid = java.util.UUID.randomUUID();
+        return new Uuid(jUuid.getMostSignificantBits(), jUuid.getLeastSignificantBits());
+    }
+
     /**
      * Static factory to retrieve a type 4 (pseudo randomly generated) UUID.
+     *
+     * This will not generate a UUID equal to 0, 1, or one whose string representation starts with a dash ("-")
      */
     public static Uuid randomUuid() {
-        java.util.UUID uuid = java.util.UUID.randomUUID();
-        while (uuid.equals(METADATA_TOPIC_ID_INTERNAL) || uuid.equals(ZERO_ID_INTERNAL)) {
-            uuid = java.util.UUID.randomUUID();
+        Uuid uuid = unsafeRandomUuid();
+        while (uuid.equals(METADATA_TOPIC_ID) || uuid.equals(ZERO_UUID) || uuid.toString().startsWith("-")) {
+            uuid = unsafeRandomUuid();
         }
-        return new Uuid(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
+        return uuid;
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/common/acl/AclOperation.java b/clients/src/main/java/org/apache/kafka/common/acl/AclOperation.java
index 671069775ca0e..19f4edaaa7187 100644
--- a/clients/src/main/java/org/apache/kafka/common/acl/AclOperation.java
+++ b/clients/src/main/java/org/apache/kafka/common/acl/AclOperation.java
@@ -106,7 +106,17 @@ public enum AclOperation {
     /**
      * IDEMPOTENT_WRITE operation.
      */
-    IDEMPOTENT_WRITE((byte) 12);
+    IDEMPOTENT_WRITE((byte) 12),
+
+    /**
+     * CREATE_TOKENS operation.
+     */
+    CREATE_TOKENS((byte) 13),
+
+    /**
+     * DESCRIBE_TOKENS operation.
+     */
+    DESCRIBE_TOKENS((byte) 14);
 
     // Note: we cannot have more than 30 ACL operations without modifying the format used
     // to describe ACL operations in MetadataResponse.
diff --git a/clients/src/main/java/org/apache/kafka/common/compress/KafkaLZ4BlockInputStream.java b/clients/src/main/java/org/apache/kafka/common/compress/KafkaLZ4BlockInputStream.java
index e2fbd5ac04d98..037af8c8dc7f1 100644
--- a/clients/src/main/java/org/apache/kafka/common/compress/KafkaLZ4BlockInputStream.java
+++ b/clients/src/main/java/org/apache/kafka/common/compress/KafkaLZ4BlockInputStream.java
@@ -173,7 +173,7 @@ private void readBlock() throws IOException {
                 in.getInt(); // TODO: verify this content checksum
             return;
         } else if (blockSize > maxBlockSize) {
-            throw new IOException(String.format("Block size %s exceeded max: %s", blockSize, maxBlockSize));
+            throw new IOException(String.format("Block size %d exceeded max: %d", blockSize, maxBlockSize));
         }
 
         if (in.remaining() < blockSize) {
diff --git a/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java b/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java
index 7ef46092749e7..e3fda4d9f5406 100644
--- a/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/AbstractConfig.java
@@ -380,8 +380,10 @@ private void logAll() {
      * Log warnings for any unused configurations
      */
     public void logUnused() {
-        for (String key : unused())
-            log.warn("The configuration '{}' was supplied but isn't a known config.", key);
+        Set<String> unusedkeys = unused();
+        if (!unusedkeys.isEmpty()) {
+            log.warn("These configurations '{}' were supplied but are not used yet.", unusedkeys);
+        }
     }
 
     private <T> T getConfiguredInstance(Object klass, Class<T> t, Map<String, Object> configPairs) {
diff --git a/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java b/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java
index 85b0103e59ae8..1dfbebbe205a4 100644
--- a/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/ConfigDef.java
@@ -16,8 +16,6 @@
  */
 package org.apache.kafka.common.config;
 
-import java.util.function.Function;
-import java.util.stream.Collectors;
 import org.apache.kafka.common.config.types.Password;
 import org.apache.kafka.common.utils.Utils;
 
@@ -33,8 +31,10 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.function.BiConsumer;
+import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 /**
  * This class is used for specifying the set of expected configurations. For each configuration, you can specify
@@ -1121,6 +1121,32 @@ public String toString() {
         }
     }
 
+    public static class ListSize implements Validator {
+        final int maxSize;
+
+        private ListSize(final int maxSize) {
+            this.maxSize = maxSize;
+        }
+
+        public static ListSize atMostOfSize(final int maxSize) {
+            return new ListSize(maxSize);
+        }
+
+        @Override
+        public void ensureValid(final String name, final Object value) {
+            @SuppressWarnings("unchecked")
+            List<String> values = (List<String>) value;
+            if (values.size() > maxSize) {
+                throw new ConfigException(name, value, "exceeds maximum list size of [" + maxSize + "].");
+            }
+        }
+
+        @Override
+        public String toString() {
+            return "List containing maximum of " + maxSize + " elements";
+        }
+    }
+
     public static class ConfigKey {
         public final String name;
         public final Type type;
@@ -1217,15 +1243,16 @@ static String niceMemoryUnits(long bytes) {
                 break;
             }
         }
+        String resultFormat = " (" + value + " %s" + (value == 1 ? ")" : "s)");
         switch (i) {
             case 1:
-                return " (" + value + " kibibyte" + (value == 1 ? ")" : "s)");
+                return String.format(resultFormat, "kibibyte");
             case 2:
-                return " (" + value + " mebibyte" + (value == 1 ? ")" : "s)");
+                return String.format(resultFormat, "mebibyte");
             case 3:
-                return " (" + value + " gibibyte" + (value == 1 ? ")" : "s)");
+                return String.format(resultFormat, "gibibyte");
             case 4:
-                return " (" + value + " tebibyte" + (value == 1 ? ")" : "s)");
+                return String.format(resultFormat, "tebibyte");
             default:
                 return "";
         }
diff --git a/clients/src/main/java/org/apache/kafka/common/config/SslClientAuth.java b/clients/src/main/java/org/apache/kafka/common/config/SslClientAuth.java
index 9d85b184ab9ab..75f8e3640e9fa 100644
--- a/clients/src/main/java/org/apache/kafka/common/config/SslClientAuth.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/SslClientAuth.java
@@ -31,7 +31,7 @@ public enum SslClientAuth {
     NONE;
 
     public static final List<SslClientAuth> VALUES =
-        Collections.unmodifiableList(Arrays.asList(SslClientAuth.values()));
+            Collections.unmodifiableList(Arrays.asList(SslClientAuth.values()));
 
     public static SslClientAuth forConfig(String key) {
         if (key == null) {
@@ -45,4 +45,9 @@ public static SslClientAuth forConfig(String key) {
         }
         return null;
     }
+
+    @Override
+    public String toString() {
+        return super.toString().toLowerCase(Locale.ROOT);
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java b/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java
index 5061ed5cfcaab..4dfb0bd5a8dab 100644
--- a/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/SslConfigs.java
@@ -69,7 +69,7 @@ public class SslConfigs {
 
     public static final String SSL_KEYSTORE_TYPE_CONFIG = "ssl.keystore.type";
     public static final String SSL_KEYSTORE_TYPE_DOC = "The file format of the key store file. "
-            + "This is optional for client.";
+            + "This is optional for client. The values currently supported by the default `ssl.engine.factory.class` are [JKS, PKCS12, PEM].";
     public static final String DEFAULT_SSL_KEYSTORE_TYPE = "JKS";
 
     public static final String SSL_KEYSTORE_KEY_CONFIG = "ssl.keystore.key";
@@ -96,10 +96,10 @@ public class SslConfigs {
 
     public static final String SSL_KEY_PASSWORD_CONFIG = "ssl.key.password";
     public static final String SSL_KEY_PASSWORD_DOC = "The password of the private key in the key store file or "
-        + "the PEM key specified in `ssl.keystore.key'. This is required for clients only if two-way authentication is configured.";
+        + "the PEM key specified in `ssl.keystore.key'.";
 
     public static final String SSL_TRUSTSTORE_TYPE_CONFIG = "ssl.truststore.type";
-    public static final String SSL_TRUSTSTORE_TYPE_DOC = "The file format of the trust store file.";
+    public static final String SSL_TRUSTSTORE_TYPE_DOC = "The file format of the trust store file. The values currently supported by the default `ssl.engine.factory.class` are [JKS, PKCS12, PEM].";
     public static final String DEFAULT_SSL_TRUSTSTORE_TYPE = "JKS";
 
     public static final String SSL_TRUSTSTORE_LOCATION_CONFIG = "ssl.truststore.location";
diff --git a/clients/src/main/java/org/apache/kafka/common/config/TopicConfig.java b/clients/src/main/java/org/apache/kafka/common/config/TopicConfig.java
index 73439c5f29af9..5be3057b62db8 100755
--- a/clients/src/main/java/org/apache/kafka/common/config/TopicConfig.java
+++ b/clients/src/main/java/org/apache/kafka/common/config/TopicConfig.java
@@ -99,7 +99,7 @@ public class TopicConfig {
         "limit only applies to a single record in that case.";
 
     public static final String INDEX_INTERVAL_BYTES_CONFIG = "index.interval.bytes";
-    public static final String INDEX_INTERVAL_BYTES_DOCS = "This setting controls how frequently " +
+    public static final String INDEX_INTERVAL_BYTES_DOC = "This setting controls how frequently " +
         "Kafka adds an index entry to its offset index. The default setting ensures that we index a " +
         "message roughly every 4096 bytes. More indexing allows reads to jump closer to the exact " +
         "position in the log but makes the index larger. You probably don't need to change this.";
@@ -139,11 +139,13 @@ public class TopicConfig {
     public static final String CLEANUP_POLICY_CONFIG = "cleanup.policy";
     public static final String CLEANUP_POLICY_COMPACT = "compact";
     public static final String CLEANUP_POLICY_DELETE = "delete";
-    public static final String CLEANUP_POLICY_DOC = "A string that is either \"" + CLEANUP_POLICY_DELETE +
-        "\" or \"" + CLEANUP_POLICY_COMPACT + "\" or both. This string designates the retention policy to use on " +
-        "old log segments. The default policy (\"delete\") will discard old segments when their retention " +
-        "time or size limit has been reached. The \"compact\" setting will enable <a href=\"#compaction\">log " +
-        "compaction</a> on the topic.";
+    public static final String CLEANUP_POLICY_DOC = "This config designates the retention policy to " +
+        "use on log segments. The \"delete\" policy (which is the default) will discard old segments " +
+        "when their retention time or size limit has been reached. The \"compact\" policy will enable " +
+        "<a href=\"#compaction\">log compaction</a>, which retains the latest value for each key. " +
+        "It is also possible to specify both policies in a comma-separated list (e.g. \"delete,compact\"). " +
+        "In this case, old segments will be discarded per the retention time and size configuration, " +
+        "while retained segments will be compacted.";
 
     public static final String UNCLEAN_LEADER_ELECTION_ENABLE_CONFIG = "unclean.leader.election.enable";
     public static final String UNCLEAN_LEADER_ELECTION_ENABLE_DOC = "Indicates whether to enable replicas " +
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/AuthorizerNotReadyException.java b/clients/src/main/java/org/apache/kafka/common/errors/AuthorizerNotReadyException.java
new file mode 100644
index 0000000000000..1c110ef2143cc
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/AuthorizerNotReadyException.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+/**
+ * An exception that indicates that the authorizer is not ready to receive the request yet.
+ */
+public class AuthorizerNotReadyException extends RetriableException {
+    private static final long serialVersionUID = 1L;
+
+    public AuthorizerNotReadyException() {
+        super();
+    }
+}
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationTest.java b/clients/src/main/java/org/apache/kafka/common/errors/IneligibleReplicaException.java
similarity index 80%
rename from connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationTest.java
rename to clients/src/main/java/org/apache/kafka/common/errors/IneligibleReplicaException.java
index ed82aa97ed48f..6c79add033a4e 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationTest.java
+++ b/clients/src/main/java/org/apache/kafka/common/errors/IneligibleReplicaException.java
@@ -14,10 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.kafka.connect.mirror.integration;
+package org.apache.kafka.common.errors;
 
-import org.junit.jupiter.api.Tag;
-
-@Tag("integration")
-public class MirrorConnectorsIntegrationTest extends MirrorConnectorsIntegrationBaseTest {
+public class IneligibleReplicaException extends ApiException {
+    public IneligibleReplicaException(String message) {
+        super(message);
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/errors/NewLeaderElectedException.java b/clients/src/main/java/org/apache/kafka/common/errors/NewLeaderElectedException.java
new file mode 100644
index 0000000000000..20fd869df9f0e
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/errors/NewLeaderElectedException.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.errors;
+
+public class NewLeaderElectedException extends ApiException {
+    public NewLeaderElectedException(String message) {
+        super(message);
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/feature/Features.java b/clients/src/main/java/org/apache/kafka/common/feature/Features.java
index 4006d71947fb7..a39ee37e53dca 100644
--- a/clients/src/main/java/org/apache/kafka/common/feature/Features.java
+++ b/clients/src/main/java/org/apache/kafka/common/feature/Features.java
@@ -32,7 +32,6 @@
  *
  * @param <VersionRangeType> is the type of version range.
  * @see SupportedVersionRange
- * @see FinalizedVersionRange
  */
 public class Features<VersionRangeType extends BaseVersionRange> {
     private final Map<String, VersionRangeType> features;
@@ -57,20 +56,6 @@ public static Features<SupportedVersionRange> supportedFeatures(Map<String, Supp
         return new Features<>(features);
     }
 
-    /**
-     * @param features   Map of feature name to FinalizedVersionRange.
-     *
-     * @return           Returns a new Features object representing finalized features.
-     */
-    public static Features<FinalizedVersionRange> finalizedFeatures(Map<String, FinalizedVersionRange> features) {
-        return new Features<>(features);
-    }
-
-    // Visible for testing.
-    public static Features<FinalizedVersionRange> emptyFinalizedFeatures() {
-        return new Features<>(new HashMap<>());
-    }
-
     public static Features<SupportedVersionRange> emptySupportedFeatures() {
         return new Features<>(new HashMap<>());
     }
@@ -138,19 +123,6 @@ private static <V extends BaseVersionRange> Features<V> fromFeaturesMap(
                 entry -> converter.fromMap(entry.getValue()))));
     }
 
-    /**
-     * Converts from a map to Features<FinalizedVersionRange>.
-     *
-     * @param featuresMap  the map representation of a Features<FinalizedVersionRange> object,
-     *                     generated using the toMap() API.
-     *
-     * @return             the Features<FinalizedVersionRange> object
-     */
-    public static Features<FinalizedVersionRange> fromFinalizedFeaturesMap(
-        Map<String, Map<String, Short>> featuresMap) {
-        return fromFeaturesMap(featuresMap, FinalizedVersionRange::fromMap);
-    }
-
     /**
      * Converts from a map to Features<SupportedVersionRange>.
      *
diff --git a/clients/src/main/java/org/apache/kafka/common/feature/FinalizedVersionRange.java b/clients/src/main/java/org/apache/kafka/common/feature/FinalizedVersionRange.java
deleted file mode 100644
index 27e6440478644..0000000000000
--- a/clients/src/main/java/org/apache/kafka/common/feature/FinalizedVersionRange.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.kafka.common.feature;
-
-import java.util.Map;
-
-/**
- * An extended {@link BaseVersionRange} representing the min/max versions for a finalized feature.
- */
-public class FinalizedVersionRange extends BaseVersionRange {
-    // Label for the min version key, that's used only to convert to/from a map.
-    private static final String MIN_VERSION_LEVEL_KEY_LABEL = "min_version_level";
-
-    // Label for the max version key, that's used only to convert to/from a map.
-    private static final String MAX_VERSION_LEVEL_KEY_LABEL = "max_version_level";
-
-    public FinalizedVersionRange(short minVersionLevel, short maxVersionLevel) {
-        super(MIN_VERSION_LEVEL_KEY_LABEL, minVersionLevel, MAX_VERSION_LEVEL_KEY_LABEL, maxVersionLevel);
-    }
-
-    public static FinalizedVersionRange fromMap(Map<String, Short> versionRangeMap) {
-        return new FinalizedVersionRange(
-            BaseVersionRange.valueOrThrow(MIN_VERSION_LEVEL_KEY_LABEL, versionRangeMap),
-            BaseVersionRange.valueOrThrow(MAX_VERSION_LEVEL_KEY_LABEL, versionRangeMap));
-    }
-
-    /**
-     * Checks if the [min, max] version level range of this object does *NOT* fall within the
-     * [min, max] range of the provided SupportedVersionRange parameter.
-     *
-     * @param supportedVersionRange   the SupportedVersionRange to be checked
-     *
-     * @return                        - true, if the version levels are compatible
-     *                                - false otherwise
-     */
-    public boolean isIncompatibleWith(SupportedVersionRange supportedVersionRange) {
-        return min() < supportedVersionRange.min() || max() > supportedVersionRange.max();
-    }
-}
diff --git a/clients/src/main/java/org/apache/kafka/common/feature/SupportedVersionRange.java b/clients/src/main/java/org/apache/kafka/common/feature/SupportedVersionRange.java
index 8993014a74b2e..a864a91762635 100644
--- a/clients/src/main/java/org/apache/kafka/common/feature/SupportedVersionRange.java
+++ b/clients/src/main/java/org/apache/kafka/common/feature/SupportedVersionRange.java
@@ -41,4 +41,16 @@ public static SupportedVersionRange fromMap(Map<String, Short> versionRangeMap)
             BaseVersionRange.valueOrThrow(MIN_VERSION_KEY_LABEL, versionRangeMap),
             BaseVersionRange.valueOrThrow(MAX_VERSION_KEY_LABEL, versionRangeMap));
     }
+
+    /**
+     * Checks if the version level does *NOT* fall within the [min, max] range of this SupportedVersionRange.
+     *
+     * @param version   the version to be checked
+     *
+     * @return  - true, if the version levels are incompatible
+     *          - false otherwise
+     */
+    public boolean isIncompatibleWith(short version) {
+        return min() > version || max() < version;
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/internals/Topic.java b/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
index 7a5fefb3d9f23..fbf491bab9800 100644
--- a/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
+++ b/clients/src/main/java/org/apache/kafka/common/internals/Topic.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.common.internals;
 
+import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.errors.InvalidTopicException;
 import org.apache.kafka.common.utils.Utils;
 
@@ -27,6 +28,8 @@ public class Topic {
 
     public static final String GROUP_METADATA_TOPIC_NAME = "__consumer_offsets";
     public static final String TRANSACTION_STATE_TOPIC_NAME = "__transaction_state";
+    public static final String METADATA_TOPIC_NAME = "__cluster_metadata";
+    public static final TopicPartition METADATA_TOPIC_PARTITION = new TopicPartition(METADATA_TOPIC_NAME, 0);
     public static final String LEGAL_CHARS = "[a-zA-Z0-9._-]";
 
     private static final Set<String> INTERNAL_TOPICS = Collections.unmodifiableSet(
@@ -67,6 +70,17 @@ public static boolean hasCollisionChars(String topic) {
         return topic.contains("_") || topic.contains(".");
     }
 
+    /**
+     * Unify topic name with a period ('.') or underscore ('_'), this is only used to check collision and will not
+     * be used to really change topic name.
+     *
+     * @param topic A topic to unify
+     * @return A unified topic name
+     */
+    public static String unifyCollisionChars(String topic) {
+        return topic.replace('.', '_');
+    }
+
     /**
      * Returns true if the topicNames collide due to a period ('.') or underscore ('_') in the same position.
      *
@@ -75,7 +89,7 @@ public static boolean hasCollisionChars(String topic) {
      * @return true if the topics collide
      */
     public static boolean hasCollision(String topicA, String topicB) {
-        return topicA.replace('.', '_').equals(topicB.replace('.', '_'));
+        return unifyCollisionChars(topicA).equals(unifyCollisionChars(topicB));
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Gauge.java b/clients/src/main/java/org/apache/kafka/common/metrics/Gauge.java
index 647942b3d03b2..d71bbd853db16 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Gauge.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Gauge.java
@@ -19,6 +19,7 @@
 /**
  * A gauge metric is an instantaneous reading of a particular value.
  */
+@FunctionalInterface
 public interface Gauge<T> extends MetricValueProvider<T> {
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java b/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java
index 52b7794a4c10b..a7581442f5d12 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Metrics.java
@@ -502,6 +502,7 @@ public void addMetric(MetricName metricName, MetricConfig config, Measurable mea
      *
      * @param metricName The name of the metric
      * @param metricValueProvider The metric value provider associated with this metric
+     * @throws IllegalArgumentException if a metric with same name already exists.
      */
     public void addMetric(MetricName metricName, MetricConfig config, MetricValueProvider<?> metricValueProvider) {
         KafkaMetric m = new KafkaMetric(new Object(),
@@ -509,7 +510,10 @@ public void addMetric(MetricName metricName, MetricConfig config, MetricValuePro
                                         Objects.requireNonNull(metricValueProvider),
                                         config == null ? this.config : config,
                                         time);
-        registerMetric(m);
+        KafkaMetric existingMetric = registerMetric(m);
+        if (existingMetric != null) {
+            throw new IllegalArgumentException("A metric named '" + metricName + "' already exists, can't register another one.");
+        }
     }
 
     /**
@@ -524,6 +528,26 @@ public void addMetric(MetricName metricName, MetricValueProvider<?> metricValueP
         addMetric(metricName, null, metricValueProvider);
     }
 
+    /**
+     * Create or get an existing metric to monitor an object that implements MetricValueProvider.
+     * This metric won't be associated with any sensor. This is a way to expose existing values as metrics.
+     * This method takes care of synchronisation while updating/accessing metrics by concurrent threads.
+     *
+     * @param metricName The name of the metric
+     * @param metricValueProvider The metric value provider associated with this metric
+     * @return Existing KafkaMetric if already registered or else a newly created one
+     */
+    public KafkaMetric addMetricIfAbsent(MetricName metricName, MetricConfig config, MetricValueProvider<?> metricValueProvider) {
+        KafkaMetric metric = new KafkaMetric(new Object(),
+                Objects.requireNonNull(metricName),
+                Objects.requireNonNull(metricValueProvider),
+                config == null ? this.config : config,
+                time);
+
+        KafkaMetric existingMetric = registerMetric(metric);
+        return existingMetric == null ? metric : existingMetric;
+    }
+
     /**
      * Remove a metric if it exists and return it. Return null otherwise. If a metric is removed, `metricRemoval`
      * will be invoked for each reporter.
@@ -563,11 +587,20 @@ public synchronized void removeReporter(MetricsReporter reporter) {
         }
     }
 
-    synchronized void registerMetric(KafkaMetric metric) {
+    /**
+     * Register a metric if not present or return the already existing metric with the same name.
+     * When a metric is newly registered, this method returns null
+     *
+     * @param metric The KafkaMetric to register
+     * @return the existing metric with the same name or null
+     */
+    synchronized KafkaMetric registerMetric(KafkaMetric metric) {
         MetricName metricName = metric.metricName();
-        if (this.metrics.containsKey(metricName))
-            throw new IllegalArgumentException("A metric named '" + metricName + "' already exists, can't register another one.");
-        this.metrics.put(metricName, metric);
+        KafkaMetric existingMetric = this.metrics.putIfAbsent(metricName, metric);
+        if (existingMetric != null) {
+            return existingMetric;
+        }
+        // newly added metric
         for (MetricsReporter reporter : reporters) {
             try {
                 reporter.metricChange(metric);
@@ -576,6 +609,7 @@ synchronized void registerMetric(KafkaMetric metric) {
             }
         }
         log.trace("Registered metric named {}", metricName);
+        return null;
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java
index 5ae3b8d997a64..25f3c21a31365 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/Sensor.java
@@ -297,7 +297,10 @@ public synchronized boolean add(CompoundStat stat, MetricConfig config) {
         for (NamedMeasurable m : stat.stats()) {
             final KafkaMetric metric = new KafkaMetric(lock, m.name(), m.stat(), statConfig, time);
             if (!metrics.containsKey(metric.metricName())) {
-                registry.registerMetric(metric);
+                KafkaMetric existingMetric = registry.registerMetric(metric);
+                if (existingMetric != null) {
+                    throw new IllegalArgumentException("A metric named '" + metric.metricName() + "' already exists, can't register another one.");
+                }
                 metrics.put(metric.metricName(), metric);
             }
         }
@@ -336,7 +339,10 @@ public synchronized boolean add(final MetricName metricName, final MeasurableSta
                 statConfig,
                 time
             );
-            registry.registerMetric(metric);
+            KafkaMetric existingMetric = registry.registerMetric(metric);
+            if (existingMetric != null) {
+                throw new IllegalArgumentException("A metric named '" + metricName + "' already exists, can't register another one.");
+            }
             metrics.put(metric.metricName(), metric);
             stats.add(new StatAndConfig(Objects.requireNonNull(stat), metric::config));
             return true;
diff --git a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java
index c6b8574186a88..09b7c05c8f283 100644
--- a/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java
+++ b/clients/src/main/java/org/apache/kafka/common/metrics/stats/Rate.java
@@ -91,7 +91,10 @@ public long windowSize(MetricConfig config, long now) {
         if (numFullWindows < minFullWindows)
             totalElapsedTimeMs += (minFullWindows - numFullWindows) * config.timeWindowMs();
 
-        return totalElapsedTimeMs;
+        // If window size is being calculated at the exact beginning of the window with no prior samples, the window size
+        // will result in a value of 0. Calculation of rate over a window is size 0 is undefined, hence, we assume the
+        // minimum window size to be at least 1ms.
+        return Math.max(totalElapsedTimeMs, 1);
     }
 
     @Override
diff --git a/clients/src/main/java/org/apache/kafka/common/network/Selector.java b/clients/src/main/java/org/apache/kafka/common/network/Selector.java
index c3b4888339083..bd1175a8ee0a3 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/Selector.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/Selector.java
@@ -845,7 +845,7 @@ private void clear() {
             boolean hasPending = false;
             if (!sendFailed)
                 hasPending = maybeReadFromClosingChannel(channel);
-            if (!hasPending || sendFailed) {
+            if (!hasPending) {
                 doClose(channel, true);
                 it.remove();
             }
diff --git a/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java b/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java
index 893fd6a4ecedd..844c2bd2c17d7 100644
--- a/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java
+++ b/clients/src/main/java/org/apache/kafka/common/network/SslTransportLayer.java
@@ -71,6 +71,8 @@ private enum State {
         CLOSING
     }
 
+    private static final String TLS13 = "TLSv1.3";
+
     private final String channelId;
     private final SSLEngine sslEngine;
     private final SelectionKey key;
@@ -449,7 +451,7 @@ private void handshakeFinished() throws IOException {
             if (netWriteBuffer.hasRemaining())
                 key.interestOps(key.interestOps() | SelectionKey.OP_WRITE);
             else {
-                state = sslEngine.getSession().getProtocol().equals("TLSv1.3") ? State.POST_HANDSHAKE : State.READY;
+                state = sslEngine.getSession().getProtocol().equals(TLS13) ? State.POST_HANDSHAKE : State.READY;
                 key.interestOps(key.interestOps() & ~SelectionKey.OP_WRITE);
                 SSLSession session = sslEngine.getSession();
                 log.debug("SSL handshake completed successfully with peerHost '{}' peerPort {} peerPrincipal '{}' cipherSuite '{}'",
@@ -585,10 +587,11 @@ public int read(ByteBuffer dst) throws IOException {
                         throw e;
                 }
                 netReadBuffer.compact();
-                // handle ssl renegotiation.
+                // reject renegotiation if TLS < 1.3, key updates for TLS 1.3 are allowed
                 if (unwrapResult.getHandshakeStatus() != HandshakeStatus.NOT_HANDSHAKING &&
                         unwrapResult.getHandshakeStatus() != HandshakeStatus.FINISHED &&
-                        unwrapResult.getStatus() == Status.OK) {
+                        unwrapResult.getStatus() == Status.OK &&
+                        !sslEngine.getSession().getProtocol().equals(TLS13)) {
                     log.error("Renegotiation requested, but it is not supported, channelId {}, " +
                         "appReadBuffer pos {}, netReadBuffer pos {}, netWriteBuffer pos {} handshakeStatus {}", channelId,
                         appReadBuffer.position(), netReadBuffer.position(), netWriteBuffer.position(), unwrapResult.getHandshakeStatus());
@@ -706,9 +709,12 @@ public int write(ByteBuffer src) throws IOException {
             SSLEngineResult wrapResult = sslEngine.wrap(src, netWriteBuffer);
             netWriteBuffer.flip();
 
-            //handle ssl renegotiation
-            if (wrapResult.getHandshakeStatus() != HandshakeStatus.NOT_HANDSHAKING && wrapResult.getStatus() == Status.OK)
+            // reject renegotiation if TLS < 1.3, key updates for TLS 1.3 are allowed
+            if (wrapResult.getHandshakeStatus() != HandshakeStatus.NOT_HANDSHAKING &&
+                    wrapResult.getStatus() == Status.OK &&
+                    !sslEngine.getSession().getProtocol().equals(TLS13)) {
                 throw renegotiationException();
+            }
 
             if (wrapResult.getStatus() == Status.OK) {
                 written += wrapResult.bytesConsumed();
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java b/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java
index 5aa6186f43423..628c9407cc2dd 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/ApiKeys.java
@@ -97,8 +97,8 @@ public enum ApiKeys {
     BEGIN_QUORUM_EPOCH(ApiMessageType.BEGIN_QUORUM_EPOCH, true, RecordBatch.MAGIC_VALUE_V0, false),
     END_QUORUM_EPOCH(ApiMessageType.END_QUORUM_EPOCH, true, RecordBatch.MAGIC_VALUE_V0, false),
     DESCRIBE_QUORUM(ApiMessageType.DESCRIBE_QUORUM, true, RecordBatch.MAGIC_VALUE_V0, true),
-    ALTER_ISR(ApiMessageType.ALTER_ISR, true),
-    UPDATE_FEATURES(ApiMessageType.UPDATE_FEATURES),
+    ALTER_PARTITION(ApiMessageType.ALTER_PARTITION, true),
+    UPDATE_FEATURES(ApiMessageType.UPDATE_FEATURES, true, true),
     ENVELOPE(ApiMessageType.ENVELOPE, true, RecordBatch.MAGIC_VALUE_V0, false),
     FETCH_SNAPSHOT(ApiMessageType.FETCH_SNAPSHOT, false, RecordBatch.MAGIC_VALUE_V0, false),
     DESCRIBE_CLUSTER(ApiMessageType.DESCRIBE_CLUSTER),
@@ -241,7 +241,7 @@ private static String toHtml() {
             b.append("</td>");
             b.append("</tr>\n");
         }
-        b.append("</table>\n");
+        b.append("</tbody></table>\n");
         return b.toString();
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
index f48ae6c2332b4..2ca42bafcfb86 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/Errors.java
@@ -52,6 +52,7 @@
 import org.apache.kafka.common.errors.InconsistentTopicIdException;
 import org.apache.kafka.common.errors.InconsistentVoterSetException;
 import org.apache.kafka.common.errors.InconsistentClusterIdException;
+import org.apache.kafka.common.errors.IneligibleReplicaException;
 import org.apache.kafka.common.errors.InvalidCommitOffsetSizeException;
 import org.apache.kafka.common.errors.InvalidConfigurationException;
 import org.apache.kafka.common.errors.InvalidFetchSessionEpochException;
@@ -77,6 +78,7 @@
 import org.apache.kafka.common.errors.LogDirNotFoundException;
 import org.apache.kafka.common.errors.MemberIdRequiredException;
 import org.apache.kafka.common.errors.NetworkException;
+import org.apache.kafka.common.errors.NewLeaderElectedException;
 import org.apache.kafka.common.errors.NoReassignmentInProgressException;
 import org.apache.kafka.common.errors.NotControllerException;
 import org.apache.kafka.common.errors.NotCoordinatorException;
@@ -364,7 +366,9 @@ public enum Errors {
     INCONSISTENT_TOPIC_ID(103, "The log's topic ID did not match the topic ID in the request", InconsistentTopicIdException::new),
     INCONSISTENT_CLUSTER_ID(104, "The clusterId in the request does not match that found on the server", InconsistentClusterIdException::new),
     TRANSACTIONAL_ID_NOT_FOUND(105, "The transactionalId could not be found", TransactionalIdNotFoundException::new),
-    FETCH_SESSION_TOPIC_ID_ERROR(106, "The fetch session encountered inconsistent topic ID usage", FetchSessionTopicIdException::new);
+    FETCH_SESSION_TOPIC_ID_ERROR(106, "The fetch session encountered inconsistent topic ID usage", FetchSessionTopicIdException::new),
+    INELIGIBLE_REPLICA(107, "The new ISR contains at least one ineligible replica.", IneligibleReplicaException::new),
+    NEW_LEADER_ELECTED(108, "The AlterPartition request successfully updated the partition state but the leader has changed.", NewLeaderElectedException::new);
 
     private static final Logger log = LoggerFactory.getLogger(Errors.class);
 
@@ -500,7 +504,7 @@ private static String toHtml() {
             b.append("</td>");
             b.append("</tr>\n");
         }
-        b.append("</table>\n");
+        b.append("</tbody></table>\n");
         return b.toString();
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/MessageUtil.java b/clients/src/main/java/org/apache/kafka/common/protocol/MessageUtil.java
index 288ffd08f7187..b366ebd8ead01 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/MessageUtil.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/MessageUtil.java
@@ -29,6 +29,11 @@
 
 
 public final class MessageUtil {
+
+    public static final long UNSIGNED_INT_MAX = 4294967295L;
+
+    public static final int UNSIGNED_SHORT_MAX = 65535;
+
     /**
      * Copy a byte buffer into an array.  This will not affect the buffer's
      * position or mark.
@@ -87,13 +92,22 @@ public static short jsonNodeToShort(JsonNode node, String about) {
 
     public static int jsonNodeToUnsignedShort(JsonNode node, String about) {
         int value = jsonNodeToInt(node, about);
-        if (value < 0 || value > 65535) {
+        if (value < 0 || value > UNSIGNED_SHORT_MAX) {
             throw new RuntimeException(about + ": value " + value +
                 " does not fit in a 16-bit unsigned integer.");
         }
         return value;
     }
 
+    public static long jsonNodeToUnsignedInt(JsonNode node, String about) {
+        long value = jsonNodeToLong(node, about);
+        if (value < 0 || value > UNSIGNED_INT_MAX) {
+            throw new RuntimeException(about + ": value " + value +
+                    " does not fit in a 32-bit unsigned integer.");
+        }
+        return value;
+    }
+
     public static int jsonNodeToInt(JsonNode node, String about) {
         if (node.isInt()) {
             return node.asInt();
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java
index d455b26eb2d87..a75eb0661d102 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java
@@ -112,7 +112,7 @@ private static void schemaToFieldTableHtml(Schema schema, StringBuilder b) {
             b.append("</td>");
             b.append("</tr>\n");
         }
-        b.append("</table>\n");
+        b.append("</tbody></table>\n");
     }
 
     public static String toHtml() {
@@ -148,7 +148,7 @@ public static String toHtml() {
                 Schema schema = requests[i];
                 // Schema
                 if (schema != null) {
-                    b.append("<p>");
+                    b.append("<div>");
                     // Version header
                     b.append("<pre>");
                     b.append(key.name);
@@ -159,7 +159,7 @@ public static String toHtml() {
                     b.append("</pre>");
                     schemaToFieldTableHtml(requests[i], b);
                 }
-                b.append("</p>\n");
+                b.append("</div>\n");
             }
 
             // Responses
@@ -169,7 +169,7 @@ public static String toHtml() {
                 Schema schema = responses[i];
                 // Schema
                 if (schema != null) {
-                    b.append("<p>");
+                    b.append("<div>");
                     // Version header
                     b.append("<pre>");
                     b.append(key.name);
@@ -180,7 +180,7 @@ public static String toHtml() {
                     b.append("</pre>");
                     schemaToFieldTableHtml(responses[i], b);
                 }
-                b.append("</p>\n");
+                b.append("</div>\n");
             }
         }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Readable.java b/clients/src/main/java/org/apache/kafka/common/protocol/Readable.java
index 9c9e461ca806a..561696827df63 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/Readable.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/Readable.java
@@ -75,4 +75,8 @@ default Uuid readUuid() {
     default int readUnsignedShort() {
         return Short.toUnsignedInt(readShort());
     }
+
+    default long readUnsignedInt() {
+        return Integer.toUnsignedLong(readInt());
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Writable.java b/clients/src/main/java/org/apache/kafka/common/protocol/Writable.java
index 8dbec87134257..0677340af4d68 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/Writable.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/Writable.java
@@ -54,4 +54,8 @@ default void writeUnsignedShort(int i) {
         // ints outside the valid range of a short.
         writeShort((short) i);
     }
+
+    default void writeUnsignedInt(long i) {
+        writeInt((int) i);
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Field.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Field.java
index 44726f8240c0f..f030387b6fcdf 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Field.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Field.java
@@ -97,6 +97,12 @@ public Uint16(String name, String docString) {
         }
     }
 
+    public static class Uint32 extends Field {
+        public Uint32(String name, String docString) {
+            super(name, Type.UNSIGNED_INT32, docString, false, null);
+        }
+    }
+
     public static class Float64 extends Field {
         public Float64(String name, String docString) {
             super(name, Type.FLOAT64, docString, false, null);
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java
index 9b9b5e66b6654..e39a84137bc0f 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Struct.java
@@ -23,6 +23,9 @@
 import java.util.Arrays;
 import java.util.Objects;
 
+import static org.apache.kafka.common.protocol.MessageUtil.UNSIGNED_INT_MAX;
+import static org.apache.kafka.common.protocol.MessageUtil.UNSIGNED_SHORT_MAX;
+
 /**
  * A record that can be serialized and deserialized according to a pre-defined schema
  */
@@ -97,6 +100,10 @@ public Integer get(Field.Uint16 field) {
         return getInt(field.name);
     }
 
+    public Long get(Field.Uint32 field) {
+        return getLong(field.name);
+    }
+
     public Short get(Field.Int16 field) {
         return getShort(field.name);
     }
@@ -270,6 +277,10 @@ public Long getUnsignedInt(String name) {
         return (Long) get(name);
     }
 
+    public Long getUnsignedInt(BoundField field) {
+        return (Long) get(field);
+    }
+
     public Long getLong(BoundField field) {
         return (Long) get(field);
     }
@@ -400,13 +411,21 @@ public Struct set(Field.Int16 def, short value) {
     }
 
     public Struct set(Field.Uint16 def, int value) {
-        if (value < 0 || value > 65535) {
+        if (value < 0 || value > UNSIGNED_SHORT_MAX) {
             throw new RuntimeException("Invalid value for unsigned short for " +
                     def.name + ": " + value);
         }
         return set(def.name, value);
     }
 
+    public Struct set(Field.Uint32 def, long value) {
+        if (value < 0 || value > UNSIGNED_INT_MAX) {
+            throw new RuntimeException("Invalid value for unsigned int for " +
+                    def.name + ": " + value);
+        }
+        return set(def.name, value);
+    }
+
     public Struct set(Field.Float64 def, double value) {
         return set(def.name, value);
     }
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/TaggedFields.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/TaggedFields.java
index 4e1ab0d4d5add..129f80c90ba1d 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/TaggedFields.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/TaggedFields.java
@@ -178,4 +178,11 @@ public String typeName() {
     public String documentation() {
         return "Represents a series of tagged fields.";
     }
+
+    /**
+     * The number of tagged fields
+     */
+    public int numFields() {
+        return this.fields.size();
+    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java b/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java
index 46a59bd08210e..4af74dbf4cc03 100644
--- a/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java
+++ b/clients/src/main/java/org/apache/kafka/common/protocol/types/Type.java
@@ -1120,7 +1120,7 @@ private static String toHtml() {
             b.append("</td>");
             b.append("</tr>\n");
         }
-        b.append("</table>\n");
+        b.append("</tbody></table>\n");
         return b.toString();
     }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java b/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
index 1b9754ffabbbf..c526929b72e96 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/CompressionType.java
@@ -190,4 +190,10 @@ else if (ZSTD.name.equals(name))
         else
             throw new IllegalArgumentException("Unknown compression name: " + name);
     }
+
+    @Override
+    public String toString() {
+        return name;
+    }
+
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/record/ControlRecordUtils.java b/clients/src/main/java/org/apache/kafka/common/record/ControlRecordUtils.java
index e74f6417febc1..66a4a14d22bed 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/ControlRecordUtils.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/ControlRecordUtils.java
@@ -28,9 +28,9 @@
  */
 public class ControlRecordUtils {
 
-    public static final short LEADER_CHANGE_SCHEMA_HIGHEST_VERSION = new LeaderChangeMessage().highestSupportedVersion();
-    public static final short SNAPSHOT_HEADER_HIGHEST_VERSION = new SnapshotHeaderRecord().highestSupportedVersion();
-    public static final short SNAPSHOT_FOOTER_HIGHEST_VERSION = new SnapshotFooterRecord().highestSupportedVersion();
+    public static final short LEADER_CHANGE_SCHEMA_HIGHEST_VERSION = LeaderChangeMessage.HIGHEST_SUPPORTED_VERSION;
+    public static final short SNAPSHOT_HEADER_HIGHEST_VERSION = SnapshotHeaderRecord.HIGHEST_SUPPORTED_VERSION;
+    public static final short SNAPSHOT_FOOTER_HIGHEST_VERSION = SnapshotFooterRecord.HIGHEST_SUPPORTED_VERSION;
 
     public static LeaderChangeMessage deserializeLeaderChangeMessage(Record record) {
         ControlRecordType recordType = ControlRecordType.parse(record.key());
diff --git a/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java b/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java
index bd80981d84bda..bc8f32491c0a8 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java
@@ -107,7 +107,7 @@ public class DefaultRecordBatch extends AbstractRecordBatch implements MutableRe
     static final int PARTITION_LEADER_EPOCH_LENGTH = 4;
     static final int MAGIC_OFFSET = PARTITION_LEADER_EPOCH_OFFSET + PARTITION_LEADER_EPOCH_LENGTH;
     static final int MAGIC_LENGTH = 1;
-    static final int CRC_OFFSET = MAGIC_OFFSET + MAGIC_LENGTH;
+    public static final int CRC_OFFSET = MAGIC_OFFSET + MAGIC_LENGTH;
     static final int CRC_LENGTH = 4;
     static final int ATTRIBUTES_OFFSET = CRC_OFFSET + CRC_LENGTH;
     static final int ATTRIBUTE_LENGTH = 2;
diff --git a/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecordsSend.java b/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecordsSend.java
index 17addef74de4e..01176518457de 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecordsSend.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/LazyDownConversionRecordsSend.java
@@ -87,7 +87,7 @@ public long writeTo(TransferableChannel channel, long previouslyWritten, int rem
                 // Since we have already sent at least one batch and we have committed to the fetch size, we
                 // send an overflow batch. The consumer will read the first few records and then fetch from the
                 // offset of the batch which has the unsupported compression type. At that time, we will
-                // send back the UNSUPPORTED_COMPRESSION_TYPE erro which will allow the consumer to fail gracefully.
+                // send back the UNSUPPORTED_COMPRESSION_TYPE error which will allow the consumer to fail gracefully.
                 convertedRecords = buildOverflowBatch(remaining);
             }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecordsBuilder.java b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecordsBuilder.java
index b825a937e084b..02fb7d7f12d55 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecordsBuilder.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecordsBuilder.java
@@ -548,7 +548,6 @@ public void append(long timestamp, ByteBuffer key, ByteBuffer value) {
      * @param key The record key
      * @param value The record value
      * @param headers The record headers if there are any
-     * @return CRC of the record or null if record-level CRC is not supported for the message format
      */
     public void append(long timestamp, ByteBuffer key, ByteBuffer value, Header[] headers) {
         appendWithOffset(nextSequentialOffset(), timestamp, key, value, headers);
@@ -559,7 +558,6 @@ public void append(long timestamp, ByteBuffer key, ByteBuffer value, Header[] he
      * @param timestamp The record timestamp
      * @param key The record key
      * @param value The record value
-     * @return CRC of the record or null if record-level CRC is not supported for the message format
      */
     public void append(long timestamp, byte[] key, byte[] value) {
         append(timestamp, wrapNullable(key), wrapNullable(value), Record.EMPTY_HEADERS);
diff --git a/clients/src/main/java/org/apache/kafka/common/record/RecordBatchIterator.java b/clients/src/main/java/org/apache/kafka/common/record/RecordBatchIterator.java
index 88af039847d5e..967cff80cc7a5 100644
--- a/clients/src/main/java/org/apache/kafka/common/record/RecordBatchIterator.java
+++ b/clients/src/main/java/org/apache/kafka/common/record/RecordBatchIterator.java
@@ -17,8 +17,10 @@
 package org.apache.kafka.common.record;
 
 import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.errors.CorruptRecordException;
 import org.apache.kafka.common.utils.AbstractIterator;
 
+import java.io.EOFException;
 import java.io.IOException;
 
 class RecordBatchIterator<T extends RecordBatch> extends AbstractIterator<T> {
@@ -36,6 +38,8 @@ protected T makeNext() {
             if (batch == null)
                 return allDone();
             return batch;
+        } catch (EOFException e) {
+            throw new CorruptRecordException("Unexpected EOF while attempting to read the next batch", e);
         } catch (IOException e) {
             throw new KafkaException(e);
         }
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java
index 0c38e998fe247..0d96d842d6df5 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java
@@ -279,8 +279,8 @@ private static AbstractRequest doParseRequest(ApiKeys apiKey, short apiVersion,
                 return EndQuorumEpochRequest.parse(buffer, apiVersion);
             case DESCRIBE_QUORUM:
                 return DescribeQuorumRequest.parse(buffer, apiVersion);
-            case ALTER_ISR:
-                return AlterIsrRequest.parse(buffer, apiVersion);
+            case ALTER_PARTITION:
+                return AlterPartitionRequest.parse(buffer, apiVersion);
             case UPDATE_FEATURES:
                 return UpdateFeaturesRequest.parse(buffer, apiVersion);
             case ENVELOPE:
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AbstractResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/AbstractResponse.java
index 47f2b3c7f3099..cd99f472ebb0a 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/AbstractResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/AbstractResponse.java
@@ -223,8 +223,8 @@ public static AbstractResponse parseResponse(ApiKeys apiKey, ByteBuffer response
                 return EndQuorumEpochResponse.parse(responseBuffer, version);
             case DESCRIBE_QUORUM:
                 return DescribeQuorumResponse.parse(responseBuffer, version);
-            case ALTER_ISR:
-                return AlterIsrResponse.parse(responseBuffer, version);
+            case ALTER_PARTITION:
+                return AlterPartitionResponse.parse(responseBuffer, version);
             case UPDATE_FEATURES:
                 return UpdateFeaturesResponse.parse(responseBuffer, version);
             case ENVELOPE:
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AlterIsrRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/AlterIsrRequest.java
deleted file mode 100644
index 516c2ce76aa85..0000000000000
--- a/clients/src/main/java/org/apache/kafka/common/requests/AlterIsrRequest.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.kafka.common.requests;
-
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
-import org.apache.kafka.common.protocol.ApiKeys;
-import org.apache.kafka.common.protocol.ByteBufferAccessor;
-import org.apache.kafka.common.protocol.Errors;
-
-import java.nio.ByteBuffer;
-
-public class AlterIsrRequest extends AbstractRequest {
-
-    private final AlterIsrRequestData data;
-
-    public AlterIsrRequest(AlterIsrRequestData data, short apiVersion) {
-        super(ApiKeys.ALTER_ISR, apiVersion);
-        this.data = data;
-    }
-
-    @Override
-    public AlterIsrRequestData data() {
-        return data;
-    }
-
-    /**
-     * Get an error response for a request with specified throttle time in the response if applicable
-     */
-    @Override
-    public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
-        return new AlterIsrResponse(new AlterIsrResponseData()
-                .setThrottleTimeMs(throttleTimeMs)
-                .setErrorCode(Errors.forException(e).code()));
-    }
-
-    public static AlterIsrRequest parse(ByteBuffer buffer, short version) {
-        return new AlterIsrRequest(new AlterIsrRequestData(new ByteBufferAccessor(buffer), version), version);
-    }
-
-    public static class Builder extends AbstractRequest.Builder<AlterIsrRequest> {
-
-        private final AlterIsrRequestData data;
-
-        public Builder(AlterIsrRequestData data) {
-            super(ApiKeys.ALTER_ISR);
-            this.data = data;
-        }
-
-        @Override
-        public AlterIsrRequest build(short version) {
-            return new AlterIsrRequest(data, version);
-        }
-
-        @Override
-        public String toString() {
-            return data.toString();
-        }
-    }
-}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AlterPartitionRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/AlterPartitionRequest.java
new file mode 100644
index 0000000000000..2d246f21041fa
--- /dev/null
+++ b/clients/src/main/java/org/apache/kafka/common/requests/AlterPartitionRequest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.ByteBufferAccessor;
+import org.apache.kafka.common.protocol.Errors;
+
+import java.nio.ByteBuffer;
+
+public class AlterPartitionRequest extends AbstractRequest {
+
+    private final AlterPartitionRequestData data;
+
+    public AlterPartitionRequest(AlterPartitionRequestData data, short apiVersion) {
+        super(ApiKeys.ALTER_PARTITION, apiVersion);
+        this.data = data;
+    }
+
+    @Override
+    public AlterPartitionRequestData data() {
+        return data;
+    }
+
+    /**
+     * Get an error response for a request with specified throttle time in the response if applicable
+     */
+    @Override
+    public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
+        return new AlterPartitionResponse(new AlterPartitionResponseData()
+            .setThrottleTimeMs(throttleTimeMs)
+            .setErrorCode(Errors.forException(e).code()));
+    }
+
+    public static AlterPartitionRequest parse(ByteBuffer buffer, short version) {
+        return new AlterPartitionRequest(new AlterPartitionRequestData(new ByteBufferAccessor(buffer), version), version);
+    }
+
+    public static class Builder extends AbstractRequest.Builder<AlterPartitionRequest> {
+
+        private final AlterPartitionRequestData data;
+
+        /**
+         * Constructs a builder for AlterPartitionRequest.
+         *
+         * @param data The data to be sent. Note that because the version of the
+         *             request is not known at this time, it is expected that all
+         *             topics have a topic id and a topic name set.
+         * @param canUseTopicIds True if version 2 and above can be used.
+         */
+        public Builder(AlterPartitionRequestData data, boolean canUseTopicIds) {
+            super(
+                ApiKeys.ALTER_PARTITION,
+                ApiKeys.ALTER_PARTITION.oldestVersion(),
+                // Version 1 is the maximum version that can be used without topic ids.
+                canUseTopicIds ? ApiKeys.ALTER_PARTITION.latestVersion() : 1
+            );
+            this.data = data;
+        }
+
+        @Override
+        public AlterPartitionRequest build(short version) {
+            return new AlterPartitionRequest(data, version);
+        }
+
+        @Override
+        public String toString() {
+            return data.toString();
+        }
+    }
+}
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AlterIsrResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/AlterPartitionResponse.java
similarity index 75%
rename from clients/src/main/java/org/apache/kafka/common/requests/AlterIsrResponse.java
rename to clients/src/main/java/org/apache/kafka/common/requests/AlterPartitionResponse.java
index c3106ed94cbde..d2ace4112f4c1 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/AlterIsrResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/AlterPartitionResponse.java
@@ -17,7 +17,7 @@
 
 package org.apache.kafka.common.requests;
 
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.protocol.ByteBufferAccessor;
 import org.apache.kafka.common.protocol.Errors;
@@ -26,17 +26,17 @@
 import java.util.HashMap;
 import java.util.Map;
 
-public class AlterIsrResponse extends AbstractResponse {
+public class AlterPartitionResponse extends AbstractResponse {
 
-    private final AlterIsrResponseData data;
+    private final AlterPartitionResponseData data;
 
-    public AlterIsrResponse(AlterIsrResponseData data) {
-        super(ApiKeys.ALTER_ISR);
+    public AlterPartitionResponse(AlterPartitionResponseData data) {
+        super(ApiKeys.ALTER_PARTITION);
         this.data = data;
     }
 
     @Override
-    public AlterIsrResponseData data() {
+    public AlterPartitionResponseData data() {
         return data;
     }
 
@@ -55,7 +55,7 @@ public int throttleTimeMs() {
         return data.throttleTimeMs();
     }
 
-    public static AlterIsrResponse parse(ByteBuffer buffer, short version) {
-        return new AlterIsrResponse(new AlterIsrResponseData(new ByteBufferAccessor(buffer), version));
+    public static AlterPartitionResponse parse(ByteBuffer buffer, short version) {
+        return new AlterPartitionResponse(new AlterPartitionResponseData(new ByteBufferAccessor(buffer), version));
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/ApiVersionsResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/ApiVersionsResponse.java
index 1190989576380..7c98eb2679b5a 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/ApiVersionsResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/ApiVersionsResponse.java
@@ -16,10 +16,11 @@
  */
 package org.apache.kafka.common.requests;
 
+import org.apache.kafka.clients.NodeApiVersions;
 import org.apache.kafka.common.feature.Features;
-import org.apache.kafka.common.feature.FinalizedVersionRange;
 import org.apache.kafka.common.feature.SupportedVersionRange;
 import org.apache.kafka.common.message.ApiMessageType;
+import org.apache.kafka.common.message.ApiMessageType.ListenerType;
 import org.apache.kafka.common.message.ApiVersionsResponseData;
 import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersion;
 import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersionCollection;
@@ -33,6 +34,7 @@
 import org.apache.kafka.common.record.RecordVersion;
 
 import java.nio.ByteBuffer;
+import java.util.Collections;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
@@ -116,8 +118,33 @@ public static ApiVersionsResponse createApiVersionsResponse(
             throttleTimeMs,
             apiVersions,
             Features.emptySupportedFeatures(),
-            Features.emptyFinalizedFeatures(),
-            UNKNOWN_FINALIZED_FEATURES_EPOCH
+            Collections.emptyMap(),
+            UNKNOWN_FINALIZED_FEATURES_EPOCH);
+    }
+
+    public static ApiVersionsResponse createApiVersionsResponse(
+        int throttleTimeMs,
+        RecordVersion minRecordVersion,
+        Features<SupportedVersionRange> latestSupportedFeatures,
+        Map<String, Short> finalizedFeatures,
+        long finalizedFeaturesEpoch,
+        NodeApiVersions controllerApiVersions,
+        ListenerType listenerType
+    ) {
+        ApiVersionCollection apiKeys;
+        if (controllerApiVersions != null) {
+            apiKeys = intersectForwardableApis(
+                listenerType, minRecordVersion, controllerApiVersions.allSupportedApiVersions());
+        } else {
+            apiKeys = filterApis(minRecordVersion, listenerType);
+        }
+
+        return createApiVersionsResponse(
+            throttleTimeMs,
+            apiKeys,
+            latestSupportedFeatures,
+            finalizedFeatures,
+            finalizedFeaturesEpoch
         );
     }
 
@@ -125,7 +152,7 @@ public static ApiVersionsResponse createApiVersionsResponse(
         int throttleTimeMs,
         ApiVersionCollection apiVersions,
         Features<SupportedVersionRange> latestSupportedFeatures,
-        Features<FinalizedVersionRange> finalizedFeatures,
+        Map<String, Short> finalizedFeatures,
         long finalizedFeaturesEpoch
     ) {
         return new ApiVersionsResponse(
@@ -205,7 +232,7 @@ private static ApiVersionsResponseData createApiVersionsResponseData(
         final Errors error,
         final ApiVersionCollection apiKeys,
         final Features<SupportedVersionRange> latestSupportedFeatures,
-        final Features<FinalizedVersionRange> finalizedFeatures,
+        final Map<String, Short> finalizedFeatures,
         final long finalizedFeaturesEpoch
     ) {
         final ApiVersionsResponseData data = new ApiVersionsResponseData();
@@ -235,14 +262,14 @@ private static SupportedFeatureKeyCollection createSupportedFeatureKeys(
     }
 
     private static FinalizedFeatureKeyCollection createFinalizedFeatureKeys(
-        Features<FinalizedVersionRange> finalizedFeatures) {
+        Map<String, Short> finalizedFeatures) {
         FinalizedFeatureKeyCollection converted = new FinalizedFeatureKeyCollection();
-        for (Map.Entry<String, FinalizedVersionRange> feature : finalizedFeatures.features().entrySet()) {
+        for (Map.Entry<String, Short> feature : finalizedFeatures.entrySet()) {
             final FinalizedFeatureKey key = new FinalizedFeatureKey();
-            final FinalizedVersionRange versionLevelRange = feature.getValue();
+            final short versionLevel = feature.getValue();
             key.setName(feature.getKey());
-            key.setMinVersionLevel(versionLevelRange.min());
-            key.setMaxVersionLevel(versionLevelRange.max());
+            key.setMinVersionLevel(versionLevel);
+            key.setMaxVersionLevel(versionLevel);
             converted.add(key);
         }
 
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenRequest.java
index 1fee1b71eb3a4..b48f84f1fa6dd 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenRequest.java
@@ -45,7 +45,8 @@ public CreateDelegationTokenRequestData data() {
 
     @Override
     public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
-        return CreateDelegationTokenResponse.prepareResponse(throttleTimeMs, Errors.forException(e), KafkaPrincipal.ANONYMOUS);
+        return CreateDelegationTokenResponse.prepareResponse(version(), throttleTimeMs, Errors.forException(e),
+            KafkaPrincipal.ANONYMOUS, KafkaPrincipal.ANONYMOUS);
     }
 
     public static class Builder extends AbstractRequest.Builder<CreateDelegationTokenRequest> {
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenResponse.java
index b679a30c8dd5c..22c2e1259019b 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/CreateDelegationTokenResponse.java
@@ -39,9 +39,11 @@ public static CreateDelegationTokenResponse parse(ByteBuffer buffer, short versi
             new CreateDelegationTokenResponseData(new ByteBufferAccessor(buffer), version));
     }
 
-    public static CreateDelegationTokenResponse prepareResponse(int throttleTimeMs,
+    public static CreateDelegationTokenResponse prepareResponse(int version,
+            int throttleTimeMs,
             Errors error,
             KafkaPrincipal owner,
+            KafkaPrincipal tokenRequester,
             long issueTimestamp,
             long expiryTimestamp,
             long maxTimestamp,
@@ -57,11 +59,16 @@ public static CreateDelegationTokenResponse prepareResponse(int throttleTimeMs,
                 .setMaxTimestampMs(maxTimestamp)
                 .setTokenId(tokenId)
                 .setHmac(hmac.array());
+        if (version > 2) {
+            data.setTokenRequesterPrincipalType(tokenRequester.getPrincipalType())
+                .setTokenRequesterPrincipalName(tokenRequester.getName());
+        }
         return new CreateDelegationTokenResponse(data);
     }
 
-    public static CreateDelegationTokenResponse prepareResponse(int throttleTimeMs, Errors error, KafkaPrincipal owner) {
-        return prepareResponse(throttleTimeMs, error, owner, -1, -1, -1, "", ByteBuffer.wrap(new byte[] {}));
+    public static CreateDelegationTokenResponse prepareResponse(int version, int throttleTimeMs, Errors error,
+                                                                KafkaPrincipal owner, KafkaPrincipal requester) {
+        return prepareResponse(version, throttleTimeMs, error, owner, requester, -1, -1, -1, "", ByteBuffer.wrap(new byte[] {}));
     }
 
     @Override
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenRequest.java
index 9bf59e844a6c3..bd3b5fd57c002 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenRequest.java
@@ -71,7 +71,7 @@ public boolean ownersListEmpty() {
 
     @Override
     public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
-        return new DescribeDelegationTokenResponse(throttleTimeMs, Errors.forException(e));
+        return new DescribeDelegationTokenResponse(version(), throttleTimeMs, Errors.forException(e));
     }
 
     public static DescribeDelegationTokenRequest parse(ByteBuffer buffer, short version) {
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenResponse.java
index 4a2162f53aaef..4fd1d99652661 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/DescribeDelegationTokenResponse.java
@@ -36,22 +36,29 @@ public class DescribeDelegationTokenResponse extends AbstractResponse {
 
     private final DescribeDelegationTokenResponseData data;
 
-    public DescribeDelegationTokenResponse(int throttleTimeMs, Errors error, List<DelegationToken> tokens) {
+    public DescribeDelegationTokenResponse(int version, int throttleTimeMs, Errors error, List<DelegationToken> tokens) {
         super(ApiKeys.DESCRIBE_DELEGATION_TOKEN);
         List<DescribedDelegationToken> describedDelegationTokenList = tokens
             .stream()
-            .map(dt -> new DescribedDelegationToken()
-                .setTokenId(dt.tokenInfo().tokenId())
-                .setPrincipalType(dt.tokenInfo().owner().getPrincipalType())
-                .setPrincipalName(dt.tokenInfo().owner().getName())
-                .setIssueTimestamp(dt.tokenInfo().issueTimestamp())
-                .setMaxTimestamp(dt.tokenInfo().maxTimestamp())
-                .setExpiryTimestamp(dt.tokenInfo().expiryTimestamp())
-                .setHmac(dt.hmac())
-                .setRenewers(dt.tokenInfo().renewers()
-                    .stream()
-                    .map(r -> new DescribedDelegationTokenRenewer().setPrincipalName(r.getName()).setPrincipalType(r.getPrincipalType()))
-                    .collect(Collectors.toList())))
+            .map(dt -> {
+                DescribedDelegationToken ddt = new DescribedDelegationToken()
+                    .setTokenId(dt.tokenInfo().tokenId())
+                    .setPrincipalType(dt.tokenInfo().owner().getPrincipalType())
+                    .setPrincipalName(dt.tokenInfo().owner().getName())
+                    .setIssueTimestamp(dt.tokenInfo().issueTimestamp())
+                    .setMaxTimestamp(dt.tokenInfo().maxTimestamp())
+                    .setExpiryTimestamp(dt.tokenInfo().expiryTimestamp())
+                    .setHmac(dt.hmac())
+                    .setRenewers(dt.tokenInfo().renewers()
+                        .stream()
+                        .map(r -> new DescribedDelegationTokenRenewer().setPrincipalName(r.getName()).setPrincipalType(r.getPrincipalType()))
+                        .collect(Collectors.toList()));
+                if (version > 2) {
+                    ddt.setTokenRequesterPrincipalType(dt.tokenInfo().tokenRequester().getPrincipalType())
+                        .setTokenRequesterPrincipalName(dt.tokenInfo().tokenRequester().getName());
+                }
+                return ddt;
+            })
             .collect(Collectors.toList());
 
         this.data = new DescribeDelegationTokenResponseData()
@@ -60,8 +67,8 @@ public DescribeDelegationTokenResponse(int throttleTimeMs, Errors error, List<De
             .setTokens(describedDelegationTokenList);
     }
 
-    public DescribeDelegationTokenResponse(int throttleTimeMs, Errors error) {
-        this(throttleTimeMs, error, new ArrayList<>());
+    public DescribeDelegationTokenResponse(int version, int throttleTimeMs, Errors error) {
+        this(version, throttleTimeMs, error, new ArrayList<>());
     }
 
     public DescribeDelegationTokenResponse(DescribeDelegationTokenResponseData data) {
@@ -99,6 +106,7 @@ public List<DelegationToken> tokens() {
             .map(ddt -> new DelegationToken(new TokenInformation(
                 ddt.tokenId(),
                 new KafkaPrincipal(ddt.principalType(), ddt.principalName()),
+                new KafkaPrincipal(ddt.tokenRequesterPrincipalType(), ddt.tokenRequesterPrincipalName()),
                 ddt.renewers()
                     .stream()
                     .map(ddtr -> new KafkaPrincipal(ddtr.principalType(), ddtr.principalName()))
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/DescribeLogDirsResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/DescribeLogDirsResponse.java
index 537d188ec120e..fe8aebbc4f6b8 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/DescribeLogDirsResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/DescribeLogDirsResponse.java
@@ -31,6 +31,7 @@
 public class DescribeLogDirsResponse extends AbstractResponse {
 
     public static final long INVALID_OFFSET_LAG = -1L;
+    public static final long UNKNOWN_VOLUME_BYTES = -1L;
 
     private final DescribeLogDirsResponseData data;
 
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java
index 48ba022610e43..09242bfc4bf0c 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/FetchRequest.java
@@ -351,8 +351,10 @@ public Map<TopicIdPartition, PartitionData> fetchData(Map<Uuid, String> topicNam
         if (fetchData == null) {
             synchronized (this) {
                 if (fetchData == null) {
-                    fetchData = new LinkedHashMap<>();
-                    short version = version();
+                    // Assigning the lazy-initialized `fetchData` in the last step
+                    // to avoid other threads accessing a half-initialized object.
+                    final LinkedHashMap<TopicIdPartition, PartitionData> fetchDataTmp = new LinkedHashMap<>();
+                    final short version = version();
                     data.topics().forEach(fetchTopic -> {
                         String name;
                         if (version < 13) {
@@ -362,7 +364,7 @@ public Map<TopicIdPartition, PartitionData> fetchData(Map<Uuid, String> topicNam
                         }
                         fetchTopic.partitions().forEach(fetchPartition ->
                                 // Topic name may be null here if the topic name was unable to be resolved using the topicNames map.
-                                fetchData.put(new TopicIdPartition(fetchTopic.topicId(), new TopicPartition(name, fetchPartition.partition())),
+                                fetchDataTmp.put(new TopicIdPartition(fetchTopic.topicId(), new TopicPartition(name, fetchPartition.partition())),
                                         new PartitionData(
                                                 fetchTopic.topicId(),
                                                 fetchPartition.fetchOffset(),
@@ -374,6 +376,7 @@ public Map<TopicIdPartition, PartitionData> fetchData(Map<Uuid, String> topicNam
                                 )
                         );
                     });
+                    fetchData = fetchDataTmp;
                 }
             }
         }
@@ -386,7 +389,9 @@ public List<TopicIdPartition> forgottenTopics(Map<Uuid, String> topicNames) {
         if (toForget == null) {
             synchronized (this) {
                 if (toForget == null) {
-                    toForget = new ArrayList<>();
+                    // Assigning the lazy-initialized `toForget` in the last step
+                    // to avoid other threads accessing a half-initialized object.
+                    final List<TopicIdPartition> toForgetTmp = new ArrayList<>();
                     data.forgottenTopicsData().forEach(forgottenTopic -> {
                         String name;
                         if (version() < 13) {
@@ -395,8 +400,9 @@ public List<TopicIdPartition> forgottenTopics(Map<Uuid, String> topicNames) {
                             name = topicNames.get(forgottenTopic.topicId());
                         }
                         // Topic name may be null here if the topic name was unable to be resolved using the topicNames map.
-                        forgottenTopic.partitions().forEach(partitionId -> toForget.add(new TopicIdPartition(forgottenTopic.topicId(), new TopicPartition(name, partitionId))));
+                        forgottenTopic.partitions().forEach(partitionId -> toForgetTmp.add(new TopicIdPartition(forgottenTopic.topicId(), new TopicPartition(name, partitionId))));
                     });
+                    toForget = toForgetTmp;
                 }
             }
         }
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java
index 2e0a02ec16855..a4af4ca2a2370 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/FetchResponse.java
@@ -100,7 +100,10 @@ public LinkedHashMap<TopicPartition, FetchResponseData.PartitionData> responseDa
         if (responseData == null) {
             synchronized (this) {
                 if (responseData == null) {
-                    responseData = new LinkedHashMap<>();
+                    // Assigning the lazy-initialized `responseData` in the last step
+                    // to avoid other threads accessing a half-initialized object.
+                    final LinkedHashMap<TopicPartition, FetchResponseData.PartitionData> responseDataTmp =
+                            new LinkedHashMap<>();
                     data.responses().forEach(topicResponse -> {
                         String name;
                         if (version < 13) {
@@ -110,9 +113,10 @@ public LinkedHashMap<TopicPartition, FetchResponseData.PartitionData> responseDa
                         }
                         if (name != null) {
                             topicResponse.partitions().forEach(partition ->
-                                responseData.put(new TopicPartition(name, partition.partitionIndex()), partition));
+                                responseDataTmp.put(new TopicPartition(name, partition.partitionIndex()), partition));
                         }
                     });
+                    responseData = responseDataTmp;
                 }
             }
         }
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java
index 220a59d183428..774506357bbb9 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/JoinGroupRequest.java
@@ -70,6 +70,20 @@ public static void validateGroupInstanceId(String id) {
         });
     }
 
+    /**
+     * Ensures that the provided {@code reason} remains within a range of 255 chars.
+     * @param reason This is the reason that is sent to the broker over the wire
+     *               as a part of {@code JoinGroupRequest} or {@code LeaveGroupRequest} messages.
+     * @return a provided reason as is or truncated reason if it exceeds the 255 chars threshold.
+     */
+    public static String maybeTruncateReason(final String reason) {
+        if (reason.length() > 255) {
+            return reason.substring(0, 255);
+        } else {
+            return reason;
+        }
+    }
+
     public JoinGroupRequest(JoinGroupRequestData data, short version) {
         super(ApiKeys.JOIN_GROUP, version);
         this.data = data;
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java
index aab5fc6840262..48609b1666c63 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/MetadataRequest.java
@@ -112,7 +112,7 @@ public MetadataRequest build(short version) {
                     if (topic.name() == null && version < 12)
                         throw new UnsupportedVersionException("MetadataRequest version " + version +
                                 " does not support null topic names.");
-                    if (topic.topicId() != Uuid.ZERO_UUID && version < 12)
+                    if (!Uuid.ZERO_UUID.equals(topic.topicId()) && version < 12)
                         throw new UnsupportedVersionException("MetadataRequest version " + version +
                             " does not support non-zero topic IDs.");
                 });
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java
index d539fa871982c..3696b047abad1 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/MetadataResponse.java
@@ -151,7 +151,7 @@ public Cluster buildCluster() {
             if (metadata.error == Errors.NONE) {
                 if (metadata.isInternal)
                     internalTopics.add(metadata.topic);
-                if (metadata.topicId() != null && metadata.topicId() != Uuid.ZERO_UUID) {
+                if (metadata.topicId() != null && !Uuid.ZERO_UUID.equals(metadata.topicId())) {
                     topicIds.put(metadata.topic, metadata.topicId());
                 }
                 for (PartitionMetadata partitionMetadata : metadata.partitionMetadata) {
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java b/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java
index 213182ec8c4a5..4e25984668da5 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/OffsetFetchResponse.java
@@ -173,8 +173,8 @@ public OffsetFetchResponse(int throttleTimeMs, Errors error, Map<TopicPartition,
      * @param responseData Fetched offset information grouped by topic-partition and by group
      */
     public OffsetFetchResponse(int throttleTimeMs,
-                               Map<String, Errors> errors, Map<String,
-                               Map<TopicPartition, PartitionData>> responseData) {
+                               Map<String, Errors> errors,
+                               Map<String, Map<TopicPartition, PartitionData>> responseData) {
         super(ApiKeys.OFFSET_FETCH);
         List<OffsetFetchResponseGroup> groupList = new ArrayList<>();
         for (Entry<String, Map<TopicPartition, PartitionData>> entry : responseData.entrySet()) {
@@ -250,7 +250,11 @@ public boolean hasError() {
     }
 
     public boolean groupHasError(String groupId) {
-        return groupLevelErrors.get(groupId) != Errors.NONE;
+        Errors error = groupLevelErrors.get(groupId);
+        if (error == null) {
+            return this.error != null && this.error != Errors.NONE;
+        }
+        return error != Errors.NONE;
     }
 
     public Errors error() {
diff --git a/clients/src/main/java/org/apache/kafka/common/requests/UpdateFeaturesRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/UpdateFeaturesRequest.java
index 7a6bf66cd9ded..27cddfadcaa7c 100644
--- a/clients/src/main/java/org/apache/kafka/common/requests/UpdateFeaturesRequest.java
+++ b/clients/src/main/java/org/apache/kafka/common/requests/UpdateFeaturesRequest.java
@@ -16,15 +16,46 @@
  */
 package org.apache.kafka.common.requests;
 
+import org.apache.kafka.clients.admin.FeatureUpdate;
 import org.apache.kafka.common.message.UpdateFeaturesRequestData;
 import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.protocol.ByteBufferAccessor;
 
 import java.nio.ByteBuffer;
+import java.util.Collection;
 import java.util.Collections;
+import java.util.stream.Collectors;
 
 public class UpdateFeaturesRequest extends AbstractRequest {
 
+    public static class FeatureUpdateItem {
+        private final String featureName;
+        private final short featureLevel;
+        private final FeatureUpdate.UpgradeType upgradeType;
+
+        public FeatureUpdateItem(String featureName, short featureLevel, FeatureUpdate.UpgradeType upgradeType) {
+            this.featureName = featureName;
+            this.featureLevel = featureLevel;
+            this.upgradeType = upgradeType;
+        }
+
+        public String feature() {
+            return featureName;
+        }
+
+        public short versionLevel() {
+            return featureLevel;
+        }
+
+        public FeatureUpdate.UpgradeType upgradeType() {
+            return upgradeType;
+        }
+
+        public boolean isDeleteRequest() {
+            return featureLevel < 1 && !upgradeType.equals(FeatureUpdate.UpgradeType.UPGRADE);
+        }
+    }
+
     public static class Builder extends AbstractRequest.Builder<UpdateFeaturesRequest> {
 
         private final UpdateFeaturesRequestData data;
@@ -52,6 +83,25 @@ public UpdateFeaturesRequest(UpdateFeaturesRequestData data, short version) {
         this.data = data;
     }
 
+    public FeatureUpdateItem getFeature(String name) {
+        UpdateFeaturesRequestData.FeatureUpdateKey update = data.featureUpdates().find(name);
+        if (super.version() == 0) {
+            if (update.allowDowngrade()) {
+                return new FeatureUpdateItem(update.feature(), update.maxVersionLevel(), FeatureUpdate.UpgradeType.SAFE_DOWNGRADE);
+            } else {
+                return new FeatureUpdateItem(update.feature(), update.maxVersionLevel(), FeatureUpdate.UpgradeType.UPGRADE);
+            }
+        } else {
+            return new FeatureUpdateItem(update.feature(), update.maxVersionLevel(), FeatureUpdate.UpgradeType.fromCode(update.upgradeType()));
+        }
+    }
+
+    public Collection<FeatureUpdateItem> featureUpdates() {
+        return data.featureUpdates().stream()
+            .map(update -> getFeature(update.feature()))
+            .collect(Collectors.toList());
+    }
+
     @Override
     public UpdateFeaturesResponse getErrorResponse(int throttleTimeMs, Throwable e) {
         return UpdateFeaturesResponse.createWithErrors(
@@ -69,8 +119,4 @@ public UpdateFeaturesRequestData data() {
     public static UpdateFeaturesRequest parse(ByteBuffer buffer, short version) {
         return new UpdateFeaturesRequest(new UpdateFeaturesRequestData(new ByteBufferAccessor(buffer), version), version);
     }
-
-    public static boolean isDeleteRequest(UpdateFeaturesRequestData.FeatureUpdateKey update) {
-        return update.maxVersionLevel() < 1 && update.allowDowngrade();
-    }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/resource/ResourcePattern.java b/clients/src/main/java/org/apache/kafka/common/resource/ResourcePattern.java
index 2b7504f70a598..b3dfc4937f03e 100644
--- a/clients/src/main/java/org/apache/kafka/common/resource/ResourcePattern.java
+++ b/clients/src/main/java/org/apache/kafka/common/resource/ResourcePattern.java
@@ -89,7 +89,7 @@ public ResourcePatternFilter toFilter() {
 
     @Override
     public String toString() {
-        return "ResourcePattern(resourceType=" + resourceType + ", name=" + ((name == null) ? "<any>" : name) + ", patternType=" + patternType + ")";
+        return "ResourcePattern(resourceType=" + resourceType + ", name=" + name + ", patternType=" + patternType + ")";
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/common/resource/ResourceType.java b/clients/src/main/java/org/apache/kafka/common/resource/ResourceType.java
index 2ce653fbeb2ec..225859812d449 100644
--- a/clients/src/main/java/org/apache/kafka/common/resource/ResourceType.java
+++ b/clients/src/main/java/org/apache/kafka/common/resource/ResourceType.java
@@ -63,7 +63,12 @@ public enum ResourceType {
     /**
      * A token ID.
      */
-    DELEGATION_TOKEN((byte) 6);
+    DELEGATION_TOKEN((byte) 6),
+
+    /**
+     * A user principal
+     */
+    USER((byte) 7);
 
     private final static HashMap<Byte, ResourceType> CODE_TO_VALUE = new HashMap<>();
 
diff --git a/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensions.java b/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensions.java
index c129f1ec400f7..ca4c4df6079ba 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensions.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensions.java
@@ -19,15 +19,34 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Set;
+import java.util.StringJoiner;
+import javax.security.auth.Subject;
 
 /**
- * A simple immutable value object class holding customizable SASL extensions
+ * A simple immutable value object class holding customizable SASL extensions.
+ *
+ * <p/>
+ *
+ * <b>Note on object identity and equality</b>: <code>SaslExtensions</code> <em>intentionally</em>
+ * overrides the standard {@link #equals(Object)} and {@link #hashCode()} methods calling their
+ * respective {@link Object#equals(Object)} and {@link Object#hashCode()} implementations. In so
+ * doing, it provides equality <em>only</em> via reference identity and will not base equality on
+ * the underlying values of its {@link #extensionsMap extentions map}.
+ *
+ * <p/>
+ *
+ * The reason for this approach to equality is based off of the manner in which
+ * credentials are stored in a {@link Subject}. <code>SaslExtensions</code> are added to and
+ * removed from a {@link Subject} via its {@link Subject#getPublicCredentials() public credentials}.
+ * The public credentials are stored in a {@link Set} in the {@link Subject}, so object equality
+ * therefore becomes a concern. With shallow, reference-based equality, distinct
+ * <code>SaslExtensions</code> instances with the same map values can be considered unique. This is
+ * critical to operations like token refresh.
+ *
+ * See <a href="https://issues.apache.org/jira/browse/KAFKA-14062">KAFKA-14062</a> for more detail.
  */
 public class SaslExtensions {
-    /**
-     * An "empty" instance indicating no SASL extensions
-     */
-    public static final SaslExtensions NO_SASL_EXTENSIONS = new SaslExtensions(Collections.emptyMap());
     private final Map<String, String> extensionsMap;
 
     public SaslExtensions(Map<String, String> extensionsMap) {
@@ -41,21 +60,59 @@ public Map<String, String> map() {
         return extensionsMap;
     }
 
+    /**
+     * Creates an "empty" instance indicating no SASL extensions. <em>Do not cache the result of
+     * this method call</em> for use by multiple {@link Subject}s as the references need to be
+     * unique.
+     *
+     * <p/>
+     *
+     * See the class-level documentation for details.
+     * @return Unique, but empty, <code>SaslExtensions</code> instance
+     */
+    @SuppressWarnings("unchecked")
+    public static SaslExtensions empty() {
+        // It's ok to re-use the EMPTY_MAP instance as the object equality is on the outer
+        // SaslExtensions reference.
+        return new SaslExtensions(Collections.EMPTY_MAP);
+    }
+
+    /**
+     * Implements equals using the reference comparison implementation from
+     * {@link Object#equals(Object)}.
+     *
+     * <p/>
+     *
+     * See the class-level documentation for details.
+     *
+     * @param o Other object to compare
+     * @return True if <code>o == this</code>
+     */
     @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-        return extensionsMap.equals(((SaslExtensions) o).extensionsMap);
+    public final boolean equals(Object o) {
+        return super.equals(o);
     }
 
+    /**
+     * Implements <code>hashCode</code> using the native implementation from
+     * {@link Object#hashCode()}.
+     *
+     * <p/>
+     *
+     * See the class-level documentation for details.
+     *
+     * @return Hash code of instance
+     */
     @Override
-    public String toString() {
-        return extensionsMap.toString();
+    public final int hashCode() {
+        return super.hashCode();
     }
 
     @Override
-    public int hashCode() {
-        return extensionsMap.hashCode();
+    public String toString() {
+        return new StringJoiner(", ", SaslExtensions.class.getSimpleName() + "[", "]")
+            .add("extensionsMap=" + extensionsMap)
+            .toString();
     }
 
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensionsCallback.java b/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensionsCallback.java
index c5bd449e0cc08..f2010afda67e4 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensionsCallback.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/auth/SaslExtensionsCallback.java
@@ -26,13 +26,13 @@
  * in the SASL exchange.
  */
 public class SaslExtensionsCallback implements Callback {
-    private SaslExtensions extensions = SaslExtensions.NO_SASL_EXTENSIONS;
+    private SaslExtensions extensions = SaslExtensions.empty();
 
     /**
      * Returns always non-null {@link SaslExtensions} consisting of the extension
      * names and values that are sent by the client to the server in the initial
      * client SASL authentication message. The default value is
-     * {@link SaslExtensions#NO_SASL_EXTENSIONS} so that if this callback is
+     * {@link SaslExtensions#empty()} so that if this callback is
      * unhandled the client will see a non-null value.
      */
     public SaslExtensions extensions() {
diff --git a/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java b/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java
index 6e35ee7a90ddb..019723b6b408d 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticator.java
@@ -673,30 +673,26 @@ private long calcCompletionTimesAndReturnSessionLifetimeMs() {
             Long credentialExpirationMs = (Long) saslServer
                     .getNegotiatedProperty(SaslInternalConfigs.CREDENTIAL_LIFETIME_MS_SASL_NEGOTIATED_PROPERTY_KEY);
             Long connectionsMaxReauthMs = connectionsMaxReauthMsByMechanism.get(saslMechanism);
-            if (credentialExpirationMs != null || connectionsMaxReauthMs != null) {
+            boolean maxReauthSet = connectionsMaxReauthMs != null && connectionsMaxReauthMs > 0;
+
+            if (credentialExpirationMs != null || maxReauthSet) {
                 if (credentialExpirationMs == null)
                     retvalSessionLifetimeMs = zeroIfNegative(connectionsMaxReauthMs);
-                else if (connectionsMaxReauthMs == null)
+                else if (!maxReauthSet)
                     retvalSessionLifetimeMs = zeroIfNegative(credentialExpirationMs - authenticationEndMs);
                 else
-                    retvalSessionLifetimeMs = zeroIfNegative(
-                            Math.min(credentialExpirationMs - authenticationEndMs, connectionsMaxReauthMs));
-                if (retvalSessionLifetimeMs > 0L)
-                    sessionExpirationTimeNanos = authenticationEndNanos + 1000 * 1000 * retvalSessionLifetimeMs;
+                    retvalSessionLifetimeMs = zeroIfNegative(Math.min(credentialExpirationMs - authenticationEndMs, connectionsMaxReauthMs));
+
+                sessionExpirationTimeNanos = authenticationEndNanos + 1000 * 1000 * retvalSessionLifetimeMs;
             }
+
             if (credentialExpirationMs != null) {
-                if (sessionExpirationTimeNanos != null)
-                    LOG.debug(
-                            "Authentication complete; session max lifetime from broker config={} ms, credential expiration={} ({} ms); session expiration = {} ({} ms), sending {} ms to client",
-                            connectionsMaxReauthMs, new Date(credentialExpirationMs),
-                            credentialExpirationMs - authenticationEndMs,
-                            new Date(authenticationEndMs + retvalSessionLifetimeMs), retvalSessionLifetimeMs,
-                            retvalSessionLifetimeMs);
-                else
-                    LOG.debug(
-                            "Authentication complete; session max lifetime from broker config={} ms, credential expiration={} ({} ms); no session expiration, sending 0 ms to client",
-                            connectionsMaxReauthMs, new Date(credentialExpirationMs),
-                            credentialExpirationMs - authenticationEndMs);
+                LOG.debug(
+                        "Authentication complete; session max lifetime from broker config={} ms, credential expiration={} ({} ms); session expiration = {} ({} ms), sending {} ms to client",
+                        connectionsMaxReauthMs, new Date(credentialExpirationMs),
+                        credentialExpirationMs - authenticationEndMs,
+                        new Date(authenticationEndMs + retvalSessionLifetimeMs), retvalSessionLifetimeMs,
+                        retvalSessionLifetimeMs);
             } else {
                 if (sessionExpirationTimeNanos != null)
                     LOG.debug(
diff --git a/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/internals/OAuthBearerClientInitialResponse.java b/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/internals/OAuthBearerClientInitialResponse.java
index a356f0da3ddb9..52623ff9fd4f2 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/internals/OAuthBearerClientInitialResponse.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/internals/OAuthBearerClientInitialResponse.java
@@ -108,7 +108,7 @@ public OAuthBearerClientInitialResponse(String tokenValue, String authorizationI
         this.tokenValue = Objects.requireNonNull(tokenValue, "token value must not be null");
         this.authorizationId = authorizationId == null ? "" : authorizationId;
         validateExtensions(extensions);
-        this.saslExtensions = extensions != null ? extensions : SaslExtensions.NO_SASL_EXTENSIONS;
+        this.saslExtensions = extensions != null ? extensions : SaslExtensions.empty();
     }
 
     /**
diff --git a/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/Retry.java b/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/Retry.java
index ffa56722f6a51..d0379ee48594f 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/Retry.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/Retry.java
@@ -49,13 +49,13 @@ public Retry(Time time, long retryBackoffMs, long retryBackoffMaxMs) {
         this.retryBackoffMaxMs = retryBackoffMaxMs;
 
         if (this.retryBackoffMs < 0)
-            throw new IllegalArgumentException(String.format("retryBackoffMs value (%s) must be non-negative", retryBackoffMs));
+            throw new IllegalArgumentException(String.format("retryBackoffMs value (%d) must be non-negative", retryBackoffMs));
 
         if (this.retryBackoffMaxMs < 0)
-            throw new IllegalArgumentException(String.format("retryBackoffMaxMs value (%s) must be non-negative", retryBackoffMaxMs));
+            throw new IllegalArgumentException(String.format("retryBackoffMaxMs value (%d) must be non-negative", retryBackoffMaxMs));
 
         if (this.retryBackoffMaxMs < this.retryBackoffMs)
-            throw new IllegalArgumentException(String.format("retryBackoffMaxMs value (%s) is less than retryBackoffMs value (%s)", retryBackoffMaxMs, retryBackoffMs));
+            throw new IllegalArgumentException(String.format("retryBackoffMaxMs value (%d) is less than retryBackoffMs value (%d)", retryBackoffMaxMs, retryBackoffMs));
     }
 
     public R execute(Retryable<R> retryable) throws ExecutionException {
@@ -88,7 +88,7 @@ public R execute(Retryable<R> retryable) throws ExecutionException {
                 if (waitMs <= 0)
                     break;
 
-                String message = String.format("Attempt %s to make call resulted in an error; sleeping %s ms before retrying",
+                String message = String.format("Attempt %d to make call resulted in an error; sleeping %d ms before retrying",
                     currAttempt, waitMs);
                 log.warn(message, e);
 
diff --git a/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidator.java b/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidator.java
index 7668438614e3b..71d549153bbf9 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidator.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidator.java
@@ -131,7 +131,6 @@ public ValidatorAccessTokenValidator(Integer clockSkew,
             .setJwsAlgorithmConstraints(DISALLOW_NONE)
             .setRequireExpirationTime()
             .setRequireIssuedAt()
-            .setRequireSubject()
             .setVerificationKeyResolver(verificationKeyResolver)
             .build();
         this.scopeClaimName = scopeClaimName;
diff --git a/clients/src/main/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactory.java b/clients/src/main/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactory.java
index a46626e7d7942..ac16c21bfc836 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactory.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactory.java
@@ -287,8 +287,6 @@ else if (password != null)
         } else if (PEM_TYPE.equals(type) && path != null) {
             if (password != null)
                 throw new InvalidConfigurationException("SSL key store password cannot be specified with PEM format, only key password may be specified");
-            else if (keyPassword == null)
-                throw new InvalidConfigurationException("SSL PEM key store is specified, but key password is not specified.");
             else
                 return new FileBasedPemStore(path, keyPassword, true);
         } else if (path == null && password != null) {
diff --git a/clients/src/main/java/org/apache/kafka/common/security/token/delegation/TokenInformation.java b/clients/src/main/java/org/apache/kafka/common/security/token/delegation/TokenInformation.java
index 9903eb51b235f..1236824d86e83 100644
--- a/clients/src/main/java/org/apache/kafka/common/security/token/delegation/TokenInformation.java
+++ b/clients/src/main/java/org/apache/kafka/common/security/token/delegation/TokenInformation.java
@@ -30,17 +30,24 @@
 @InterfaceStability.Evolving
 public class TokenInformation {
 
-    private KafkaPrincipal owner;
-    private Collection<KafkaPrincipal> renewers;
-    private long issueTimestamp;
-    private long maxTimestamp;
+    private final KafkaPrincipal owner;
+    private final KafkaPrincipal tokenRequester;
+    private final Collection<KafkaPrincipal> renewers;
+    private final long issueTimestamp;
+    private final long maxTimestamp;
     private long expiryTimestamp;
-    private String tokenId;
+    private final String tokenId;
 
-    public TokenInformation(String tokenId, KafkaPrincipal owner, Collection<KafkaPrincipal> renewers,
-                            long issueTimestamp, long maxTimestamp, long expiryTimestamp) {
+    public TokenInformation(String tokenId, KafkaPrincipal owner,
+                            Collection<KafkaPrincipal> renewers, long issueTimestamp, long maxTimestamp, long expiryTimestamp) {
+        this(tokenId, owner, owner, renewers, issueTimestamp, maxTimestamp, expiryTimestamp);
+    }
+
+    public TokenInformation(String tokenId, KafkaPrincipal owner, KafkaPrincipal tokenRequester,
+                            Collection<KafkaPrincipal> renewers, long issueTimestamp, long maxTimestamp, long expiryTimestamp) {
         this.tokenId = tokenId;
         this.owner = owner;
+        this.tokenRequester = tokenRequester;
         this.renewers = renewers;
         this.issueTimestamp =  issueTimestamp;
         this.maxTimestamp =  maxTimestamp;
@@ -51,6 +58,10 @@ public KafkaPrincipal owner() {
         return owner;
     }
 
+    public KafkaPrincipal tokenRequester() {
+        return tokenRequester;
+    }
+
     public String ownerAsString() {
         return owner.toString();
     }
@@ -88,13 +99,14 @@ public long maxTimestamp() {
     }
 
     public boolean ownerOrRenewer(KafkaPrincipal principal) {
-        return owner.equals(principal) || renewers.contains(principal);
+        return owner.equals(principal) || tokenRequester.equals(principal) || renewers.contains(principal);
     }
 
     @Override
     public String toString() {
         return "TokenInformation{" +
             "owner=" + owner +
+            ", tokenRequester=" + tokenRequester +
             ", renewers=" + renewers +
             ", issueTimestamp=" + issueTimestamp +
             ", maxTimestamp=" + maxTimestamp +
@@ -117,17 +129,13 @@ public boolean equals(Object o) {
         return issueTimestamp == that.issueTimestamp &&
             maxTimestamp == that.maxTimestamp &&
             Objects.equals(owner, that.owner) &&
+            Objects.equals(tokenRequester, that.tokenRequester) &&
             Objects.equals(renewers, that.renewers) &&
             Objects.equals(tokenId, that.tokenId);
     }
 
     @Override
     public int hashCode() {
-        int result = owner != null ? owner.hashCode() : 0;
-        result = 31 * result + (renewers != null ? renewers.hashCode() : 0);
-        result = 31 * result + Long.hashCode(issueTimestamp);
-        result = 31 * result + Long.hashCode(maxTimestamp);
-        result = 31 * result + (tokenId != null ? tokenId.hashCode() : 0);
-        return result;
+        return Objects.hash(owner, tokenRequester, renewers, issueTimestamp, maxTimestamp, expiryTimestamp, tokenId);
     }
 }
diff --git a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
index 3e3faeaadf794..7d84167cf24fc 100755
--- a/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
+++ b/clients/src/main/java/org/apache/kafka/common/utils/Utils.java
@@ -998,6 +998,14 @@ public interface UncheckedCloseable extends AutoCloseable {
 
     /**
      * Closes {@code closeable} and if an exception is thrown, it is logged at the WARN level.
+     * <b>Be cautious when passing method references as an argument.</b> For example:
+     * <p>
+     * {@code closeQuietly(task::stop, "source task");}
+     * <p>
+     * Although this method gracefully handles null {@link AutoCloseable} objects, attempts to take a method
+     * reference from a null object will result in a {@link NullPointerException}. In the example code above,
+     * it would be the caller's responsibility to ensure that {@code task} was non-null before attempting to
+     * use a method reference from it.
      */
     public static void closeQuietly(AutoCloseable closeable, String name) {
         if (closeable != null) {
@@ -1009,6 +1017,17 @@ public static void closeQuietly(AutoCloseable closeable, String name) {
         }
     }
 
+    /**
+    * Closes {@code closeable} and if an exception is thrown, it is registered to the firstException parameter.
+    * <b>Be cautious when passing method references as an argument.</b> For example:
+    * <p>
+    * {@code closeQuietly(task::stop, "source task");}
+    * <p>
+    * Although this method gracefully handles null {@link AutoCloseable} objects, attempts to take a method
+    * reference from a null object will result in a {@link NullPointerException}. In the example code above,
+    * it would be the caller's responsibility to ensure that {@code task} was non-null before attempting to
+    * use a method reference from it.
+    */
     public static void closeQuietly(AutoCloseable closeable, String name, AtomicReference<Throwable> firstException) {
         if (closeable != null) {
             try {
@@ -1038,7 +1057,7 @@ public static void closeAllQuietly(AtomicReference<Throwable> firstException, St
      *
      * Note: changing this method in the future will possibly cause partition selection not to be
      * compatible with the existing messages already placed on a partition since it is used
-     * in producer's {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner}
+     * in producer's partition selection logic {@link org.apache.kafka.clients.producer.KafkaProducer}
      *
      * @param number a given number
      * @return a positive number.
diff --git a/clients/src/main/java/org/apache/kafka/server/authorizer/AuthorizerServerInfo.java b/clients/src/main/java/org/apache/kafka/server/authorizer/AuthorizerServerInfo.java
index 51e23fba57fad..eb03c117b6c95 100644
--- a/clients/src/main/java/org/apache/kafka/server/authorizer/AuthorizerServerInfo.java
+++ b/clients/src/main/java/org/apache/kafka/server/authorizer/AuthorizerServerInfo.java
@@ -48,4 +48,9 @@ public interface AuthorizerServerInfo {
      * Returns the inter-broker endpoint. This is one of the endpoints returned by {@link #endpoints()}.
      */
     Endpoint interBrokerEndpoint();
+
+    /**
+     * Returns the configured early start listeners.
+     */
+    Collection<String> earlyStartListeners();
 }
diff --git a/clients/src/main/java/org/apache/kafka/server/policy/AlterConfigPolicy.java b/clients/src/main/java/org/apache/kafka/server/policy/AlterConfigPolicy.java
index 5710a6011aae7..7f2c4905c9a73 100644
--- a/clients/src/main/java/org/apache/kafka/server/policy/AlterConfigPolicy.java
+++ b/clients/src/main/java/org/apache/kafka/server/policy/AlterConfigPolicy.java
@@ -71,7 +71,7 @@ public int hashCode() {
 
         @Override
         public boolean equals(Object o) {
-            if (o == null || o.getClass() != o.getClass()) return false;
+            if ((o == null) || (!o.getClass().equals(getClass()))) return false;
             RequestMetadata other = (RequestMetadata) o;
             return resource.equals(other.resource) &&
                 configs.equals(other.configs);
diff --git a/clients/src/main/resources/common/message/AlterIsrRequest.json b/clients/src/main/resources/common/message/AlterPartitionRequest.json
similarity index 65%
rename from clients/src/main/resources/common/message/AlterIsrRequest.json
rename to clients/src/main/resources/common/message/AlterPartitionRequest.json
index 70736dbe8a5a0..d91f317f97d56 100644
--- a/clients/src/main/resources/common/message/AlterIsrRequest.json
+++ b/clients/src/main/resources/common/message/AlterPartitionRequest.json
@@ -17,8 +17,11 @@
   "apiKey": 56,
   "type": "request",
   "listeners": ["zkBroker", "controller"],
-  "name": "AlterIsrRequest",
-  "validVersions": "0",
+  "name": "AlterPartitionRequest",
+  // Version 1 adds LeaderRecoveryState field (KIP-704).
+  //
+  // Version 2 adds TopicId field to replace TopicName field (KIP-841).
+  "validVersions": "0-2",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "BrokerId", "type": "int32", "versions": "0+", "entityType": "brokerId",
@@ -26,17 +29,21 @@
     { "name": "BrokerEpoch", "type": "int64", "versions": "0+", "default": "-1",
       "about": "The epoch of the requesting broker" },
     { "name": "Topics", "type": "[]TopicData", "versions": "0+", "fields": [
-      { "name":  "Name", "type": "string", "versions": "0+", "entityType": "topicName",
+      { "name":  "TopicName", "type": "string", "versions": "0-1", "ignorable": true, "entityType": "topicName",
         "about": "The name of the topic to alter ISRs for" },
+      { "name":  "TopicId", "type": "uuid", "versions": "2+", "ignorable": true,
+        "about": "The ID of the topic to alter ISRs for" },
       { "name": "Partitions", "type": "[]PartitionData", "versions": "0+", "fields": [
         { "name": "PartitionIndex", "type": "int32", "versions": "0+",
           "about": "The partition index" },
         { "name": "LeaderEpoch", "type": "int32", "versions": "0+",
           "about": "The leader epoch of this partition" },
         { "name": "NewIsr", "type": "[]int32", "versions": "0+", "entityType": "brokerId",
-          "about": "The ISR for this partition"},
-        { "name": "CurrentIsrVersion", "type": "int32", "versions": "0+",
-          "about": "The expected version of ISR which is being updated"}
+          "about": "The ISR for this partition" },
+        { "name": "LeaderRecoveryState", "type": "int8", "versions": "1+", "default": "0",
+          "about": "1 if the partition is recovering from an unclean leader election; 0 otherwise." },
+        { "name": "PartitionEpoch", "type": "int32", "versions": "0+",
+          "about": "The expected epoch of the partition which is being updated. For legacy cluster this is the ZkVersion in the LeaderAndIsr request." }
       ]}
     ]}
   ]
diff --git a/clients/src/main/resources/common/message/AlterIsrResponse.json b/clients/src/main/resources/common/message/AlterPartitionResponse.json
similarity index 68%
rename from clients/src/main/resources/common/message/AlterIsrResponse.json
rename to clients/src/main/resources/common/message/AlterPartitionResponse.json
index 33837996d3fbd..e8be99fd5e32a 100644
--- a/clients/src/main/resources/common/message/AlterIsrResponse.json
+++ b/clients/src/main/resources/common/message/AlterPartitionResponse.json
@@ -16,8 +16,12 @@
 {
   "apiKey": 56,
   "type": "response",
-  "name": "AlterIsrResponse",
-  "validVersions": "0",
+  "name": "AlterPartitionResponse",
+  // Version 1 adds LeaderRecoveryState field (KIP-704).
+  //
+  // Version 2 adds TopicId field to replace TopicName field, can return the following new errors:
+  // INELIGIBLE_REPLICA, NEW_LEADER_ELECTED and UNKNOWN_TOPIC_ID (KIP-841).
+  "validVersions": "0-2",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
@@ -25,8 +29,10 @@
     { "name": "ErrorCode", "type": "int16", "versions": "0+",
       "about": "The top level response error code" },
     { "name": "Topics", "type": "[]TopicData", "versions": "0+", "fields": [
-      { "name":  "Name", "type": "string", "versions": "0+", "entityType": "topicName",
+      { "name": "TopicName", "type": "string", "versions": "0-1", "ignorable": true, "entityType": "topicName",
         "about": "The name of the topic" },
+      { "name":  "TopicId", "type": "uuid", "versions": "2+", "ignorable": true,
+        "about": "The ID of the topic" },
       { "name": "Partitions", "type": "[]PartitionData", "versions": "0+", "fields": [
         { "name": "PartitionIndex", "type": "int32", "versions": "0+",
           "about": "The partition index" },
@@ -38,9 +44,11 @@
           "about": "The leader epoch." },
         { "name": "Isr", "type": "[]int32", "versions": "0+", "entityType": "brokerId",
           "about": "The in-sync replica IDs." },
-        { "name": "CurrentIsrVersion", "type": "int32", "versions": "0+",
-          "about": "The current ISR version." }
+        { "name": "LeaderRecoveryState", "type": "int8", "versions": "1+", "default": "0", "ignorable": true,
+          "about": "1 if the partition is recovering from an unclean leader election; 0 otherwise." },
+        { "name": "PartitionEpoch", "type": "int32", "versions": "0+",
+          "about": "The current epoch for the partition for KRaft controllers. The current ZK version for the legacy controllers." }
       ]}
     ]}
   ]
-}
\ No newline at end of file
+}
diff --git a/clients/src/main/resources/common/message/CreateAclsRequest.json b/clients/src/main/resources/common/message/CreateAclsRequest.json
index 5b3bfed78162c..89f5cf7f435c2 100644
--- a/clients/src/main/resources/common/message/CreateAclsRequest.json
+++ b/clients/src/main/resources/common/message/CreateAclsRequest.json
@@ -20,7 +20,8 @@
   "name": "CreateAclsRequest",
   // Version 1 adds resource pattern type.
   // Version 2 enables flexible versions.
-  "validVersions": "0-2",
+  // Version 3 adds user resource type.
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "Creations", "type": "[]AclCreation", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/CreateAclsResponse.json b/clients/src/main/resources/common/message/CreateAclsResponse.json
index 7b0de7e56b731..da1632c03b3b9 100644
--- a/clients/src/main/resources/common/message/CreateAclsResponse.json
+++ b/clients/src/main/resources/common/message/CreateAclsResponse.json
@@ -19,7 +19,8 @@
   "name": "CreateAclsResponse",
   // Starting in version 1, on quota violation, brokers send out responses before throttling.
   // Version 2 enables flexible versions.
-  "validVersions": "0-2",
+  // Version 3 adds user resource type.
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/CreateDelegationTokenRequest.json b/clients/src/main/resources/common/message/CreateDelegationTokenRequest.json
index d65d490a6e066..8444f16c00c76 100644
--- a/clients/src/main/resources/common/message/CreateDelegationTokenRequest.json
+++ b/clients/src/main/resources/common/message/CreateDelegationTokenRequest.json
@@ -21,9 +21,15 @@
   // Version 1 is the same as version 0.
   //
   // Version 2 is the first flexible version.
-  "validVersions": "0-2",
+  //
+  // Version 3 adds owner principal
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
+    { "name": "OwnerPrincipalType", "type": "string", "versions": "3+", "nullableVersions": "3+",
+      "about": "The principal type of the owner of the token. If it's null it defaults to the token request principal." },
+    { "name": "OwnerPrincipalName", "type": "string", "versions": "3+", "nullableVersions": "3+",
+      "about": "The principal name of the owner of the token. If it's null it defaults to the token request principal." },
     { "name": "Renewers", "type": "[]CreatableRenewers", "versions": "0+",
       "about": "A list of those who are allowed to renew this token before it expires.", "fields": [
       { "name": "PrincipalType", "type": "string", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/CreateDelegationTokenResponse.json b/clients/src/main/resources/common/message/CreateDelegationTokenResponse.json
index 74ad905b94b26..bf8be5573bc0b 100644
--- a/clients/src/main/resources/common/message/CreateDelegationTokenResponse.json
+++ b/clients/src/main/resources/common/message/CreateDelegationTokenResponse.json
@@ -20,7 +20,9 @@
   // Starting in version 1, on quota violation, brokers send out responses before throttling.
   //
   // Version 2 is the first flexible version.
-  "validVersions": "0-2",
+  //
+  // Version 3 adds token requester details
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ErrorCode", "type": "int16", "versions": "0+",
@@ -29,6 +31,10 @@
       "about": "The principal type of the token owner." },
     { "name": "PrincipalName", "type": "string", "versions": "0+",
       "about": "The name of the token owner." },
+    { "name": "TokenRequesterPrincipalType", "type": "string", "versions": "3+",
+      "about": "The principal type of the requester of the token." },
+    { "name": "TokenRequesterPrincipalName", "type": "string", "versions": "3+",
+      "about": "The principal type of the requester of the token." },
     { "name": "IssueTimestampMs", "type": "int64", "versions": "0+",
       "about": "When this token was generated." },
     { "name": "ExpiryTimestampMs", "type": "int64", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/DeleteAclsRequest.json b/clients/src/main/resources/common/message/DeleteAclsRequest.json
index fd7c1522b43bd..ea7106d4c87c2 100644
--- a/clients/src/main/resources/common/message/DeleteAclsRequest.json
+++ b/clients/src/main/resources/common/message/DeleteAclsRequest.json
@@ -20,7 +20,8 @@
   "name": "DeleteAclsRequest",
   // Version 1 adds the pattern type.
   // Version 2 enables flexible versions.
-  "validVersions": "0-2",
+  // Version 3 adds the user resource type.
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "Filters", "type": "[]DeleteAclsFilter", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/DeleteAclsResponse.json b/clients/src/main/resources/common/message/DeleteAclsResponse.json
index 08f570283e153..e00969df7a70b 100644
--- a/clients/src/main/resources/common/message/DeleteAclsResponse.json
+++ b/clients/src/main/resources/common/message/DeleteAclsResponse.json
@@ -20,7 +20,8 @@
   // Version 1 adds the resource pattern type.
   // Starting in version 1, on quota violation, brokers send out responses before throttling.
   // Version 2 enables flexible versions.
-  "validVersions": "0-2",
+  // Version 3 adds the user resource type.
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/DescribeAclsRequest.json b/clients/src/main/resources/common/message/DescribeAclsRequest.json
index 58886da654707..4f0e851c725d8 100644
--- a/clients/src/main/resources/common/message/DescribeAclsRequest.json
+++ b/clients/src/main/resources/common/message/DescribeAclsRequest.json
@@ -20,7 +20,8 @@
   "name": "DescribeAclsRequest",
   // Version 1 adds resource pattern type.
   // Version 2 enables flexible versions.
-  "validVersions": "0-2",
+  // Version 3 adds user resource type.
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ResourceTypeFilter", "type": "int8", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/DescribeAclsResponse.json b/clients/src/main/resources/common/message/DescribeAclsResponse.json
index 0ae72d67c4641..19de109445846 100644
--- a/clients/src/main/resources/common/message/DescribeAclsResponse.json
+++ b/clients/src/main/resources/common/message/DescribeAclsResponse.json
@@ -20,7 +20,8 @@
   // Version 1 adds PatternType.
   // Starting in version 1, on quota violation, brokers send out responses before throttling.
   // Version 2 enables flexible versions.
-  "validVersions": "0-2",
+  // Version 3 adds user resource type.
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/DescribeDelegationTokenRequest.json b/clients/src/main/resources/common/message/DescribeDelegationTokenRequest.json
index 79c342e14e004..e92c57e5a263a 100644
--- a/clients/src/main/resources/common/message/DescribeDelegationTokenRequest.json
+++ b/clients/src/main/resources/common/message/DescribeDelegationTokenRequest.json
@@ -20,7 +20,8 @@
   "name": "DescribeDelegationTokenRequest",
   // Version 1 is the same as version 0.
   // Version 2 adds flexible version support
-  "validVersions": "0-2",
+  // Version 3 adds token requester into the response
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "Owners", "type": "[]DescribeDelegationTokenOwner", "versions": "0+", "nullableVersions": "0+",
diff --git a/clients/src/main/resources/common/message/DescribeDelegationTokenResponse.json b/clients/src/main/resources/common/message/DescribeDelegationTokenResponse.json
index 09f69ce61c30b..3258164caf552 100644
--- a/clients/src/main/resources/common/message/DescribeDelegationTokenResponse.json
+++ b/clients/src/main/resources/common/message/DescribeDelegationTokenResponse.json
@@ -19,7 +19,8 @@
   "name": "DescribeDelegationTokenResponse",
   // Starting in version 1, on quota violation, brokers send out responses before throttling.
   // Version 2 adds flexible version support
-  "validVersions": "0-2",
+  // Version 3 adds token requester details
+  "validVersions": "0-3",
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ErrorCode", "type": "int16", "versions": "0+",
@@ -30,6 +31,10 @@
         "about": "The token principal type." },
       { "name": "PrincipalName", "type": "string", "versions": "0+",
         "about": "The token principal name." },
+      { "name": "TokenRequesterPrincipalType", "type": "string", "versions": "3+",
+        "about": "The principal type of the requester of the token." },
+      { "name": "TokenRequesterPrincipalName", "type": "string", "versions": "3+",
+        "about": "The principal type of the requester of the token." },
       { "name": "IssueTimestamp", "type": "int64", "versions": "0+",
         "about": "The token issue timestamp in milliseconds." },
       { "name": "ExpiryTimestamp", "type": "int64", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/DescribeLogDirsRequest.json b/clients/src/main/resources/common/message/DescribeLogDirsRequest.json
index a133b6c68e392..41cc9e2289571 100644
--- a/clients/src/main/resources/common/message/DescribeLogDirsRequest.json
+++ b/clients/src/main/resources/common/message/DescribeLogDirsRequest.json
@@ -19,9 +19,10 @@
   "listeners": ["zkBroker", "broker"],
   "name": "DescribeLogDirsRequest",
   // Version 1 is the same as version 0.
-  "validVersions": "0-3",
+  "validVersions": "0-4",
   // Version 2 is the first flexible version.
   // Version 3 is the same as version 2 (new field in response).
+  // Version 4 is the same as version 2 (new fields in response).
   "flexibleVersions": "2+",
   "fields": [
     { "name": "Topics", "type": "[]DescribableLogDirTopic", "versions": "0+", "nullableVersions": "0+",
diff --git a/clients/src/main/resources/common/message/DescribeLogDirsResponse.json b/clients/src/main/resources/common/message/DescribeLogDirsResponse.json
index 0171a16481ff3..fec69d17a030c 100644
--- a/clients/src/main/resources/common/message/DescribeLogDirsResponse.json
+++ b/clients/src/main/resources/common/message/DescribeLogDirsResponse.json
@@ -18,14 +18,16 @@
   "type": "response",
   "name": "DescribeLogDirsResponse",
   // Starting in version 1, on quota violation, brokers send out responses before throttling.
-  "validVersions": "0-3",
+  "validVersions": "0-4",
   // Version 2 is the first flexible version.
   // Version 3 adds the top-level ErrorCode field
+  // Version 4 adds the TotalBytes and UsableBytes fields
   "flexibleVersions": "2+",
   "fields": [
     { "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
       "about": "The duration in milliseconds for which the request was throttled due to a quota violation, or zero if the request did not violate any quota." },
-    { "name": "ErrorCode", "type": "int16", "versions": "3+", "about": "The error code, or 0 if there was no error." },
+    { "name": "ErrorCode", "type": "int16", "versions": "3+",
+      "ignorable": true, "about": "The error code, or 0 if there was no error." },
     { "name": "Results", "type": "[]DescribeLogDirsResult", "versions": "0+",
       "about": "The log directories.", "fields": [
       { "name": "ErrorCode", "type": "int16", "versions": "0+",
@@ -46,7 +48,13 @@
           { "name": "IsFutureKey", "type": "bool", "versions": "0+",
             "about": "True if this log is created by AlterReplicaLogDirsRequest and will replace the current log of the replica in the future." }
         ]}
-      ]}
+      ]},
+      { "name": "TotalBytes", "type": "int64", "versions": "4+", "ignorable": true, "default": "-1",
+        "about": "The total size in bytes of the volume the log directory is in."
+      },
+      { "name": "UsableBytes", "type": "int64", "versions": "4+", "ignorable": true, "default": "-1",
+        "about": "The usable size in bytes of the volume the log directory is in."
+      }
     ]}
   ]
 }
diff --git a/clients/src/main/resources/common/message/DescribeQuorumRequest.json b/clients/src/main/resources/common/message/DescribeQuorumRequest.json
index cd4a7f1db5470..cee8fe69822a9 100644
--- a/clients/src/main/resources/common/message/DescribeQuorumRequest.json
+++ b/clients/src/main/resources/common/message/DescribeQuorumRequest.json
@@ -18,7 +18,8 @@
   "type": "request",
   "listeners": ["broker", "controller"],
   "name": "DescribeQuorumRequest",
-  "validVersions": "0",
+  // Version 1 adds additional fields in the response. The request is unchanged (KIP-836).
+  "validVersions": "0-1",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "Topics", "type": "[]TopicData",
diff --git a/clients/src/main/resources/common/message/DescribeQuorumResponse.json b/clients/src/main/resources/common/message/DescribeQuorumResponse.json
index 444fee355a8ba..0ea6271238b2d 100644
--- a/clients/src/main/resources/common/message/DescribeQuorumResponse.json
+++ b/clients/src/main/resources/common/message/DescribeQuorumResponse.json
@@ -17,7 +17,8 @@
   "apiKey": 55,
   "type": "response",
   "name": "DescribeQuorumResponse",
-  "validVersions": "0",
+  // Version 1 adds LastFetchTimeStamp and LastCaughtUpTimestamp in ReplicaState (KIP-836).
+  "validVersions": "0-1",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "ErrorCode", "type": "int16", "versions": "0+",
@@ -44,7 +45,11 @@
     { "name": "ReplicaState", "versions": "0+", "fields": [
       { "name": "ReplicaId", "type": "int32", "versions": "0+", "entityType": "brokerId" },
       { "name": "LogEndOffset", "type": "int64", "versions": "0+",
-        "about": "The last known log end offset of the follower or -1 if it is unknown"}
+        "about": "The last known log end offset of the follower or -1 if it is unknown"},
+      { "name": "LastFetchTimestamp", "type": "int64", "versions": "1+", "ignorable": true, "default": -1,
+        "about": "The last known leader wall clock time time when a follower fetched from the leader. This is reported as -1 both for the current leader or if it is unknown for a voter"},
+      { "name": "LastCaughtUpTimestamp", "type": "int64", "versions": "1+", "ignorable": true, "default": -1,
+        "about": "The leader wall clock append time of the offset for which the follower made the most recent fetch request. This is reported as the current time for the leader and -1 if unknown for a voter"}
     ]}
   ]
 }
diff --git a/clients/src/main/resources/common/message/FetchRequest.json b/clients/src/main/resources/common/message/FetchRequest.json
index df639579bd772..13ab712be3d50 100644
--- a/clients/src/main/resources/common/message/FetchRequest.json
+++ b/clients/src/main/resources/common/message/FetchRequest.json
@@ -92,7 +92,7 @@
     { "name": "ForgottenTopicsData", "type": "[]ForgottenTopic", "versions": "7+", "ignorable": false,
       "about": "In an incremental fetch request, the partitions to remove.", "fields": [
       { "name": "Topic", "type": "string", "versions": "7-12", "entityType": "topicName", "ignorable": true,
-        "about": "The partition name." },
+        "about": "The topic name." },
       { "name": "TopicId", "type": "uuid", "versions": "13+", "ignorable": true, "about": "The unique topic ID"},
       { "name": "Partitions", "type": "[]int32", "versions": "7+",
         "about": "The partitions indexes to forget." }
diff --git a/clients/src/main/resources/common/message/LeaderAndIsrRequest.json b/clients/src/main/resources/common/message/LeaderAndIsrRequest.json
index c38f21e59fa89..97881be27ff32 100644
--- a/clients/src/main/resources/common/message/LeaderAndIsrRequest.json
+++ b/clients/src/main/resources/common/message/LeaderAndIsrRequest.json
@@ -27,7 +27,9 @@
   // Version 4 is the first flexible version.
   //
   // Version 5 adds Topic ID and Type to the TopicStates, as described in KIP-516.
-  "validVersions": "0-5",
+  //
+  // Version 6 adds LeaderRecoveryState as described in KIP-704.
+  "validVersions": "0-6",
   "flexibleVersions": "4+",
   "fields": [
     { "name": "ControllerId", "type": "int32", "versions": "0+", "entityType": "brokerId",
@@ -76,8 +78,8 @@
         "about": "The leader epoch." },
       { "name": "Isr", "type": "[]int32", "versions": "0+", "entityType": "brokerId",
         "about": "The in-sync replica IDs." },
-      { "name": "ZkVersion", "type": "int32", "versions": "0+",
-        "about": "The ZooKeeper version." },
+      { "name": "PartitionEpoch", "type": "int32", "versions": "0+",
+        "about": "The current epoch for the partition. The epoch is a monotonically increasing value which is incremented after every partition change. (Since the LeaderAndIsr request is only used by the legacy controller, this corresponds to the zkVersion)" },
       { "name": "Replicas", "type": "[]int32", "versions": "0+", "entityType": "brokerId",
         "about": "The replica IDs." },
       { "name": "AddingReplicas", "type": "[]int32", "versions": "3+", "ignorable": true, "entityType": "brokerId",
@@ -85,7 +87,9 @@
       { "name": "RemovingReplicas", "type": "[]int32", "versions": "3+", "ignorable": true, "entityType": "brokerId",
         "about": "The replica IDs that we are removing this partition from, or null if no replicas are being removed." },
       { "name": "IsNew", "type": "bool", "versions": "1+", "default": "false", "ignorable": true,
-        "about": "Whether the replica should have existed on the broker or not." }
+        "about": "Whether the replica should have existed on the broker or not." },
+      { "name": "LeaderRecoveryState", "type": "int8", "versions": "6+", "default": "0",
+        "about": "1 if the partition is recovering from an unclean leader election; 0 otherwise." }
     ]}
   ]
 }
diff --git a/clients/src/main/resources/common/message/LeaderAndIsrResponse.json b/clients/src/main/resources/common/message/LeaderAndIsrResponse.json
index 958448be2744b..79ad819f5e6dc 100644
--- a/clients/src/main/resources/common/message/LeaderAndIsrResponse.json
+++ b/clients/src/main/resources/common/message/LeaderAndIsrResponse.json
@@ -25,9 +25,9 @@
   //
   // Version 4 is the first flexible version.
   //
-  // Version 5 removes TopicName and replaces it with TopicId and reorganizes 
+  // Version 5 removes TopicName and replaces it with TopicId and reorganizes
   // the partitions by topic, as described by KIP-516.
-  "validVersions": "0-5",
+  "validVersions": "0-6",
   "flexibleVersions": "4+",
   "fields": [
     { "name": "ErrorCode", "type": "int16", "versions": "0+",
diff --git a/clients/src/main/resources/common/message/UnregisterBrokerRequest.json b/clients/src/main/resources/common/message/UnregisterBrokerRequest.json
index 4fb8d8df4e004..05fd315bba842 100644
--- a/clients/src/main/resources/common/message/UnregisterBrokerRequest.json
+++ b/clients/src/main/resources/common/message/UnregisterBrokerRequest.json
@@ -16,7 +16,7 @@
 {
   "apiKey": 64,
   "type": "request",
-  "listeners": ["controller"],
+  "listeners": ["broker", "controller"],
   "name": "UnregisterBrokerRequest",
   "validVersions": "0",
   "flexibleVersions": "0+",
diff --git a/clients/src/main/resources/common/message/UpdateFeaturesRequest.json b/clients/src/main/resources/common/message/UpdateFeaturesRequest.json
index 2b3181362d7c5..27ed8420fbb08 100644
--- a/clients/src/main/resources/common/message/UpdateFeaturesRequest.json
+++ b/clients/src/main/resources/common/message/UpdateFeaturesRequest.json
@@ -16,9 +16,9 @@
 {
   "apiKey": 57,
   "type": "request",
-  "listeners": ["zkBroker", "broker"],
+  "listeners": ["zkBroker", "broker", "controller"],
   "name": "UpdateFeaturesRequest",
-  "validVersions": "0",
+  "validVersions": "0-1",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "timeoutMs", "type": "int32", "versions": "0+", "default": "60000",
@@ -29,8 +29,12 @@
         "about": "The name of the finalized feature to be updated."},
       {"name": "MaxVersionLevel", "type": "int16", "versions": "0+",
         "about": "The new maximum version level for the finalized feature. A value >= 1 is valid. A value < 1, is special, and can be used to request the deletion of the finalized feature."},
-      {"name": "AllowDowngrade", "type": "bool", "versions": "0+",
-        "about": "When set to true, the finalized feature version level is allowed to be downgraded/deleted. The downgrade request will fail if the new maximum version level is a value that's not lower than the existing maximum finalized version level."}
-    ]}
+      {"name": "AllowDowngrade", "type": "bool", "versions": "0",
+        "about": "DEPRECATED in version 1 (see DowngradeType). When set to true, the finalized feature version level is allowed to be downgraded/deleted. The downgrade request will fail if the new maximum version level is a value that's not lower than the existing maximum finalized version level."},
+      {"name": "UpgradeType", "type": "int8", "versions": "1+", "default": 1,
+        "about": "Determine which type of upgrade will be performed: 1 will perform an upgrade only (default), 2 is safe downgrades only (lossless), 3 is unsafe downgrades (lossy)."}
+    ]},
+    {"name": "ValidateOnly", "type": "bool", "versions": "1+", "default": false,
+      "about": "True if we should validate the request, but not perform the upgrade or downgrade."}
   ]
 }
diff --git a/clients/src/main/resources/common/message/UpdateFeaturesResponse.json b/clients/src/main/resources/common/message/UpdateFeaturesResponse.json
index 63e84ff9683d3..033926b801e30 100644
--- a/clients/src/main/resources/common/message/UpdateFeaturesResponse.json
+++ b/clients/src/main/resources/common/message/UpdateFeaturesResponse.json
@@ -17,7 +17,7 @@
   "apiKey": 57,
   "type": "response",
   "name": "UpdateFeaturesResponse",
-  "validVersions": "0",
+  "validVersions": "0-1",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
diff --git a/clients/src/test/java/org/apache/kafka/clients/ApiVersionsTest.java b/clients/src/test/java/org/apache/kafka/clients/ApiVersionsTest.java
index 206e95e4d3074..89065536435c2 100644
--- a/clients/src/test/java/org/apache/kafka/clients/ApiVersionsTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/ApiVersionsTest.java
@@ -48,7 +48,7 @@ public void testMaxUsableProduceMagicWithRaftController() {
         assertEquals(RecordBatch.CURRENT_MAGIC_VALUE, apiVersions.maxUsableProduceMagic());
 
         // something that doesn't support PRODUCE, which is the case with Raft-based controllers
-        apiVersions.update("2", new NodeApiVersions(Collections.singleton(
+        apiVersions.update("2", NodeApiVersions.create(Collections.singleton(
             new ApiVersionsResponseData.ApiVersion()
                 .setApiKey(ApiKeys.FETCH.id)
                 .setMinVersion((short) 0)
diff --git a/clients/src/test/java/org/apache/kafka/clients/ClusterConnectionStatesTest.java b/clients/src/test/java/org/apache/kafka/clients/ClusterConnectionStatesTest.java
index 72cc123921b38..96fe89ca11ea8 100644
--- a/clients/src/test/java/org/apache/kafka/clients/ClusterConnectionStatesTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/ClusterConnectionStatesTest.java
@@ -231,20 +231,8 @@ public void testMaxReconnectBackoff() {
 
     @Test
     public void testExponentialReconnectBackoff() {
-        double reconnectBackoffMaxExp = Math.log(reconnectBackoffMax / (double) Math.max(reconnectBackoffMs, 1))
-            / Math.log(reconnectBackoffExpBase);
-
-        // Run through 10 disconnects and check that reconnect backoff value is within expected range for every attempt
-        for (int i = 0; i < 10; i++) {
-            connectionStates.connecting(nodeId1, time.milliseconds(), "localhost");
-            connectionStates.disconnected(nodeId1, time.milliseconds());
-            // Calculate expected backoff value without jitter
-            long expectedBackoff = Math.round(Math.pow(reconnectBackoffExpBase, Math.min(i, reconnectBackoffMaxExp))
-                * reconnectBackoffMs);
-            long currentBackoff = connectionStates.connectionDelay(nodeId1, time.milliseconds());
-            assertEquals(expectedBackoff, currentBackoff, reconnectBackoffJitter * expectedBackoff);
-            time.sleep(connectionStates.connectionDelay(nodeId1, time.milliseconds()) + 1);
-        }
+        verifyReconnectExponentialBackoff(false);
+        verifyReconnectExponentialBackoff(true);
     }
 
     @Test
@@ -426,4 +414,26 @@ private void setupMultipleIPs() {
         this.connectionStates = new ClusterConnectionStates(reconnectBackoffMs, reconnectBackoffMax,
                 connectionSetupTimeoutMs, connectionSetupTimeoutMaxMs, new LogContext(), this.multipleIPHostResolver);
     }
+
+    private void verifyReconnectExponentialBackoff(boolean enterCheckingApiVersionState) {
+        double reconnectBackoffMaxExp = Math.log(reconnectBackoffMax / (double) Math.max(reconnectBackoffMs, 1))
+            / Math.log(reconnectBackoffExpBase);
+
+        connectionStates.remove(nodeId1);
+        // Run through 10 disconnects and check that reconnect backoff value is within expected range for every attempt
+        for (int i = 0; i < 10; i++) {
+            connectionStates.connecting(nodeId1, time.milliseconds(), "localhost");
+            if (enterCheckingApiVersionState) {
+                connectionStates.checkingApiVersions(nodeId1);
+            }
+
+            connectionStates.disconnected(nodeId1, time.milliseconds());
+            // Calculate expected backoff value without jitter
+            long expectedBackoff = Math.round(Math.pow(reconnectBackoffExpBase, Math.min(i, reconnectBackoffMaxExp))
+                * reconnectBackoffMs);
+            long currentBackoff = connectionStates.connectionDelay(nodeId1, time.milliseconds());
+            assertEquals(expectedBackoff, currentBackoff, reconnectBackoffJitter * expectedBackoff);
+            time.sleep(connectionStates.connectionDelay(nodeId1, time.milliseconds()) + 1);
+        }
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/CommonClientConfigsTest.java b/clients/src/test/java/org/apache/kafka/clients/CommonClientConfigsTest.java
index 007e1490b4184..8b33868aaf06b 100644
--- a/clients/src/test/java/org/apache/kafka/clients/CommonClientConfigsTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/CommonClientConfigsTest.java
@@ -19,6 +19,10 @@
 
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.config.SaslConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.common.utils.Utils;
 import org.junit.jupiter.api.Test;
 
 import java.util.Collections;
@@ -26,7 +30,10 @@
 import java.util.Map;
 
 import static org.apache.kafka.common.config.ConfigDef.Range.atLeast;
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class CommonClientConfigsTest {
     private static class TestConfig extends AbstractConfig {
@@ -44,11 +51,23 @@ private static class TestConfig extends AbstractConfig {
                     1000L,
                     atLeast(0L),
                     ConfigDef.Importance.LOW,
-                    "");
+                    "")
+                .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+                    ConfigDef.Type.STRING,
+                    CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                    in(Utils.enumOptions(SecurityProtocol.class)),
+                    ConfigDef.Importance.MEDIUM,
+                    CommonClientConfigs.SECURITY_PROTOCOL_DOC)
+                .define(SaslConfigs.SASL_MECHANISM,
+                    ConfigDef.Type.STRING,
+                    SaslConfigs.DEFAULT_SASL_MECHANISM,
+                    ConfigDef.Importance.MEDIUM,
+                    SaslConfigs.SASL_MECHANISM_DOC);
         }
 
         @Override
         protected Map<String, Object> postProcessParsedConfig(final Map<String, Object> parsedValues) {
+            CommonClientConfigs.postValidateSaslMechanismConfig(this);
             return CommonClientConfigs.postProcessReconnectBackoffConfigs(this, parsedValues);
         }
 
@@ -82,4 +101,17 @@ public void testExponentialBackoffDefaults() {
         assertEquals(Long.valueOf(123L),
                 reconnectBackoffSetConf.getLong(CommonClientConfigs.RECONNECT_BACKOFF_MAX_MS_CONFIG));
     }
+
+    @Test
+    public void testInvalidSaslMechanism() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SecurityProtocol.SASL_PLAINTEXT.name);
+        configs.put(SaslConfigs.SASL_MECHANISM, null);
+        ConfigException ce = assertThrows(ConfigException.class, () -> new TestConfig(configs));
+        assertTrue(ce.getMessage().contains(SaslConfigs.SASL_MECHANISM));
+
+        configs.put(SaslConfigs.SASL_MECHANISM, "");
+        ce = assertThrows(ConfigException.class, () -> new TestConfig(configs));
+        assertTrue(ce.getMessage().contains(SaslConfigs.SASL_MECHANISM));
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java b/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java
index fe1e9d19202db..63b44835f63f1 100644
--- a/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/NetworkClientTest.java
@@ -82,6 +82,8 @@ public class NetworkClientTest {
     protected final long reconnectBackoffMaxMsTest = 10 * 10000;
     protected final long connectionSetupTimeoutMsTest = 5 * 1000;
     protected final long connectionSetupTimeoutMaxMsTest = 127 * 1000;
+    private final int reconnectBackoffExpBase = ClusterConnectionStates.RECONNECT_BACKOFF_EXP_BASE;
+    private final double reconnectBackoffJitter = ClusterConnectionStates.RECONNECT_BACKOFF_JITTER;
     private final TestMetadataUpdater metadataUpdater = new TestMetadataUpdater(Collections.singletonList(node));
     private final NetworkClient client = createNetworkClient(reconnectBackoffMaxMsTest);
     private final NetworkClient clientWithNoExponentialBackoff = createNetworkClient(reconnectBackoffMsTest);
@@ -831,13 +833,28 @@ public void testDisconnectDuringUserMetadataRequest() {
 
     @Test
     public void testServerDisconnectAfterInternalApiVersionRequest() throws Exception {
-        awaitInFlightApiVersionRequest();
-        selector.serverDisconnect(node.idString());
-
-        // The failed ApiVersion request should not be forwarded to upper layers
-        List<ClientResponse> responses = client.poll(0, time.milliseconds());
-        assertFalse(client.hasInFlightRequests(node.idString()));
-        assertTrue(responses.isEmpty());
+        final long numIterations = 5;
+        double reconnectBackoffMaxExp = Math.log(reconnectBackoffMaxMsTest / (double) Math.max(reconnectBackoffMsTest, 1))
+            / Math.log(reconnectBackoffExpBase);
+        for (int i = 0; i < numIterations; i++) {
+            selector.clear();
+            awaitInFlightApiVersionRequest();
+            selector.serverDisconnect(node.idString());
+
+            // The failed ApiVersion request should not be forwarded to upper layers
+            List<ClientResponse> responses = client.poll(0, time.milliseconds());
+            assertFalse(client.hasInFlightRequests(node.idString()));
+            assertTrue(responses.isEmpty());
+
+            long expectedBackoff = Math.round(Math.pow(reconnectBackoffExpBase, Math.min(i, reconnectBackoffMaxExp))
+                * reconnectBackoffMsTest);
+            long delay = client.connectionDelay(node, time.milliseconds());
+            assertEquals(expectedBackoff, delay, reconnectBackoffJitter * expectedBackoff);
+            if (i == numIterations - 1) {
+                break;
+            }
+            time.sleep(delay + 1);
+        }
     }
 
     @Test
diff --git a/clients/src/test/java/org/apache/kafka/clients/NodeApiVersionsTest.java b/clients/src/test/java/org/apache/kafka/clients/NodeApiVersionsTest.java
index b04d83b47df2f..f379366ac1609 100644
--- a/clients/src/test/java/org/apache/kafka/clients/NodeApiVersionsTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/NodeApiVersionsTest.java
@@ -27,6 +27,7 @@
 import org.junit.jupiter.params.provider.EnumSource;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -38,7 +39,7 @@ public class NodeApiVersionsTest {
 
     @Test
     public void testUnsupportedVersionsToString() {
-        NodeApiVersions versions = new NodeApiVersions(new ApiVersionCollection());
+        NodeApiVersions versions = new NodeApiVersions(new ApiVersionCollection(), Collections.emptyList());
         StringBuilder bld = new StringBuilder();
         String prefix = "(";
         for (ApiKeys apiKey : ApiKeys.zkBrokerApis()) {
@@ -67,7 +68,7 @@ public void testVersionsToString() {
                         .setMaxVersion((short) 10001));
             } else versionList.add(ApiVersionsResponse.toApiVersion(apiKey));
         }
-        NodeApiVersions versions = new NodeApiVersions(versionList);
+        NodeApiVersions versions = new NodeApiVersions(versionList, Collections.emptyList());
         StringBuilder bld = new StringBuilder();
         String prefix = "(";
         for (ApiKeys apiKey : ApiKeys.values()) {
@@ -124,7 +125,7 @@ public void testLatestUsableVersionOutOfRangeHigh() {
 
     @Test
     public void testUsableVersionCalculationNoKnownVersions() {
-        NodeApiVersions versions = new NodeApiVersions(new ApiVersionCollection());
+        NodeApiVersions versions = new NodeApiVersions(new ApiVersionCollection(), Collections.emptyList());
         assertThrows(UnsupportedVersionException.class,
             () -> versions.latestUsableVersion(ApiKeys.FETCH));
     }
@@ -146,7 +147,7 @@ public void testUsableVersionLatestVersions(ApiMessageType.ListenerType scope) {
                 .setApiKey((short) 100)
                 .setMinVersion((short) 0)
                 .setMaxVersion((short) 1));
-        NodeApiVersions versions = new NodeApiVersions(versionList);
+        NodeApiVersions versions = new NodeApiVersions(versionList, Collections.emptyList());
         for (ApiKeys apiKey: ApiKeys.apisForListener(scope)) {
             assertEquals(apiKey.latestVersion(), versions.latestUsableVersion(apiKey));
         }
@@ -156,7 +157,7 @@ public void testUsableVersionLatestVersions(ApiMessageType.ListenerType scope) {
     @EnumSource(ApiMessageType.ListenerType.class)
     public void testConstructionFromApiVersionsResponse(ApiMessageType.ListenerType scope) {
         ApiVersionsResponse apiVersionsResponse = ApiVersionsResponse.defaultApiVersionsResponse(scope);
-        NodeApiVersions versions = new NodeApiVersions(apiVersionsResponse.data().apiKeys());
+        NodeApiVersions versions = new NodeApiVersions(apiVersionsResponse.data().apiKeys(), Collections.emptyList());
 
         for (ApiVersion apiVersionKey : apiVersionsResponse.data().apiKeys()) {
             ApiVersion apiVersion = versions.apiVersion(ApiKeys.forId(apiVersionKey.apiKey()));
diff --git a/clients/src/test/java/org/apache/kafka/clients/admin/AdminClientTestUtils.java b/clients/src/test/java/org/apache/kafka/clients/admin/AdminClientTestUtils.java
index 587434acf6217..d8b9f427d6b24 100644
--- a/clients/src/test/java/org/apache/kafka/clients/admin/AdminClientTestUtils.java
+++ b/clients/src/test/java/org/apache/kafka/clients/admin/AdminClientTestUtils.java
@@ -24,7 +24,9 @@
 import org.apache.kafka.clients.HostResolver;
 import org.apache.kafka.clients.admin.CreateTopicsResult.TopicMetadataAndConfig;
 import org.apache.kafka.clients.admin.internals.MetadataOperationContext;
+import org.apache.kafka.clients.admin.internals.CoordinatorKey;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.Uuid;
@@ -103,8 +105,17 @@ public static DescribeTopicsResult describeTopicsResult(Map<String, TopicDescrip
                 .collect(Collectors.toMap(Map.Entry::getKey, e -> KafkaFuture.completedFuture(e.getValue()))));
     }
 
-    public static ListConsumerGroupOffsetsResult listConsumerGroupOffsetsResult(Map<TopicPartition, OffsetAndMetadata> offsets) {
-        return new ListConsumerGroupOffsetsResult(KafkaFuture.completedFuture(offsets));
+    public static ListConsumerGroupOffsetsResult listConsumerGroupOffsetsResult(Map<String, Map<TopicPartition, OffsetAndMetadata>> offsets) {
+        Map<CoordinatorKey, KafkaFuture<Map<TopicPartition, OffsetAndMetadata>>> resultMap = offsets.entrySet().stream()
+            .collect(Collectors.toMap(e -> CoordinatorKey.byGroupId(e.getKey()),
+                                      e -> KafkaFutureImpl.completedFuture(e.getValue())));
+        return new ListConsumerGroupOffsetsResult(resultMap);
+    }
+
+    public static ListConsumerGroupOffsetsResult listConsumerGroupOffsetsResult(String group, KafkaException exception) {
+        final KafkaFutureImpl<Map<TopicPartition, OffsetAndMetadata>> future = new KafkaFutureImpl<>();
+        future.completeExceptionally(exception);
+        return new ListConsumerGroupOffsetsResult(Collections.singletonMap(CoordinatorKey.byGroupId(group), future));
     }
 
     /**
diff --git a/clients/src/test/java/org/apache/kafka/clients/admin/KafkaAdminClientTest.java b/clients/src/test/java/org/apache/kafka/clients/admin/KafkaAdminClientTest.java
index 3d7bb94fe706a..de57813679b99 100644
--- a/clients/src/test/java/org/apache/kafka/clients/admin/KafkaAdminClientTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/admin/KafkaAdminClientTest.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.clients.admin;
 
 import org.apache.kafka.clients.ClientDnsLookup;
+import org.apache.kafka.clients.ClientRequest;
 import org.apache.kafka.clients.ClientUtils;
 import org.apache.kafka.clients.MockClient;
 import org.apache.kafka.clients.NodeApiVersions;
@@ -69,6 +70,7 @@
 import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.feature.Features;
+import org.apache.kafka.common.internals.Topic;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.AlterReplicaLogDirsResponseData;
 import org.apache.kafka.common.message.AlterReplicaLogDirsResponseData.AlterReplicaLogDirPartitionResult;
@@ -100,11 +102,13 @@
 import org.apache.kafka.common.message.DescribeLogDirsResponseData;
 import org.apache.kafka.common.message.DescribeLogDirsResponseData.DescribeLogDirsTopic;
 import org.apache.kafka.common.message.DescribeProducersResponseData;
+import org.apache.kafka.common.message.DescribeQuorumResponseData;
 import org.apache.kafka.common.message.DescribeTransactionsResponseData;
 import org.apache.kafka.common.message.DescribeUserScramCredentialsResponseData;
 import org.apache.kafka.common.message.DescribeUserScramCredentialsResponseData.CredentialInfo;
 import org.apache.kafka.common.message.ElectLeadersResponseData.PartitionResult;
 import org.apache.kafka.common.message.ElectLeadersResponseData.ReplicaElectionResult;
+import org.apache.kafka.common.message.FindCoordinatorRequestData;
 import org.apache.kafka.common.message.FindCoordinatorResponseData;
 import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData;
 import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData.AlterConfigsResourceResponse;
@@ -126,6 +130,9 @@
 import org.apache.kafka.common.message.OffsetDeleteResponseData.OffsetDeleteResponsePartitionCollection;
 import org.apache.kafka.common.message.OffsetDeleteResponseData.OffsetDeleteResponseTopic;
 import org.apache.kafka.common.message.OffsetDeleteResponseData.OffsetDeleteResponseTopicCollection;
+import org.apache.kafka.common.message.OffsetFetchRequestData;
+import org.apache.kafka.common.message.OffsetFetchRequestData.OffsetFetchRequestGroup;
+import org.apache.kafka.common.message.OffsetFetchRequestData.OffsetFetchRequestTopics;
 import org.apache.kafka.common.message.UnregisterBrokerResponseData;
 import org.apache.kafka.common.message.WriteTxnMarkersResponseData;
 import org.apache.kafka.common.protocol.ApiKeys;
@@ -161,6 +168,8 @@
 import org.apache.kafka.common.requests.DescribeLogDirsResponse;
 import org.apache.kafka.common.requests.DescribeProducersRequest;
 import org.apache.kafka.common.requests.DescribeProducersResponse;
+import org.apache.kafka.common.requests.DescribeQuorumRequest;
+import org.apache.kafka.common.requests.DescribeQuorumResponse;
 import org.apache.kafka.common.requests.DescribeTransactionsRequest;
 import org.apache.kafka.common.requests.DescribeTransactionsResponse;
 import org.apache.kafka.common.requests.DescribeUserScramCredentialsResponse;
@@ -184,7 +193,9 @@
 import org.apache.kafka.common.requests.MetadataResponse;
 import org.apache.kafka.common.requests.OffsetCommitResponse;
 import org.apache.kafka.common.requests.OffsetDeleteResponse;
+import org.apache.kafka.common.requests.OffsetFetchRequest;
 import org.apache.kafka.common.requests.OffsetFetchResponse;
+import org.apache.kafka.common.requests.OffsetFetchResponse.PartitionData;
 import org.apache.kafka.common.requests.RequestTestUtils;
 import org.apache.kafka.common.requests.UnregisterBrokerResponse;
 import org.apache.kafka.common.requests.UpdateFeaturesRequest;
@@ -217,6 +228,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Optional;
 import java.util.OptionalInt;
 import java.util.OptionalLong;
@@ -236,7 +248,7 @@
 import static java.util.Collections.emptySet;
 import static java.util.Collections.singleton;
 import static java.util.Collections.singletonList;
-import static org.apache.kafka.clients.admin.KafkaAdminClient.LEAVE_GROUP_REASON;
+import static org.apache.kafka.clients.admin.KafkaAdminClient.DEFAULT_LEAVE_GROUP_REASON;
 import static org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData.ReassignablePartitionResponse;
 import static org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData.ReassignableTopicResponse;
 import static org.apache.kafka.common.message.ListPartitionReassignmentsResponseData.OngoingPartitionReassignment;
@@ -259,6 +271,7 @@
 public class KafkaAdminClientTest {
     private static final Logger log = LoggerFactory.getLogger(KafkaAdminClientTest.class);
     private static final String GROUP_ID = "group-0";
+    private static final int THROTTLE = 10;
 
     @Test
     public void testDefaultApiTimeoutAndRequestTimeoutConflicts() {
@@ -494,6 +507,21 @@ private static FindCoordinatorResponse prepareOldFindCoordinatorResponse(Errors
         return FindCoordinatorResponse.prepareOldResponse(error, node);
     }
 
+    private static FindCoordinatorResponse prepareBatchedFindCoordinatorResponse(Errors error, Node node, Collection<String> groups) {
+        FindCoordinatorResponseData data = new FindCoordinatorResponseData();
+        List<FindCoordinatorResponseData.Coordinator> coordinators = groups.stream()
+                .map(group -> new FindCoordinatorResponseData.Coordinator()
+                        .setErrorCode(error.code())
+                        .setErrorMessage(error.message())
+                        .setKey(group)
+                        .setHost(node.host())
+                        .setPort(node.port())
+                        .setNodeId(node.id()))
+                .collect(Collectors.toList());
+        data.setCoordinators(coordinators);
+        return new FindCoordinatorResponse(data);
+    }
+
     private static MetadataResponse prepareMetadataResponse(Cluster cluster, Errors error) {
         List<MetadataResponseTopic> metadata = new ArrayList<>();
         for (String topic : cluster.topics()) {
@@ -545,7 +573,7 @@ private static DescribeGroupsResponseData prepareDescribeGroupsResponseData(Stri
 
     private static FeatureMetadata defaultFeatureMetadata() {
         return new FeatureMetadata(
-            Utils.mkMap(Utils.mkEntry("test_feature_1", new FinalizedVersionRange((short) 2, (short) 3))),
+            Utils.mkMap(Utils.mkEntry("test_feature_1", new FinalizedVersionRange((short) 2, (short) 2))),
             Optional.of(1L),
             Utils.mkMap(Utils.mkEntry("test_feature_1", new SupportedVersionRange((short) 1, (short) 5))));
     }
@@ -563,26 +591,13 @@ private static Features<org.apache.kafka.common.feature.SupportedVersionRange> c
         return Features.supportedFeatures(featuresMap);
     }
 
-    private static Features<org.apache.kafka.common.feature.FinalizedVersionRange> convertFinalizedFeaturesMap(Map<String, FinalizedVersionRange> features) {
-        final Map<String, org.apache.kafka.common.feature.FinalizedVersionRange> featuresMap = new HashMap<>();
-        for (final Map.Entry<String, FinalizedVersionRange> entry : features.entrySet()) {
-            final FinalizedVersionRange versionRange = entry.getValue();
-            featuresMap.put(
-                entry.getKey(),
-                new org.apache.kafka.common.feature.FinalizedVersionRange(
-                    versionRange.minVersionLevel(), versionRange.maxVersionLevel()));
-        }
-
-        return Features.finalizedFeatures(featuresMap);
-    }
-
     private static ApiVersionsResponse prepareApiVersionsResponseForDescribeFeatures(Errors error) {
         if (error == Errors.NONE) {
             return ApiVersionsResponse.createApiVersionsResponse(
                 0,
                 ApiVersionsResponse.filterApis(RecordVersion.current(), ApiMessageType.ListenerType.ZK_BROKER),
                 convertSupportedFeaturesMap(defaultFeatureMetadata().supportedFeatures()),
-                convertFinalizedFeaturesMap(defaultFeatureMetadata().finalizedFeatures()),
+                Collections.singletonMap("test_feature_1", (short) 2),
                 defaultFeatureMetadata().finalizedFeaturesEpoch().get()
             );
         }
@@ -592,6 +607,48 @@ private static ApiVersionsResponse prepareApiVersionsResponseForDescribeFeatures
                 .setErrorCode(error.code()));
     }
 
+    private static QuorumInfo defaultQuorumInfo(Boolean emptyOptionals) {
+        return new QuorumInfo(1,
+                singletonList(new QuorumInfo.ReplicaState(1, 100,
+                        emptyOptionals ? OptionalLong.empty() : OptionalLong.of(1000),
+                        emptyOptionals ? OptionalLong.empty() : OptionalLong.of(1000))),
+                singletonList(new QuorumInfo.ReplicaState(1, 100,
+                        emptyOptionals ? OptionalLong.empty() : OptionalLong.of(1000),
+                        emptyOptionals ? OptionalLong.empty() : OptionalLong.of(1000))));
+    }
+
+    private static DescribeQuorumResponse prepareDescribeQuorumResponse(
+            Errors topLevelError,
+            Errors partitionLevelError,
+            Boolean topicCountError,
+            Boolean topicNameError,
+            Boolean partitionCountError,
+            Boolean partitionIndexError,
+            Boolean emptyOptionals) {
+        String topicName = topicNameError ? "RANDOM" : Topic.METADATA_TOPIC_NAME;
+        Integer partitionIndex = partitionIndexError ? 1 : Topic.METADATA_TOPIC_PARTITION.partition();
+        List<DescribeQuorumResponseData.TopicData> topics = new ArrayList<>();
+        List<DescribeQuorumResponseData.PartitionData> partitions = new ArrayList<>();
+        for (int i = 0; i < (partitionCountError ? 2 : 1); i++) {
+            DescribeQuorumResponseData.ReplicaState replica = new DescribeQuorumResponseData.ReplicaState()
+                    .setReplicaId(1)
+                    .setLogEndOffset(100);
+            replica.setLastFetchTimestamp(emptyOptionals ? -1 : 1000);
+            replica.setLastCaughtUpTimestamp(emptyOptionals ? -1 : 1000);
+            partitions.add(new DescribeQuorumResponseData.PartitionData().setPartitionIndex(partitionIndex)
+                    .setLeaderId(1)
+                    .setLeaderEpoch(0)
+                    .setHighWatermark(0)
+                    .setCurrentVoters(singletonList(replica))
+                    .setObservers(singletonList(replica))
+                    .setErrorCode(partitionLevelError.code()));
+        }
+        for (int i = 0; i < (topicCountError ? 2 : 1); i++) {
+            topics.add(new DescribeQuorumResponseData.TopicData().setTopicName(topicName).setPartitions(partitions));
+        }
+        return new DescribeQuorumResponse(new DescribeQuorumResponseData().setTopics(topics).setErrorCode(topLevelError.code()));
+    }
+
     /**
      * Test that the client properly times out when we don't receive any metadata.
      */
@@ -1602,6 +1659,11 @@ private static DescribeLogDirsResponse prepareDescribeLogDirsResponse(Errors err
                 prepareDescribeLogDirsTopics(partitionSize, offsetLag, tp.topic(), tp.partition(), false));
     }
 
+    private static DescribeLogDirsResponse prepareDescribeLogDirsResponse(Errors error, String logDir, TopicPartition tp, long partitionSize, long offsetLag, long totalBytes, long usableBytes) {
+        return prepareDescribeLogDirsResponse(error, logDir,
+                prepareDescribeLogDirsTopics(partitionSize, offsetLag, tp.topic(), tp.partition(), false), totalBytes, usableBytes);
+    }
+
     private static List<DescribeLogDirsTopic> prepareDescribeLogDirsTopics(
             long partitionSize, long offsetLag, String topic, int partition, boolean isFuture) {
         return singletonList(new DescribeLogDirsTopic()
@@ -1623,6 +1685,19 @@ private static DescribeLogDirsResponse prepareDescribeLogDirsResponse(Errors err
                 )));
     }
 
+    private static DescribeLogDirsResponse prepareDescribeLogDirsResponse(Errors error, String logDir,
+                                                                          List<DescribeLogDirsTopic> topics,
+                                                                          long totalBytes, long usableBytes) {
+        return new DescribeLogDirsResponse(
+                new DescribeLogDirsResponseData().setResults(singletonList(new DescribeLogDirsResponseData.DescribeLogDirsResult()
+                        .setErrorCode(error.code())
+                        .setLogDir(logDir)
+                        .setTopics(topics)
+                        .setTotalBytes(totalBytes)
+                        .setUsableBytes(usableBytes)
+                )));
+    }
+
     private static DescribeLogDirsResponse prepareEmptyDescribeLogDirsResponse(Optional<Errors> error) {
         DescribeLogDirsResponseData data = new DescribeLogDirsResponseData();
         if (error.isPresent()) data.setErrorCode(error.get().code());
@@ -1674,6 +1749,11 @@ public void testDescribeLogDirs() throws ExecutionException, InterruptedExceptio
 
     private static void assertDescriptionContains(Map<String, LogDirDescription> descriptionsMap, String logDir,
                                            TopicPartition tp, long partitionSize, long offsetLag) {
+        assertDescriptionContains(descriptionsMap, logDir, tp, partitionSize, offsetLag, OptionalLong.empty(), OptionalLong.empty());
+    }
+
+    private static void assertDescriptionContains(Map<String, LogDirDescription> descriptionsMap, String logDir,
+                                                  TopicPartition tp, long partitionSize, long offsetLag, OptionalLong totalBytes, OptionalLong usableBytes) {
         assertNotNull(descriptionsMap);
         assertEquals(singleton(logDir), descriptionsMap.keySet());
         assertNull(descriptionsMap.get(logDir).error());
@@ -1682,6 +1762,53 @@ private static void assertDescriptionContains(Map<String, LogDirDescription> des
         assertEquals(partitionSize, descriptionsReplicaInfos.get(tp).size());
         assertEquals(offsetLag, descriptionsReplicaInfos.get(tp).offsetLag());
         assertFalse(descriptionsReplicaInfos.get(tp).isFuture());
+        assertEquals(totalBytes, descriptionsMap.get(logDir).totalBytes());
+        assertEquals(usableBytes, descriptionsMap.get(logDir).usableBytes());
+    }
+
+    @Test
+    public void testDescribeLogDirsWithVolumeBytes() throws ExecutionException, InterruptedException {
+        Set<Integer> brokers = singleton(0);
+        String logDir = "/var/data/kafka";
+        TopicPartition tp = new TopicPartition("topic", 12);
+        long partitionSize = 1234567890;
+        long offsetLag = 24;
+        long totalBytes = 123L;
+        long usableBytes = 456L;
+
+        try (AdminClientUnitTestEnv env = mockClientEnv()) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
+            env.kafkaClient().prepareResponseFrom(
+                    prepareDescribeLogDirsResponse(Errors.NONE, logDir, tp, partitionSize, offsetLag, totalBytes, usableBytes),
+                    env.cluster().nodeById(0));
+
+            DescribeLogDirsResult result = env.adminClient().describeLogDirs(brokers);
+
+            Map<Integer, KafkaFuture<Map<String, LogDirDescription>>> descriptions = result.descriptions();
+            assertEquals(brokers, descriptions.keySet());
+            assertNotNull(descriptions.get(0));
+            assertDescriptionContains(descriptions.get(0).get(), logDir, tp, partitionSize, offsetLag, OptionalLong.of(totalBytes), OptionalLong.of(usableBytes));
+
+            Map<Integer, Map<String, LogDirDescription>> allDescriptions = result.allDescriptions().get();
+            assertEquals(brokers, allDescriptions.keySet());
+            assertDescriptionContains(allDescriptions.get(0), logDir, tp, partitionSize, offsetLag, OptionalLong.of(totalBytes), OptionalLong.of(usableBytes));
+
+            // Empty results when not authorized with version < 3
+            env.kafkaClient().prepareResponseFrom(
+                    prepareEmptyDescribeLogDirsResponse(Optional.empty()),
+                    env.cluster().nodeById(0));
+            final DescribeLogDirsResult errorResult = env.adminClient().describeLogDirs(brokers);
+            ExecutionException exception = assertThrows(ExecutionException.class, () -> errorResult.allDescriptions().get());
+            assertTrue(exception.getCause() instanceof ClusterAuthorizationException);
+
+            // Empty results with an error with version >= 3
+            env.kafkaClient().prepareResponseFrom(
+                    prepareEmptyDescribeLogDirsResponse(Optional.of(Errors.UNKNOWN_SERVER_ERROR)),
+                    env.cluster().nodeById(0));
+            final DescribeLogDirsResult errorResult2 = env.adminClient().describeLogDirs(brokers);
+            exception = assertThrows(ExecutionException.class, () -> errorResult2.allDescriptions().get());
+            assertTrue(exception.getCause() instanceof UnknownServerException);
+        }
     }
 
     @SuppressWarnings("deprecation")
@@ -2949,6 +3076,56 @@ public void testDescribeNonConsumerGroups() throws Exception {
         }
     }
 
+    @Test
+    public void testListConsumerGroupOffsetsOptionsWithUnbatchedApi() throws Exception {
+        verifyListConsumerGroupOffsetsOptions(false);
+    }
+
+    @Test
+    public void testListConsumerGroupOffsetsOptionsWithBatchedApi() throws Exception {
+        verifyListConsumerGroupOffsetsOptions(true);
+    }
+
+    @SuppressWarnings("deprecation")
+    private void verifyListConsumerGroupOffsetsOptions(boolean batchedApi) throws Exception {
+        final Cluster cluster = mockCluster(3, 0);
+        final Time time = new MockTime();
+
+        try (AdminClientUnitTestEnv env = new AdminClientUnitTestEnv(time, cluster,
+                AdminClientConfig.RETRIES_CONFIG, "0")) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
+
+            env.kafkaClient().prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
+
+            final List<TopicPartition> partitions = Collections.singletonList(new TopicPartition("A", 0));
+            final ListConsumerGroupOffsetsOptions options = new ListConsumerGroupOffsetsOptions()
+                    .requireStable(true)
+                    .timeoutMs(300);
+            if (batchedApi) {
+                final ListConsumerGroupOffsetsSpec groupSpec = new ListConsumerGroupOffsetsSpec()
+                        .topicPartitions(partitions);
+                env.adminClient().listConsumerGroupOffsets(Collections.singletonMap(GROUP_ID, groupSpec), options);
+            } else {
+                env.adminClient().listConsumerGroupOffsets(GROUP_ID, options.topicPartitions(partitions));
+            }
+
+            final MockClient mockClient = env.kafkaClient();
+            waitForRequest(mockClient, ApiKeys.OFFSET_FETCH);
+
+            ClientRequest clientRequest = mockClient.requests().peek();
+            assertNotNull(clientRequest);
+            assertEquals(300, clientRequest.requestTimeoutMs());
+            OffsetFetchRequestData data = ((OffsetFetchRequest.Builder) clientRequest.requestBuilder()).data;
+            assertTrue(data.requireStable());
+            assertEquals(Collections.singletonList(GROUP_ID),
+                    data.groups().stream().map(OffsetFetchRequestGroup::groupId).collect(Collectors.toList()));
+            assertEquals(Collections.singletonList("A"),
+                    data.groups().get(0).topics().stream().map(OffsetFetchRequestTopics::name).collect(Collectors.toList()));
+            assertEquals(Collections.singletonList(0),
+                    data.groups().get(0).topics().get(0).partitionIndexes());
+        }
+    }
+
     @Test
     public void testListConsumerGroupOffsetsNumRetries() throws Exception {
         final Cluster cluster = mockCluster(3, 0);
@@ -2959,12 +3136,11 @@ public void testListConsumerGroupOffsetsNumRetries() throws Exception {
             env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
 
             env.kafkaClient().prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
-            env.kafkaClient().prepareResponse(new OffsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
+            env.kafkaClient().prepareResponse(offsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
             env.kafkaClient().prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             final ListConsumerGroupOffsetsResult result = env.adminClient().listConsumerGroupOffsets(GROUP_ID);
 
-
             TestUtils.assertFutureError(result.partitionsToOffsetAndMetadata(), TimeoutException.class);
         }
     }
@@ -2988,16 +3164,16 @@ public void testListConsumerGroupOffsetsRetryBackoff() throws Exception {
             mockClient.prepareResponse(body -> {
                 firstAttemptTime.set(time.milliseconds());
                 return true;
-            }, new OffsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
+            }, offsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
 
             mockClient.prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             mockClient.prepareResponse(body -> {
                 secondAttemptTime.set(time.milliseconds());
                 return true;
-            }, new OffsetFetchResponse(Errors.NONE, Collections.emptyMap()));
+            }, offsetFetchResponse(Errors.NONE, Collections.emptyMap()));
 
-            final KafkaFuture<Map<TopicPartition, OffsetAndMetadata>> future = env.adminClient().listConsumerGroupOffsets("group-0").partitionsToOffsetAndMetadata();
+            final KafkaFuture<Map<TopicPartition, OffsetAndMetadata>> future = env.adminClient().listConsumerGroupOffsets(GROUP_ID).partitionsToOffsetAndMetadata();
 
             TestUtils.waitForCondition(() -> mockClient.numAwaitingResponses() == 1, "Failed awaiting ListConsumerGroupOffsets first request failure");
             TestUtils.waitForCondition(() -> ((KafkaAdminClient) env.adminClient()).numPendingCalls() == 1, "Failed to add retry ListConsumerGroupOffsets call on first failure");
@@ -3021,7 +3197,8 @@ public void testListConsumerGroupOffsetsRetriableErrors() throws Exception {
                 prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             env.kafkaClient().prepareResponse(
-                new OffsetFetchResponse(Errors.COORDINATOR_LOAD_IN_PROGRESS, Collections.emptyMap()));
+                offsetFetchResponse(Errors.COORDINATOR_LOAD_IN_PROGRESS, Collections.emptyMap()));
+
             /*
              * We need to return two responses here, one for NOT_COORDINATOR call when calling list consumer offsets
              * api using coordinator that has moved. This will retry whole operation. So we need to again respond with a
@@ -3030,19 +3207,19 @@ public void testListConsumerGroupOffsetsRetriableErrors() throws Exception {
              * And the same reason for the following COORDINATOR_NOT_AVAILABLE error response
              */
             env.kafkaClient().prepareResponse(
-                new OffsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
+                offsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
 
             env.kafkaClient().prepareResponse(
                 prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             env.kafkaClient().prepareResponse(
-                new OffsetFetchResponse(Errors.COORDINATOR_NOT_AVAILABLE, Collections.emptyMap()));
+                offsetFetchResponse(Errors.COORDINATOR_NOT_AVAILABLE, Collections.emptyMap()));
 
             env.kafkaClient().prepareResponse(
                 prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             env.kafkaClient().prepareResponse(
-                new OffsetFetchResponse(Errors.NONE, Collections.emptyMap()));
+                offsetFetchResponse(Errors.NONE, Collections.emptyMap()));
 
             final ListConsumerGroupOffsetsResult errorResult1 = env.adminClient().listConsumerGroupOffsets(GROUP_ID);
 
@@ -3063,8 +3240,7 @@ public void testListConsumerGroupOffsetsNonRetriableErrors() throws Exception {
                 env.kafkaClient().prepareResponse(
                     prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
-                env.kafkaClient().prepareResponse(
-                    new OffsetFetchResponse(error, Collections.emptyMap()));
+                env.kafkaClient().prepareResponse(offsetFetchResponse(error, Collections.emptyMap()));
 
                 ListConsumerGroupOffsetsResult errorResult = env.adminClient().listConsumerGroupOffsets(GROUP_ID);
 
@@ -3084,7 +3260,7 @@ public void testListConsumerGroupOffsets() throws Exception {
             env.kafkaClient().prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             // Retriable errors should be retried
-            env.kafkaClient().prepareResponse(new OffsetFetchResponse(Errors.COORDINATOR_LOAD_IN_PROGRESS, Collections.emptyMap()));
+            env.kafkaClient().prepareResponse(offsetFetchResponse(Errors.COORDINATOR_LOAD_IN_PROGRESS, Collections.emptyMap()));
 
             /*
              * We need to return two responses here, one for NOT_COORDINATOR error when calling list consumer group offsets
@@ -3093,10 +3269,10 @@ public void testListConsumerGroupOffsets() throws Exception {
              *
              * And the same reason for the following COORDINATOR_NOT_AVAILABLE error response
              */
-            env.kafkaClient().prepareResponse(new OffsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
+            env.kafkaClient().prepareResponse(offsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
             env.kafkaClient().prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
-            env.kafkaClient().prepareResponse(new OffsetFetchResponse(Errors.COORDINATOR_NOT_AVAILABLE, Collections.emptyMap()));
+            env.kafkaClient().prepareResponse(offsetFetchResponse(Errors.COORDINATOR_NOT_AVAILABLE, Collections.emptyMap()));
             env.kafkaClient().prepareResponse(prepareFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
 
             TopicPartition myTopicPartition0 = new TopicPartition("my_topic", 0);
@@ -3113,7 +3289,7 @@ public void testListConsumerGroupOffsets() throws Exception {
                     Optional.empty(), "", Errors.NONE));
             responseData.put(myTopicPartition3, new OffsetFetchResponse.PartitionData(OffsetFetchResponse.INVALID_OFFSET,
                     Optional.empty(), "", Errors.NONE));
-            env.kafkaClient().prepareResponse(new OffsetFetchResponse(Errors.NONE, responseData));
+            env.kafkaClient().prepareResponse(offsetFetchResponse(Errors.NONE, responseData));
 
             final ListConsumerGroupOffsetsResult result = env.adminClient().listConsumerGroupOffsets(GROUP_ID);
             final Map<TopicPartition, OffsetAndMetadata> partitionToOffsetAndMetadata = result.partitionsToOffsetAndMetadata().get();
@@ -3127,6 +3303,144 @@ public void testListConsumerGroupOffsets() throws Exception {
         }
     }
 
+    @Test
+    public void testBatchedListConsumerGroupOffsets() throws Exception {
+        Cluster cluster = mockCluster(1, 0);
+        Time time = new MockTime();
+        Map<String, ListConsumerGroupOffsetsSpec> groupSpecs = batchedListConsumerGroupOffsetsSpec();
+
+        try (AdminClientUnitTestEnv env = new AdminClientUnitTestEnv(time, cluster, AdminClientConfig.RETRIES_CONFIG, "0")) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
+            env.kafkaClient().prepareResponse(prepareBatchedFindCoordinatorResponse(Errors.NONE, env.cluster().controller(), groupSpecs.keySet()));
+
+            ListConsumerGroupOffsetsResult result = env.adminClient().listConsumerGroupOffsets(groupSpecs, new ListConsumerGroupOffsetsOptions());
+            sendOffsetFetchResponse(env.kafkaClient(), groupSpecs, true, Errors.NONE);
+
+            verifyListOffsetsForMultipleGroups(groupSpecs, result);
+        }
+    }
+
+    @Test
+    public void testBatchedListConsumerGroupOffsetsWithNoFindCoordinatorBatching() throws Exception {
+        Cluster cluster = mockCluster(1, 0);
+        Time time = new MockTime();
+        Map<String, ListConsumerGroupOffsetsSpec> groupSpecs = batchedListConsumerGroupOffsetsSpec();
+
+        ApiVersion findCoordinatorV3 = new ApiVersion()
+                .setApiKey(ApiKeys.FIND_COORDINATOR.id)
+                .setMinVersion((short) 0)
+                .setMaxVersion((short) 3);
+        ApiVersion offsetFetchV7 = new ApiVersion()
+                .setApiKey(ApiKeys.OFFSET_FETCH.id)
+                .setMinVersion((short) 0)
+                .setMaxVersion((short) 7);
+
+        try (AdminClientUnitTestEnv env = new AdminClientUnitTestEnv(time, cluster, AdminClientConfig.RETRY_BACKOFF_MS_CONFIG, "0")) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create(Arrays.asList(findCoordinatorV3, offsetFetchV7)));
+            env.kafkaClient().prepareResponse(prepareOldFindCoordinatorResponse(Errors.COORDINATOR_NOT_AVAILABLE, Node.noNode()));
+            env.kafkaClient().prepareResponse(prepareOldFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
+            env.kafkaClient().prepareResponse(prepareOldFindCoordinatorResponse(Errors.NONE, env.cluster().controller()));
+
+            ListConsumerGroupOffsetsResult result = env.adminClient().listConsumerGroupOffsets(groupSpecs);
+
+            // Fail the first request in order to ensure that the group is not batched when retried.
+            sendOffsetFetchResponse(env.kafkaClient(), groupSpecs, false, Errors.COORDINATOR_LOAD_IN_PROGRESS);
+
+            sendOffsetFetchResponse(env.kafkaClient(), groupSpecs, false, Errors.NONE);
+            sendOffsetFetchResponse(env.kafkaClient(), groupSpecs, false, Errors.NONE);
+
+            verifyListOffsetsForMultipleGroups(groupSpecs, result);
+        }
+    }
+
+    @Test
+    public void testBatchedListConsumerGroupOffsetsWithNoOffsetFetchBatching() throws Exception {
+        Cluster cluster = mockCluster(1, 0);
+        Time time = new MockTime();
+        Map<String, ListConsumerGroupOffsetsSpec> groupSpecs = batchedListConsumerGroupOffsetsSpec();
+
+        ApiVersion offsetFetchV7 = new ApiVersion()
+                .setApiKey(ApiKeys.OFFSET_FETCH.id)
+                .setMinVersion((short) 0)
+                .setMaxVersion((short) 7);
+
+        try (AdminClientUnitTestEnv env = new AdminClientUnitTestEnv(time, cluster, AdminClientConfig.RETRY_BACKOFF_MS_CONFIG, "0")) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create(Collections.singleton(offsetFetchV7)));
+            env.kafkaClient().prepareResponse(prepareBatchedFindCoordinatorResponse(Errors.NONE, env.cluster().controller(), groupSpecs.keySet()));
+            // Prepare a response to force client to attempt batched request creation that throws
+            // NoBatchedOffsetFetchRequestException. This triggers creation of non-batched requests.
+            env.kafkaClient().prepareResponse(offsetFetchResponse(Errors.COORDINATOR_NOT_AVAILABLE, Collections.emptyMap()));
+
+            ListConsumerGroupOffsetsResult result = env.adminClient().listConsumerGroupOffsets(groupSpecs);
+
+            // The request handler attempts both FindCoordinator and OffsetFetch requests. This seems
+            // ok since since we expect this scenario only during upgrades from versions < 3.0.0 where
+            // some upgraded brokers could handle batched FindCoordinator while non-upgraded coordinators
+            // rejected batched OffsetFetch requests.
+            sendFindCoordinatorResponse(env.kafkaClient(), env.cluster().controller());
+            sendFindCoordinatorResponse(env.kafkaClient(), env.cluster().controller());
+            sendOffsetFetchResponse(env.kafkaClient(), groupSpecs, false, Errors.NONE);
+            sendOffsetFetchResponse(env.kafkaClient(), groupSpecs, false, Errors.NONE);
+
+            verifyListOffsetsForMultipleGroups(groupSpecs, result);
+        }
+    }
+
+    private Map<String, ListConsumerGroupOffsetsSpec> batchedListConsumerGroupOffsetsSpec() {
+        Set<TopicPartition> groupAPartitions = Collections.singleton(new TopicPartition("A", 1));
+        Set<TopicPartition> groupBPartitions =  Collections.singleton(new TopicPartition("B", 2));
+
+        ListConsumerGroupOffsetsSpec groupASpec = new ListConsumerGroupOffsetsSpec().topicPartitions(groupAPartitions);
+        ListConsumerGroupOffsetsSpec groupBSpec = new ListConsumerGroupOffsetsSpec().topicPartitions(groupBPartitions);
+        return Utils.mkMap(Utils.mkEntry("groupA", groupASpec), Utils.mkEntry("groupB", groupBSpec));
+    }
+
+    private void waitForRequest(MockClient mockClient, ApiKeys apiKeys) throws Exception {
+        TestUtils.waitForCondition(() -> {
+            ClientRequest clientRequest = mockClient.requests().peek();
+            return clientRequest != null && clientRequest.apiKey() == apiKeys;
+        }, "Failed awaiting " + apiKeys + " request");
+    }
+
+    private void sendFindCoordinatorResponse(MockClient mockClient, Node coordinator) throws Exception {
+        waitForRequest(mockClient, ApiKeys.FIND_COORDINATOR);
+
+        ClientRequest clientRequest = mockClient.requests().peek();
+        FindCoordinatorRequestData data = ((FindCoordinatorRequest.Builder) clientRequest.requestBuilder()).data();
+        mockClient.respond(prepareFindCoordinatorResponse(Errors.NONE, data.key(), coordinator));
+    }
+
+    private void sendOffsetFetchResponse(MockClient mockClient, Map<String, ListConsumerGroupOffsetsSpec> groupSpecs, boolean batched, Errors error) throws Exception {
+        waitForRequest(mockClient, ApiKeys.OFFSET_FETCH);
+
+        ClientRequest clientRequest = mockClient.requests().peek();
+        OffsetFetchRequestData data = ((OffsetFetchRequest.Builder) clientRequest.requestBuilder()).data;
+        Map<String, Map<TopicPartition, PartitionData>> results = new HashMap<>();
+        Map<String, Errors> errors = new HashMap<>();
+        data.groups().forEach(group -> {
+            Map<TopicPartition, PartitionData> partitionResults = new HashMap<>();
+            for (TopicPartition tp : groupSpecs.get(group.groupId()).topicPartitions()) {
+                partitionResults.put(tp, new PartitionData(10, Optional.empty(), "", Errors.NONE));
+            }
+            results.put(group.groupId(), partitionResults);
+            errors.put(group.groupId(), error);
+        });
+        if (!batched) {
+            assertEquals(1, data.groups().size());
+            mockClient.respond(new OffsetFetchResponse(THROTTLE, error, results.values().iterator().next()));
+        } else
+            mockClient.respond(new OffsetFetchResponse(THROTTLE, errors, results));
+    }
+
+    private void verifyListOffsetsForMultipleGroups(Map<String, ListConsumerGroupOffsetsSpec> groupSpecs,
+                                                    ListConsumerGroupOffsetsResult result) throws Exception {
+        assertEquals(groupSpecs.size(), result.all().get(10, TimeUnit.SECONDS).size());
+        for (Map.Entry<String, ListConsumerGroupOffsetsSpec> entry : groupSpecs.entrySet()) {
+            assertEquals(entry.getValue().topicPartitions(),
+                    result.partitionsToOffsetAndMetadata(entry.getKey()).get().keySet());
+        }
+    }
+
     @Test
     public void testDeleteConsumerGroupsNumRetries() throws Exception {
         final Cluster cluster = mockCluster(3, 0);
@@ -3947,35 +4261,51 @@ private void testRemoveMembersFromGroup(String reason, String expectedReason) th
                 LeaveGroupRequestData leaveGroupRequest = ((LeaveGroupRequest) body).data();
 
                 return leaveGroupRequest.members().stream().allMatch(
-                        member -> member.reason().equals(expectedReason)
+                    member -> member.reason().equals(expectedReason)
                 );
             }, new LeaveGroupResponse(new LeaveGroupResponseData().setErrorCode(Errors.NONE.code()).setMembers(
-                    Arrays.asList(
-                            new MemberResponse().setGroupInstanceId("instance-1"),
-                            new MemberResponse().setGroupInstanceId("instance-2")
-                    ))
+                Arrays.asList(
+                    new MemberResponse().setGroupInstanceId("instance-1"),
+                    new MemberResponse().setGroupInstanceId("instance-2")
+                ))
             ));
 
-            Collection<MemberToRemove> membersToRemove = Arrays.asList(new MemberToRemove("instance-1"), new MemberToRemove("instance-2"));
+            MemberToRemove memberToRemove1 = new MemberToRemove("instance-1");
+            MemberToRemove memberToRemove2 = new MemberToRemove("instance-2");
 
-            RemoveMembersFromConsumerGroupOptions options = new RemoveMembersFromConsumerGroupOptions(membersToRemove);
+            RemoveMembersFromConsumerGroupOptions options = new RemoveMembersFromConsumerGroupOptions(Arrays.asList(
+                memberToRemove1,
+                memberToRemove2
+            ));
             options.reason(reason);
 
             final RemoveMembersFromConsumerGroupResult result = env.adminClient().removeMembersFromConsumerGroup(
-                    GROUP_ID, options);
+                GROUP_ID,
+                options
+            );
 
             assertNull(result.all().get());
+            assertNull(result.memberResult(memberToRemove1).get());
+            assertNull(result.memberResult(memberToRemove2).get());
         }
     }
 
     @Test
     public void testRemoveMembersFromGroupReason() throws Exception {
-        testRemoveMembersFromGroup("testing remove members reason", LEAVE_GROUP_REASON + ": testing remove members reason");
+        testRemoveMembersFromGroup("testing remove members reason", "testing remove members reason");
+    }
+
+    @Test
+    public void testRemoveMembersFromGroupTruncatesReason() throws Exception {
+        final String reason = "Very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong reason that is 271 characters long to make sure that length limit logic handles the scenario nicely";
+        final String truncatedReason = reason.substring(0, 255);
+        testRemoveMembersFromGroup(reason, truncatedReason);
     }
 
     @Test
     public void testRemoveMembersFromGroupDefaultReason() throws Exception {
-        testRemoveMembersFromGroup(null, LEAVE_GROUP_REASON);
+        testRemoveMembersFromGroup(null, DEFAULT_LEAVE_GROUP_REASON);
+        testRemoveMembersFromGroup("", DEFAULT_LEAVE_GROUP_REASON);
     }
 
     @Test
@@ -4676,8 +5006,8 @@ public void testListOffsetsNonMaxTimestampDowngradedImmediately() throws Excepti
 
     private Map<String, FeatureUpdate> makeTestFeatureUpdates() {
         return Utils.mkMap(
-            Utils.mkEntry("test_feature_1", new FeatureUpdate((short) 2, false)),
-            Utils.mkEntry("test_feature_2", new FeatureUpdate((short) 3, true)));
+            Utils.mkEntry("test_feature_1", new FeatureUpdate((short) 2,  FeatureUpdate.UpgradeType.UPGRADE)),
+            Utils.mkEntry("test_feature_2", new FeatureUpdate((short) 3,  FeatureUpdate.UpgradeType.SAFE_DOWNGRADE)));
     }
 
     private Map<String, ApiError> makeTestFeatureUpdateErrors(final Map<String, FeatureUpdate> updates, final Errors error) {
@@ -4773,8 +5103,8 @@ public void testUpdateFeaturesHandleNotControllerException() throws Exception {
                 env.cluster().nodeById(controllerId));
             final KafkaFuture<Void> future = env.adminClient().updateFeatures(
                 Utils.mkMap(
-                    Utils.mkEntry("test_feature_1", new FeatureUpdate((short) 2, false)),
-                    Utils.mkEntry("test_feature_2", new FeatureUpdate((short) 3, true))),
+                    Utils.mkEntry("test_feature_1", new FeatureUpdate((short) 2,  FeatureUpdate.UpgradeType.UPGRADE)),
+                    Utils.mkEntry("test_feature_2", new FeatureUpdate((short) 3,  FeatureUpdate.UpgradeType.SAFE_DOWNGRADE))),
                 new UpdateFeaturesOptions().timeoutMs(10000)
             ).all();
             future.get();
@@ -4797,8 +5127,8 @@ public void testUpdateFeaturesShouldFailRequestForInvalidFeatureName() {
             assertThrows(
                 IllegalArgumentException.class,
                 () -> env.adminClient().updateFeatures(
-                    Utils.mkMap(Utils.mkEntry("feature", new FeatureUpdate((short) 2, false)),
-                                Utils.mkEntry("", new FeatureUpdate((short) 2, false))),
+                    Utils.mkMap(Utils.mkEntry("feature", new FeatureUpdate((short) 2,  FeatureUpdate.UpgradeType.UPGRADE)),
+                                Utils.mkEntry("", new FeatureUpdate((short) 2,  FeatureUpdate.UpgradeType.UPGRADE))),
                     new UpdateFeaturesOptions()));
         }
     }
@@ -4807,7 +5137,7 @@ public void testUpdateFeaturesShouldFailRequestForInvalidFeatureName() {
     public void testUpdateFeaturesShouldFailRequestInClientWhenDowngradeFlagIsNotSetDuringDeletion() {
         assertThrows(
             IllegalArgumentException.class,
-            () -> new FeatureUpdate((short) 0, false));
+            () -> new FeatureUpdate((short) 0,  FeatureUpdate.UpgradeType.UPGRADE));
     }
 
     @Test
@@ -4837,6 +5167,96 @@ public void testDescribeFeaturesFailure() {
         }
     }
 
+    @Test
+    public void testDescribeMetadataQuorumSuccess() throws Exception {
+        try (final AdminClientUnitTestEnv env = mockClientEnv()) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create(ApiKeys.DESCRIBE_QUORUM.id,
+                    ApiKeys.DESCRIBE_QUORUM.oldestVersion(),
+                    ApiKeys.DESCRIBE_QUORUM.latestVersion()));
+
+            // Test with optional fields set
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, false, false, false, false, false));
+            KafkaFuture<QuorumInfo> future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            QuorumInfo quorumInfo = future.get();
+            assertEquals(defaultQuorumInfo(false), quorumInfo);
+
+            // Test with optional fields empty
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, false, false, false, false, true));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            quorumInfo = future.get();
+            assertEquals(defaultQuorumInfo(true), quorumInfo);
+        }
+    }
+
+    @Test
+    public void testDescribeMetadataQuorumFailure() {
+        try (final AdminClientUnitTestEnv env = mockClientEnv()) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create(ApiKeys.DESCRIBE_QUORUM.id,
+                        ApiKeys.DESCRIBE_QUORUM.oldestVersion(),
+                        ApiKeys.DESCRIBE_QUORUM.latestVersion()));
+
+            // Test top level error
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.INVALID_REQUEST, Errors.NONE, false, false, false, false, false));
+            KafkaFuture<QuorumInfo> future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, InvalidRequestException.class);
+
+            // Test incorrect topic count
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, true, false, false, false, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, UnknownServerException.class);
+
+            // Test incorrect topic name
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, false, true, false, false, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, UnknownServerException.class);
+
+            // Test incorrect partition count
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, false, false, true, false, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, UnknownServerException.class);
+
+            // Test incorrect partition index
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, false, false, false, true, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, UnknownServerException.class);
+
+            // Test partition level error
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.INVALID_REQUEST, false, false, false, false, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, InvalidRequestException.class);
+
+            // Test all incorrect and no errors
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.NONE, Errors.NONE, true, true, true, true, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, UnknownServerException.class);
+
+            // Test all incorrect and both errors
+            env.kafkaClient().prepareResponse(
+                    body -> body instanceof DescribeQuorumRequest,
+                    prepareDescribeQuorumResponse(Errors.INVALID_REQUEST, Errors.INVALID_REQUEST, true, true, true, true, false));
+            future = env.adminClient().describeMetadataQuorum().quorumInfo();
+            TestUtils.assertFutureThrows(future, Errors.INVALID_REQUEST.exception().getClass());
+        }
+    }
+
     @Test
     public void testListOffsetsMetadataRetriableErrors() throws Exception {
 
@@ -6302,6 +6722,12 @@ private DescribeLogDirsResponse prepareDescribeLogDirsResponse(Errors error, Str
                     .setLogDir(logDir))));
     }
 
+    private OffsetFetchResponse offsetFetchResponse(Errors error, Map<TopicPartition, PartitionData> responseData) {
+        return new OffsetFetchResponse(THROTTLE,
+                                       Collections.singletonMap(GROUP_ID, error),
+                                       Collections.singletonMap(GROUP_ID, responseData));
+    }
+
     private static MemberDescription convertToMemberDescriptions(DescribedGroupMember member,
                                                                  MemberAssignment assignment) {
         return new MemberDescription(member.memberId(),
diff --git a/clients/src/test/java/org/apache/kafka/clients/admin/MockAdminClient.java b/clients/src/test/java/org/apache/kafka/clients/admin/MockAdminClient.java
index 15cdc5ccc4116..8c31c7cf691b5 100644
--- a/clients/src/test/java/org/apache/kafka/clients/admin/MockAdminClient.java
+++ b/clients/src/test/java/org/apache/kafka/clients/admin/MockAdminClient.java
@@ -17,8 +17,10 @@
 package org.apache.kafka.clients.admin;
 
 import org.apache.kafka.clients.admin.DescribeReplicaLogDirsResult.ReplicaLogDirInfo;
+import org.apache.kafka.clients.admin.internals.CoordinatorKey;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 import org.apache.kafka.common.ElectionType;
+import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.Metric;
 import org.apache.kafka.common.MetricName;
@@ -57,6 +59,7 @@
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 public class MockAdminClient extends AdminClient {
     public static final String DEFAULT_CLUSTER_ID = "I4ZmrWqfT2e-upky_4fdPA";
@@ -68,12 +71,11 @@ public class MockAdminClient extends AdminClient {
     private final Map<String, TopicMetadata> allTopics = new HashMap<>();
     private final Map<String, Uuid> topicIds = new HashMap<>();
     private final Map<Uuid, String> topicNames = new HashMap<>();
-    private final Map<TopicPartition, NewPartitionReassignment> reassignments =
-        new HashMap<>();
-    private final Map<TopicPartitionReplica, ReplicaLogDirInfo> replicaMoves =
-        new HashMap<>();
+    private final Map<TopicPartition, NewPartitionReassignment> reassignments = new HashMap<>();
+    private final Map<TopicPartitionReplica, ReplicaLogDirInfo> replicaMoves = new HashMap<>();
     private final Map<TopicPartition, Long> beginningOffsets;
     private final Map<TopicPartition, Long> endOffsets;
+    private final Map<TopicPartition, Long> committedOffsets;
     private final boolean usingRaftController;
     private final String clusterId;
     private final List<List<String>> brokerLogDirs;
@@ -84,6 +86,8 @@ public class MockAdminClient extends AdminClient {
     private final int defaultPartitions;
     private final int defaultReplicationFactor;
 
+    private KafkaException listConsumerGroupOffsetsException;
+
     private Map<MetricName, Metric> mockMetrics = new HashMap<>();
 
     public static Builder create() {
@@ -193,6 +197,7 @@ private MockAdminClient(List<Node> brokers,
         }
         this.beginningOffsets = new HashMap<>();
         this.endOffsets = new HashMap<>();
+        this.committedOffsets = new HashMap<>();
         this.usingRaftController = usingRaftController;
     }
 
@@ -579,8 +584,29 @@ synchronized public ListConsumerGroupsResult listConsumerGroups(ListConsumerGrou
     }
 
     @Override
-    synchronized public ListConsumerGroupOffsetsResult listConsumerGroupOffsets(String groupId, ListConsumerGroupOffsetsOptions options) {
-        throw new UnsupportedOperationException("Not implemented yet");
+    synchronized public ListConsumerGroupOffsetsResult listConsumerGroupOffsets(Map<String, ListConsumerGroupOffsetsSpec> groupSpecs, ListConsumerGroupOffsetsOptions options) {
+        // ignoring the groups and assume one test would only work on one group only
+        if (groupSpecs.size() != 1)
+            throw new UnsupportedOperationException("Not implemented yet");
+
+        String group = groupSpecs.keySet().iterator().next();
+        Collection<TopicPartition> topicPartitions = groupSpecs.get(group).topicPartitions();
+        final KafkaFutureImpl<Map<TopicPartition, OffsetAndMetadata>> future = new KafkaFutureImpl<>();
+
+        if (listConsumerGroupOffsetsException != null) {
+            future.completeExceptionally(listConsumerGroupOffsetsException);
+        } else {
+            if (topicPartitions.isEmpty()) {
+                future.complete(committedOffsets.entrySet().stream()
+                        .collect(Collectors.toMap(Map.Entry::getKey, entry -> new OffsetAndMetadata(entry.getValue()))));
+            } else {
+                future.complete(committedOffsets.entrySet().stream()
+                        .filter(entry -> topicPartitions.contains(entry.getKey()))
+                        .collect(Collectors.toMap(Map.Entry::getKey, entry -> new OffsetAndMetadata(entry.getValue()))));
+            }
+        }
+
+        return new ListConsumerGroupOffsetsResult(Collections.singletonMap(CoordinatorKey.byGroupId(group), future));
     }
 
     @Override
@@ -962,6 +988,11 @@ public AlterUserScramCredentialsResult alterUserScramCredentials(List<UserScramC
         throw new UnsupportedOperationException("Not implemented yet");
     }
 
+    @Override
+    public DescribeMetadataQuorumResult describeMetadataQuorum(DescribeMetadataQuorumOptions options) {
+        throw new UnsupportedOperationException("Not implemented yet");
+    }
+
     @Override
     public DescribeFeaturesResult describeFeatures(DescribeFeaturesOptions options) {
         throw new UnsupportedOperationException("Not implemented yet");
@@ -1019,6 +1050,14 @@ public synchronized void updateEndOffsets(final Map<TopicPartition, Long> newOff
         endOffsets.putAll(newOffsets);
     }
 
+    public synchronized void updateConsumerGroupOffsets(final Map<TopicPartition, Long> newOffsets) {
+        committedOffsets.putAll(newOffsets);
+    }
+
+    public synchronized void throwOnListConsumerGroupOffsets(final KafkaException exception) {
+        listConsumerGroupOffsetsException = exception;
+    }
+
     private final static class TopicMetadata {
         final Uuid topicId;
         final boolean isInternalTopic;
diff --git a/clients/src/test/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandlerTest.java b/clients/src/test/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandlerTest.java
index 27597ce035b00..95fabb3fc2a2f 100644
--- a/clients/src/test/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandlerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/admin/internals/ListConsumerGroupOffsetsHandlerTest.java
@@ -24,52 +24,140 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsSpec;
+import org.apache.kafka.clients.admin.internals.AdminApiHandler.RequestAndKeys;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.errors.GroupAuthorizationException;
 import org.apache.kafka.common.errors.GroupIdNotFoundException;
 import org.apache.kafka.common.errors.InvalidGroupIdException;
+import org.apache.kafka.common.message.OffsetFetchRequestData.OffsetFetchRequestGroup;
+import org.apache.kafka.common.message.OffsetFetchRequestData.OffsetFetchRequestTopics;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.OffsetFetchRequest;
 import org.apache.kafka.common.requests.OffsetFetchResponse;
 import org.apache.kafka.common.requests.OffsetFetchResponse.PartitionData;
 import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.common.utils.Utils;
 import org.junit.jupiter.api.Test;
 
 public class ListConsumerGroupOffsetsHandlerTest {
 
     private final LogContext logContext = new LogContext();
-    private final String groupId = "group-id";
+    private final int throttleMs = 10;
+    private final String groupZero = "group0";
+    private final String groupOne = "group1";
+    private final String groupTwo = "group2";
+    private final List<String> groups = Arrays.asList(groupZero, groupOne, groupTwo);
     private final TopicPartition t0p0 = new TopicPartition("t0", 0);
     private final TopicPartition t0p1 = new TopicPartition("t0", 1);
     private final TopicPartition t1p0 = new TopicPartition("t1", 0);
     private final TopicPartition t1p1 = new TopicPartition("t1", 1);
-    private final List<TopicPartition> tps = Arrays.asList(t0p0, t0p1, t1p0, t1p1);
+    private final TopicPartition t2p0 = new TopicPartition("t2", 0);
+    private final TopicPartition t2p1 = new TopicPartition("t2", 1);
+    private final TopicPartition t2p2 = new TopicPartition("t2", 2);
+    private final Map<String, ListConsumerGroupOffsetsSpec> singleRequestMap = Collections.singletonMap(groupZero,
+            new ListConsumerGroupOffsetsSpec().topicPartitions(Arrays.asList(t0p0, t0p1, t1p0, t1p1)));
+    private final Map<String, ListConsumerGroupOffsetsSpec> batchedRequestMap =
+            new HashMap<String, ListConsumerGroupOffsetsSpec>() {{
+                put(groupZero, new ListConsumerGroupOffsetsSpec().topicPartitions(singletonList(t0p0)));
+                put(groupOne, new ListConsumerGroupOffsetsSpec().topicPartitions(Arrays.asList(t0p0, t1p0, t1p1)));
+                put(groupTwo, new ListConsumerGroupOffsetsSpec().topicPartitions(Arrays.asList(t0p0, t1p0, t1p1, t2p0, t2p1, t2p2)));
+            }};
 
     @Test
     public void testBuildRequest() {
-        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(groupId, tps, logContext);
-        OffsetFetchRequest request = handler.buildBatchedRequest(1, singleton(CoordinatorKey.byGroupId(groupId))).build();
-        assertEquals(groupId, request.data().groups().get(0).groupId());
+        ListConsumerGroupOffsetsHandler handler =
+            new ListConsumerGroupOffsetsHandler(singleRequestMap, false, logContext);
+        OffsetFetchRequest request = handler.buildBatchedRequest(coordinatorKeys(groupZero)).build();
+        assertEquals(groupZero, request.data().groups().get(0).groupId());
         assertEquals(2, request.data().groups().get(0).topics().size());
         assertEquals(2, request.data().groups().get(0).topics().get(0).partitionIndexes().size());
         assertEquals(2, request.data().groups().get(0).topics().get(1).partitionIndexes().size());
     }
 
+    @Test
+    public void testBuildRequestWithMultipleGroups() {
+        Map<String, ListConsumerGroupOffsetsSpec> requestMap = new HashMap<>(this.batchedRequestMap);
+        String groupThree = "group3";
+        requestMap.put(groupThree, new ListConsumerGroupOffsetsSpec()
+                .topicPartitions(Arrays.asList(new TopicPartition("t3", 0), new TopicPartition("t3", 1))));
+
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(requestMap, false, logContext);
+        OffsetFetchRequest request1 = handler.buildBatchedRequest(coordinatorKeys(groupZero, groupOne, groupTwo)).build();
+        assertEquals(Utils.mkSet(groupZero, groupOne, groupTwo), requestGroups(request1));
+
+        OffsetFetchRequest request2 = handler.buildBatchedRequest(coordinatorKeys(groupThree)).build();
+        assertEquals(Utils.mkSet(groupThree), requestGroups(request2));
+
+        Map<String, ListConsumerGroupOffsetsSpec> builtRequests = new HashMap<>();
+        request1.groupIdsToPartitions().forEach((group, partitions) ->
+                builtRequests.put(group, new ListConsumerGroupOffsetsSpec().topicPartitions(partitions)));
+        request2.groupIdsToPartitions().forEach((group, partitions) ->
+                builtRequests.put(group, new ListConsumerGroupOffsetsSpec().topicPartitions(partitions)));
+
+        assertEquals(requestMap, builtRequests);
+        Map<String, List<OffsetFetchRequestTopics>> groupIdsToTopics = request1.groupIdsToTopics();
+
+        assertEquals(3, groupIdsToTopics.size());
+        assertEquals(1, groupIdsToTopics.get(groupZero).size());
+        assertEquals(2, groupIdsToTopics.get(groupOne).size());
+        assertEquals(3, groupIdsToTopics.get(groupTwo).size());
+
+        assertEquals(1, groupIdsToTopics.get(groupZero).get(0).partitionIndexes().size());
+        assertEquals(1, groupIdsToTopics.get(groupOne).get(0).partitionIndexes().size());
+        assertEquals(2, groupIdsToTopics.get(groupOne).get(1).partitionIndexes().size());
+        assertEquals(1, groupIdsToTopics.get(groupTwo).get(0).partitionIndexes().size());
+        assertEquals(2, groupIdsToTopics.get(groupTwo).get(1).partitionIndexes().size());
+        assertEquals(3, groupIdsToTopics.get(groupTwo).get(2).partitionIndexes().size());
+
+        groupIdsToTopics = request2.groupIdsToTopics();
+        assertEquals(1, groupIdsToTopics.size());
+        assertEquals(1, groupIdsToTopics.get(groupThree).size());
+        assertEquals(2, groupIdsToTopics.get(groupThree).get(0).partitionIndexes().size());
+    }
+
+    @Test
+    public void testBuildRequestBatchGroups() {
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(batchedRequestMap, false, logContext);
+        Collection<RequestAndKeys<CoordinatorKey>> requests = handler.buildRequest(1, coordinatorKeys(groupZero, groupOne, groupTwo));
+        assertEquals(1, requests.size());
+        assertEquals(Utils.mkSet(groupZero, groupOne, groupTwo), requestGroups((OffsetFetchRequest) requests.iterator().next().request.build()));
+    }
+
+    @Test
+    public void testBuildRequestDoesNotBatchGroup() {
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(batchedRequestMap, false, logContext);
+        // Disable batching.
+        ((CoordinatorStrategy) handler.lookupStrategy()).disableBatch();
+        Collection<RequestAndKeys<CoordinatorKey>> requests = handler.buildRequest(1, coordinatorKeys(groupZero, groupOne, groupTwo));
+        assertEquals(3, requests.size());
+        assertEquals(
+            Utils.mkSet(Utils.mkSet(groupZero), Utils.mkSet(groupOne), Utils.mkSet(groupTwo)),
+            requests.stream().map(requestAndKey -> requestGroups((OffsetFetchRequest) requestAndKey.request.build())).collect(Collectors.toSet())
+        );
+    }
+
     @Test
     public void testSuccessfulHandleResponse() {
         Map<TopicPartition, OffsetAndMetadata> expected = new HashMap<>();
         assertCompleted(handleWithError(Errors.NONE), expected);
     }
 
-
     @Test
     public void testSuccessfulHandleResponseWithOnePartitionError() {
         Map<TopicPartition, OffsetAndMetadata> expectedResult = Collections.singletonMap(t0p0, new OffsetAndMetadata(10L));
@@ -80,17 +168,62 @@ public void testSuccessfulHandleResponseWithOnePartitionError() {
         assertCompleted(handleWithPartitionError(Errors.UNSTABLE_OFFSET_COMMIT), expectedResult);
     }
 
+    @Test
+    public void testSuccessfulHandleResponseWithOnePartitionErrorWithMultipleGroups() {
+        Map<TopicPartition, OffsetAndMetadata> offsetAndMetadataMapZero =
+            Collections.singletonMap(t0p0, new OffsetAndMetadata(10L));
+        Map<TopicPartition, OffsetAndMetadata> offsetAndMetadataMapOne =
+            Collections.singletonMap(t1p1, new OffsetAndMetadata(10L));
+        Map<TopicPartition, OffsetAndMetadata> offsetAndMetadataMapTwo =
+            Collections.singletonMap(t2p2, new OffsetAndMetadata(10L));
+        Map<String, Map<TopicPartition, OffsetAndMetadata>> expectedResult =
+            new HashMap<String, Map<TopicPartition, OffsetAndMetadata>>() {{
+                put(groupZero, offsetAndMetadataMapZero);
+                put(groupOne, offsetAndMetadataMapOne);
+                put(groupTwo, offsetAndMetadataMapTwo);
+            }};
+
+        assertCompletedForMultipleGroups(
+            handleWithPartitionErrorMultipleGroups(Errors.UNKNOWN_TOPIC_OR_PARTITION), expectedResult);
+        assertCompletedForMultipleGroups(
+            handleWithPartitionErrorMultipleGroups(Errors.TOPIC_AUTHORIZATION_FAILED), expectedResult);
+        assertCompletedForMultipleGroups(
+            handleWithPartitionErrorMultipleGroups(Errors.UNSTABLE_OFFSET_COMMIT), expectedResult);
+    }
+
+    @Test
+    public void testSuccessfulHandleResponseWithMultipleGroups() {
+        Map<String, Map<TopicPartition, OffsetAndMetadata>> expected = new HashMap<>();
+        Map<String, Errors> errorMap = errorMap(groups, Errors.NONE);
+        assertCompletedForMultipleGroups(handleWithErrorWithMultipleGroups(errorMap, batchedRequestMap), expected);
+    }
+
     @Test
     public void testUnmappedHandleResponse() {
         assertUnmapped(handleWithError(Errors.COORDINATOR_NOT_AVAILABLE));
         assertUnmapped(handleWithError(Errors.NOT_COORDINATOR));
     }
 
+    @Test
+    public void testUnmappedHandleResponseWithMultipleGroups() {
+        Map<String, Errors> errorMap = new HashMap<>();
+        errorMap.put(groupZero, Errors.NOT_COORDINATOR);
+        errorMap.put(groupOne, Errors.COORDINATOR_NOT_AVAILABLE);
+        errorMap.put(groupTwo, Errors.NOT_COORDINATOR);
+        assertUnmappedWithMultipleGroups(handleWithErrorWithMultipleGroups(errorMap, batchedRequestMap));
+    }
+
     @Test
     public void testRetriableHandleResponse() {
         assertRetriable(handleWithError(Errors.COORDINATOR_LOAD_IN_PROGRESS));
     }
 
+    @Test
+    public void testRetriableHandleResponseWithMultipleGroups() {
+        Map<String, Errors> errorMap = errorMap(groups, Errors.COORDINATOR_LOAD_IN_PROGRESS);
+        assertRetriable(handleWithErrorWithMultipleGroups(errorMap, batchedRequestMap));
+    }
+
     @Test
     public void testFailedHandleResponse() {
         assertFailed(GroupAuthorizationException.class, handleWithError(Errors.GROUP_AUTHORIZATION_FAILED));
@@ -98,10 +231,50 @@ public void testFailedHandleResponse() {
         assertFailed(InvalidGroupIdException.class, handleWithError(Errors.INVALID_GROUP_ID));
     }
 
+    @Test
+    public void testFailedHandleResponseWithMultipleGroups() {
+        Map<String, Errors> errorMap = new HashMap<>();
+        errorMap.put(groupZero, Errors.GROUP_AUTHORIZATION_FAILED);
+        errorMap.put(groupOne, Errors.GROUP_ID_NOT_FOUND);
+        errorMap.put(groupTwo, Errors.INVALID_GROUP_ID);
+        Map<String, Class<? extends Throwable>> groupToExceptionMap = new HashMap<>();
+        groupToExceptionMap.put(groupZero, GroupAuthorizationException.class);
+        groupToExceptionMap.put(groupOne, GroupIdNotFoundException.class);
+        groupToExceptionMap.put(groupTwo, InvalidGroupIdException.class);
+        assertFailedForMultipleGroups(groupToExceptionMap,
+            handleWithErrorWithMultipleGroups(errorMap, batchedRequestMap));
+    }
+
     private OffsetFetchResponse buildResponse(Errors error) {
-        Map<TopicPartition, PartitionData> responseData = new HashMap<>();
-        OffsetFetchResponse response = new OffsetFetchResponse(error, responseData);
-        return response;
+        return new OffsetFetchResponse(
+            throttleMs,
+            Collections.singletonMap(groupZero, error),
+            Collections.singletonMap(groupZero, new HashMap<>()));
+    }
+
+    private OffsetFetchResponse buildResponseWithMultipleGroups(
+        Map<String, Errors> errorMap,
+        Map<String, Map<TopicPartition, PartitionData>> responseData
+    ) {
+        return new OffsetFetchResponse(throttleMs, errorMap, responseData);
+    }
+
+    private AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> handleWithErrorWithMultipleGroups(
+        Map<String, Errors> errorMap,
+        Map<String, ListConsumerGroupOffsetsSpec> groupSpecs
+    ) {
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(groupSpecs, false, logContext);
+        Map<String, Map<TopicPartition, PartitionData>> responseData = new HashMap<>();
+        for (String group : errorMap.keySet()) {
+            responseData.put(group, new HashMap<>());
+        }
+        OffsetFetchResponse response = buildResponseWithMultipleGroups(errorMap, responseData);
+        return handler.handleResponse(new Node(1, "host", 1234),
+                errorMap.keySet()
+                        .stream()
+                        .map(CoordinatorKey::byGroupId)
+                        .collect(Collectors.toSet()),
+                response);
     }
 
     private OffsetFetchResponse buildResponseWithPartitionError(Errors error) {
@@ -110,24 +283,68 @@ private OffsetFetchResponse buildResponseWithPartitionError(Errors error) {
         responseData.put(t0p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", Errors.NONE));
         responseData.put(t0p1, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
 
-        OffsetFetchResponse response = new OffsetFetchResponse(Errors.NONE, responseData);
-        return response;
+        return new OffsetFetchResponse(Errors.NONE, responseData);
+    }
+
+    private OffsetFetchResponse buildResponseWithPartitionErrorWithMultipleGroups(Errors error) {
+        Map<TopicPartition, PartitionData> responseDataZero = new HashMap<>();
+        responseDataZero.put(t0p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", Errors.NONE));
+
+        Map<TopicPartition, PartitionData> responseDataOne = new HashMap<>();
+        responseDataOne.put(t0p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataOne.put(t1p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataOne.put(t1p1, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", Errors.NONE));
+
+        Map<TopicPartition, PartitionData> responseDataTwo = new HashMap<>();
+        responseDataTwo.put(t0p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataTwo.put(t1p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataTwo.put(t1p1, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataTwo.put(t2p0, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataTwo.put(t2p1, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", error));
+        responseDataTwo.put(t2p2, new OffsetFetchResponse.PartitionData(10, Optional.empty(), "", Errors.NONE));
+
+        Map<String, Map<TopicPartition, PartitionData>> responseData =
+            new HashMap<String, Map<TopicPartition, PartitionData>>() {{
+                put(groupZero, responseDataZero);
+                put(groupOne, responseDataOne);
+                put(groupTwo, responseDataTwo);
+            }};
+
+        Map<String, Errors> errorMap = errorMap(groups, Errors.NONE);
+        return new OffsetFetchResponse(0, errorMap, responseData);
     }
 
     private AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> handleWithPartitionError(
         Errors error
     ) {
-        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(groupId, tps, logContext);
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(singleRequestMap,
+            false, logContext);
         OffsetFetchResponse response = buildResponseWithPartitionError(error);
-        return handler.handleResponse(new Node(1, "host", 1234), singleton(CoordinatorKey.byGroupId(groupId)), response);
+        return handler.handleResponse(new Node(1, "host", 1234),
+            singleton(CoordinatorKey.byGroupId(groupZero)), response);
+    }
+
+    private AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> handleWithPartitionErrorMultipleGroups(
+        Errors error
+    ) {
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(
+                batchedRequestMap, false, logContext);
+        OffsetFetchResponse response = buildResponseWithPartitionErrorWithMultipleGroups(error);
+        return handler.handleResponse(
+            new Node(1, "host", 1234),
+            coordinatorKeys(groupZero, groupOne, groupTwo),
+            response);
     }
 
     private AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> handleWithError(
         Errors error
     ) {
-        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(groupId, tps, logContext);
+        ListConsumerGroupOffsetsHandler handler = new ListConsumerGroupOffsetsHandler(
+            singleRequestMap, false, logContext);
         OffsetFetchResponse response = buildResponse(error);
-        return handler.handleResponse(new Node(1, "host", 1234), singleton(CoordinatorKey.byGroupId(groupId)), response);
+        return handler.handleResponse(new Node(1, "host", 1234),
+            singleton(CoordinatorKey.byGroupId(groupZero)),
+            response);
     }
 
     private void assertUnmapped(
@@ -135,11 +352,19 @@ private void assertUnmapped(
     ) {
         assertEquals(emptySet(), result.completedKeys.keySet());
         assertEquals(emptySet(), result.failedKeys.keySet());
-        assertEquals(singletonList(CoordinatorKey.byGroupId(groupId)), result.unmappedKeys);
+        assertEquals(singletonList(CoordinatorKey.byGroupId(groupZero)), result.unmappedKeys);
+    }
+
+    private void assertUnmappedWithMultipleGroups(
+            AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result
+    ) {
+        assertEquals(emptySet(), result.completedKeys.keySet());
+        assertEquals(emptySet(), result.failedKeys.keySet());
+        assertEquals(coordinatorKeys(groupZero, groupOne, groupTwo), new HashSet<>(result.unmappedKeys));
     }
 
     private void assertRetriable(
-        AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result
+            AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result
     ) {
         assertEquals(emptySet(), result.completedKeys.keySet());
         assertEquals(emptySet(), result.failedKeys.keySet());
@@ -150,21 +375,64 @@ private void assertCompleted(
         AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result,
         Map<TopicPartition, OffsetAndMetadata> expected
     ) {
-        CoordinatorKey key = CoordinatorKey.byGroupId(groupId);
+        CoordinatorKey key = CoordinatorKey.byGroupId(groupZero);
         assertEquals(emptySet(), result.failedKeys.keySet());
         assertEquals(emptyList(), result.unmappedKeys);
         assertEquals(singleton(key), result.completedKeys.keySet());
-        assertEquals(expected, result.completedKeys.get(CoordinatorKey.byGroupId(groupId)));
+        assertEquals(expected, result.completedKeys.get(key));
+    }
+
+    private void assertCompletedForMultipleGroups(
+        AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result,
+        Map<String, Map<TopicPartition, OffsetAndMetadata>> expected
+    ) {
+        assertEquals(emptySet(), result.failedKeys.keySet());
+        assertEquals(emptyList(), result.unmappedKeys);
+        for (String g : expected.keySet()) {
+            CoordinatorKey key = CoordinatorKey.byGroupId(g);
+            assertTrue(result.completedKeys.containsKey(key));
+            assertEquals(expected.get(g), result.completedKeys.get(key));
+        }
     }
 
     private void assertFailed(
         Class<? extends Throwable> expectedExceptionType,
         AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result
     ) {
-        CoordinatorKey key = CoordinatorKey.byGroupId(groupId);
+        CoordinatorKey key = CoordinatorKey.byGroupId(groupZero);
         assertEquals(emptySet(), result.completedKeys.keySet());
         assertEquals(emptyList(), result.unmappedKeys);
         assertEquals(singleton(key), result.failedKeys.keySet());
         assertTrue(expectedExceptionType.isInstance(result.failedKeys.get(key)));
     }
+
+    private void assertFailedForMultipleGroups(
+        Map<String, Class<? extends Throwable>> groupToExceptionMap,
+        AdminApiHandler.ApiResult<CoordinatorKey, Map<TopicPartition, OffsetAndMetadata>> result
+    ) {
+        assertEquals(emptySet(), result.completedKeys.keySet());
+        assertEquals(emptyList(), result.unmappedKeys);
+        for (String g : groupToExceptionMap.keySet()) {
+            CoordinatorKey key = CoordinatorKey.byGroupId(g);
+            assertTrue(result.failedKeys.containsKey(key));
+            assertTrue(groupToExceptionMap.get(g).isInstance(result.failedKeys.get(key)));
+        }
+    }
+
+    private Set<CoordinatorKey> coordinatorKeys(String... groups) {
+        return Stream.of(groups)
+                .map(CoordinatorKey::byGroupId)
+                .collect(Collectors.toSet());
+    }
+
+    private Set<String> requestGroups(OffsetFetchRequest request) {
+        return request.data().groups()
+                .stream()
+                .map(OffsetFetchRequestGroup::groupId)
+                .collect(Collectors.toSet());
+    }
+
+    private Map<String, Errors> errorMap(Collection<String> groups, Errors error) {
+        return groups.stream().collect(Collectors.toMap(Function.identity(), unused -> error));
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerConfigTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerConfigTest.java
index dc1eeac5d74a0..163b9cf118009 100644
--- a/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerConfigTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/consumer/ConsumerConfigTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.clients.consumer;
 
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.errors.InvalidConfigurationException;
 import org.apache.kafka.common.serialization.ByteArrayDeserializer;
 import org.apache.kafka.common.serialization.Deserializer;
@@ -30,6 +32,8 @@
 
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
 public class ConsumerConfigTest {
@@ -98,6 +102,19 @@ public void testAppendDeserializerToConfig() {
         assertEquals(newConfigs.get(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG), valueDeserializerClass);
     }
 
+    @Test
+    public void testAppendDeserializerToConfigWithException() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, null);
+        configs.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializerClass);
+        assertThrows(ConfigException.class, () -> ConsumerConfig.appendDeserializerToConfig(configs, null, valueDeserializer));
+
+        configs.clear();
+        configs.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializerClass);
+        configs.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, null);
+        assertThrows(ConfigException.class, () -> ConsumerConfig.appendDeserializerToConfig(configs, keyDeserializer, null));
+    }
+
     @Test
     public void ensureDefaultThrowOnUnsupportedStableFlagToFalse() {
         assertFalse(new ConsumerConfig(properties).getBoolean(ConsumerConfig.THROW_ON_FETCH_STABLE_OFFSET_UNSUPPORTED));
@@ -108,4 +125,24 @@ public void testDefaultPartitionAssignor() {
         assertEquals(Arrays.asList(RangeAssignor.class, CooperativeStickyAssignor.class),
             new ConsumerConfig(properties).getList(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG));
     }
+
+    @Test
+    public void testInvalidGroupInstanceId() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializerClass);
+        configs.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializerClass);
+        configs.put(ConsumerConfig.GROUP_INSTANCE_ID_CONFIG, "");
+        ConfigException ce = assertThrows(ConfigException.class, () -> new ConsumerConfig(configs));
+        assertTrue(ce.getMessage().contains(ConsumerConfig.GROUP_INSTANCE_ID_CONFIG));
+    }
+
+    @Test
+    public void testInvalidSecurityProtocol() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializerClass);
+        configs.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializerClass);
+        configs.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc");
+        ConfigException ce = assertThrows(ConfigException.class, () -> new ConsumerConfig(configs));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java
index 27c108bcdacac..e7f25345c6da0 100644
--- a/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/consumer/KafkaConsumerTest.java
@@ -138,6 +138,7 @@
 import static java.util.Collections.singleton;
 import static java.util.Collections.singletonList;
 import static java.util.Collections.singletonMap;
+import static org.apache.kafka.clients.consumer.KafkaConsumer.DEFAULT_REASON;
 import static org.apache.kafka.common.requests.FetchMetadata.INVALID_SESSION_ID;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -171,6 +172,7 @@ public class KafkaConsumerTest {
     // Set auto commit interval lower than heartbeat so we don't need to deal with
     // a concurrent heartbeat request
     private final int autoCommitIntervalMs = 500;
+    private final int throttleMs = 10;
 
     private final String groupId = "mock-group";
     private final String memberId = "memberId";
@@ -716,7 +718,6 @@ public void testFetchProgressWithMissingPartitionPosition() {
         consumer.seekToEnd(singleton(tp0));
         consumer.seekToBeginning(singleton(tp1));
 
-        client.prepareResponseFrom(FindCoordinatorResponse.prepareResponse(Errors.NONE, groupId, node), node);
         client.prepareResponse(body -> {
             ListOffsetsRequest request = (ListOffsetsRequest) body;
             List<ListOffsetsPartition> partitions = request.topics().stream().flatMap(t -> {
@@ -2434,7 +2435,10 @@ private OffsetFetchResponse offsetResponse(Map<TopicPartition, Long> offsets, Er
             partitionData.put(entry.getKey(), new OffsetFetchResponse.PartitionData(entry.getValue(),
                     Optional.empty(), "", error));
         }
-        return new OffsetFetchResponse(Errors.NONE, partitionData);
+        return new OffsetFetchResponse(
+            throttleMs,
+            Collections.singletonMap(groupId, Errors.NONE),
+            Collections.singletonMap(groupId, partitionData));
     }
 
     private ListOffsetsResponse listOffsetsResponse(Map<TopicPartition, Long> offsets) {
@@ -2819,6 +2823,66 @@ public void testEnforceRebalanceTriggersRebalanceOnNextPoll() {
         assertEquals(countingRebalanceListener.revokedCount, 1);
     }
 
+    @Test
+    public void testEnforceRebalanceReason() {
+        Time time = new MockTime(1L);
+
+        ConsumerMetadata metadata = createMetadata(subscription);
+        MockClient client = new MockClient(time, metadata);
+        initMetadata(client, Utils.mkMap(Utils.mkEntry(topic, 1)));
+        Node node = metadata.fetch().nodes().get(0);
+
+        KafkaConsumer<String, String> consumer = newConsumer(
+            time,
+            client,
+            subscription,
+            metadata,
+            assignor,
+            true,
+            groupInstanceId
+        );
+        consumer.subscribe(Collections.singletonList(topic));
+
+        // Lookup coordinator.
+        client.prepareResponseFrom(FindCoordinatorResponse.prepareResponse(Errors.NONE, groupId, node), node);
+        consumer.poll(Duration.ZERO);
+
+        // Initial join sends an empty reason.
+        prepareJoinGroupAndVerifyReason(client, node, "");
+        consumer.poll(Duration.ZERO);
+
+        // A null reason should be replaced by the default reason.
+        consumer.enforceRebalance(null);
+        prepareJoinGroupAndVerifyReason(client, node, DEFAULT_REASON);
+        consumer.poll(Duration.ZERO);
+
+        // An empty reason should be replaced by the default reason.
+        consumer.enforceRebalance("");
+        prepareJoinGroupAndVerifyReason(client, node, DEFAULT_REASON);
+        consumer.poll(Duration.ZERO);
+
+        // A non-null and non-empty reason is sent as-is.
+        String customReason = "user provided reason";
+        consumer.enforceRebalance(customReason);
+        prepareJoinGroupAndVerifyReason(client, node, customReason);
+        consumer.poll(Duration.ZERO);
+    }
+
+    private void prepareJoinGroupAndVerifyReason(
+        MockClient client,
+        Node node,
+        String expectedReason
+    ) {
+        client.prepareResponseFrom(
+            body -> {
+                JoinGroupRequest joinGroupRequest = (JoinGroupRequest) body;
+                return expectedReason.equals(joinGroupRequest.data().reason());
+            },
+            joinGroupFollowerResponse(assignor, 1, memberId, leaderId, Errors.NONE),
+            node
+        );
+    }
+
     @Test
     public void configurableObjectsShouldSeeGeneratedClientId() {
         Properties props = new Properties();
diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java
index 48ed136ebc4cb..cbc4e7495e161 100644
--- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/AbstractCoordinatorTest.java
@@ -67,6 +67,7 @@
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -488,6 +489,54 @@ public void testRetainMemberIdAfterSyncGroupDisconnect() {
         ensureActiveGroup(rejoinedGeneration, memberId);
     }
 
+    @Test
+    public void testResetGenerationIdAfterSyncGroupFailedWithRebalanceInProgress() throws InterruptedException, ExecutionException {
+        setupCoordinator();
+
+        String memberId = "memberId";
+        int generation = 5;
+
+        // Rebalance once to initialize the generation and memberId
+        mockClient.prepareResponse(groupCoordinatorResponse(node, Errors.NONE));
+        expectJoinGroup("", generation, memberId);
+        expectSyncGroup(generation, memberId);
+        ensureActiveGroup(generation, memberId);
+
+        // Force a rebalance
+        coordinator.requestRejoin("Manual test trigger");
+        assertTrue(coordinator.rejoinNeededOrPending());
+
+        ExecutorService executor = Executors.newFixedThreadPool(1);
+        try {
+            // Return RebalanceInProgress in syncGroup
+            int rejoinedGeneration = 10;
+            expectJoinGroup(memberId, rejoinedGeneration, memberId);
+            expectRebalanceInProgressForSyncGroup(rejoinedGeneration, memberId);
+            Future<Boolean> secondJoin = executor.submit(() ->
+                coordinator.ensureActiveGroup(mockTime.timer(Integer.MAX_VALUE)));
+
+            TestUtils.waitForCondition(() -> {
+                AbstractCoordinator.Generation currentGeneration = coordinator.generation();
+                return currentGeneration.generationId == AbstractCoordinator.Generation.NO_GENERATION.generationId &&
+                        currentGeneration.memberId.equals(memberId);
+            }, 2000, "Generation should be reset");
+
+            rejoinedGeneration = 20;
+            expectSyncGroup(rejoinedGeneration, memberId);
+            mockClient.respond(joinGroupFollowerResponse(
+                    rejoinedGeneration,
+                    memberId,
+                    "leaderId",
+                    Errors.NONE,
+                    PROTOCOL_TYPE
+            ));
+            assertTrue(secondJoin.get());
+        } finally {
+            executor.shutdownNow();
+            executor.awaitTermination(1000, TimeUnit.MILLISECONDS);
+        }
+    }
+
     @Test
     public void testRejoinReason() {
         setupCoordinator();
@@ -504,7 +553,7 @@ public void testRejoinReason() {
         ensureActiveGroup(generation, memberId);
         assertEquals("", coordinator.rejoinReason());
 
-        // Force a rebalance
+        // force a rebalance
         expectJoinGroup(memberId, "Manual test trigger", generation, memberId);
         expectSyncGroup(generation, memberId);
         coordinator.requestRejoin("Manual test trigger");
@@ -518,8 +567,17 @@ public void testRejoinReason() {
                 () -> coordinator.joinGroupIfNeeded(mockTime.timer(100L)));
 
         // next join group request should contain exception message
-        expectJoinGroup(memberId, String.format("rebalance failed due to '%s' (%s)", e.getMessage(), e.getClass().getSimpleName()), generation, memberId);
+        expectJoinGroup(memberId, String.format("rebalance failed due to %s", e.getClass().getSimpleName()), generation, memberId);
+        expectSyncGroup(generation, memberId);
+        ensureActiveGroup(generation, memberId);
+        assertEquals("", coordinator.rejoinReason());
+
+        // check limit length of reason field
+        final String reason = "Very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong reason that is 271 characters long to make sure that length limit logic handles the scenario nicely";
+        final String truncatedReason = reason.substring(0, 255);
+        expectJoinGroup(memberId, truncatedReason, generation, memberId);
         expectSyncGroup(generation, memberId);
+        coordinator.requestRejoin(reason);
         ensureActiveGroup(generation, memberId);
         assertEquals("", coordinator.rejoinReason());
     }
@@ -566,6 +624,22 @@ private void expectDisconnectInSyncGroup(
         }, null, true);
     }
 
+    private void expectRebalanceInProgressForSyncGroup(
+            int expectedGeneration,
+            String expectedMemberId
+    ) {
+        mockClient.prepareResponse(body -> {
+            if (!(body instanceof SyncGroupRequest)) {
+                return false;
+            }
+            SyncGroupRequestData syncGroupRequest = ((SyncGroupRequest) body).data();
+            return syncGroupRequest.generationId() == expectedGeneration
+                    && syncGroupRequest.memberId().equals(expectedMemberId)
+                    && syncGroupRequest.protocolType().equals(PROTOCOL_TYPE)
+                    && syncGroupRequest.protocolName().equals(PROTOCOL_NAME);
+        }, syncGroupResponse(Errors.REBALANCE_IN_PROGRESS, PROTOCOL_TYPE, PROTOCOL_NAME));
+    }
+
     private void expectDisconnectInJoinGroup(
         String expectedMemberId
     ) {
@@ -1094,6 +1168,19 @@ public void testHandleNormalLeaveGroupResponse() {
         assertTrue(leaveGroupFuture.succeeded());
     }
 
+    @Test
+    public void testHandleNormalLeaveGroupResponseAndTruncatedLeaveReason() {
+        MemberResponse memberResponse = new MemberResponse()
+                .setMemberId(memberId)
+                .setErrorCode(Errors.NONE.code());
+        LeaveGroupResponse response =
+                leaveGroupResponse(Collections.singletonList(memberResponse));
+        String leaveReason = "Very looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong leaveReason that is 271 characters long to make sure that length limit logic handles the scenario nicely";
+        RequestFuture<Void> leaveGroupFuture = setupLeaveGroup(response, leaveReason, leaveReason.substring(0, 255));
+        assertNotNull(leaveGroupFuture);
+        assertTrue(leaveGroupFuture.succeeded());
+    }
+
     @Test
     public void testHandleMultipleMembersLeaveGroupResponse() {
         MemberResponse memberResponse = new MemberResponse()
@@ -1128,6 +1215,12 @@ public void testHandleLeaveGroupResponseWithException() {
     }
 
     private RequestFuture<Void> setupLeaveGroup(LeaveGroupResponse leaveGroupResponse) {
+        return setupLeaveGroup(leaveGroupResponse, "test maybe leave group", "test maybe leave group");
+    }
+
+    private RequestFuture<Void> setupLeaveGroup(LeaveGroupResponse leaveGroupResponse,
+                                                String leaveReason,
+                                                String expectedLeaveReason) {
         setupCoordinator(RETRY_BACKOFF_MS, Integer.MAX_VALUE, Optional.empty());
 
         mockClient.prepareResponse(groupCoordinatorResponse(node, Errors.NONE));
@@ -1139,11 +1232,11 @@ private RequestFuture<Void> setupLeaveGroup(LeaveGroupResponse leaveGroupRespons
             }
             LeaveGroupRequestData leaveGroupRequest = ((LeaveGroupRequest) body).data();
             return leaveGroupRequest.members().get(0).memberId().equals(memberId) &&
-                   leaveGroupRequest.members().get(0).reason().equals("test maybe leave group");
+                   leaveGroupRequest.members().get(0).reason().equals(expectedLeaveReason);
         }, leaveGroupResponse);
 
         coordinator.ensureActiveGroup();
-        return coordinator.maybeLeaveGroup("test maybe leave group");
+        return coordinator.maybeLeaveGroup(leaveReason);
     }
 
     @Test
@@ -1633,7 +1726,7 @@ protected Map<String, ByteBuffer> onLeaderElected(String leaderId,
         }
 
         @Override
-        protected boolean onJoinPrepare(int generation, String memberId) {
+        protected boolean onJoinPrepare(Timer timer, int generation, String memberId) {
             onJoinPrepareInvokes++;
             return true;
         }
diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java
index 1fee84a8d8f55..d948990d69b63 100644
--- a/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinatorTest.java
@@ -71,12 +71,14 @@
 import org.apache.kafka.common.requests.OffsetCommitRequest;
 import org.apache.kafka.common.requests.OffsetCommitResponse;
 import org.apache.kafka.common.requests.OffsetFetchResponse;
+import org.apache.kafka.common.requests.OffsetFetchResponse.PartitionData;
 import org.apache.kafka.common.requests.RequestTestUtils;
 import org.apache.kafka.common.requests.SyncGroupRequest;
 import org.apache.kafka.common.requests.SyncGroupResponse;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.SystemTime;
+import org.apache.kafka.common.utils.Timer;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.test.TestUtils;
 import org.junit.jupiter.api.AfterEach;
@@ -140,6 +142,7 @@ public abstract class ConsumerCoordinatorTest {
     private final long retryBackoffMs = 100;
     private final int autoCommitIntervalMs = 2000;
     private final int requestTimeoutMs = 30000;
+    private final int throttleMs = 10;
     private final MockTime time = new MockTime();
     private GroupRebalanceConfig rebalanceConfig;
 
@@ -514,10 +517,62 @@ public void testCoordinatorNotAvailableWithUserAssignedType() {
         coordinator.poll(time.timer(0));
         assertTrue(coordinator.coordinatorUnknown());
 
-        // should find an available node in next find coordinator request
+        // should not try to find coordinator since we are in manual assignment
+        // hence the prepared response should not be returned
         client.prepareResponse(groupCoordinatorResponse(node, Errors.NONE));
         coordinator.poll(time.timer(Long.MAX_VALUE));
+        assertTrue(coordinator.coordinatorUnknown());
+    }
+
+    @Test
+    public void testAutoCommitAsyncWithUserAssignedType() {
+        try (ConsumerCoordinator coordinator = buildCoordinator(rebalanceConfig, new Metrics(), assignors, true, subscriptions)) {
+            subscriptions.assignFromUser(Collections.singleton(t1p));
+            // set timeout to 0 because we expect no requests sent
+            coordinator.poll(time.timer(0));
+            assertTrue(coordinator.coordinatorUnknown());
+            assertFalse(client.hasInFlightRequests());
+
+            // elapse auto commit interval and set committable position
+            time.sleep(autoCommitIntervalMs);
+            subscriptions.seekUnvalidated(t1p, new SubscriptionState.FetchPosition(100L));
+
+            // should try to find coordinator since we are auto committing
+            coordinator.poll(time.timer(0));
+            assertTrue(coordinator.coordinatorUnknown());
+            assertTrue(client.hasInFlightRequests());
+
+            client.respond(groupCoordinatorResponse(node, Errors.NONE));
+            coordinator.poll(time.timer(0));
+            assertFalse(coordinator.coordinatorUnknown());
+            // after we've discovered the coordinator we should send
+            // out the commit request immediately
+            assertTrue(client.hasInFlightRequests());
+        }
+    }
+
+    @Test
+    public void testCommitAsyncWithUserAssignedType() {
+        subscriptions.assignFromUser(Collections.singleton(t1p));
+        // set timeout to 0 because we expect no requests sent
+        coordinator.poll(time.timer(0));
+        assertTrue(coordinator.coordinatorUnknown());
+        assertFalse(client.hasInFlightRequests());
+
+        // should try to find coordinator since we are commit async
+        coordinator.commitOffsetsAsync(singletonMap(t1p, new OffsetAndMetadata(100L)), (offsets, exception) -> {
+            fail("Commit should not get responses, but got offsets:" + offsets + ", and exception:" + exception);
+        });
+        coordinator.poll(time.timer(0));
+        assertTrue(coordinator.coordinatorUnknown());
+        assertTrue(client.hasInFlightRequests());
+
+        client.respond(groupCoordinatorResponse(node, Errors.NONE));
+        coordinator.poll(time.timer(0));
         assertFalse(coordinator.coordinatorUnknown());
+        // after we've discovered the coordinator we should send
+        // out the commit request immediately
+        assertTrue(client.hasInFlightRequests());
     }
 
     @Test
@@ -1247,9 +1302,71 @@ public void testForceMetadataDeleteForPatternSubscriptionDuringRebalance() {
         }
     }
 
+    @Test
+    public void testOnJoinPrepareWithOffsetCommitShouldSuccessAfterRetry() {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty(), false)) {
+            int generationId = 42;
+            String memberId = "consumer-42";
+
+            Timer pollTimer = time.timer(100L);
+            client.prepareResponse(offsetCommitResponse(singletonMap(t1p, Errors.UNKNOWN_TOPIC_OR_PARTITION)));
+            boolean res = coordinator.onJoinPrepare(pollTimer, generationId, memberId);
+            assertFalse(res);
+
+            pollTimer = time.timer(100L);
+            client.prepareResponse(offsetCommitResponse(singletonMap(t1p, Errors.NONE)));
+            res = coordinator.onJoinPrepare(pollTimer, generationId, memberId);
+            assertTrue(res);
+
+            assertFalse(client.hasPendingResponses());
+            assertFalse(client.hasInFlightRequests());
+            assertFalse(coordinator.coordinatorUnknown());
+        }
+    }
+
+    @Test
+    public void testOnJoinPrepareWithOffsetCommitShouldKeepJoinAfterNonRetryableException() {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty(), false)) {
+            int generationId = 42;
+            String memberId = "consumer-42";
+
+            Timer pollTimer = time.timer(100L);
+            client.prepareResponse(offsetCommitResponse(singletonMap(t1p, Errors.UNKNOWN_MEMBER_ID)));
+            boolean res = coordinator.onJoinPrepare(pollTimer, generationId, memberId);
+            assertTrue(res);
+
+            assertFalse(client.hasPendingResponses());
+            assertFalse(client.hasInFlightRequests());
+            assertFalse(coordinator.coordinatorUnknown());
+        }
+    }
+
+    @Test
+    public void testOnJoinPrepareWithOffsetCommitShouldKeepJoinAfterRebalanceTimeout() {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty(), false)) {
+            int generationId = 42;
+            String memberId = "consumer-42";
+
+            Timer pollTimer = time.timer(100L);
+            time.sleep(150);
+            boolean res = coordinator.onJoinPrepare(pollTimer, generationId, memberId);
+            assertFalse(res);
+
+            pollTimer = time.timer(100L);
+            time.sleep(rebalanceTimeoutMs);
+            client.respond(offsetCommitResponse(singletonMap(t1p, Errors.UNKNOWN_TOPIC_OR_PARTITION)));
+            res = coordinator.onJoinPrepare(pollTimer, generationId, memberId);
+            assertTrue(res);
+
+            assertFalse(client.hasPendingResponses());
+            assertFalse(client.hasInFlightRequests());
+            assertFalse(coordinator.coordinatorUnknown());
+        }
+    }
+
     @Test
     public void testJoinPrepareWithDisableAutoCommit() {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"), true)) {
             coordinator.ensureActiveGroup();
 
             prepareOffsetCommitRequest(singletonMap(t1p, 100L), Errors.NONE);
@@ -1257,7 +1374,7 @@ public void testJoinPrepareWithDisableAutoCommit() {
             int generationId = 42;
             String memberId = "consumer-42";
 
-            boolean res = coordinator.onJoinPrepare(generationId, memberId);
+            boolean res = coordinator.onJoinPrepare(time.timer(0L), generationId, memberId);
 
             assertTrue(res);
             assertTrue(client.hasPendingResponses());
@@ -1268,14 +1385,14 @@ public void testJoinPrepareWithDisableAutoCommit() {
 
     @Test
     public void testJoinPrepareAndCommitCompleted() {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.of("group-id"), true)) {
             coordinator.ensureActiveGroup();
 
             prepareOffsetCommitRequest(singletonMap(t1p, 100L), Errors.NONE);
             int generationId = 42;
             String memberId = "consumer-42";
 
-            boolean res = coordinator.onJoinPrepare(generationId, memberId);
+            boolean res = coordinator.onJoinPrepare(time.timer(0L), generationId, memberId);
             coordinator.invokeCompletedOffsetCommitCallbacks();
 
             assertTrue(res);
@@ -1287,7 +1404,7 @@ public void testJoinPrepareAndCommitCompleted() {
 
     @Test
     public void testJoinPrepareAndCommitWithCoordinatorNotAvailable() {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.of("group-id"), true)) {
             coordinator.ensureActiveGroup();
 
             prepareOffsetCommitRequest(singletonMap(t1p, 100L), Errors.COORDINATOR_NOT_AVAILABLE);
@@ -1295,7 +1412,7 @@ public void testJoinPrepareAndCommitWithCoordinatorNotAvailable() {
             int generationId = 42;
             String memberId = "consumer-42";
 
-            boolean res = coordinator.onJoinPrepare(generationId, memberId);
+            boolean res = coordinator.onJoinPrepare(time.timer(0L), generationId, memberId);
             coordinator.invokeCompletedOffsetCommitCallbacks();
 
             assertFalse(res);
@@ -1307,7 +1424,7 @@ public void testJoinPrepareAndCommitWithCoordinatorNotAvailable() {
 
     @Test
     public void testJoinPrepareAndCommitWithUnknownMemberId() {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.of("group-id"), true)) {
             coordinator.ensureActiveGroup();
 
             prepareOffsetCommitRequest(singletonMap(t1p, 100L), Errors.UNKNOWN_MEMBER_ID);
@@ -1315,7 +1432,7 @@ public void testJoinPrepareAndCommitWithUnknownMemberId() {
             int generationId = 42;
             String memberId = "consumer-42";
 
-            boolean res = coordinator.onJoinPrepare(generationId, memberId);
+            boolean res = coordinator.onJoinPrepare(time.timer(0L), generationId, memberId);
             coordinator.invokeCompletedOffsetCommitCallbacks();
 
             assertTrue(res);
@@ -2116,8 +2233,7 @@ private void testInFlightRequestsFailedAfterCoordinatorMarkedDead(Errors error)
 
     @Test
     public void testAutoCommitDynamicAssignment() {
-        try (ConsumerCoordinator coordinator = buildCoordinator(rebalanceConfig, new Metrics(), assignors, true, subscriptions)
-        ) {
+        try (ConsumerCoordinator coordinator = buildCoordinator(rebalanceConfig, new Metrics(), assignors, true, subscriptions)) {
             subscriptions.subscribe(singleton(topic1), rebalanceListener);
             joinAsFollowerAndReceiveAssignment(coordinator, singletonList(t1p));
             subscriptions.seek(t1p, 100);
@@ -2821,7 +2937,7 @@ public void testFetchCommittedOffsets() {
         OffsetFetchResponse.PartitionData data = new OffsetFetchResponse.PartitionData(offset, leaderEpoch,
                 metadata, Errors.NONE);
 
-        client.prepareResponse(new OffsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
+        client.prepareResponse(offsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
         Map<TopicPartition, OffsetAndMetadata> fetchedOffsets = coordinator.fetchCommittedOffsets(singleton(t1p),
                 time.timer(Long.MAX_VALUE));
 
@@ -2837,7 +2953,7 @@ public void testTopicAuthorizationFailedInOffsetFetch() {
         OffsetFetchResponse.PartitionData data = new OffsetFetchResponse.PartitionData(-1, Optional.empty(),
                 "", Errors.TOPIC_AUTHORIZATION_FAILED);
 
-        client.prepareResponse(new OffsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
+        client.prepareResponse(offsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
         TopicAuthorizationException exception = assertThrows(TopicAuthorizationException.class, () ->
                 coordinator.fetchCommittedOffsets(singleton(t1p), time.timer(Long.MAX_VALUE)));
 
@@ -2850,7 +2966,7 @@ public void testRefreshOffsetLoadInProgress() {
         coordinator.ensureCoordinatorReady(time.timer(Long.MAX_VALUE));
 
         subscriptions.assignFromUser(singleton(t1p));
-        client.prepareResponse(offsetFetchResponse(Errors.COORDINATOR_LOAD_IN_PROGRESS));
+        client.prepareResponse(offsetFetchResponse(Errors.COORDINATOR_LOAD_IN_PROGRESS, Collections.emptyMap()));
         client.prepareResponse(offsetFetchResponse(t1p, Errors.NONE, "", 100L));
         coordinator.refreshCommittedOffsetsIfNeeded(time.timer(Long.MAX_VALUE));
 
@@ -2865,7 +2981,7 @@ public void testRefreshOffsetsGroupNotAuthorized() {
         coordinator.ensureCoordinatorReady(time.timer(Long.MAX_VALUE));
 
         subscriptions.assignFromUser(singleton(t1p));
-        client.prepareResponse(offsetFetchResponse(Errors.GROUP_AUTHORIZATION_FAILED));
+        client.prepareResponse(offsetFetchResponse(Errors.GROUP_AUTHORIZATION_FAILED, Collections.emptyMap()));
         try {
             coordinator.refreshCommittedOffsetsIfNeeded(time.timer(Long.MAX_VALUE));
             fail("Expected group authorization error");
@@ -2908,7 +3024,7 @@ public void testRefreshOffsetNotCoordinatorForConsumer() {
         coordinator.ensureCoordinatorReady(time.timer(Long.MAX_VALUE));
 
         subscriptions.assignFromUser(singleton(t1p));
-        client.prepareResponse(offsetFetchResponse(Errors.NOT_COORDINATOR));
+        client.prepareResponse(offsetFetchResponse(Errors.NOT_COORDINATOR, Collections.emptyMap()));
         client.prepareResponse(groupCoordinatorResponse(node, Errors.NONE));
         client.prepareResponse(offsetFetchResponse(t1p, Errors.NONE, "", 100L));
         coordinator.refreshCommittedOffsetsIfNeeded(time.timer(Long.MAX_VALUE));
@@ -3027,21 +3143,21 @@ public void run() {
 
     @Test
     public void testCloseDynamicAssignment() {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty(), true)) {
             gracefulCloseTest(coordinator, true);
         }
     }
 
     @Test
     public void testCloseManualAssignment() {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(false, true, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(false, true, Optional.empty(), true)) {
             gracefulCloseTest(coordinator, false);
         }
     }
 
     @Test
     public void testCloseCoordinatorNotKnownManualAssignment() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(false, true, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(false, true, Optional.empty(), true)) {
             makeCoordinatorUnknown(coordinator, Errors.NOT_COORDINATOR);
             time.sleep(autoCommitIntervalMs);
             closeVerifyTimeout(coordinator, 1000, 1000, 1000);
@@ -3050,7 +3166,7 @@ public void testCloseCoordinatorNotKnownManualAssignment() throws Exception {
 
     @Test
     public void testCloseCoordinatorNotKnownNoCommits() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.empty(), true)) {
             makeCoordinatorUnknown(coordinator, Errors.NOT_COORDINATOR);
             closeVerifyTimeout(coordinator, 1000, 0, 0);
         }
@@ -3058,7 +3174,7 @@ public void testCloseCoordinatorNotKnownNoCommits() throws Exception {
 
     @Test
     public void testCloseCoordinatorNotKnownWithCommits() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, Optional.empty(), true)) {
             makeCoordinatorUnknown(coordinator, Errors.NOT_COORDINATOR);
             time.sleep(autoCommitIntervalMs);
             closeVerifyTimeout(coordinator, 1000, 1000, 1000);
@@ -3067,7 +3183,7 @@ public void testCloseCoordinatorNotKnownWithCommits() throws Exception {
 
     @Test
     public void testCloseCoordinatorUnavailableNoCommits() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.empty(), true)) {
             makeCoordinatorUnknown(coordinator, Errors.COORDINATOR_NOT_AVAILABLE);
             closeVerifyTimeout(coordinator, 1000, 0, 0);
         }
@@ -3075,7 +3191,7 @@ public void testCloseCoordinatorUnavailableNoCommits() throws Exception {
 
     @Test
     public void testCloseTimeoutCoordinatorUnavailableForCommit() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId)) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId, true)) {
             makeCoordinatorUnknown(coordinator, Errors.COORDINATOR_NOT_AVAILABLE);
             time.sleep(autoCommitIntervalMs);
             closeVerifyTimeout(coordinator, 1000, 1000, 1000);
@@ -3084,7 +3200,7 @@ public void testCloseTimeoutCoordinatorUnavailableForCommit() throws Exception {
 
     @Test
     public void testCloseMaxWaitCoordinatorUnavailableForCommit() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId)) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId, true)) {
             makeCoordinatorUnknown(coordinator, Errors.COORDINATOR_NOT_AVAILABLE);
             time.sleep(autoCommitIntervalMs);
             closeVerifyTimeout(coordinator, Long.MAX_VALUE, requestTimeoutMs, requestTimeoutMs);
@@ -3093,7 +3209,7 @@ public void testCloseMaxWaitCoordinatorUnavailableForCommit() throws Exception {
 
     @Test
     public void testCloseNoResponseForCommit() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId)) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId, true)) {
             time.sleep(autoCommitIntervalMs);
             closeVerifyTimeout(coordinator, Long.MAX_VALUE, requestTimeoutMs, requestTimeoutMs);
         }
@@ -3101,14 +3217,14 @@ public void testCloseNoResponseForCommit() throws Exception {
 
     @Test
     public void testCloseNoResponseForLeaveGroup() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.empty())) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.empty(), true)) {
             closeVerifyTimeout(coordinator, Long.MAX_VALUE, requestTimeoutMs, requestTimeoutMs);
         }
     }
 
     @Test
     public void testCloseNoWait() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId)) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId, true)) {
             time.sleep(autoCommitIntervalMs);
             closeVerifyTimeout(coordinator, 0, 0, 0);
         }
@@ -3116,7 +3232,7 @@ public void testCloseNoWait() throws Exception {
 
     @Test
     public void testHeartbeatThreadClose() throws Exception {
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId)) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId, true)) {
             coordinator.ensureActiveGroup();
             time.sleep(heartbeatIntervalMs + 100);
             Thread.yield(); // Give heartbeat thread a chance to attempt heartbeat
@@ -3183,7 +3299,7 @@ public void testGetGroupMetadata() {
         assertEquals(JoinGroupRequest.UNKNOWN_MEMBER_ID, groupMetadata.memberId());
         assertFalse(groupMetadata.groupInstanceId().isPresent());
 
-        try (final ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId)) {
+        try (final ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, true, groupInstanceId, true)) {
             coordinator.ensureActiveGroup();
 
             final ConsumerGroupMetadata joinedGroupMetadata = coordinator.groupMetadata();
@@ -3219,7 +3335,7 @@ public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
     @Test
     public void testPrepareJoinAndRejoinAfterFailedRebalance() {
         final List<TopicPartition> partitions = singletonList(t1p);
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"), true)) {
             coordinator.ensureActiveGroup();
 
             prepareOffsetCommitRequest(singletonMap(t1p, 100L), Errors.REBALANCE_IN_PROGRESS);
@@ -3239,7 +3355,7 @@ public void testPrepareJoinAndRejoinAfterFailedRebalance() {
             MockTime time = new MockTime(1);
 
             // onJoinPrepare will be executed and onJoinComplete will not.
-            boolean res = coordinator.joinGroupIfNeeded(time.timer(2));
+            boolean res = coordinator.joinGroupIfNeeded(time.timer(100));
 
             assertFalse(res);
             assertFalse(client.hasPendingResponses());
@@ -3284,7 +3400,7 @@ public void testPrepareJoinAndRejoinAfterFailedRebalance() {
     @Test
     public void shouldLoseAllOwnedPartitionsBeforeRejoiningAfterDroppingOutOfTheGroup() {
         final List<TopicPartition> partitions = singletonList(t1p);
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"), true)) {
             final SystemTime realTime = new SystemTime();
             coordinator.ensureActiveGroup();
 
@@ -3317,7 +3433,7 @@ public void shouldLoseAllOwnedPartitionsBeforeRejoiningAfterDroppingOutOfTheGrou
     @Test
     public void shouldLoseAllOwnedPartitionsBeforeRejoiningAfterResettingGenerationId() {
         final List<TopicPartition> partitions = singletonList(t1p);
-        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"))) {
+        try (ConsumerCoordinator coordinator = prepareCoordinatorForCloseTest(true, false, Optional.of("group-id"), true)) {
             final SystemTime realTime = new SystemTime();
             coordinator.ensureActiveGroup();
 
@@ -3384,7 +3500,11 @@ private void supportStableFlag(final short upperVersion, final boolean expectThr
         OffsetFetchResponse.PartitionData data = new OffsetFetchResponse.PartitionData(offset, leaderEpoch,
             metadata, Errors.NONE);
 
-        client.prepareResponse(new OffsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
+        if (upperVersion < 8) {
+            client.prepareResponse(new OffsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
+        } else {
+            client.prepareResponse(offsetFetchResponse(Errors.NONE, singletonMap(t1p, data)));
+        }
         if (expectThrows) {
             assertThrows(UnsupportedVersionException.class,
                 () -> coordinator.fetchCommittedOffsets(singleton(t1p), time.timer(Long.MAX_VALUE)));
@@ -3411,7 +3531,8 @@ private void receiveFencedInstanceIdException() {
 
     private ConsumerCoordinator prepareCoordinatorForCloseTest(final boolean useGroupManagement,
                                                                final boolean autoCommit,
-                                                               final Optional<String> groupInstanceId) {
+                                                               final Optional<String> groupInstanceId,
+                                                               final boolean shouldPoll) {
         rebalanceConfig = buildRebalanceConfig(groupInstanceId);
         ConsumerCoordinator coordinator = buildCoordinator(rebalanceConfig,
                                                            new Metrics(),
@@ -3430,7 +3551,9 @@ private ConsumerCoordinator prepareCoordinatorForCloseTest(final boolean useGrou
         }
 
         subscriptions.seek(t1p, 100);
-        coordinator.poll(time.timer(Long.MAX_VALUE));
+        if (shouldPoll) {
+            coordinator.poll(time.timer(Long.MAX_VALUE));
+        }
 
         return coordinator;
     }
@@ -3639,8 +3762,10 @@ private OffsetCommitResponse offsetCommitResponse(Map<TopicPartition, Errors> re
         return new OffsetCommitResponse(responseData);
     }
 
-    private OffsetFetchResponse offsetFetchResponse(Errors topLevelError) {
-        return new OffsetFetchResponse(topLevelError, Collections.emptyMap());
+    private OffsetFetchResponse offsetFetchResponse(Errors error, Map<TopicPartition, PartitionData> responseData) {
+        return new OffsetFetchResponse(throttleMs,
+                                       singletonMap(groupId, error),
+                                       singletonMap(groupId, responseData));
     }
 
     private OffsetFetchResponse offsetFetchResponse(TopicPartition tp, Errors partitionLevelError, String metadata, long offset) {
@@ -3650,7 +3775,7 @@ private OffsetFetchResponse offsetFetchResponse(TopicPartition tp, Errors partit
     private OffsetFetchResponse offsetFetchResponse(TopicPartition tp, Errors partitionLevelError, String metadata, long offset, Optional<Integer> epoch) {
         OffsetFetchResponse.PartitionData data = new OffsetFetchResponse.PartitionData(offset,
                 epoch, metadata, partitionLevelError);
-        return new OffsetFetchResponse(Errors.NONE, singletonMap(tp, data));
+        return offsetFetchResponse(Errors.NONE, singletonMap(tp, data));
     }
 
     private OffsetCommitCallback callback(final AtomicBoolean success) {
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/KafkaProducerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/KafkaProducerTest.java
index 81eb6e3f2d691..dc7db382a6229 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/KafkaProducerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/KafkaProducerTest.java
@@ -23,9 +23,13 @@
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerGroupMetadata;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.clients.producer.internals.FutureRecordMetadata;
+import org.apache.kafka.clients.producer.internals.ProduceRequestResult;
 import org.apache.kafka.clients.producer.internals.ProducerInterceptors;
 import org.apache.kafka.clients.producer.internals.ProducerMetadata;
+import org.apache.kafka.clients.producer.internals.RecordAccumulator;
 import org.apache.kafka.clients.producer.internals.Sender;
+import org.apache.kafka.clients.producer.internals.TransactionManager;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.Metric;
@@ -51,6 +55,7 @@
 import org.apache.kafka.common.network.Selectable;
 import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.Record;
 import org.apache.kafka.common.record.RecordBatch;
 import org.apache.kafka.common.requests.AddOffsetsToTxnResponse;
 import org.apache.kafka.common.requests.EndTxnResponse;
@@ -66,6 +71,7 @@
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Serializer;
 import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.common.utils.KafkaThread;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
@@ -74,7 +80,9 @@
 import org.apache.kafka.test.MockProducerInterceptor;
 import org.apache.kafka.test.MockSerializer;
 import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
 
@@ -123,6 +131,7 @@
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.ArgumentMatchers.notNull;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
@@ -151,6 +160,8 @@ public class KafkaProducerTest {
                     new PartitionInfo(topic, 2, null, null, null)),
             Collections.emptySet(),
             Collections.emptySet());
+    private TestInfo testInfo;
+
     private static final int DEFAULT_METADATA_IDLE_MS = 5 * 60 * 1000;
     private static final Node NODE = new Node(0, "host1", 1000);
 
@@ -161,8 +172,13 @@ private static <K, V> KafkaProducer<K, V> kafkaProducer(Map<String, Object> conf
                   KafkaClient kafkaClient,
                   ProducerInterceptors<K, V> interceptors,
                   Time time) {
-        return new KafkaProducer<>(new ProducerConfig(ProducerConfig.appendSerializerToConfig(configs, keySerializer, valueSerializer)),
-                keySerializer, valueSerializer, metadata, kafkaClient, interceptors, time);
+        return new KafkaProducer<K, V>(new ProducerConfig(ProducerConfig.appendSerializerToConfig(configs, keySerializer, valueSerializer)),
+            keySerializer, valueSerializer, metadata, kafkaClient, interceptors, time);
+    }
+
+    @BeforeEach
+    public void setup(TestInfo testInfo) {
+        this.testInfo = testInfo;
     }
 
     @Test
@@ -636,7 +652,7 @@ private static KafkaProducer<String, String> producerWithOverrideNewSender(Map<S
 
     private static KafkaProducer<String, String> producerWithOverrideNewSender(Map<String, Object> configs,
                                                                                ProducerMetadata metadata,
-                                                                               Time timer) {
+                                                                               Time time) {
         // let mockClient#leastLoadedNode return the node directly so that we can isolate Metadata calls from KafkaProducer for idempotent producer
         MockClient mockClient = new MockClient(Time.SYSTEM, metadata) {
             @Override
@@ -647,7 +663,7 @@ public Node leastLoadedNode(long now) {
 
         return new KafkaProducer<String, String>(
                 new ProducerConfig(ProducerConfig.appendSerializerToConfig(configs, new StringSerializer(), new StringSerializer())),
-                new StringSerializer(), new StringSerializer(), metadata, mockClient, null, timer) {
+                new StringSerializer(), new StringSerializer(), metadata, mockClient, null, time) {
             @Override
             Sender newSender(LogContext logContext, KafkaClient kafkaClient, ProducerMetadata metadata) {
                 // give Sender its own Metadata instance so that we can isolate Metadata calls from KafkaProducer
@@ -1885,10 +1901,13 @@ public void testNullTopicName() {
     }
 
     @Test
-    public void testCallbackHandlesError() throws Exception {
+    public void testCallbackAndInterceptorHandleError() throws Exception {
         Map<String, Object> configs = new HashMap<>();
         configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9000");
         configs.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, "1000");
+        configs.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG, MockProducerInterceptor.class.getName());
+        configs.put(MockProducerInterceptor.APPEND_STRING_PROP, "something");
+
 
         Time time = new MockTime();
         ProducerMetadata producerMetadata = newMetadata(0, Long.MAX_VALUE);
@@ -1896,8 +1915,11 @@ public void testCallbackHandlesError() throws Exception {
 
         String invalidTopicName = "topic abc"; // Invalid topic name due to space
 
+        ProducerInterceptors<String, String> producerInterceptors =
+                new ProducerInterceptors<>(Arrays.asList(new MockProducerInterceptor()));
+
         try (Producer<String, String> producer = kafkaProducer(configs, new StringSerializer(), new StringSerializer(),
-                producerMetadata, client, null, time)) {
+                producerMetadata, client, producerInterceptors, time)) {
             ProducerRecord<String, String> record = new ProducerRecord<>(invalidTopicName, "HelloKafka");
 
             // Here's the important piece of the test. Let's make sure that the RecordMetadata we get
@@ -1922,6 +1944,7 @@ public void testCallbackHandlesError() throws Exception {
             };
 
             producer.send(record, callBack);
+            assertEquals(1, MockProducerInterceptor.ON_ACKNOWLEDGEMENT_COUNT.intValue());
         }
     }
 
@@ -1944,6 +1967,202 @@ public void negativePartitionShouldThrow() {
         }
     }
 
+    @Test
+    public void testPartitionAddedToTransaction() throws Exception {
+        StringSerializer serializer = new StringSerializer();
+        KafkaProducerTestContext<String> ctx = new KafkaProducerTestContext<>(testInfo, serializer);
+
+        String topic = "foo";
+        TopicPartition topicPartition = new TopicPartition(topic, 0);
+        Cluster cluster = TestUtils.singletonCluster(topic, 1);
+
+        when(ctx.sender.isRunning()).thenReturn(true);
+        when(ctx.metadata.fetch()).thenReturn(cluster);
+
+        long timestamp = ctx.time.milliseconds();
+        ProducerRecord<String, String> record = new ProducerRecord<>(topic, null, timestamp, "key", "value");
+        FutureRecordMetadata future = expectAppend(ctx, record, topicPartition, cluster);
+
+        try (KafkaProducer<String, String> producer = ctx.newKafkaProducer()) {
+            assertEquals(future, producer.send(record));
+            assertFalse(future.isDone());
+            verify(ctx.transactionManager).maybeAddPartition(topicPartition);
+        }
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test
+    public void testPartitionAddedToTransactionAfterFullBatchRetry() throws Exception {
+        StringSerializer serializer = new StringSerializer();
+        KafkaProducerTestContext<String> ctx = new KafkaProducerTestContext<>(testInfo, serializer);
+
+        String topic = "foo";
+        TopicPartition topicPartition0 = new TopicPartition(topic, 0);
+        TopicPartition topicPartition1 = new TopicPartition(topic, 1);
+        Cluster cluster = TestUtils.singletonCluster(topic, 2);
+
+        when(ctx.sender.isRunning()).thenReturn(true);
+        when(ctx.metadata.fetch()).thenReturn(cluster);
+
+        long timestamp = ctx.time.milliseconds();
+        ProducerRecord<String, String> record = new ProducerRecord<>(topic, null, timestamp, "key", "value");
+
+        FutureRecordMetadata future = expectAppendWithAbortForNewBatch(
+            ctx,
+            record,
+            topicPartition0,
+            topicPartition1,
+            cluster
+        );
+
+        try (KafkaProducer<String, String> producer = ctx.newKafkaProducer()) {
+            assertEquals(future, producer.send(record));
+            assertFalse(future.isDone());
+            verify(ctx.partitioner).onNewBatch(topic, cluster, 0);
+            verify(ctx.transactionManager, never()).maybeAddPartition(topicPartition0);
+            verify(ctx.transactionManager).maybeAddPartition(topicPartition1);
+        }
+    }
+
+    private <T> FutureRecordMetadata expectAppend(
+        KafkaProducerTestContext<T> ctx,
+        ProducerRecord<T, T> record,
+        TopicPartition initialSelectedPartition,
+        Cluster cluster
+    ) throws InterruptedException {
+        byte[] serializedKey = ctx.serializer.serialize(topic, record.key());
+        byte[] serializedValue = ctx.serializer.serialize(topic, record.value());
+        long timestamp = record.timestamp() == null ? ctx.time.milliseconds() : record.timestamp();
+
+        ProduceRequestResult requestResult = new ProduceRequestResult(initialSelectedPartition);
+        FutureRecordMetadata futureRecordMetadata = new FutureRecordMetadata(
+            requestResult,
+            5,
+            timestamp,
+            serializedKey.length,
+            serializedValue.length,
+            ctx.time
+        );
+
+        when(ctx.partitioner.partition(
+            initialSelectedPartition.topic(),
+            record.key(),
+            serializedKey,
+            record.value(),
+            serializedValue,
+            cluster
+        )).thenReturn(initialSelectedPartition.partition());
+
+        when(ctx.accumulator.append(
+            eq(initialSelectedPartition.topic()),            // 0
+            eq(initialSelectedPartition.partition()),        // 1
+            eq(timestamp),                                   // 2
+            eq(serializedKey),                               // 3
+            eq(serializedValue),                             // 4
+            eq(Record.EMPTY_HEADERS),                        // 5
+            any(RecordAccumulator.AppendCallbacks.class),    // 6 <--
+            anyLong(),
+            eq(true),
+            anyLong(),
+            any()
+        )).thenAnswer(invocation -> {
+            RecordAccumulator.AppendCallbacks callbacks =
+                (RecordAccumulator.AppendCallbacks) invocation.getArguments()[6];
+            callbacks.setPartition(initialSelectedPartition.partition());
+            return new RecordAccumulator.RecordAppendResult(
+                futureRecordMetadata,
+                false,
+                false,
+                false,
+                0);
+        });
+
+        return futureRecordMetadata;
+    }
+
+    private <T> FutureRecordMetadata expectAppendWithAbortForNewBatch(
+        KafkaProducerTestContext<T> ctx,
+        ProducerRecord<T, T> record,
+        TopicPartition initialSelectedPartition,
+        TopicPartition retrySelectedPartition,
+        Cluster cluster
+    ) throws InterruptedException {
+        byte[] serializedKey = ctx.serializer.serialize(topic, record.key());
+        byte[] serializedValue = ctx.serializer.serialize(topic, record.value());
+        long timestamp = record.timestamp() == null ? ctx.time.milliseconds() : record.timestamp();
+
+        ProduceRequestResult requestResult = new ProduceRequestResult(retrySelectedPartition);
+        FutureRecordMetadata futureRecordMetadata = new FutureRecordMetadata(
+            requestResult,
+            0,
+            timestamp,
+            serializedKey.length,
+            serializedValue.length,
+            ctx.time
+        );
+
+        when(ctx.partitioner.partition(
+            initialSelectedPartition.topic(),
+            record.key(),
+            serializedKey,
+            record.value(),
+            serializedValue,
+            cluster
+        )).thenReturn(initialSelectedPartition.partition())
+          .thenReturn(retrySelectedPartition.partition());
+
+        when(ctx.accumulator.append(
+            eq(initialSelectedPartition.topic()),            // 0
+            eq(initialSelectedPartition.partition()),        // 1
+            eq(timestamp),                                   // 2
+            eq(serializedKey),                               // 3
+            eq(serializedValue),                             // 4
+            eq(Record.EMPTY_HEADERS),                        // 5
+            any(RecordAccumulator.AppendCallbacks.class),    // 6 <--
+            anyLong(),
+            eq(true), // abortOnNewBatch
+            anyLong(),
+            any()
+        )).thenAnswer(invocation -> {
+            RecordAccumulator.AppendCallbacks callbacks =
+                (RecordAccumulator.AppendCallbacks) invocation.getArguments()[6];
+            callbacks.setPartition(initialSelectedPartition.partition());
+            return new RecordAccumulator.RecordAppendResult(
+                null,
+                false,
+                false,
+                true,
+                0);
+        });
+
+        when(ctx.accumulator.append(
+            eq(retrySelectedPartition.topic()),              // 0
+            eq(retrySelectedPartition.partition()),          // 1
+            eq(timestamp),                                   // 2
+            eq(serializedKey),                               // 3
+            eq(serializedValue),                             // 4
+            eq(Record.EMPTY_HEADERS),                        // 5
+            any(RecordAccumulator.AppendCallbacks.class),    // 6 <--
+            anyLong(),
+            eq(false), // abortOnNewBatch
+            anyLong(),
+            any()
+        )).thenAnswer(invocation -> {
+            RecordAccumulator.AppendCallbacks callbacks =
+                (RecordAccumulator.AppendCallbacks) invocation.getArguments()[6];
+            callbacks.setPartition(retrySelectedPartition.partition());
+            return new RecordAccumulator.RecordAppendResult(
+                futureRecordMetadata,
+                false,
+                true,
+                false,
+                0);
+        });
+
+        return futureRecordMetadata;
+    }
+
+
     private static final List<String> CLIENT_IDS = new ArrayList<>();
 
     public static class SerializerForClientId implements Serializer<byte[]> {
@@ -2012,4 +2231,96 @@ public void close() {
         public void configure(Map<String, ?> configs) {
         }
     }
+
+    private static class KafkaProducerTestContext<T> {
+        private final TestInfo testInfo;
+        private final Map<String, Object> configs;
+        private final Serializer<T> serializer;
+        private ProducerMetadata metadata = mock(ProducerMetadata.class);
+        private RecordAccumulator accumulator = mock(RecordAccumulator.class);
+        private Sender sender = mock(Sender.class);
+        private TransactionManager transactionManager = mock(TransactionManager.class);
+        private Partitioner partitioner = mock(Partitioner.class);
+        private KafkaThread ioThread = mock(KafkaThread.class);
+        private Time time = new MockTime();
+        private Metrics metrics = new Metrics(time);
+        private List<ProducerInterceptor<T, T>> interceptors = new ArrayList<>();
+
+        public KafkaProducerTestContext(
+            TestInfo testInfo,
+            Serializer<T> serializer
+        ) {
+            this(testInfo, new HashMap<>(), serializer);
+        }
+
+        public KafkaProducerTestContext(
+            TestInfo testInfo,
+            Map<String, Object> configs,
+            Serializer<T> serializer
+        ) {
+            this.testInfo = testInfo;
+            this.configs = configs;
+            this.serializer = serializer;
+
+            if (!configs.containsKey(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)) {
+                configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9999");
+            }
+        }
+
+        public KafkaProducerTestContext<T> setProducerMetadata(ProducerMetadata metadata) {
+            this.metadata = metadata;
+            return this;
+        }
+
+        public KafkaProducerTestContext<T> setAccumulator(RecordAccumulator accumulator) {
+            this.accumulator = accumulator;
+            return this;
+        }
+
+        public KafkaProducerTestContext<T> setSender(Sender sender) {
+            this.sender = sender;
+            return this;
+        }
+
+        public KafkaProducerTestContext<T> setTransactionManager(TransactionManager transactionManager) {
+            this.transactionManager = transactionManager;
+            return this;
+        }
+
+        public KafkaProducerTestContext<T> addInterceptor(ProducerInterceptor<T, T> interceptor) {
+            this.interceptors.add(interceptor);
+            return this;
+        }
+
+        public KafkaProducerTestContext<T> setTime(Time time) {
+            this.time = time;
+            return this;
+        }
+
+        public KafkaProducer<T, T> newKafkaProducer() {
+            LogContext logContext = new LogContext("[Producer test=" + testInfo.getDisplayName() + "] ");
+
+            ProducerConfig producerConfig = new ProducerConfig(
+                ProducerConfig.appendSerializerToConfig(configs, serializer, serializer));
+
+            ProducerInterceptors<T, T> interceptors = new ProducerInterceptors<>(this.interceptors);
+
+            return new KafkaProducer<>(
+                producerConfig,
+                logContext,
+                metrics,
+                serializer,
+                serializer,
+                metadata,
+                accumulator,
+                transactionManager,
+                sender,
+                interceptors,
+                partitioner,
+                time,
+                ioThread
+            );
+        }
+    }
+
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java
index ca14ab0fda3da..8c7884bd77cdc 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/MockProducerTest.java
@@ -18,7 +18,6 @@
 
 import org.apache.kafka.clients.consumer.ConsumerGroupMetadata;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.PartitionInfo;
@@ -85,7 +84,7 @@ public void testPartitioner() throws Exception {
         PartitionInfo partitionInfo1 = new PartitionInfo(topic, 1, null, null, null);
         Cluster cluster = new Cluster(null, new ArrayList<>(0), asList(partitionInfo0, partitionInfo1),
                 Collections.emptySet(), Collections.emptySet());
-        MockProducer<String, String> producer = new MockProducer<>(cluster, true, new DefaultPartitioner(), new StringSerializer(), new StringSerializer());
+        MockProducer<String, String> producer = new MockProducer<>(cluster, true, new StringSerializer(), new StringSerializer());
         ProducerRecord<String, String> record = new ProducerRecord<>(topic, "key", "value");
         Future<RecordMetadata> metadata = producer.send(record);
         assertEquals(1, metadata.get().partition(), "Partition should be correct");
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java
index a2f318bebc7a0..7a9be7b32ff1f 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/ProducerConfigTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.clients.producer;
 
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Serializer;
 import org.apache.kafka.common.serialization.StringSerializer;
@@ -25,6 +27,8 @@
 import java.util.Map;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class ProducerConfigTest {
 
@@ -59,4 +63,36 @@ public void testAppendSerializerToConfig() {
         assertEquals(newConfigs.get(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG), keySerializerClass);
         assertEquals(newConfigs.get(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG), valueSerializerClass);
     }
+
+    @Test
+    public void testAppendSerializerToConfigWithException() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, null);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializerClass);
+        assertThrows(ConfigException.class, () -> ProducerConfig.appendSerializerToConfig(configs, null, valueSerializer));
+
+        configs.clear();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializerClass);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, null);
+        assertThrows(ConfigException.class, () -> ProducerConfig.appendSerializerToConfig(configs, keySerializer, null));
+    }
+
+    @Test
+    public void testInvalidCompressionType() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializerClass);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializerClass);
+        configs.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "abc");
+        assertThrows(ConfigException.class, () -> new ProducerConfig(configs));
+    }
+
+    @Test
+    public void testInvalidSecurityProtocol() {
+        Map<String, Object> configs = new HashMap<>();
+        configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, keySerializerClass);
+        configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, valueSerializerClass);
+        configs.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc");
+        ConfigException ce = assertThrows(ConfigException.class, () -> new ProducerConfig(configs));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/UniformStickyPartitionerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/UniformStickyPartitionerTest.java
index 0014bf8daaeef..f5484071717dc 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/UniformStickyPartitionerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/UniformStickyPartitionerTest.java
@@ -39,6 +39,7 @@ public class UniformStickyPartitionerTest {
     private final static String TOPIC_A = "TOPIC_A";
     private final static String TOPIC_B = "TOPIC_B";
 
+    @SuppressWarnings("deprecation")
     @Test
     public void testRoundRobinWithUnavailablePartitions() {
         // Intentionally make the partition list not in partition order to test the edge
@@ -77,6 +78,7 @@ public void testRoundRobinWithUnavailablePartitions() {
         assertEquals(countForPart0, countForPart2, "The distribution between two available partitions should be even");
     }
 
+    @SuppressWarnings("deprecation")
     @Test
     public void testRoundRobinWithKeyBytes() throws InterruptedException {
         List<PartitionInfo> allPartitions = asList(new PartitionInfo(TOPIC_A, 0, NODES[0], NODES, NODES),
@@ -140,7 +142,8 @@ public void testRoundRobinWithKeyBytes() throws InterruptedException {
         assertEquals(30, partitionCount.get(oldPart).intValue());
         assertEquals(60, partitionCount.get(newPart).intValue());
     }
-    
+
+    @SuppressWarnings("deprecation")
     @Test
     public void testRoundRobinWithNullKeyBytes() throws InterruptedException {
         List<PartitionInfo> allPartitions = asList(new PartitionInfo(TOPIC_A, 0, NODES[0], NODES, NODES),
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/BuiltInPartitionerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/BuiltInPartitionerTest.java
new file mode 100644
index 0000000000000..734aedc483ad1
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/BuiltInPartitionerTest.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.clients.producer.internals;
+
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.utils.LogContext;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static java.util.Arrays.asList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class BuiltInPartitionerTest {
+    private final static Node[] NODES = new Node[] {
+        new Node(0, "localhost", 99),
+        new Node(1, "localhost", 100),
+        new Node(2, "localhost", 101),
+        new Node(11, "localhost", 102)
+    };
+    final static String TOPIC_A = "topicA";
+    final static String TOPIC_B = "topicB";
+    final static String TOPIC_C = "topicC";
+    final LogContext logContext = new LogContext();
+
+    @AfterEach
+    public void tearDown() {
+        BuiltInPartitioner.mockRandom = null;
+    }
+
+    @Test
+    public void testStickyPartitioning() {
+        List<PartitionInfo> allPartitions = asList(new PartitionInfo(TOPIC_A, 0, NODES[0], NODES, NODES),
+            new PartitionInfo(TOPIC_A, 1, NODES[1], NODES, NODES),
+            new PartitionInfo(TOPIC_A, 2, NODES[2], NODES, NODES),
+            new PartitionInfo(TOPIC_B, 0, NODES[0], NODES, NODES)
+        );
+        Cluster testCluster = new Cluster("clusterId", asList(NODES), allPartitions,
+            Collections.emptySet(), Collections.emptySet());
+
+        // Create partitions with "sticky" batch size to accommodate 3 records.
+        BuiltInPartitioner builtInPartitionerA = new BuiltInPartitioner(logContext, TOPIC_A, 3);
+
+        // Test the partition is not switched until sticky batch size is reached.
+        // Mock random number generator with just sequential integer.
+        AtomicInteger mockRandom = new AtomicInteger();
+        BuiltInPartitioner.mockRandom = () -> mockRandom.getAndAdd(1);
+
+        BuiltInPartitioner.StickyPartitionInfo partitionInfo = builtInPartitionerA.peekCurrentPartitionInfo(testCluster);
+        int partA = partitionInfo.partition();
+        builtInPartitionerA.updatePartitionInfo(partitionInfo, 1, testCluster);
+
+        partitionInfo = builtInPartitionerA.peekCurrentPartitionInfo(testCluster);
+        assertEquals(partA, partitionInfo.partition());
+        builtInPartitionerA.updatePartitionInfo(partitionInfo, 1, testCluster);
+
+        partitionInfo = builtInPartitionerA.peekCurrentPartitionInfo(testCluster);
+        assertEquals(partA, partitionInfo.partition());
+        builtInPartitionerA.updatePartitionInfo(partitionInfo, 1, testCluster);
+
+        // After producing 3 records, partition must've switched.
+        assertNotEquals(partA, builtInPartitionerA.peekCurrentPartitionInfo(testCluster).partition());
+
+        // Check that switching works even when there is one partition.
+        BuiltInPartitioner builtInPartitionerB = new BuiltInPartitioner(logContext, TOPIC_B, 1);
+        for (int c = 10; c-- > 0; ) {
+            partitionInfo = builtInPartitionerB.peekCurrentPartitionInfo(testCluster);
+            assertEquals(0, partitionInfo.partition());
+            builtInPartitionerB.updatePartitionInfo(partitionInfo, 1, testCluster);
+        }
+    }
+
+    @Test
+    public void unavailablePartitionsTest() {
+        // Partition 1 in topic A, partition 0 in topic B and partition 0 in topic C are unavailable partitions.
+        List<PartitionInfo> allPartitions = asList(new PartitionInfo(TOPIC_A, 0, NODES[0], NODES, NODES),
+            new PartitionInfo(TOPIC_A, 1, null, NODES, NODES),
+            new PartitionInfo(TOPIC_A, 2, NODES[2], NODES, NODES),
+            new PartitionInfo(TOPIC_B, 0, null, NODES, NODES),
+            new PartitionInfo(TOPIC_B, 1, NODES[0], NODES, NODES),
+            new PartitionInfo(TOPIC_C, 0, null, NODES, NODES)
+        );
+
+        Cluster testCluster = new Cluster("clusterId", asList(NODES[0], NODES[1], NODES[2]), allPartitions,
+            Collections.emptySet(), Collections.emptySet());
+
+        // Create partitions with "sticky" batch size to accommodate 1 record.
+        BuiltInPartitioner builtInPartitionerA = new BuiltInPartitioner(logContext, TOPIC_A, 1);
+
+        // Assure we never choose partition 1 because it is unavailable.
+        BuiltInPartitioner.StickyPartitionInfo partitionInfo = builtInPartitionerA.peekCurrentPartitionInfo(testCluster);
+        int partA = partitionInfo.partition();
+        builtInPartitionerA.updatePartitionInfo(partitionInfo, 1, testCluster);
+
+        boolean foundAnotherPartA = false;
+        assertNotEquals(1, partA);
+        for (int aPartitions = 0; aPartitions < 100; aPartitions++) {
+            partitionInfo = builtInPartitionerA.peekCurrentPartitionInfo(testCluster);
+            int anotherPartA = partitionInfo.partition();
+            builtInPartitionerA.updatePartitionInfo(partitionInfo, 1, testCluster);
+
+            assertNotEquals(1, anotherPartA);
+            foundAnotherPartA = foundAnotherPartA || anotherPartA != partA;
+        }
+        assertTrue(foundAnotherPartA, "Expected to find partition other than " + partA);
+
+        BuiltInPartitioner builtInPartitionerB = new BuiltInPartitioner(logContext, TOPIC_B, 1);
+        // Assure we always choose partition 1 for topic B.
+        partitionInfo = builtInPartitionerB.peekCurrentPartitionInfo(testCluster);
+        int partB = partitionInfo.partition();
+        builtInPartitionerB.updatePartitionInfo(partitionInfo, 1, testCluster);
+
+        assertEquals(1, partB);
+        for (int bPartitions = 0; bPartitions < 100; bPartitions++) {
+            partitionInfo = builtInPartitionerB.peekCurrentPartitionInfo(testCluster);
+            assertEquals(1, partitionInfo.partition());
+            builtInPartitionerB.updatePartitionInfo(partitionInfo, 1, testCluster);
+        }
+
+        // Assure that we still choose the partition when there are no partitions available.
+        BuiltInPartitioner builtInPartitionerC = new BuiltInPartitioner(logContext, TOPIC_C, 1);
+        partitionInfo = builtInPartitionerC.peekCurrentPartitionInfo(testCluster);
+        int partC = partitionInfo.partition();
+        builtInPartitionerC.updatePartitionInfo(partitionInfo, 1, testCluster);
+        assertEquals(0, partC);
+
+        partitionInfo = builtInPartitionerC.peekCurrentPartitionInfo(testCluster);
+        partC = partitionInfo.partition();
+        assertEquals(0, partC);
+    }
+
+    @Test
+    public void adaptivePartitionsTest() {
+        // Mock random number generator with just sequential integer.
+        AtomicInteger mockRandom = new AtomicInteger();
+        BuiltInPartitioner.mockRandom = () -> mockRandom.getAndAdd(1);
+
+        BuiltInPartitioner builtInPartitioner = new BuiltInPartitioner(logContext, TOPIC_A, 1);
+
+        // Simulate partition queue sizes.
+        int[] queueSizes = {5, 0, 3, 0, 1};
+        int[] partitionIds = new int[queueSizes.length];
+        int[] expectedFrequencies = new int[queueSizes.length];
+        List<PartitionInfo> allPartitions = new ArrayList<>();
+        for (int i = 0; i < partitionIds.length; i++) {
+            partitionIds[i] = i;
+            allPartitions.add(new PartitionInfo(TOPIC_A, i, NODES[i % NODES.length], NODES, NODES));
+            expectedFrequencies[i] = 6 - queueSizes[i];  // 6 is max(queueSizes) + 1
+        }
+
+        builtInPartitioner.updatePartitionLoadStats(queueSizes, partitionIds, queueSizes.length);
+
+        Cluster testCluster = new Cluster("clusterId", asList(NODES), allPartitions,
+            Collections.emptySet(), Collections.emptySet());
+
+        // Issue a certain number of partition calls to validate that the partitions would be
+        // distributed with frequencies that are reciprocal to the queue sizes.  The number of
+        // iterations is defined by the last element of the cumulative frequency table which is
+        // the sum of all frequencies.  We do 2 cycles, just so it's more than 1.
+        final int numberOfCycles = 2;
+        int numberOfIterations = builtInPartitioner.loadStatsRangeEnd() * numberOfCycles;
+        int[] frequencies = new int[queueSizes.length];
+
+        for (int i = 0; i < numberOfIterations; i++) {
+            BuiltInPartitioner.StickyPartitionInfo partitionInfo = builtInPartitioner.peekCurrentPartitionInfo(testCluster);
+            ++frequencies[partitionInfo.partition()];
+            builtInPartitioner.updatePartitionInfo(partitionInfo, 1, testCluster);
+        }
+
+        // Verify that frequencies are reciprocal of queue sizes.
+        for (int i = 0; i < frequencies.length; i++) {
+            assertEquals(expectedFrequencies[i] * numberOfCycles, frequencies[i],
+                "Partition " + i + " was chosen " + frequencies[i] + " times");
+        }
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/DefaultPartitionerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/DefaultPartitionerTest.java
index a55e5d2220d22..e250748643a43 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/DefaultPartitionerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/DefaultPartitionerTest.java
@@ -42,6 +42,7 @@ public class DefaultPartitionerTest {
 
     @Test
     public void testKeyPartitionIsStable() {
+        @SuppressWarnings("deprecation")
         final Partitioner partitioner = new DefaultPartitioner();
         final Cluster cluster = new Cluster("clusterId", asList(NODES), PARTITIONS,
             Collections.<String>emptySet(), Collections.<String>emptySet());
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetricsTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetricsTest.java
index e0688616b643f..46d1ed329eee2 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetricsTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/KafkaProducerMetricsTest.java
@@ -32,6 +32,7 @@ class KafkaProducerMetricsTest {
     private static final String TXN_COMMIT_TIME_TOTAL = "txn-commit-time-ns-total";
     private static final String TXN_ABORT_TIME_TOTAL = "txn-abort-time-ns-total";
     private static final String TXN_SEND_OFFSETS_TIME_TOTAL = "txn-send-offsets-time-ns-total";
+    private static final String METADATA_WAIT_TIME_TOTAL = "metadata-wait-time-ns-total";
 
     private final Metrics metrics = new Metrics();
     private final KafkaProducerMetrics producerMetrics = new KafkaProducerMetrics(metrics);
@@ -90,6 +91,15 @@ public void shouldRecordSendOffsetsTime() {
         assertMetricValue(TXN_SEND_OFFSETS_TIME_TOTAL);
     }
 
+    @Test
+    public void shouldRecordMetadataWaitTime() {
+        // When:
+        producerMetrics.recordMetadataWait(METRIC_VALUE);
+
+        // Then:
+        assertMetricValue(METADATA_WAIT_TIME_TOTAL);
+    }
+
     @Test
     public void shouldRemoveMetricsOnClose() {
         // When:
@@ -102,6 +112,7 @@ public void shouldRemoveMetricsOnClose() {
         assertMetricRemoved(TXN_COMMIT_TIME_TOTAL);
         assertMetricRemoved(TXN_ABORT_TIME_TOTAL);
         assertMetricRemoved(TXN_SEND_OFFSETS_TIME_TOTAL);
+        assertMetricRemoved(METADATA_WAIT_TIME_TOTAL);
     }
 
     private void assertMetricRemoved(final String name) {
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java
index 06ed1ce1f1242..cf991de338bdd 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/RecordAccumulatorTest.java
@@ -50,8 +50,10 @@
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Deque;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -60,7 +62,7 @@
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicInteger;
-
+import java.util.stream.Collectors;
 import static java.util.Arrays.asList;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -98,6 +100,76 @@ public void teardown() {
         this.metrics.close();
     }
 
+    @Test
+    public void testDrainBatches() throws Exception {
+        // test case: node1(tp1,tp2) , node2(tp3,tp4)
+        // add tp-4
+        int partition4 = 3;
+        TopicPartition tp4 = new TopicPartition(topic, partition4);
+        PartitionInfo part4 = new PartitionInfo(topic, partition4, node2, null, null);
+
+        long batchSize = value.length + DefaultRecordBatch.RECORD_BATCH_OVERHEAD;
+        RecordAccumulator accum = createTestRecordAccumulator((int) batchSize, Integer.MAX_VALUE, CompressionType.NONE, 10);
+        Cluster cluster = new Cluster(null, Arrays.asList(node1, node2), Arrays.asList(part1, part2, part3, part4),
+                Collections.emptySet(), Collections.emptySet());
+
+        //  initial data
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition4, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+
+        // drain batches from 2 nodes: node1 => tp1, node2 => tp3, because the max request size is full after the first batch drained
+        Map<Integer, List<ProducerBatch>> batches1 = accum.drain(cluster, new HashSet<Node>(Arrays.asList(node1, node2)), (int) batchSize, 0);
+        verifyTopicPartitionInBatches(batches1, tp1, tp3);
+
+        // add record for tp1, tp3
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+
+        // drain batches from 2 nodes: node1 => tp2, node2 => tp4, because the max request size is full after the first batch drained
+        // The drain index should start from next topic partition, that is, node1 => tp2, node2 => tp4
+        Map<Integer, List<ProducerBatch>> batches2 = accum.drain(cluster, new HashSet<Node>(Arrays.asList(node1, node2)), (int) batchSize, 0);
+        verifyTopicPartitionInBatches(batches2, tp2, tp4);
+
+        // make sure in next run, the drain index will start from the beginning
+        Map<Integer, List<ProducerBatch>> batches3 = accum.drain(cluster, new HashSet<Node>(Arrays.asList(node1, node2)), (int) batchSize, 0);
+        verifyTopicPartitionInBatches(batches3, tp1, tp3);
+
+        // add record for tp2, tp3, tp4 and mute the tp4
+        accum.append(topic, partition2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition4, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.mutePartition(tp4);
+        // drain batches from 2 nodes: node1 => tp2, node2 => tp3 (because tp4 is muted)
+        Map<Integer, List<ProducerBatch>> batches4 = accum.drain(cluster, new HashSet<Node>(Arrays.asList(node1, node2)), (int) batchSize, 0);
+        verifyTopicPartitionInBatches(batches4, tp2, tp3);
+
+        // add record for tp1, tp2, tp3, and unmute tp4
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.append(topic, partition3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        accum.unmutePartition(tp4);
+        // set maxSize as a max value, so that the all partitions in 2 nodes should be drained: node1 => [tp1, tp2], node2 => [tp3, tp4]
+        Map<Integer, List<ProducerBatch>> batches5 = accum.drain(cluster, new HashSet<Node>(Arrays.asList(node1, node2)), Integer.MAX_VALUE, 0);
+        verifyTopicPartitionInBatches(batches5, tp1, tp2, tp3, tp4);
+    }
+
+    private void verifyTopicPartitionInBatches(Map<Integer, List<ProducerBatch>> nodeBatches, TopicPartition... tp) {
+        int allTpBatchCount = nodeBatches.values().stream().flatMap(Collection::stream).collect(Collectors.toList()).size();
+        assertEquals(tp.length, allTpBatchCount);
+        List<TopicPartition> topicPartitionsInBatch = new ArrayList<TopicPartition>();
+        for (Map.Entry<Integer, List<ProducerBatch>> entry : nodeBatches.entrySet()) {
+            List<ProducerBatch> tpBatchList = entry.getValue();
+            List<TopicPartition> tpList = tpBatchList.stream().map(producerBatch -> producerBatch.topicPartition).collect(Collectors.toList());
+            topicPartitionsInBatch.addAll(tpList);
+        }
+
+        for (int i = 0; i < tp.length; i++) {
+            assertEquals(tp[i], topicPartitionsInBatch.get(i));
+        }
+    }
+
     @Test
     public void testFull() throws Exception {
         long now = time.milliseconds();
@@ -110,8 +182,8 @@ public void testFull() throws Exception {
         int appends = expectedNumAppends(batchSize);
         for (int i = 0; i < appends; i++) {
             // append to the first batch
-            accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
-            Deque<ProducerBatch> partitionBatches = accum.batches().get(tp1);
+            accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+            Deque<ProducerBatch> partitionBatches = accum.getDeque(tp1);
             assertEquals(1, partitionBatches.size());
 
             ProducerBatch batch = partitionBatches.peekFirst();
@@ -121,8 +193,8 @@ public void testFull() throws Exception {
 
         // this append doesn't fit in the first batch, so a new batch is created and the first batch is closed
 
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
-        Deque<ProducerBatch> partitionBatches = accum.batches().get(tp1);
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+        Deque<ProducerBatch> partitionBatches = accum.getDeque(tp1);
         assertEquals(2, partitionBatches.size());
         Iterator<ProducerBatch> partitionBatchesIterator = partitionBatches.iterator();
         assertTrue(partitionBatchesIterator.next().isWritable());
@@ -156,10 +228,10 @@ private void testAppendLarge(CompressionType compressionType) throws Exception {
         byte[] value = new byte[2 * batchSize];
         RecordAccumulator accum = createTestRecordAccumulator(
                 batchSize + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10 * 1024, compressionType, 0);
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         assertEquals(Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes, "Our partition's leader should be ready");
 
-        Deque<ProducerBatch> batches = accum.batches().get(tp1);
+        Deque<ProducerBatch> batches = accum.getDeque(tp1);
         assertEquals(1, batches.size());
         ProducerBatch producerBatch = batches.peek();
         List<MutableRecordBatch> recordBatches = TestUtils.toList(producerBatch.records().batches());
@@ -194,10 +266,10 @@ private void testAppendLargeOldMessageFormat(CompressionType compressionType) th
 
         RecordAccumulator accum = createTestRecordAccumulator(
                 batchSize + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10 * 1024, compressionType, 0);
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         assertEquals(Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes, "Our partition's leader should be ready");
 
-        Deque<ProducerBatch> batches = accum.batches().get(tp1);
+        Deque<ProducerBatch> batches = accum.getDeque(tp1);
         assertEquals(1, batches.size());
         ProducerBatch producerBatch = batches.peek();
         List<MutableRecordBatch> recordBatches = TestUtils.toList(producerBatch.records().batches());
@@ -218,7 +290,7 @@ public void testLinger() throws Exception {
         int lingerMs = 10;
         RecordAccumulator accum = createTestRecordAccumulator(
                 1024 + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10 * 1024, CompressionType.NONE, lingerMs);
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         assertEquals(0, accum.ready(cluster, time.milliseconds()).readyNodes.size(), "No partitions should be ready");
         time.sleep(10);
         assertEquals(Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes, "Our partition's leader should be ready");
@@ -241,7 +313,7 @@ public void testPartialDrain() throws Exception {
         List<TopicPartition> partitions = asList(tp1, tp2);
         for (TopicPartition tp : partitions) {
             for (int i = 0; i < appends; i++)
-                accum.append(tp, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+                accum.append(tp.topic(), tp.partition(), 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         }
         assertEquals(Collections.singleton(node1), accum.ready(cluster, time.milliseconds()).readyNodes, "Partition's leader should be ready");
 
@@ -263,7 +335,7 @@ public void testStressfulSituation() throws Exception {
                 public void run() {
                     for (int i = 0; i < msgs; i++) {
                         try {
-                            accum.append(new TopicPartition(topic, i % numParts), 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+                            accum.append(topic, i % numParts, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
                         } catch (Exception e) {
                             e.printStackTrace();
                         }
@@ -307,7 +379,7 @@ public void testNextReadyCheckDelay() throws Exception {
 
         // Partition on node1 only
         for (int i = 0; i < appends; i++)
-            accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, time.milliseconds());
         assertEquals(0, result.readyNodes.size(), "No nodes should be ready.");
         assertEquals(lingerMs, result.nextReadyCheckDelayMs, "Next check time should be the linger time");
@@ -316,14 +388,14 @@ public void testNextReadyCheckDelay() throws Exception {
 
         // Add partition on node2 only
         for (int i = 0; i < appends; i++)
-            accum.append(tp3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, partition3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         result = accum.ready(cluster, time.milliseconds());
         assertEquals(0, result.readyNodes.size(), "No nodes should be ready.");
         assertEquals(lingerMs / 2, result.nextReadyCheckDelayMs, "Next check time should be defined by node1, half remaining linger time");
 
         // Add data for another partition on node1, enough to make data sendable immediately
         for (int i = 0; i < appends + 1; i++)
-            accum.append(tp2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, partition2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         result = accum.ready(cluster, time.milliseconds());
         assertEquals(Collections.singleton(node1), result.readyNodes, "Node1 should be ready");
         // Note this can actually be < linger time because it may use delays from partitions that aren't sendable
@@ -345,7 +417,7 @@ CompressionType.NONE, lingerMs, retryBackoffMs, deliveryTimeoutMs, metrics, metr
             new BufferPool(totalSize, batchSize, metrics, time, metricGrpName));
 
         long now = time.milliseconds();
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, now + lingerMs + 1);
         assertEquals(Collections.singleton(node1), result.readyNodes, "Node1 should be ready");
         Map<Integer, List<ProducerBatch>> batches = accum.drain(cluster, result.readyNodes, Integer.MAX_VALUE, now + lingerMs + 1);
@@ -357,7 +429,7 @@ CompressionType.NONE, lingerMs, retryBackoffMs, deliveryTimeoutMs, metrics, metr
         accum.reenqueue(batches.get(0).get(0), now);
 
         // Put message for partition 1 into accumulator
-        accum.append(tp2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         result = accum.ready(cluster, now + lingerMs + 1);
         assertEquals(Collections.singleton(node1), result.readyNodes, "Node1 should be ready");
 
@@ -383,7 +455,7 @@ public void testFlush() throws Exception {
                 4 * 1024 + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 64 * 1024, CompressionType.NONE, lingerMs);
 
         for (int i = 0; i < 100; i++) {
-            accum.append(new TopicPartition(topic, i % 3), 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, i % 3, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
             assertTrue(accum.hasIncomplete());
         }
         RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, time.milliseconds());
@@ -421,7 +493,7 @@ public void run() {
     public void testAwaitFlushComplete() throws Exception {
         RecordAccumulator accum = createTestRecordAccumulator(
             4 * 1024 + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 64 * 1024, CompressionType.NONE, Integer.MAX_VALUE);
-        accum.append(new TopicPartition(topic, 0), 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, 0, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
 
         accum.beginFlush();
         assertTrue(accum.flushInProgress());
@@ -442,15 +514,19 @@ public void testAbortIncompleteBatches() throws Exception {
         final AtomicInteger numExceptionReceivedInCallback = new AtomicInteger(0);
         final RecordAccumulator accum = createTestRecordAccumulator(
             128 + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 64 * 1024, CompressionType.NONE, lingerMs);
-        class TestCallback implements Callback {
+        class TestCallback implements RecordAccumulator.AppendCallbacks {
             @Override
             public void onCompletion(RecordMetadata metadata, Exception exception) {
                 assertTrue(exception.getMessage().equals("Producer is closed forcefully."));
                 numExceptionReceivedInCallback.incrementAndGet();
             }
+
+            @Override
+            public void setPartition(int partition) {
+            }
         }
         for (int i = 0; i < numRecords; i++)
-            accum.append(new TopicPartition(topic, i % 3), 0L, key, value, null, new TestCallback(), maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, i % 3, 0L, key, value, null, new TestCallback(), maxBlockTimeMs, false, time.milliseconds(), cluster);
         RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, time.milliseconds());
         assertFalse(result.readyNodes.isEmpty());
         Map<Integer, List<ProducerBatch>> drained = accum.drain(cluster, result.readyNodes, Integer.MAX_VALUE, time.milliseconds());
@@ -483,15 +559,19 @@ public void testAbortUnsentBatches() throws Exception {
                 128 + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 64 * 1024, CompressionType.NONE, lingerMs);
         final KafkaException cause = new KafkaException();
 
-        class TestCallback implements Callback {
+        class TestCallback implements RecordAccumulator.AppendCallbacks {
             @Override
             public void onCompletion(RecordMetadata metadata, Exception exception) {
                 assertEquals(cause, exception);
                 numExceptionReceivedInCallback.incrementAndGet();
             }
+
+            @Override
+            public void setPartition(int partition) {
+            }
         }
         for (int i = 0; i < numRecords; i++)
-            accum.append(new TopicPartition(topic, i % 3), 0L, key, value, null, new TestCallback(), maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, i % 3, 0L, key, value, null, new TestCallback(), maxBlockTimeMs, false, time.milliseconds(), cluster);
         RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, time.milliseconds());
         assertFalse(result.readyNodes.isEmpty());
         Map<Integer, List<ProducerBatch>> drained = accum.drain(cluster, result.readyNodes, Integer.MAX_VALUE,
@@ -530,7 +610,7 @@ private void doExpireBatchSingle(int deliveryTimeoutMs) throws InterruptedExcept
         for (Boolean mute: muteStates) {
             if (time.milliseconds() < System.currentTimeMillis())
                 time.setCurrentTimeMs(System.currentTimeMillis());
-            accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
             assertEquals(0, accum.ready(cluster, time.milliseconds()).readyNodes.size(), "No partition should be ready.");
 
             time.sleep(lingerMs);
@@ -579,11 +659,11 @@ public void testExpiredBatches() throws InterruptedException {
 
         // Test batches not in retry
         for (int i = 0; i < appends; i++) {
-            accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
             assertEquals(0, accum.ready(cluster, time.milliseconds()).readyNodes.size(), "No partitions should be ready.");
         }
         // Make the batches ready due to batch full
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster);
         Set<Node> readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes;
         assertEquals(Collections.singleton(node1), readyNodes, "Our partition's leader should be ready");
         // Advance the clock to expire the batch.
@@ -613,7 +693,7 @@ public void testExpiredBatches() throws InterruptedException {
 
         // Test batches in retry.
         // Create a retried batch
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster);
         time.sleep(lingerMs);
         readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes;
         assertEquals(Collections.singleton(node1), readyNodes, "Our partition's leader should be ready");
@@ -637,7 +717,7 @@ public void testExpiredBatches() throws InterruptedException {
         assertEquals(0, expiredBatches.size(), "All batches should have been expired.");
 
         // Test that when being throttled muted batches are expired before the throttle time is over.
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster);
         time.sleep(lingerMs);
         readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes;
         assertEquals(Collections.singleton(node1), readyNodes, "Our partition's leader should be ready");
@@ -670,7 +750,7 @@ public void testMutedPartitions() throws InterruptedException {
                 batchSize + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10 * batchSize, CompressionType.NONE, 10);
         int appends = expectedNumAppends(batchSize);
         for (int i = 0; i < appends; i++) {
-            accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+            accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
             assertEquals(0, accum.ready(cluster, now).readyNodes.size(), "No partitions should be ready.");
         }
         time.sleep(2000);
@@ -713,7 +793,7 @@ public void testIdempotenceWithOldMagic() {
             CompressionType.NONE, lingerMs, retryBackoffMs, deliveryTimeoutMs, metrics, metricGrpName, time, apiVersions, transactionManager,
             new BufferPool(totalSize, batchSize, metrics, time, metricGrpName));
         assertThrows(UnsupportedVersionException.class,
-            () -> accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds()));
+            () -> accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster));
     }
 
     @Test
@@ -736,10 +816,10 @@ public void testRecordsDrainedWhenTransactionCompleting() throws Exception {
         // Initially, the transaction is still in progress, so we should respect the linger.
         Mockito.when(transactionManager.isCompleting()).thenReturn(false);
 
-        accumulator.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs,
-            false, time.milliseconds());
-        accumulator.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs,
-            false, time.milliseconds());
+        accumulator.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs,
+            false, time.milliseconds(), cluster);
+        accumulator.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs,
+            false, time.milliseconds(), cluster);
         assertTrue(accumulator.hasUndrained());
 
         RecordAccumulator.ReadyCheckResult firstResult = accumulator.ready(cluster, time.milliseconds());
@@ -858,7 +938,7 @@ public void testSplitFrequency() throws InterruptedException {
                 int dice = random.nextInt(100);
                 byte[] value = (dice < goodCompRatioPercentage) ?
                         bytesWithGoodCompression(random) : bytesWithPoorCompression(random, 100);
-                accum.append(tp1, 0L, null, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds());
+                accum.append(topic, partition1, 0L, null, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster);
                 BatchDrainedResult result = completeOrSplitBatches(accum, batchSize);
                 numSplit += result.numSplit;
                 numBatches += result.numBatches;
@@ -881,7 +961,7 @@ public void testSoonToExpireBatchesArePickedUpForExpiry() throws InterruptedExce
         RecordAccumulator accum = createTestRecordAccumulator(
             batchSize + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10 * batchSize, CompressionType.NONE, lingerMs);
 
-        accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         Set<Node> readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes;
         Map<Integer, List<ProducerBatch>> drained = accum.drain(cluster, readyNodes, Integer.MAX_VALUE, time.milliseconds());
         assertTrue(drained.isEmpty());
@@ -896,7 +976,7 @@ public void testSoonToExpireBatchesArePickedUpForExpiry() throws InterruptedExce
         //assertTrue(accum.soonToExpireInFlightBatches().isEmpty());
 
         // Queue another batch and advance clock such that batch expiry time is earlier than request timeout.
-        accum.append(tp2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition2, 0L, key, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         time.sleep(lingerMs * 4);
 
         // Now drain and check that accumulator picked up the drained batch because its expiry is soon.
@@ -921,7 +1001,7 @@ public void testExpiredBatchesRetry() throws InterruptedException {
 
         // Test batches in retry.
         for (Boolean mute : muteStates) {
-            accum.append(tp1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds());
+            accum.append(topic, partition1, 0L, key, value, Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster);
             time.sleep(lingerMs);
             readyNodes = accum.ready(cluster, time.milliseconds()).readyNodes;
             assertEquals(Collections.singleton(node1), readyNodes, "Our partition's leader should be ready");
@@ -943,6 +1023,7 @@ public void testExpiredBatchesRetry() throws InterruptedException {
         }
     }
 
+    @SuppressWarnings("deprecation")
     @Test
     public void testStickyBatches() throws Exception {
         long now = time.milliseconds();
@@ -952,24 +1033,23 @@ public void testStickyBatches() throws Exception {
 
         Partitioner partitioner = new DefaultPartitioner();
         RecordAccumulator accum = createTestRecordAccumulator(3200,
-                batchSize + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10L * batchSize, CompressionType.NONE, 10);
+            batchSize + DefaultRecordBatch.RECORD_BATCH_OVERHEAD, 10L * batchSize, CompressionType.NONE, 10);
         int expectedAppends = expectedNumAppendsNoKey(batchSize);
 
         // Create first batch
         int partition = partitioner.partition(topic, null, null, "value", value, cluster);
-        TopicPartition tp = new TopicPartition(topic, partition);
-        accum.append(tp, 0L, null, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition, 0L, null, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         int appends = 1;
 
         boolean switchPartition = false;
         while (!switchPartition) {
             // Append to the first batch
             partition = partitioner.partition(topic, null, null, "value", value, cluster);
-            tp = new TopicPartition(topic, partition);
-            RecordAccumulator.RecordAppendResult result = accum.append(tp, 0L, null, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, true, time.milliseconds());
-            Deque<ProducerBatch> partitionBatches1 = accum.batches().get(tp1);
-            Deque<ProducerBatch> partitionBatches2 = accum.batches().get(tp2);
-            Deque<ProducerBatch> partitionBatches3 = accum.batches().get(tp3);
+            RecordAccumulator.RecordAppendResult result = accum.append(topic, partition, 0L, null,
+                value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, true, time.milliseconds(), cluster);
+            Deque<ProducerBatch> partitionBatches1 = accum.getDeque(tp1);
+            Deque<ProducerBatch> partitionBatches2 = accum.getDeque(tp2);
+            Deque<ProducerBatch> partitionBatches3 = accum.getDeque(tp3);
             int numBatches = (partitionBatches1 == null ? 0 : partitionBatches1.size()) + (partitionBatches2 == null ? 0 : partitionBatches2.size()) + (partitionBatches3 == null ? 0 : partitionBatches3.size());
             // Only one batch is created because the partition is sticky.
             assertEquals(1, numBatches);
@@ -990,18 +1070,17 @@ public void testStickyBatches() throws Exception {
         // KafkaProducer would call this method in this case, make second batch
         partitioner.onNewBatch(topic, cluster, partition);
         partition = partitioner.partition(topic, null, null, "value", value, cluster);
-        tp = new TopicPartition(topic, partition);
-        accum.append(tp, 0L, null, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
+        accum.append(topic, partition, 0L, null, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
         appends++;
 
         // These appends all go into the second batch
         while (!switchPartition) {
             partition = partitioner.partition(topic, null, null, "value", value, cluster);
-            tp = new TopicPartition(topic, partition);
-            RecordAccumulator.RecordAppendResult result = accum.append(tp, 0L, null, value, Record.EMPTY_HEADERS, null, maxBlockTimeMs, true, time.milliseconds());
-            Deque<ProducerBatch> partitionBatches1 = accum.batches().get(tp1);
-            Deque<ProducerBatch> partitionBatches2 = accum.batches().get(tp2);
-            Deque<ProducerBatch> partitionBatches3 = accum.batches().get(tp3);
+            RecordAccumulator.RecordAppendResult result = accum.append(topic, partition, 0L, null, value,
+                Record.EMPTY_HEADERS, null, maxBlockTimeMs, true, time.milliseconds(), cluster);
+            Deque<ProducerBatch> partitionBatches1 = accum.getDeque(tp1);
+            Deque<ProducerBatch> partitionBatches2 = accum.getDeque(tp2);
+            Deque<ProducerBatch> partitionBatches3 = accum.getDeque(tp3);
             int numBatches = (partitionBatches1 == null ? 0 : partitionBatches1.size()) + (partitionBatches2 == null ? 0 : partitionBatches2.size()) + (partitionBatches3 == null ? 0 : partitionBatches3.size());
             // Only two batches because the new partition is also sticky.
             assertEquals(2, numBatches);
@@ -1017,6 +1096,158 @@ public void testStickyBatches() throws Exception {
         assertEquals(appends, 2 * expectedAppends);
     }
 
+    @Test
+    public void testUniformBuiltInPartitioner() throws Exception {
+
+        try {
+            // Mock random number generator with just sequential integer.
+            AtomicInteger mockRandom = new AtomicInteger();
+            BuiltInPartitioner.mockRandom = () -> mockRandom.getAndAdd(1);
+
+            long totalSize = 1024 * 1024;
+            int batchSize = 128;  // note that this is also a "sticky" limit for the partitioner
+            RecordAccumulator accum = createTestRecordAccumulator(batchSize, totalSize, CompressionType.NONE, 0);
+
+            // Set up callbacks so that we know what partition is chosen.
+            final AtomicInteger partition = new AtomicInteger(RecordMetadata.UNKNOWN_PARTITION);
+            RecordAccumulator.AppendCallbacks callbacks = new RecordAccumulator.AppendCallbacks() {
+                @Override
+                public void setPartition(int p) {
+                    partition.set(p);
+                }
+
+                @Override
+                public void onCompletion(RecordMetadata metadata, Exception exception) {
+
+                }
+            };
+
+            // Produce small record, we should switch to first partition.
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, value, Record.EMPTY_HEADERS,
+                callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+            assertEquals(partition1, partition.get());
+            assertEquals(1, mockRandom.get());
+
+            // Produce large record, we should exceed "sticky" limit, but produce to this partition
+            // as we switch after the "sticky" limit is exceeded.  The partition is switched after
+            // we produce.
+            byte[] largeValue = new byte[batchSize];
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+            assertEquals(partition1, partition.get());
+            assertEquals(2, mockRandom.get());
+
+            // Produce large record, we should switch to next partition.
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+            assertEquals(partition2, partition.get());
+            assertEquals(3, mockRandom.get());
+
+            // Produce large record, we should switch to next partition.
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+            assertEquals(partition3, partition.get());
+            assertEquals(4, mockRandom.get());
+
+            // Produce large record, we should switch to first partition again.
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+            assertEquals(partition1, partition.get());
+            assertEquals(5, mockRandom.get());
+        } finally {
+            BuiltInPartitioner.mockRandom = null;
+        }
+    }
+
+    @Test
+    public void testAdaptiveBuiltInPartitioner() throws Exception {
+        try {
+            // Mock random number generator with just sequential integer.
+            AtomicInteger mockRandom = new AtomicInteger();
+            BuiltInPartitioner.mockRandom = () -> mockRandom.getAndAdd(1);
+
+            // Create accumulator with partitioner config to enable adaptive partitioning.
+            RecordAccumulator.PartitionerConfig config = new RecordAccumulator.PartitionerConfig(true, 100);
+            long totalSize = 1024 * 1024;
+            int batchSize = 128;
+            RecordAccumulator accum = new RecordAccumulator(logContext, batchSize, CompressionType.NONE, 0, 0L,
+                3200, config, metrics, "producer-metrics", time, new ApiVersions(), null,
+                new BufferPool(totalSize, batchSize, metrics, time, "producer-internal-metrics"));
+
+            byte[] largeValue = new byte[batchSize];
+            int[] queueSizes = {1, 7, 2};
+            int[] expectedFrequencies = new int[queueSizes.length];
+            for (int i = 0; i < queueSizes.length; i++) {
+                expectedFrequencies[i] = 8 - queueSizes[i];  // 8 is max(queueSizes) + 1
+                for (int c = queueSizes[i]; c-- > 0; ) {
+                    // Add large records to each partition, so that each record creates a batch.
+                    accum.append(topic, i, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                        null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+                }
+                assertEquals(queueSizes[i], accum.getDeque(new TopicPartition(topic, i)).size());
+            }
+
+            // Let the accumulator generate the probability tables.
+            accum.ready(cluster, time.milliseconds());
+
+            // Set up callbacks so that we know what partition is chosen.
+            final AtomicInteger partition = new AtomicInteger(RecordMetadata.UNKNOWN_PARTITION);
+            RecordAccumulator.AppendCallbacks callbacks = new RecordAccumulator.AppendCallbacks() {
+                @Override
+                public void setPartition(int p) {
+                    partition.set(p);
+                }
+
+                @Override
+                public void onCompletion(RecordMetadata metadata, Exception exception) {
+
+                }
+            };
+
+            // Prime built-in partitioner so that it'd switch on every record, as switching only
+            // happens after the "sticky" limit is exceeded.
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+
+            // Issue a certain number of partition calls to validate that the partitions would be
+            // distributed with frequencies that are reciprocal to the queue sizes.  The number of
+            // iterations is defined by the last element of the cumulative frequency table which is
+            // the sum of all frequencies.  We do 2 cycles, just so it's more than 1.
+            final int numberOfCycles = 2;
+            int numberOfIterations = accum.getBuiltInPartitioner(topic).loadStatsRangeEnd() * numberOfCycles;
+            int[] frequencies = new int[queueSizes.length];
+
+            for (int i = 0; i < numberOfIterations; i++) {
+                accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                    callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+                ++frequencies[partition.get()];
+            }
+
+            // Verify that frequencies are reciprocal of queue sizes.
+            for (int i = 0; i < frequencies.length; i++) {
+                assertEquals(expectedFrequencies[i] * numberOfCycles, frequencies[i],
+                    "Partition " + i + " was chosen " + frequencies[i] + " times");
+            }
+
+            // Test that partitions residing on high-latency nodes don't get switched to.
+            accum.updateNodeLatencyStats(0, time.milliseconds() - 200, true);
+            accum.updateNodeLatencyStats(0, time.milliseconds(), false);
+            accum.ready(cluster, time.milliseconds());
+
+            // Do one append, because partition gets switched after append.
+            accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                    callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+
+            for (int c = 10; c-- > 0; ) {
+                accum.append(topic, RecordMetadata.UNKNOWN_PARTITION, 0L, null, largeValue, Record.EMPTY_HEADERS,
+                    callbacks, maxBlockTimeMs, false, time.milliseconds(), cluster);
+                assertEquals(partition3, partition.get());
+            }
+        } finally {
+            BuiltInPartitioner.mockRandom = null;
+        }
+    }
+
     private int prepareSplitBatches(RecordAccumulator accum, long seed, int recordSize, int numRecords)
         throws InterruptedException {
         Random random = new Random();
@@ -1026,7 +1257,7 @@ private int prepareSplitBatches(RecordAccumulator accum, long seed, int recordSi
         CompressionRatioEstimator.setEstimation(tp1.topic(), CompressionType.GZIP, 0.1f);
         // Append 20 records of 100 bytes size with poor compression ratio should make the batch too big.
         for (int i = 0; i < numRecords; i++) {
-            accum.append(tp1, 0L, null, bytesWithPoorCompression(random, recordSize), Record.EMPTY_HEADERS, null, 0, false, time.milliseconds());
+            accum.append(topic, partition1, 0L, null, bytesWithPoorCompression(random, recordSize), Record.EMPTY_HEADERS, null, 0, false, time.milliseconds(), cluster);
         }
 
         RecordAccumulator.ReadyCheckResult result = accum.ready(cluster, time.milliseconds());
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/SenderTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/SenderTest.java
index 60e9f06186255..3d972b3eb2cfe 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/SenderTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/SenderTest.java
@@ -22,7 +22,6 @@
 import org.apache.kafka.clients.MockClient;
 import org.apache.kafka.clients.NetworkClient;
 import org.apache.kafka.clients.NodeApiVersions;
-import org.apache.kafka.clients.producer.Callback;
 import org.apache.kafka.clients.producer.RecordMetadata;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.InvalidRecordException;
@@ -467,22 +466,30 @@ public void testAppendInExpiryCallback() throws InterruptedException {
         final byte[] key = "key".getBytes();
         final byte[] value = "value".getBytes();
         final long maxBlockTimeMs = 1000;
-        Callback callback = (metadata, exception) -> {
-            if (exception instanceof TimeoutException) {
-                expiryCallbackCount.incrementAndGet();
-                try {
-                    accumulator.append(tp1, 0L, key, value,
-                        Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds());
-                } catch (InterruptedException e) {
-                    throw new RuntimeException("Unexpected interruption", e);
-                }
-            } else if (exception != null)
-                unexpectedException.compareAndSet(null, exception);
+        Cluster cluster = TestUtils.singletonCluster();
+        RecordAccumulator.AppendCallbacks callbacks = new RecordAccumulator.AppendCallbacks() {
+            @Override
+            public void setPartition(int partition) {
+            }
+
+            @Override
+            public void onCompletion(RecordMetadata metadata, Exception exception) {
+                if (exception instanceof TimeoutException) {
+                    expiryCallbackCount.incrementAndGet();
+                    try {
+                        accumulator.append(tp1.topic(), tp1.partition(), 0L, key, value,
+                            Record.EMPTY_HEADERS, null, maxBlockTimeMs, false, time.milliseconds(), cluster);
+                    } catch (InterruptedException e) {
+                        throw new RuntimeException("Unexpected interruption", e);
+                    }
+                } else if (exception != null)
+                    unexpectedException.compareAndSet(null, exception);
+            }
         };
 
         final long nowMs = time.milliseconds();
         for (int i = 0; i < messagesPerBatch; i++)
-            accumulator.append(tp1, 0L, key, value, null, callback, maxBlockTimeMs, false, nowMs);
+            accumulator.append(tp1.topic(), tp1.partition(), 0L, key, value, null, callbacks, maxBlockTimeMs, false, nowMs, cluster);
 
         // Advance the clock to expire the first batch.
         time.sleep(10000);
@@ -501,9 +508,9 @@ public void testAppendInExpiryCallback() throws InterruptedException {
         assertEquals(messagesPerBatch, expiryCallbackCount.get(), "Callbacks not invoked for expiry");
         assertNull(unexpectedException.get(), "Unexpected exception");
         // Make sure that the reconds were appended back to the batch.
-        assertTrue(accumulator.batches().containsKey(tp1));
-        assertEquals(1, accumulator.batches().get(tp1).size());
-        assertEquals(messagesPerBatch, accumulator.batches().get(tp1).peekFirst().recordCount);
+        assertNotNull(accumulator.getDeque(tp1));
+        assertEquals(1, accumulator.getDeque(tp1).size());
+        assertEquals(messagesPerBatch, accumulator.getDeque(tp1).peekFirst().recordCount);
     }
 
     /**
@@ -546,6 +553,76 @@ public void testMetadataTopicExpiry() throws Exception {
         assertTrue(future.isDone(), "Request should be completed");
     }
 
+    @Test
+    public void testNodeLatencyStats() throws Exception {
+        try (Metrics m = new Metrics()) {
+            // Create a new record accumulator with non-0 partitionAvailabilityTimeoutMs
+            // otherwise it wouldn't update the stats.
+            RecordAccumulator.PartitionerConfig config = new RecordAccumulator.PartitionerConfig(false, 42);
+            long totalSize = 1024 * 1024;
+            accumulator = new RecordAccumulator(logContext, batchSize, CompressionType.NONE, 0, 0L,
+                DELIVERY_TIMEOUT_MS, config, m, "producer-metrics", time, apiVersions, null,
+                new BufferPool(totalSize, batchSize, m, time, "producer-internal-metrics"));
+
+            SenderMetricsRegistry senderMetrics = new SenderMetricsRegistry(m);
+            Sender sender = new Sender(logContext, client, metadata, this.accumulator, false, MAX_REQUEST_SIZE, ACKS_ALL, 1,
+                senderMetrics, time, REQUEST_TIMEOUT, 1000L, null, new ApiVersions());
+
+            // Produce and send batch.
+            long time1 = time.milliseconds();
+            appendToAccumulator(tp0, 0L, "key", "value");
+            sender.runOnce();
+            assertEquals(1, client.inFlightRequestCount(), "We should have a single produce request in flight.");
+
+            // We were able to send the batch out, so both the ready and drain values should be the same.
+            RecordAccumulator.NodeLatencyStats stats = accumulator.getNodeLatencyStats(0);
+            assertEquals(time1, stats.drainTimeMs);
+            assertEquals(time1, stats.readyTimeMs);
+
+            // Make the node 1 not ready.
+            client.throttle(metadata.fetch().nodeById(0), 100);
+
+            // Time passes, but we don't have anything to send.
+            time.sleep(10);
+            sender.runOnce();
+            assertEquals(1, client.inFlightRequestCount(), "We should have a single produce request in flight.");
+
+            // Stats shouldn't change as we didn't have anything ready.
+            assertEquals(time1, stats.drainTimeMs);
+            assertEquals(time1, stats.readyTimeMs);
+
+            // Produce a new batch, but we won't be able to send it because node is not ready.
+            long time2 = time.milliseconds();
+            appendToAccumulator(tp0, 0L, "key", "value");
+            sender.runOnce();
+            assertEquals(1, client.inFlightRequestCount(), "We should have a single produce request in flight.");
+
+            // The ready time should move forward, but drain time shouldn't change.
+            assertEquals(time1, stats.drainTimeMs);
+            assertEquals(time2, stats.readyTimeMs);
+
+            // Time passes, we keep trying to send, but the node is not ready.
+            time.sleep(10);
+            time2 = time.milliseconds();
+            sender.runOnce();
+            assertEquals(1, client.inFlightRequestCount(), "We should have a single produce request in flight.");
+
+            // The ready time should move forward, but drain time shouldn't change.
+            assertEquals(time1, stats.drainTimeMs);
+            assertEquals(time2, stats.readyTimeMs);
+
+            // Finally, time passes beyond the throttle and the node is ready.
+            time.sleep(100);
+            time2 = time.milliseconds();
+            sender.runOnce();
+            assertEquals(2, client.inFlightRequestCount(), "We should have 2 produce requests in flight.");
+
+            // Both times should move forward
+            assertEquals(time2, stats.drainTimeMs);
+            assertEquals(time2, stats.readyTimeMs);
+        }
+    }
+
     @Test
     public void testInitProducerIdRequest() {
         final long producerId = 343434L;
@@ -1200,7 +1277,7 @@ public void testCorrectHandlingOfOutOfOrderResponses() throws Exception {
         client.respondToRequest(secondClientRequest, produceResponse(tp0, -1, Errors.OUT_OF_ORDER_SEQUENCE_NUMBER, -1));
 
         sender.runOnce(); // receive response 1
-        Deque<ProducerBatch> queuedBatches = accumulator.batches().get(tp0);
+        Deque<ProducerBatch> queuedBatches = accumulator.getDeque(tp0);
 
         // Make sure that we are queueing the second batch first.
         assertEquals(1, queuedBatches.size());
@@ -1281,7 +1358,7 @@ public void testCorrectHandlingOfOutOfOrderResponsesWhenSecondSucceeds() throws
         assertTrue(request2.isDone());
         assertEquals(1, request2.get().offset());
         assertFalse(request1.isDone());
-        Deque<ProducerBatch> queuedBatches = accumulator.batches().get(tp0);
+        Deque<ProducerBatch> queuedBatches = accumulator.getDeque(tp0);
 
         assertEquals(0, queuedBatches.size());
         assertEquals(1, client.inFlightRequestCount());
@@ -1389,7 +1466,7 @@ public void testExpiryOfFirstBatchShouldNotCauseUnresolvedSequencesIfFutureBatch
         assertEquals(1, request2.get().offset());
         assertEquals(0, sender.inFlightBatches(tp0).size());
 
-        Deque<ProducerBatch> batches = accumulator.batches().get(tp0);
+        Deque<ProducerBatch> batches = accumulator.getDeque(tp0);
         assertEquals(1, batches.size());
         assertFalse(batches.peekFirst().hasSequence());
         assertFalse(client.hasInFlightRequests());
@@ -1444,7 +1521,7 @@ public void testExpiryOfFirstBatchShouldCauseEpochBumpIfFutureBatchesFail() thro
         sendIdempotentProducerResponse(1, tp0, Errors.OUT_OF_ORDER_SEQUENCE_NUMBER, 1);
         sender.runOnce(); // receive second response, the third request shouldn't be sent since we are in an unresolved state.
 
-        Deque<ProducerBatch> batches = accumulator.batches().get(tp0);
+        Deque<ProducerBatch> batches = accumulator.getDeque(tp0);
 
         // The epoch should be bumped and the second request should be requeued
         assertEquals(2, batches.size());
@@ -1524,7 +1601,7 @@ public void testExpiryOfAllSentBatchesShouldCauseUnresolvedSequences() throws Ex
         assertFutureFailure(request1, TimeoutException.class);
         assertTrue(transactionManager.hasUnresolvedSequence(tp0));
         assertFalse(client.hasInFlightRequests());
-        Deque<ProducerBatch> batches = accumulator.batches().get(tp0);
+        Deque<ProducerBatch> batches = accumulator.getDeque(tp0);
         assertEquals(0, batches.size());
         assertEquals(producerId, transactionManager.producerIdAndEpoch().producerId);
 
@@ -2337,10 +2414,11 @@ private void testSplitBatchAndSend(TransactionManager txnManager,
             client.prepareMetadataUpdate(metadataUpdate1);
             // Send the first message.
             long nowMs = time.milliseconds();
+            Cluster cluster = TestUtils.singletonCluster();
             Future<RecordMetadata> f1 =
-                    accumulator.append(tp, 0L, "key1".getBytes(), new byte[batchSize / 2], null, null, MAX_BLOCK_TIMEOUT, false, nowMs).future;
+                    accumulator.append(tp.topic(), tp.partition(), 0L, "key1".getBytes(), new byte[batchSize / 2], null, null, MAX_BLOCK_TIMEOUT, false, nowMs, cluster).future;
             Future<RecordMetadata> f2 =
-                    accumulator.append(tp, 0L, "key2".getBytes(), new byte[batchSize / 2], null, null, MAX_BLOCK_TIMEOUT, false, nowMs).future;
+                    accumulator.append(tp.topic(), tp.partition(), 0L, "key2".getBytes(), new byte[batchSize / 2], null, null, MAX_BLOCK_TIMEOUT, false, nowMs, cluster).future;
             sender.runOnce(); // connect
             sender.runOnce(); // send produce request
 
@@ -2395,7 +2473,7 @@ private void testSplitBatchAndSend(TransactionManager txnManager,
             assertEquals(2, txnManager.sequenceNumber(tp).longValue(), "The next sequence number should be 2");
             assertEquals(OptionalInt.of(1), txnManager.lastAckedSequence(tp), "The last ack'd sequence number should be 1");
             assertEquals(1L, f2.get().offset(), "Offset of the first message should be 1");
-            assertTrue(accumulator.batches().get(tp).isEmpty(), "There should be no batch in the accumulator");
+            assertTrue(accumulator.getDeque(tp).isEmpty(), "There should be no batch in the accumulator");
             assertTrue((Double) (m.metrics().get(senderMetrics.batchSplitRate).metricValue()) > 0, "There should be a split");
         }
     }
@@ -3063,8 +3141,8 @@ private FutureRecordMetadata appendToAccumulator(TopicPartition tp) throws Inter
     }
 
     private FutureRecordMetadata appendToAccumulator(TopicPartition tp, long timestamp, String key, String value) throws InterruptedException {
-        return accumulator.append(tp, timestamp, key.getBytes(), value.getBytes(), Record.EMPTY_HEADERS,
-                null, MAX_BLOCK_TIMEOUT, false, time.milliseconds()).future;
+        return accumulator.append(tp.topic(), tp.partition(), timestamp, key.getBytes(), value.getBytes(), Record.EMPTY_HEADERS,
+                null, MAX_BLOCK_TIMEOUT, false, time.milliseconds(), TestUtils.singletonCluster()).future;
     }
 
     @SuppressWarnings("deprecation")
diff --git a/clients/src/test/java/org/apache/kafka/clients/producer/internals/TransactionManagerTest.java b/clients/src/test/java/org/apache/kafka/clients/producer/internals/TransactionManagerTest.java
index 4227db5e61e62..b6bf9e6f4f1a7 100644
--- a/clients/src/test/java/org/apache/kafka/clients/producer/internals/TransactionManagerTest.java
+++ b/clients/src/test/java/org/apache/kafka/clients/producer/internals/TransactionManagerTest.java
@@ -153,7 +153,7 @@ public void setup() {
     private void initializeTransactionManager(Optional<String> transactionalId) {
         Metrics metrics = new Metrics(time);
 
-        apiVersions.update("0", new NodeApiVersions(Arrays.asList(
+        apiVersions.update("0", NodeApiVersions.create(Arrays.asList(
                 new ApiVersion()
                     .setApiKey(ApiKeys.INIT_PRODUCER_ID.id)
                     .setMinVersion((short) 0)
@@ -674,6 +674,70 @@ public void testBatchCompletedAfterProducerReset() {
         assertNull(transactionManager.nextBatchBySequence(tp0));
     }
 
+    @Test
+    public void testDuplicateSequenceAfterProducerReset() throws Exception {
+        initializeTransactionManager(Optional.empty());
+        initializeIdempotentProducerId(producerId, epoch);
+
+        Metrics metrics = new Metrics(time);
+        final int requestTimeout = 10000;
+        final int deliveryTimeout = 15000;
+
+        RecordAccumulator accumulator = new RecordAccumulator(logContext, 16 * 1024, CompressionType.NONE, 0, 0L,
+                deliveryTimeout, metrics, "", time, apiVersions, transactionManager,
+                new BufferPool(1024 * 1024, 16 * 1024, metrics, time, ""));
+
+        Sender sender = new Sender(logContext, this.client, this.metadata, accumulator, false,
+                MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(metrics), this.time, requestTimeout,
+                0, transactionManager, apiVersions);
+
+        assertEquals(0, transactionManager.sequenceNumber(tp0).intValue());
+
+        Future<RecordMetadata> responseFuture1 = accumulator.append(tp0.topic(), tp0.partition(), time.milliseconds(),
+                "1".getBytes(), "1".getBytes(), Record.EMPTY_HEADERS, null, MAX_BLOCK_TIMEOUT, false, time.milliseconds(),
+                TestUtils.singletonCluster()).future;
+        sender.runOnce();
+        assertEquals(1, transactionManager.sequenceNumber(tp0).intValue());
+
+        time.sleep(requestTimeout);
+        sender.runOnce();
+        assertEquals(0, client.inFlightRequestCount());
+        assertTrue(transactionManager.hasInflightBatches(tp0));
+        assertEquals(1, transactionManager.sequenceNumber(tp0).intValue());
+        sender.runOnce(); // retry
+        assertEquals(1, client.inFlightRequestCount());
+        assertTrue(transactionManager.hasInflightBatches(tp0));
+        assertEquals(1, transactionManager.sequenceNumber(tp0).intValue());
+
+        time.sleep(5000); // delivery time out
+        sender.runOnce();
+
+        // The retried request will remain inflight until the request timeout
+        // is reached even though the delivery timeout has expired and the
+        // future has completed exceptionally.
+        assertTrue(responseFuture1.isDone());
+        TestUtils.assertFutureThrows(responseFuture1, TimeoutException.class);
+        assertFalse(transactionManager.hasInFlightRequest());
+        assertEquals(1, client.inFlightRequestCount());
+
+        sender.runOnce(); // bump the epoch
+        assertEquals(epoch + 1, transactionManager.producerIdAndEpoch().epoch);
+        assertEquals(0, transactionManager.sequenceNumber(tp0).intValue());
+
+        Future<RecordMetadata> responseFuture2 = accumulator.append(tp0.topic(), tp0.partition(), time.milliseconds(),
+                "2".getBytes(), "2".getBytes(), Record.EMPTY_HEADERS, null, MAX_BLOCK_TIMEOUT, false, time.milliseconds(),
+                TestUtils.singletonCluster()).future;
+        sender.runOnce();
+        sender.runOnce();
+        assertEquals(0, transactionManager.firstInFlightSequence(tp0));
+        assertEquals(1, transactionManager.sequenceNumber(tp0).intValue());
+
+        time.sleep(5000); // request time out again
+        sender.runOnce();
+        assertTrue(transactionManager.hasInflightBatches(tp0)); // the latter batch failed and retried
+        assertFalse(responseFuture2.isDone());
+    }
+
     private ProducerBatch writeIdempotentBatchWithValue(TransactionManager manager,
                                                         TopicPartition tp,
                                                         String value) {
@@ -2551,7 +2615,7 @@ public void testDropCommitOnBatchExpiry() throws InterruptedException {
 
     @Test
     public void testTransitionToFatalErrorWhenRetriedBatchIsExpired() throws InterruptedException {
-        apiVersions.update("0", new NodeApiVersions(Arrays.asList(
+        apiVersions.update("0", NodeApiVersions.create(Arrays.asList(
                 new ApiVersion()
                     .setApiKey(ApiKeys.INIT_PRODUCER_ID.id)
                     .setMinVersion((short) 0)
@@ -2750,7 +2814,7 @@ public void testNoFailedBatchHandlingWhenTxnManagerIsInFatalError() {
 
     @Test
     public void testAbortTransactionAndReuseSequenceNumberOnError() throws InterruptedException {
-        apiVersions.update("0", new NodeApiVersions(Arrays.asList(
+        apiVersions.update("0", NodeApiVersions.create(Arrays.asList(
                 new ApiVersion()
                         .setApiKey(ApiKeys.INIT_PRODUCER_ID.id)
                         .setMinVersion((short) 0)
@@ -2802,7 +2866,7 @@ public void testAbortTransactionAndResetSequenceNumberOnUnknownProducerId() thro
         // Set the InitProducerId version such that bumping the epoch number is not supported. This will test the case
         // where the sequence number is reset on an UnknownProducerId error, allowing subsequent transactions to
         // append to the log successfully
-        apiVersions.update("0", new NodeApiVersions(Arrays.asList(
+        apiVersions.update("0", NodeApiVersions.create(Arrays.asList(
                 new ApiVersion()
                     .setApiKey(ApiKeys.INIT_PRODUCER_ID.id)
                     .setMinVersion((short) 0)
@@ -3116,7 +3180,7 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
         // New tp1 batches should not be drained from the accumulator while tp1 has in-flight requests using the old epoch
         appendToAccumulator(tp1);
         sender.runOnce();
-        assertEquals(1, accumulator.batches().get(tp1).size());
+        assertEquals(1, accumulator.getDeque(tp1).size());
 
         // Partition failover occurs and tp1 returns a NOT_LEADER_OR_FOLLOWER error
         // Despite having the old epoch, the batch should retry
@@ -3127,8 +3191,8 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
 
         // The batch with the old epoch should be successfully drained, leaving the new one in the queue
         sender.runOnce();
-        assertEquals(1, accumulator.batches().get(tp1).size());
-        assertNotEquals(tp1b2, accumulator.batches().get(tp1).peek());
+        assertEquals(1, accumulator.getDeque(tp1).size());
+        assertNotEquals(tp1b2, accumulator.getDeque(tp1).peek());
         assertEquals(epoch, tp1b2.producerEpoch());
 
         // After successfully retrying, there should be no in-flight batches for tp1 and the sequence should be 0
@@ -3143,7 +3207,7 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
 
         // The last batch should now be drained and sent
         runUntil(() -> transactionManager.hasInflightBatches(tp1));
-        assertTrue(accumulator.batches().get(tp1).isEmpty());
+        assertTrue(accumulator.getDeque(tp1).isEmpty());
         ProducerBatch tp1b3 = transactionManager.nextBatchBySequence(tp1);
         assertEquals(epoch + 1, tp1b3.producerEpoch());
 
@@ -3240,7 +3304,7 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
         // New tp1 batches should not be drained from the accumulator while tp1 has in-flight requests using the old epoch
         appendToAccumulator(tp1);
         sender.runOnce();
-        assertEquals(1, accumulator.batches().get(tp1).size());
+        assertEquals(1, accumulator.getDeque(tp1).size());
 
         // Partition failover occurs and tp1 returns a NOT_LEADER_OR_FOLLOWER error
         // Despite having the old epoch, the batch should retry
@@ -3251,8 +3315,8 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
 
         // The batch with the old epoch should be successfully drained, leaving the new one in the queue
         sender.runOnce();
-        assertEquals(1, accumulator.batches().get(tp1).size());
-        assertNotEquals(tp1b2, accumulator.batches().get(tp1).peek());
+        assertEquals(1, accumulator.getDeque(tp1).size());
+        assertNotEquals(tp1b2, accumulator.getDeque(tp1).peek());
         assertEquals(epoch, tp1b2.producerEpoch());
 
         // After successfully retrying, there should be no in-flight batches for tp1 and the sequence should be 0
@@ -3267,7 +3331,7 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
 
         // The last batch should now be drained and sent
         runUntil(() -> transactionManager.hasInflightBatches(tp1));
-        assertTrue(accumulator.batches().get(tp1).isEmpty());
+        assertTrue(accumulator.getDeque(tp1).isEmpty());
         ProducerBatch tp1b3 = transactionManager.nextBatchBySequence(tp1);
         assertEquals(epoch + 1, tp1b3.producerEpoch());
 
@@ -3282,8 +3346,8 @@ MAX_REQUEST_SIZE, ACKS_ALL, MAX_RETRIES, new SenderMetricsRegistry(new Metrics(t
 
     private FutureRecordMetadata appendToAccumulator(TopicPartition tp) throws InterruptedException {
         final long nowMs = time.milliseconds();
-        return accumulator.append(tp, nowMs, "key".getBytes(), "value".getBytes(), Record.EMPTY_HEADERS,
-                null, MAX_BLOCK_TIMEOUT, false, nowMs).future;
+        return accumulator.append(tp.topic(), tp.partition(), nowMs, "key".getBytes(), "value".getBytes(), Record.EMPTY_HEADERS,
+                null, MAX_BLOCK_TIMEOUT, false, nowMs, TestUtils.singletonCluster()).future;
     }
 
     private void verifyCommitOrAbortTransactionRetriable(TransactionResult firstTransactionResult,
diff --git a/clients/src/test/java/org/apache/kafka/common/UuidTest.java b/clients/src/test/java/org/apache/kafka/common/UuidTest.java
index 232b9927c896d..67d841925cef9 100644
--- a/clients/src/test/java/org/apache/kafka/common/UuidTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/UuidTest.java
@@ -16,11 +16,13 @@
  */
 package org.apache.kafka.common;
 
+import org.junit.jupiter.api.RepeatedTest;
 import org.junit.jupiter.api.Test;
 
 import java.util.Base64;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
@@ -72,12 +74,13 @@ public void testStringConversion() {
         assertEquals(Uuid.fromString(zeroIdString), Uuid.ZERO_UUID);
     }
 
-    @Test
+    @RepeatedTest(100)
     public void testRandomUuid() {
         Uuid randomID = Uuid.randomUuid();
 
         assertNotEquals(randomID, Uuid.ZERO_UUID);
         assertNotEquals(randomID, Uuid.METADATA_TOPIC_ID);
+        assertFalse(randomID.toString().startsWith("-"));
     }
 
     @Test
diff --git a/clients/src/test/java/org/apache/kafka/common/acl/AclOperationTest.java b/clients/src/test/java/org/apache/kafka/common/acl/AclOperationTest.java
index c807e2be95a7e..b91db6f206c2f 100644
--- a/clients/src/test/java/org/apache/kafka/common/acl/AclOperationTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/acl/AclOperationTest.java
@@ -48,7 +48,9 @@ private static class AclOperationTestInfo {
         new AclOperationTestInfo(AclOperation.CLUSTER_ACTION, 9, "cluster_action", false),
         new AclOperationTestInfo(AclOperation.DESCRIBE_CONFIGS, 10, "describe_configs", false),
         new AclOperationTestInfo(AclOperation.ALTER_CONFIGS, 11, "alter_configs", false),
-        new AclOperationTestInfo(AclOperation.IDEMPOTENT_WRITE, 12, "idempotent_write", false)
+        new AclOperationTestInfo(AclOperation.IDEMPOTENT_WRITE, 12, "idempotent_write", false),
+        new AclOperationTestInfo(AclOperation.CREATE_TOKENS, 13, "create_tokens", false),
+        new AclOperationTestInfo(AclOperation.DESCRIBE_TOKENS, 14, "describe_tokens", false)
     };
 
     @Test
diff --git a/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java b/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java
index 893f68b89e6b3..76c20df4edffe 100644
--- a/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/config/ConfigDefTest.java
@@ -18,6 +18,7 @@
 
 import org.apache.kafka.common.config.ConfigDef.CaseInsensitiveValidString;
 import org.apache.kafka.common.config.ConfigDef.Importance;
+import org.apache.kafka.common.config.ConfigDef.ListSize;
 import org.apache.kafka.common.config.ConfigDef.Range;
 import org.apache.kafka.common.config.ConfigDef.Type;
 import org.apache.kafka.common.config.ConfigDef.ValidString;
@@ -38,6 +39,8 @@
 import java.util.Set;
 
 import static java.util.Arrays.asList;
+import static java.util.Collections.singletonList;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
@@ -426,7 +429,7 @@ public void testNames() {
     public void testMissingDependentConfigs() {
         // Should not be possible to parse a config if a dependent config has not been defined
         final ConfigDef configDef = new ConfigDef()
-                .define("parent", Type.STRING, Importance.HIGH, "parent docs", "group", 1, Width.LONG, "Parent", Collections.singletonList("child"));
+                .define("parent", Type.STRING, Importance.HIGH, "parent docs", "group", 1, Width.LONG, "Parent", singletonList("child"));
         assertThrows(ConfigException.class, () -> configDef.parse(Collections.emptyMap()));
     }
 
@@ -438,7 +441,7 @@ public void testBaseConfigDefDependents() {
         assertEquals(new HashSet<>(Arrays.asList("a")), baseConfigDef.getConfigsWithNoParent());
 
         final ConfigDef configDef = new ConfigDef(baseConfigDef)
-                .define("parent", Type.STRING, Importance.HIGH, "parent docs", "group", 1, Width.LONG, "Parent", Collections.singletonList("child"))
+                .define("parent", Type.STRING, Importance.HIGH, "parent docs", "group", 1, Width.LONG, "Parent", singletonList("child"))
                 .define("child", Type.STRING, Importance.HIGH, "docs");
 
         assertEquals(new HashSet<>(Arrays.asList("a", "parent")), configDef.getConfigsWithNoParent());
@@ -541,7 +544,7 @@ public void toEnrichedRst() {
                 .define("opt2.of.group2", Type.BOOLEAN, false, Importance.HIGH, "Doc doc doc doc.",
                         "Group Two", 1, Width.NONE, "..", Collections.<String>emptyList())
                 .define("opt1.of.group2", Type.BOOLEAN, false, Importance.HIGH, "Doc doc doc doc doc.",
-                        "Group Two", 0, Width.NONE, "..", Collections.singletonList("some.option"))
+                        "Group Two", 0, Width.NONE, "..", singletonList("some.option"))
                 .define("poor.opt", Type.STRING, "foo", Importance.HIGH, "Doc doc doc doc.");
 
         final String expectedRst = "" +
@@ -722,4 +725,42 @@ public void testNiceTimeUnits() {
         assertEquals(" (365 days)", ConfigDef.niceTimeUnits(Duration.ofDays(365).toMillis()));
     }
 
+    @Test
+    public void testThrowsExceptionWhenListSizeExceedsLimit() {
+        final ConfigException exception = assertThrows(ConfigException.class, () -> new ConfigDef().define("lst",
+                                                                                                           Type.LIST,
+                                                                                                           asList("a", "b"),
+                                                                                                           ListSize.atMostOfSize(1),
+                                                                                                           Importance.HIGH,
+                                                                                                           "lst doc"));
+        assertEquals("Invalid value [a, b] for configuration lst: exceeds maximum list size of [1].",
+                     exception.getMessage());
+    }
+
+    @Test
+    public void testNoExceptionIsThrownWhenListSizeEqualsTheLimit() {
+        final List<String> lst = asList("a", "b", "c");
+        assertDoesNotThrow(() -> new ConfigDef().define("lst",
+                                                        Type.LIST,
+                                                        lst,
+                                                        ListSize.atMostOfSize(lst.size()),
+                                                        Importance.HIGH,
+                                                        "lst doc"));
+    }
+
+    @Test
+    public void testNoExceptionIsThrownWhenListSizeIsBelowTheLimit() {
+        assertDoesNotThrow(() -> new ConfigDef().define("lst",
+                                                        Type.LIST,
+                                                        asList("a", "b"),
+                                                        ListSize.atMostOfSize(3),
+                                                        Importance.HIGH,
+                                                        "lst doc"));
+    }
+
+    @Test
+    public void testListSizeValidatorToString() {
+        assertEquals("List containing maximum of 5 elements", ListSize.atMostOfSize(5).toString());
+    }
+
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/feature/FeaturesTest.java b/clients/src/test/java/org/apache/kafka/common/feature/FeaturesTest.java
index 88b3471208989..0b2bc4f50a2ac 100644
--- a/clients/src/test/java/org/apache/kafka/common/feature/FeaturesTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/feature/FeaturesTest.java
@@ -25,7 +25,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -36,11 +36,6 @@ public class FeaturesTest {
     public void testEmptyFeatures() {
         Map<String, Map<String, Short>> emptyMap = new HashMap<>();
 
-        Features<FinalizedVersionRange> emptyFinalizedFeatures = Features.emptyFinalizedFeatures();
-        assertTrue(emptyFinalizedFeatures.features().isEmpty());
-        assertTrue(emptyFinalizedFeatures.toMap().isEmpty());
-        assertEquals(emptyFinalizedFeatures, Features.fromFinalizedFeaturesMap(emptyMap));
-
         Features<SupportedVersionRange> emptySupportedFeatures = Features.emptySupportedFeatures();
         assertTrue(emptySupportedFeatures.features().isEmpty());
         assertTrue(emptySupportedFeatures.toMap().isEmpty());
@@ -49,9 +44,6 @@ public void testEmptyFeatures() {
 
     @Test
     public void testNullFeatures() {
-        assertThrows(
-            NullPointerException.class,
-            () -> Features.finalizedFeatures(null));
         assertThrows(
             NullPointerException.class,
             () -> Features.supportedFeatures(null));
@@ -93,34 +85,6 @@ public void testFromFeaturesMapToFeaturesMap() {
         assertEquals(features, Features.fromSupportedFeaturesMap(expected));
     }
 
-    @Test
-    public void testFromToFinalizedFeaturesMap() {
-        FinalizedVersionRange v1 = new FinalizedVersionRange((short) 1, (short) 2);
-        FinalizedVersionRange v2 = new FinalizedVersionRange((short) 3, (short) 4);
-        Map<String, FinalizedVersionRange> allFeatures = mkMap(mkEntry("feature_1", v1), mkEntry("feature_2", v2));
-
-        Features<FinalizedVersionRange> features = Features.finalizedFeatures(allFeatures);
-
-        Map<String, Map<String, Short>> expected = mkMap(
-            mkEntry("feature_1", mkMap(mkEntry("min_version_level", (short) 1), mkEntry("max_version_level", (short) 2))),
-            mkEntry("feature_2", mkMap(mkEntry("min_version_level", (short) 3), mkEntry("max_version_level", (short) 4))));
-        assertEquals(expected, features.toMap());
-        assertEquals(features, Features.fromFinalizedFeaturesMap(expected));
-    }
-
-    @Test
-    public void testToStringFinalizedFeatures() {
-        FinalizedVersionRange v1 = new FinalizedVersionRange((short) 1, (short) 2);
-        FinalizedVersionRange v2 = new FinalizedVersionRange((short) 3, (short) 4);
-        Map<String, FinalizedVersionRange> allFeatures = mkMap(mkEntry("feature_1", v1), mkEntry("feature_2", v2));
-
-        Features<FinalizedVersionRange> features = Features.finalizedFeatures(allFeatures);
-
-        assertEquals(
-            "Features{(feature_1 -> FinalizedVersionRange[min_version_level:1, max_version_level:2]), (feature_2 -> FinalizedVersionRange[min_version_level:3, max_version_level:4])}",
-            features.toString());
-    }
-
     @Test
     public void testToStringSupportedFeatures() {
         SupportedVersionRange v1 = new SupportedVersionRange((short) 1, (short) 2);
@@ -145,29 +109,19 @@ public void testSuppportedFeaturesFromMapFailureWithInvalidMissingMaxVersion() {
             () -> Features.fromSupportedFeaturesMap(invalidFeatures));
     }
 
-    @Test
-    public void testFinalizedFeaturesFromMapFailureWithInvalidMissingMaxVersionLevel() {
-        // This is invalid because 'max_version_level' key is missing.
-        Map<String, Map<String, Short>> invalidFeatures = mkMap(
-            mkEntry("feature_1", mkMap(mkEntry("min_version_level", (short) 1))));
-        assertThrows(
-            IllegalArgumentException.class,
-            () -> Features.fromFinalizedFeaturesMap(invalidFeatures));
-    }
-
     @Test
     public void testEquals() {
         SupportedVersionRange v1 = new SupportedVersionRange((short) 1, (short) 2);
         Map<String, SupportedVersionRange> allFeatures = mkMap(mkEntry("feature_1", v1));
         Features<SupportedVersionRange> features = Features.supportedFeatures(allFeatures);
         Features<SupportedVersionRange> featuresClone = Features.supportedFeatures(allFeatures);
-        assertTrue(features.equals(featuresClone));
+        assertEquals(features, featuresClone);
 
         SupportedVersionRange v2 = new SupportedVersionRange((short) 1, (short) 3);
         Map<String, SupportedVersionRange> allFeaturesDifferent = mkMap(mkEntry("feature_1", v2));
         Features<SupportedVersionRange> featuresDifferent = Features.supportedFeatures(allFeaturesDifferent);
-        assertFalse(features.equals(featuresDifferent));
+        assertNotEquals(features, featuresDifferent);
 
-        assertFalse(features.equals(null));
+        assertNotEquals(null, features);
     }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/feature/FinalizedVersionRangeTest.java b/clients/src/test/java/org/apache/kafka/common/feature/FinalizedVersionRangeTest.java
deleted file mode 100644
index 989c4bd1a543b..0000000000000
--- a/clients/src/test/java/org/apache/kafka/common/feature/FinalizedVersionRangeTest.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.kafka.common.feature;
-
-import java.util.Map;
-
-import org.junit.jupiter.api.Test;
-
-import static org.apache.kafka.common.utils.Utils.mkEntry;
-import static org.apache.kafka.common.utils.Utils.mkMap;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-/**
- * Unit tests for the FinalizedVersionRange class.
- *
- * Most of the unit tests required for BaseVersionRange are part of the SupportedVersionRangeTest
- * suite. This suite only tests behavior very specific to FinalizedVersionRange.
- */
-public class FinalizedVersionRangeTest {
-
-    @Test
-    public void testFromToMap() {
-        FinalizedVersionRange versionRange = new FinalizedVersionRange((short) 1, (short) 2);
-        assertEquals(1, versionRange.min());
-        assertEquals(2, versionRange.max());
-
-        Map<String, Short> versionRangeMap = versionRange.toMap();
-        assertEquals(
-            mkMap(
-                mkEntry("min_version_level", versionRange.min()),
-                mkEntry("max_version_level", versionRange.max())),
-            versionRangeMap);
-
-        FinalizedVersionRange newVersionRange = FinalizedVersionRange.fromMap(versionRangeMap);
-        assertEquals(1, newVersionRange.min());
-        assertEquals(2, newVersionRange.max());
-        assertEquals(versionRange, newVersionRange);
-    }
-
-    @Test
-    public void testToString() {
-        assertEquals("FinalizedVersionRange[min_version_level:1, max_version_level:1]", new FinalizedVersionRange((short) 1, (short) 1).toString());
-        assertEquals("FinalizedVersionRange[min_version_level:1, max_version_level:2]", new FinalizedVersionRange((short) 1, (short) 2).toString());
-    }
-
-    @Test
-    public void testIsCompatibleWith() {
-        assertFalse(new FinalizedVersionRange((short) 1, (short) 1).isIncompatibleWith(new SupportedVersionRange((short) 1, (short) 1)));
-        assertFalse(new FinalizedVersionRange((short) 2, (short) 3).isIncompatibleWith(new SupportedVersionRange((short) 1, (short) 4)));
-        assertFalse(new FinalizedVersionRange((short) 1, (short) 4).isIncompatibleWith(new SupportedVersionRange((short) 1, (short) 4)));
-
-        assertTrue(new FinalizedVersionRange((short) 1, (short) 4).isIncompatibleWith(new SupportedVersionRange((short) 2, (short) 3)));
-        assertTrue(new FinalizedVersionRange((short) 1, (short) 4).isIncompatibleWith(new SupportedVersionRange((short) 2, (short) 4)));
-        assertTrue(new FinalizedVersionRange((short) 2, (short) 4).isIncompatibleWith(new SupportedVersionRange((short) 2, (short) 3)));
-    }
-
-    @Test
-    public void testMinMax() {
-        FinalizedVersionRange versionRange = new FinalizedVersionRange((short) 1, (short) 2);
-        assertEquals(1, versionRange.min());
-        assertEquals(2, versionRange.max());
-    }
-}
diff --git a/clients/src/test/java/org/apache/kafka/common/feature/SupportedVersionRangeTest.java b/clients/src/test/java/org/apache/kafka/common/feature/SupportedVersionRangeTest.java
index acf452d820d84..a1d2af419fd2f 100644
--- a/clients/src/test/java/org/apache/kafka/common/feature/SupportedVersionRangeTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/feature/SupportedVersionRangeTest.java
@@ -25,6 +25,7 @@
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -128,9 +129,9 @@ public void testToString() {
     @Test
     public void testEquals() {
         SupportedVersionRange tested = new SupportedVersionRange((short) 1, (short) 1);
-        assertTrue(tested.equals(tested));
-        assertFalse(tested.equals(new SupportedVersionRange((short) 1, (short) 2)));
-        assertFalse(tested.equals(null));
+        assertEquals(tested, tested);
+        assertNotEquals(tested, new SupportedVersionRange((short) 1, (short) 2));
+        assertNotEquals(null, tested);
     }
 
     @Test
@@ -139,4 +140,15 @@ public void testMinMax() {
         assertEquals(1, versionRange.min());
         assertEquals(2, versionRange.max());
     }
+
+    @Test
+    public void testIsIncompatibleWith() {
+        assertFalse(new SupportedVersionRange((short) 1, (short) 1).isIncompatibleWith((short) 1));
+        assertFalse(new SupportedVersionRange((short) 1, (short) 4).isIncompatibleWith((short) 2));
+        assertFalse(new SupportedVersionRange((short) 1, (short) 4).isIncompatibleWith((short) 1));
+        assertFalse(new SupportedVersionRange((short) 1, (short) 4).isIncompatibleWith((short) 4));
+
+        assertTrue(new SupportedVersionRange((short) 2, (short) 3).isIncompatibleWith((short) 1));
+        assertTrue(new SupportedVersionRange((short) 2, (short) 3).isIncompatibleWith((short) 4));
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/internals/TopicTest.java b/clients/src/test/java/org/apache/kafka/common/internals/TopicTest.java
index 9bf237fb1b31d..03c0811fa4509 100644
--- a/clients/src/test/java/org/apache/kafka/common/internals/TopicTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/internals/TopicTest.java
@@ -24,6 +24,7 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
@@ -81,6 +82,14 @@ public void testTopicHasCollisionChars() {
             assertTrue(Topic.hasCollisionChars(topic));
     }
 
+    @Test
+    public void testUnifyCollisionChars() {
+        assertEquals("topic", Topic.unifyCollisionChars("topic"));
+        assertEquals("_topic", Topic.unifyCollisionChars(".topic"));
+        assertEquals("_topic", Topic.unifyCollisionChars("_topic"));
+        assertEquals("__topic", Topic.unifyCollisionChars("_.topic"));
+    }
+
     @Test
     public void testTopicHasCollision() {
         List<String> periodFirstMiddleLastNone = Arrays.asList(".topic", "to.pic", "topic.", "topic");
diff --git a/clients/src/test/java/org/apache/kafka/common/message/SimpleExampleMessageTest.java b/clients/src/test/java/org/apache/kafka/common/message/SimpleExampleMessageTest.java
index 1cdafcd0fdc0e..b904eed2721a6 100644
--- a/clients/src/test/java/org/apache/kafka/common/message/SimpleExampleMessageTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/message/SimpleExampleMessageTest.java
@@ -30,6 +30,8 @@
 import java.util.Collections;
 import java.util.function.Consumer;
 
+import static org.apache.kafka.common.protocol.MessageUtil.UNSIGNED_INT_MAX;
+import static org.apache.kafka.common.protocol.MessageUtil.UNSIGNED_SHORT_MAX;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
@@ -179,6 +181,18 @@ public void testMyInt16() {
             message -> assertEquals((short) 456, message.myInt16()));
     }
 
+    @Test
+    public void testMyUint32() {
+        // Verify that the uint16 field reads as 33000 when not set.
+        testRoundTrip(new SimpleExampleMessageData(),
+                message -> assertEquals(1234567, message.myUint32()));
+
+        testRoundTrip(new SimpleExampleMessageData().setMyUint32(123),
+                message -> assertEquals(123, message.myUint32()));
+        testRoundTrip(new SimpleExampleMessageData().setMyUint32(60000),
+                message -> assertEquals(60000, message.myUint32()));
+    }
+
     @Test
     public void testMyUint16() {
         // Verify that the uint16 field reads as 33000 when not set.
@@ -206,7 +220,12 @@ public void testMyBytes() {
         assertThrows(RuntimeException.class,
             () -> new SimpleExampleMessageData().setMyUint16(-1));
         assertThrows(RuntimeException.class,
-            () -> new SimpleExampleMessageData().setMyUint16(65536));
+            () -> new SimpleExampleMessageData().setMyUint16(UNSIGNED_SHORT_MAX + 1));
+
+        assertThrows(RuntimeException.class,
+                () -> new SimpleExampleMessageData().setMyUint32(-1));
+        assertThrows(RuntimeException.class,
+                () -> new SimpleExampleMessageData().setMyUint32(UNSIGNED_INT_MAX + 1));
 
         // Verify that the tagged field reads as empty when not set.
         testRoundTrip(new SimpleExampleMessageData(),
@@ -355,6 +374,7 @@ public void testToString() {
                 "myTaggedStruct=TaggedStruct(structId=''), " +
                 "myCommonStruct=TestCommonStruct(foo=123, bar=123), " +
                 "myOtherCommonStruct=TestCommonStruct(foo=123, bar=123), " +
-                "myUint16=65535)", message.toString());
+                "myUint16=65535, " +
+                "myUint32=1234567)", message.toString());
     }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java b/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java
index 3dd114d9fd4b1..bc1fc5d9e5624 100644
--- a/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/metrics/MetricsTest.java
@@ -45,6 +45,7 @@
 
 import org.apache.kafka.common.Metric;
 import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.metrics.internals.MetricsUtils;
 import org.apache.kafka.common.metrics.stats.Avg;
 import org.apache.kafka.common.metrics.stats.CumulativeSum;
 import org.apache.kafka.common.metrics.stats.Max;
@@ -607,15 +608,15 @@ public void testRateWindowing() throws Exception {
         // Sleep for half the window.
         time.sleep(cfg.timeWindowMs() / 2);
 
-        // prior to any time passing
-        double elapsedSecs = (cfg.timeWindowMs() * (cfg.samples() - 1) + cfg.timeWindowMs() / 2) / 1000.0;
+        // prior to any time passing, elapsedSecs = sampleWindowSize * (total samples - half of final sample)
+        double elapsedSecs = MetricsUtils.convert(cfg.timeWindowMs(), TimeUnit.SECONDS) * (cfg.samples() - 0.5);
 
         KafkaMetric rateMetric = metrics.metrics().get(rateMetricName);
         KafkaMetric countRateMetric = metrics.metrics().get(countRateMetricName);
         assertEquals(sum / elapsedSecs, (Double) rateMetric.metricValue(), EPS, "Rate(0...2) = 2.666");
         assertEquals(count / elapsedSecs, (Double) countRateMetric.metricValue(), EPS, "Count rate(0...2) = 0.02666");
         assertEquals(elapsedSecs,
-                ((Rate) rateMetric.measurable()).windowSize(cfg, time.milliseconds()) / 1000, EPS, "Elapsed Time = 75 seconds");
+                MetricsUtils.convert(((Rate) rateMetric.measurable()).windowSize(cfg, time.milliseconds()), TimeUnit.SECONDS), EPS, "Elapsed Time = 75 seconds");
         assertEquals(sum, (Double) totalMetric.metricValue(), EPS);
         assertEquals(count, (Double) countTotalMetric.metricValue(), EPS);
 
diff --git a/clients/src/test/java/org/apache/kafka/common/metrics/stats/RateTest.java b/clients/src/test/java/org/apache/kafka/common/metrics/stats/RateTest.java
new file mode 100644
index 0000000000000..04c5ca1292f98
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/metrics/stats/RateTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.metrics.stats;
+
+import org.apache.kafka.common.metrics.MetricConfig;
+import org.apache.kafka.common.metrics.internals.MetricsUtils;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.common.utils.Time;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+
+import java.util.concurrent.TimeUnit;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+public class RateTest {
+    private static final double EPS = 0.000001;
+    private Rate r;
+    private Time timeClock;
+
+    @BeforeEach
+    public void setup() {
+        r = new Rate();
+        timeClock = new MockTime();
+    }
+
+    // Tests the scenario where the recording and measurement is done before the window for first sample finishes
+    // with no prior samples retained.
+    @ParameterizedTest
+    @CsvSource({"1,1", "1,11", "11,1", "11,11"})
+    public void testRateWithNoPriorAvailableSamples(int numSample, int sampleWindowSizeSec) {
+        final MetricConfig config = new MetricConfig().samples(numSample).timeWindow(sampleWindowSizeSec, TimeUnit.SECONDS);
+        final double sampleValue = 50.0;
+        // record at beginning of the window
+        r.record(config, sampleValue, timeClock.milliseconds());
+        // forward time till almost the end of window
+        final long measurementTime = TimeUnit.SECONDS.toMillis(sampleWindowSizeSec) - 1;
+        timeClock.sleep(measurementTime);
+        // calculate rate at almost the end of window
+        final double observedRate = r.measure(config, timeClock.milliseconds());
+        assertFalse(Double.isNaN(observedRate));
+
+        // In a scenario where sufficient number of samples is not available yet, the rate calculation algorithm assumes
+        // presence of N-1 (where N = numSample) prior samples with sample values of 0. Hence, the window size for rate
+        // calculation accounts for N-1 prior samples
+        final int dummyPriorSamplesAssumedByAlgorithm = numSample - 1;
+        final double windowSize = MetricsUtils.convert(measurementTime, TimeUnit.SECONDS) + (dummyPriorSamplesAssumedByAlgorithm * sampleWindowSizeSec);
+        double expectedRatePerSec = sampleValue / windowSize;
+        assertEquals(expectedRatePerSec, observedRate, EPS);
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/network/NetworkTestUtils.java b/clients/src/test/java/org/apache/kafka/common/network/NetworkTestUtils.java
index 002d76ffe017f..1d379a15cbe63 100644
--- a/clients/src/test/java/org/apache/kafka/common/network/NetworkTestUtils.java
+++ b/clients/src/test/java/org/apache/kafka/common/network/NetworkTestUtils.java
@@ -22,6 +22,7 @@
 import java.util.Map;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import org.apache.kafka.common.config.AbstractConfig;
@@ -87,13 +88,23 @@ public static void checkClientConnection(Selector selector, String node, int min
         }
     }
 
+    public static void waitForChannelConnected(Selector selector, String node) throws IOException {
+        int secondsLeft = 30;
+        while (selector.channel(node) != null
+                && !selector.channel(node).isConnected() && secondsLeft-- > 0) {
+            selector.poll(1000L);
+        }
+        assertNotNull(selector.channel(node));
+        assertTrue(selector.channel(node).isConnected(), String.format("Channel %s was not connected after 30 seconds", node));
+    }
+
     public static void waitForChannelReady(Selector selector, String node) throws IOException {
         // wait for handshake to finish
         int secondsLeft = 30;
         while (!selector.isChannelReady(node) && secondsLeft-- > 0) {
             selector.poll(1000L);
         }
-        assertTrue(selector.isChannelReady(node));
+        assertTrue(selector.isChannelReady(node), String.format("Channel %s was not ready after 30 seconds", node));
     }
 
     public static ChannelState waitForChannelClose(Selector selector, String node, ChannelState.State channelState) throws IOException {
diff --git a/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java b/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java
index f276cd4211a3c..09f14531def71 100644
--- a/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/network/SelectorTest.java
@@ -72,7 +72,6 @@
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
-
 /**
  * A set of tests for the selector. These use a test harness that runs a simple socket server that echos back responses.
  */
@@ -80,6 +79,7 @@
 public class SelectorTest {
     protected static final int BUFFER_SIZE = 4 * 1024;
     private static final String METRIC_GROUP = "MetricGroup";
+    private static final long CONNECTION_MAX_IDLE_MS = 5_000;
 
     protected EchoServer server;
     protected Time time;
@@ -96,7 +96,7 @@ public void setUp() throws Exception {
         this.channelBuilder = new PlaintextChannelBuilder(ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT));
         this.channelBuilder.configure(clientConfigs());
         this.metrics = new Metrics();
-        this.selector = new Selector(5000, this.metrics, time, METRIC_GROUP, channelBuilder, new LogContext());
+        this.selector = new Selector(CONNECTION_MAX_IDLE_MS, this.metrics, time, METRIC_GROUP, channelBuilder, new LogContext());
     }
 
     @AfterEach
@@ -110,10 +110,6 @@ public void tearDown() throws Exception {
         }
     }
 
-    public SecurityProtocol securityProtocol() {
-        return SecurityProtocol.PLAINTEXT;
-    }
-
     protected Map<String, Object> clientConfigs() {
         return new HashMap<>();
     }
@@ -422,7 +418,7 @@ public void close() throws IOException {
             }
         };
         channelBuilder.configure(clientConfigs());
-        Selector selector = new Selector(5000, new Metrics(), new MockTime(), "MetricGroup", channelBuilder, new LogContext());
+        Selector selector = new Selector(CONNECTION_MAX_IDLE_MS, new Metrics(), new MockTime(), "MetricGroup", channelBuilder, new LogContext());
         selector.connect("0", new InetSocketAddress("localhost", server.port), BUFFER_SIZE, BUFFER_SIZE);
         selector.connect("1", new InetSocketAddress("localhost", server.port), BUFFER_SIZE, BUFFER_SIZE);
         assertThrows(RuntimeException.class, selector::close);
@@ -441,7 +437,7 @@ public KafkaChannel buildChannel(String id, SelectionKey key, int maxReceiveSize
             public void close() {
             }
         };
-        Selector selector = new Selector(5000, new Metrics(), new MockTime(), "MetricGroup", channelBuilder, new LogContext());
+        Selector selector = new Selector(CONNECTION_MAX_IDLE_MS, new Metrics(), new MockTime(), "MetricGroup", channelBuilder, new LogContext());
         SocketChannel socketChannel = SocketChannel.open();
         socketChannel.configureBlocking(false);
         IOException e = assertThrows(IOException.class, () -> selector.register("1", socketChannel));
@@ -453,9 +449,9 @@ public void close() {
     @Test
     public void testCloseOldestConnection() throws Exception {
         String id = "0";
-        blockingConnect(id);
-
-        time.sleep(6000); // The max idle time is 5000ms
+        selector.connect(id, new InetSocketAddress("localhost", server.port), BUFFER_SIZE, BUFFER_SIZE);
+        NetworkTestUtils.waitForChannelConnected(selector, id);
+        time.sleep(CONNECTION_MAX_IDLE_MS + 1_000);
         selector.poll(0);
 
         assertTrue(selector.disconnected().containsKey(id), "The idle connection should have been closed");
@@ -469,7 +465,7 @@ public void testIdleExpiryWithoutReadyKeys() throws IOException {
         KafkaChannel channel = selector.channel(id);
         channel.selectionKey().interestOps(0);
 
-        time.sleep(6000); // The max idle time is 5000ms
+        time.sleep(CONNECTION_MAX_IDLE_MS + 1_000);
         selector.poll(0);
         assertTrue(selector.disconnected().containsKey(id), "The idle connection should have been closed");
         assertEquals(ChannelState.EXPIRED, selector.disconnected().get(id));
@@ -478,7 +474,7 @@ public void testIdleExpiryWithoutReadyKeys() throws IOException {
     @Test
     public void testImmediatelyConnectedCleaned() throws Exception {
         Metrics metrics = new Metrics(); // new metrics object to avoid metric registration conflicts
-        Selector selector = new ImmediatelyConnectingSelector(5000, metrics, time, "MetricGroup", channelBuilder, new LogContext());
+        Selector selector = new ImmediatelyConnectingSelector(CONNECTION_MAX_IDLE_MS, metrics, time, "MetricGroup", channelBuilder, new LogContext());
 
         try {
             testImmediatelyConnectedCleaned(selector, true);
@@ -529,7 +525,7 @@ private void testImmediatelyConnectedCleaned(Selector selector, boolean closeAft
     public void testConnectException() throws Exception {
         Metrics metrics = new Metrics();
         AtomicBoolean throwIOException = new AtomicBoolean();
-        Selector selector = new ImmediatelyConnectingSelector(5000, metrics, time, "MetricGroup", channelBuilder, new LogContext()) {
+        Selector selector = new ImmediatelyConnectingSelector(CONNECTION_MAX_IDLE_MS, metrics, time, "MetricGroup", channelBuilder, new LogContext()) {
             @Override
             protected SelectionKey registerChannel(String id, SocketChannel socketChannel, int interestedOps) throws IOException {
                 SelectionKey key = super.registerChannel(id, socketChannel, interestedOps);
@@ -585,7 +581,7 @@ public void testExpireClosedConnectionWithPendingReceives() throws Exception {
     private void verifyChannelExpiry(KafkaChannel channel) throws Exception {
         String id = channel.id();
         selector.mute(id); // Mute to allow channel to be expired even if more data is available for read
-        time.sleep(6000);  // The max idle time is 5000ms
+        time.sleep(CONNECTION_MAX_IDLE_MS + 1_000);
         selector.poll(0);
         assertNull(selector.channel(id), "Channel not expired");
         assertNull(selector.closingChannel(id), "Channel not removed from closingChannels");
@@ -607,23 +603,18 @@ private void verifyChannelExpiry(KafkaChannel channel) throws Exception {
     public void testCloseOldestConnectionWithMultiplePendingReceives() throws Exception {
         int expectedReceives = 5;
         KafkaChannel channel = createConnectionWithPendingReceives(expectedReceives);
-        String id = channel.id();
-        int completedReceives = 0;
+        int completedReceives = selector.completedReceives().size();
+
         while (selector.disconnected().isEmpty()) {
-            time.sleep(6000); // The max idle time is 5000ms
-            selector.poll(completedReceives == expectedReceives ? 0 : 1000);
+            time.sleep(CONNECTION_MAX_IDLE_MS + 1_000);
+            selector.poll(completedReceives == expectedReceives ? 0 : 1_000);
             completedReceives += selector.completedReceives().size();
-            if (!selector.completedReceives().isEmpty()) {
-                assertEquals(1, selector.completedReceives().size());
-                assertNotNull(selector.channel(id), "Channel should not have been expired");
-                assertTrue(selector.closingChannel(id) != null || selector.channel(id) != null, "Channel not found");
-                assertFalse(selector.disconnected().containsKey(id), "Disconnect notified too early");
-            }
         }
+
         assertEquals(expectedReceives, completedReceives);
-        assertNull(selector.channel(id), "Channel not removed");
-        assertNull(selector.closingChannel(id), "Channel not removed");
-        assertTrue(selector.disconnected().containsKey(id), "Disconnect not notified");
+        assertNull(selector.channel(channel.id()), "Channel not expired");
+        assertNull(selector.closingChannel(channel.id()), "Channel not expired");
+        assertTrue(selector.disconnected().containsKey(channel.id()), "Disconnect not notified");
         assertTrue(selector.completedReceives().isEmpty(), "Unexpected receive");
     }
 
@@ -689,7 +680,7 @@ public void testMuteOnOOM() throws Exception {
         //clean up default selector, replace it with one that uses a finite mem pool
         selector.close();
         MemoryPool pool = new SimpleMemoryPool(900, 900, false, null);
-        selector = new Selector(NetworkReceive.UNLIMITED, 5000, metrics, time, "MetricGroup",
+        selector = new Selector(NetworkReceive.UNLIMITED, CONNECTION_MAX_IDLE_MS, metrics, time, "MetricGroup",
             new HashMap<String, String>(), true, false, channelBuilder, pool, new LogContext());
 
         try (ServerSocketChannel ss = ServerSocketChannel.open()) {
@@ -785,14 +776,13 @@ public void testConnectDisconnectDuringInSinglePoll() throws Exception {
         when(kafkaChannel.selectionKey()).thenReturn(selectionKey);
         when(selectionKey.channel()).thenReturn(SocketChannel.open());
         when(selectionKey.readyOps()).thenReturn(SelectionKey.OP_CONNECT);
+        when(selectionKey.attachment()).thenReturn(kafkaChannel);
 
-        selectionKey.attach(kafkaChannel);
         Set<SelectionKey> selectionKeys = Utils.mkSet(selectionKey);
         selector.pollSelectionKeys(selectionKeys, false, System.nanoTime());
 
         assertFalse(selector.connected().contains(kafkaChannel.id()));
         assertTrue(selector.disconnected().containsKey(kafkaChannel.id()));
-        assertNull(selectionKey.attachment());
 
         verify(kafkaChannel, atLeastOnce()).ready();
         verify(kafkaChannel).disconnect();
@@ -931,7 +921,7 @@ public void testLowestPriorityChannel() throws Exception {
     @Test
     public void testMetricsCleanupOnSelectorClose() throws Exception {
         Metrics metrics = new Metrics();
-        Selector selector = new ImmediatelyConnectingSelector(5000, metrics, time, "MetricGroup", channelBuilder, new LogContext()) {
+        Selector selector = new ImmediatelyConnectingSelector(CONNECTION_MAX_IDLE_MS, metrics, time, "MetricGroup", channelBuilder, new LogContext()) {
             @Override
             public void close(String id) {
                 throw new RuntimeException();
@@ -980,8 +970,11 @@ public void testChannelCloseWhileProcessingReceives() throws Exception {
             SelectionKey selectionKey = mock(SelectionKey.class);
             when(channel.selectionKey()).thenReturn(selectionKey);
             when(selectionKey.isValid()).thenReturn(true);
+            when(selectionKey.isReadable()).thenReturn(true);
             when(selectionKey.readyOps()).thenReturn(SelectionKey.OP_READ);
-            selectionKey.attach(channel);
+            when(selectionKey.attachment())
+                    .thenReturn(channel)
+                    .thenReturn(null);
             selectionKeys.add(selectionKey);
 
             NetworkReceive receive = mock(NetworkReceive.class);
@@ -1015,7 +1008,6 @@ public void testChannelCloseWhileProcessingReceives() throws Exception {
 
     private String blockingRequest(String node, String s) throws IOException {
         selector.send(createSend(node, s));
-        selector.poll(1000L);
         while (true) {
             selector.poll(1000L);
             for (NetworkReceive receive : selector.completedReceives())
@@ -1035,10 +1027,7 @@ private void blockingConnect(String node) throws IOException {
 
     protected void blockingConnect(String node, InetSocketAddress serverAddr) throws IOException {
         selector.connect(node, serverAddr, BUFFER_SIZE, BUFFER_SIZE);
-        while (!selector.connected().contains(node))
-            selector.poll(10000L);
-        while (!selector.isChannelReady(node))
-            selector.poll(10000L);
+        NetworkTestUtils.waitForChannelReady(selector, node);
     }
 
     protected final NetworkSend createSend(String node, String payload) {
@@ -1132,7 +1121,7 @@ private KafkaMetric findUntaggedMetricByName(String name) {
      * Creates a connection, sends the specified number of requests and returns without reading
      * any incoming data. Some of the incoming data may be in the socket buffers when this method
      * returns, but there is no guarantee that all the data from the server will be available
-     * immediately.
+     * immediately. 
      */
     private KafkaChannel createConnectionWithPendingReceives(int pendingReceives) throws Exception {
         String id = "0";
@@ -1143,20 +1132,18 @@ private KafkaChannel createConnectionWithPendingReceives(int pendingReceives) th
     }
 
     /**
-     * Sends the specified number of requests and waits for the requests to be sent. The channel
-     * is muted during polling to ensure that incoming data is not received.
+     * Sends the specified number of requests and waits for the requests to be sent.
+     * The channel is muted during polling to ensure that incoming data is not received.
      */
-    private KafkaChannel sendNoReceive(KafkaChannel channel, int numRequests) throws Exception {
-        channel.mute();
+    private void sendNoReceive(KafkaChannel channel, int numRequests) throws Exception {
+        selector.mute(channel.id());
         for (int i = 0; i < numRequests; i++) {
             selector.send(createSend(channel.id(), String.valueOf(i)));
             do {
                 selector.poll(10);
             } while (selector.completedSends().isEmpty());
         }
-        channel.maybeUnmute();
-
-        return channel;
+        selector.unmute(channel.id());
     }
 
     /**
diff --git a/clients/src/test/java/org/apache/kafka/common/network/SslSelectorTest.java b/clients/src/test/java/org/apache/kafka/common/network/SslSelectorTest.java
index 7f95566c9f981..5ea14efb84d88 100644
--- a/clients/src/test/java/org/apache/kafka/common/network/SslSelectorTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/network/SslSelectorTest.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.common.network;
 
 import java.nio.channels.SelectionKey;
+import java.security.GeneralSecurityException;
 import javax.net.ssl.SSLEngine;
 
 import org.apache.kafka.common.config.SecurityConfig;
@@ -43,11 +44,9 @@
 import java.nio.channels.ServerSocketChannel;
 import java.nio.channels.SocketChannel;
 import java.security.Security;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -61,7 +60,7 @@
 /**
  * A set of tests for the selector. These use a test harness that runs a simple socket server that echos back responses.
  */
-public class SslSelectorTest extends SelectorTest {
+public abstract class SslSelectorTest extends SelectorTest {
 
     private Map<String, Object> sslClientConfigs;
 
@@ -73,7 +72,7 @@ public void setUp() throws Exception {
         this.server = new EchoServer(SecurityProtocol.SSL, sslServerConfigs);
         this.server.start();
         this.time = new MockTime();
-        sslClientConfigs = TestSslUtils.createSslConfig(false, false, Mode.CLIENT, trustStoreFile, "client");
+        sslClientConfigs = createSslClientConfigs(trustStoreFile);
         LogContext logContext = new LogContext();
         this.channelBuilder = new SslChannelBuilder(Mode.CLIENT, null, false, logContext);
         this.channelBuilder.configure(sslClientConfigs);
@@ -81,6 +80,8 @@ public void setUp() throws Exception {
         this.selector = new Selector(5000, metrics, time, "MetricGroup", channelBuilder, logContext);
     }
 
+    protected abstract Map<String, Object> createSslClientConfigs(File trustStoreFile) throws GeneralSecurityException, IOException;
+
     @AfterEach
     public void tearDown() throws Exception {
         this.selector.close();
@@ -88,11 +89,6 @@ public void tearDown() throws Exception {
         this.metrics.close();
     }
 
-    @Override
-    public SecurityProtocol securityProtocol() {
-        return SecurityProtocol.PLAINTEXT;
-    }
-
     @Override
     protected Map<String, Object> clientConfigs() {
         return sslClientConfigs;
@@ -100,7 +96,6 @@ protected Map<String, Object> clientConfigs() {
 
     @Test
     public void testConnectionWithCustomKeyManager() throws Exception {
-
         TestProviderCreator testProviderCreator = new TestProviderCreator();
 
         int requestSize = 100 * 1024;
@@ -125,10 +120,7 @@ public void testConnectionWithCustomKeyManager() throws Exception {
         Selector selector = new Selector(5000, metrics, time, "MetricGroup", channelBuilder, new LogContext());
 
         selector.connect(node, new InetSocketAddress("localhost", server.port), BUFFER_SIZE, BUFFER_SIZE);
-        while (!selector.connected().contains(node))
-            selector.poll(10000L);
-        while (!selector.isChannelReady(node))
-            selector.poll(10000L);
+        NetworkTestUtils.waitForChannelReady(selector, node);
 
         selector.send(createSend(node, request));
 
@@ -249,35 +241,6 @@ void pollSelectionKeys(Set<SelectionKey> selectionKeys, boolean isImmediatelyCon
         verifySelectorEmpty();
     }
 
-    /**
-     * Renegotiation is not supported since it is potentially unsafe and it has been removed in TLS 1.3
-     */
-    @Test
-    public void testRenegotiationFails() throws Exception {
-        String node = "0";
-        // create connections
-        InetSocketAddress addr = new InetSocketAddress("localhost", server.port);
-        selector.connect(node, addr, BUFFER_SIZE, BUFFER_SIZE);
-
-        // send echo requests and receive responses
-        while (!selector.isChannelReady(node)) {
-            selector.poll(1000L);
-        }
-        selector.send(createSend(node, node + "-" + 0));
-        selector.poll(0L);
-        server.renegotiate();
-        selector.send(createSend(node, node + "-" + 1));
-        long expiryTime = System.currentTimeMillis() + 2000;
-
-        List<String> disconnected = new ArrayList<>();
-        while (!disconnected.contains(node) && System.currentTimeMillis() < expiryTime) {
-            selector.poll(10);
-            disconnected.addAll(selector.disconnected().keySet());
-        }
-        assertTrue(disconnected.contains(node), "Renegotiation should cause disconnection");
-
-    }
-
     @Override
     @Test
     public void testMuteOnOOM() throws Exception {
@@ -399,7 +362,7 @@ static class TestSslTransportLayer extends SslTransportLayer {
             boolean muteSocket = false;
 
             public TestSslTransportLayer(String channelId, SelectionKey key, SSLEngine sslEngine,
-                                         ChannelMetadataRegistry metadataRegistry) throws IOException {
+                                         ChannelMetadataRegistry metadataRegistry) {
                 super(channelId, key, sslEngine, metadataRegistry);
                 transportLayers.put(channelId, this);
             }
diff --git a/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java b/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java
index 5b0d4172d8c3b..d78e5f44b27a2 100644
--- a/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/network/SslTransportLayerTest.java
@@ -490,9 +490,7 @@ public void testPemFiles(Args args) throws Exception {
     }
 
     /**
-     * Test with PEM key store files without key password for client key store. We don't allow this
-     * with PEM files since unprotected private key on disk is not safe. We do allow with inline
-     * PEM config since key config can be encrypted or externalized similar to other password configs.
+     * Test with PEM key store files without key password for client key store.
      */
     @ParameterizedTest
     @ArgumentsSource(SslTransportLayerArgumentsProvider.class)
@@ -502,27 +500,19 @@ public void testPemFilesWithoutClientKeyPassword(Args args) throws Exception {
         TestSslUtils.convertToPem(args.sslClientConfigs, !useInlinePem, false);
         args.sslServerConfigs.put(BrokerSecurityConfigs.SSL_CLIENT_AUTH_CONFIG, "required");
         server = createEchoServer(args, SecurityProtocol.SSL);
-        if (useInlinePem)
-            verifySslConfigs(args);
-        else
-            assertThrows(KafkaException.class, () -> createSelector(args.sslClientConfigs));
+        verifySslConfigs(args);
     }
 
     /**
      * Test with PEM key store files without key password for server key store.We don't allow this
-     * with PEM files since unprotected private key on disk is not safe. We do allow with inline
-     * PEM config since key config can be encrypted or externalized similar to other password configs.
+     * with PEM files since unprotected private key on disk is not safe.
      */
     @ParameterizedTest
     @ArgumentsSource(SslTransportLayerArgumentsProvider.class)
     public void testPemFilesWithoutServerKeyPassword(Args args) throws Exception {
         TestSslUtils.convertToPem(args.sslServerConfigs, !args.useInlinePem, false);
         TestSslUtils.convertToPem(args.sslClientConfigs, !args.useInlinePem, true);
-
-        if (args.useInlinePem)
-            verifySslConfigs(args);
-        else
-            assertThrows(KafkaException.class, () -> createEchoServer(args, SecurityProtocol.SSL));
+        verifySslConfigs(args);
     }
 
     /**
diff --git a/clients/src/test/java/org/apache/kafka/common/network/Tls12SelectorTest.java b/clients/src/test/java/org/apache/kafka/common/network/Tls12SelectorTest.java
new file mode 100644
index 0000000000000..7169b2ec51706
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/network/Tls12SelectorTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.common.network;
+
+import static java.util.Arrays.asList;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.security.GeneralSecurityException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import org.apache.kafka.common.config.SslConfigs;
+import org.apache.kafka.test.TestSslUtils;
+import org.junit.jupiter.api.Test;
+
+public class Tls12SelectorTest extends SslSelectorTest {
+
+    @Override
+    protected Map<String, Object> createSslClientConfigs(File trustStoreFile)
+        throws GeneralSecurityException, IOException {
+        Map<String, Object> configs = TestSslUtils.createSslConfig(false, false, Mode.CLIENT,
+            trustStoreFile, "client");
+        configs.put(SslConfigs.SSL_ENABLED_PROTOCOLS_CONFIG, asList("TLSv1.2"));
+        return configs;
+    }
+
+    /**
+     * Renegotiation is not supported when TLS 1.2 is used (renegotiation was removed from TLS 1.3)
+     */
+    @Test
+    public void testRenegotiationFails() throws Exception {
+        String node = "0";
+        // create connections
+        InetSocketAddress addr = new InetSocketAddress("localhost", server.port);
+        selector.connect(node, addr, BUFFER_SIZE, BUFFER_SIZE);
+        NetworkTestUtils.waitForChannelReady(selector, node);
+
+        // send echo requests and receive responses
+        selector.send(createSend(node, node + "-" + 0));
+        selector.poll(0L);
+        server.renegotiate();
+        selector.send(createSend(node, node + "-" + 1));
+        long expiryTime = System.currentTimeMillis() + 2000;
+
+        List<String> disconnected = new ArrayList<>();
+        while (!disconnected.contains(node) && System.currentTimeMillis() < expiryTime) {
+            selector.poll(10);
+            disconnected.addAll(selector.disconnected().keySet());
+        }
+        assertTrue(disconnected.contains(node), "Renegotiation should cause disconnection");
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/network/Tls13SelectorTest.java b/clients/src/test/java/org/apache/kafka/common/network/Tls13SelectorTest.java
new file mode 100644
index 0000000000000..db69c2fa8ea1e
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/network/Tls13SelectorTest.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.common.network;
+
+import static java.util.Arrays.asList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.security.GeneralSecurityException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.kafka.common.config.SslConfigs;
+import org.apache.kafka.test.TestSslUtils;
+import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledForJreRange;
+import org.junit.jupiter.api.condition.JRE;
+
+@EnabledForJreRange(min = JRE.JAVA_11) // TLS 1.3 is only supported with Java 11 and newer
+public class Tls13SelectorTest extends SslSelectorTest {
+
+    @Override
+    protected Map<String, Object> createSslClientConfigs(File trustStoreFile) throws GeneralSecurityException, IOException {
+        Map<String, Object> configs = TestSslUtils.createSslConfig(false, false, Mode.CLIENT,
+            trustStoreFile, "client");
+        configs.put(SslConfigs.SSL_ENABLED_PROTOCOLS_CONFIG, asList("TLSv1.3"));
+        return configs;
+    }
+
+    /**
+     * TLS 1.3 has a post-handshake key and IV update, which will update the sending and receiving keys
+     * for one side of the connection.
+     *
+     * Key Usage Limits will trigger an update when the algorithm limits are reached, but the default
+     * value is too large (2^37 bytes of plaintext data) for a unit test. This value can be overridden
+     * via the security property `jdk.tls.keyLimits`, but that's also difficult to achieve in a unit
+     * test.
+     *
+     * Applications can also trigger an update by calling `SSLSocket.startHandshake()` or
+     * `SSLEngine.beginHandshake()` (this would trigger `renegotiation` with TLS 1.2) and that's the
+     * approach we take here.
+     */
+    @Test
+    public void testKeyUpdate() throws Exception {
+        String node = "0";
+        // create connections
+        InetSocketAddress addr = new InetSocketAddress("localhost", server.port);
+        selector.connect(node, addr, BUFFER_SIZE, BUFFER_SIZE);
+        NetworkTestUtils.waitForChannelReady(selector, node);
+
+        // send echo requests and receive responses
+        selector.send(createSend(node, node + "-" + 0));
+        selector.poll(0L);
+        server.renegotiate();
+        selector.send(createSend(node,  node + "-" + 1));
+        List<NetworkReceive> received = new ArrayList<>();
+        TestUtils.waitForCondition(() -> {
+            try {
+                selector.poll(1000L);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            for (NetworkReceive receive : selector.completedReceives()) {
+                if (receive.source().equals(node))
+                    received.add(receive);
+            }
+            return received.size() == 2;
+        }, "Expected two receives, got " + received.size());
+
+        assertEquals(asList("0-0", "0-1"), received.stream().map(this::asString).collect(Collectors.toList()));
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/protocol/ApiKeysTest.java b/clients/src/test/java/org/apache/kafka/common/protocol/ApiKeysTest.java
index 3c66b211bec4f..1aa420b36f0f4 100644
--- a/clients/src/test/java/org/apache/kafka/common/protocol/ApiKeysTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/protocol/ApiKeysTest.java
@@ -44,8 +44,8 @@ public void testForIdWithInvalidIdHigh() {
     }
 
     @Test
-    public void testAlterIsrIsClusterAction() {
-        assertTrue(ApiKeys.ALTER_ISR.clusterAction);
+    public void testAlterPartitionIsClusterAction() {
+        assertTrue(ApiKeys.ALTER_PARTITION.clusterAction);
     }
 
     /**
@@ -62,7 +62,7 @@ public void testAlterIsrIsClusterAction() {
     public void testResponseThrottleTime() {
         Set<ApiKeys> authenticationKeys = EnumSet.of(ApiKeys.SASL_HANDSHAKE, ApiKeys.SASL_AUTHENTICATE);
         // Newer protocol apis include throttle time ms even for cluster actions
-        Set<ApiKeys> clusterActionsWithThrottleTimeMs = EnumSet.of(ApiKeys.ALTER_ISR, ApiKeys.ALLOCATE_PRODUCER_IDS);
+        Set<ApiKeys> clusterActionsWithThrottleTimeMs = EnumSet.of(ApiKeys.ALTER_PARTITION, ApiKeys.ALLOCATE_PRODUCER_IDS, ApiKeys.UPDATE_FEATURES);
         for (ApiKeys apiKey: ApiKeys.zkBrokerApis()) {
             Schema responseSchema = apiKey.messageType.responseSchemas()[apiKey.latestVersion()];
             BoundField throttleTimeField = responseSchema.get("throttle_time_ms");
diff --git a/clients/src/test/java/org/apache/kafka/common/protocol/MessageUtilTest.java b/clients/src/test/java/org/apache/kafka/common/protocol/MessageUtilTest.java
index 33dcabb80a481..5195f5511570f 100755
--- a/clients/src/test/java/org/apache/kafka/common/protocol/MessageUtilTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/protocol/MessageUtilTest.java
@@ -77,4 +77,10 @@ public void testCompareRawTaggedFields() {
             Arrays.asList(new RawTaggedField(1, new byte[] {1}),
                 new RawTaggedField(2, new byte[] {}))));
     }
+
+    @Test
+    public void testConstants() {
+        assertEquals(MessageUtil.UNSIGNED_SHORT_MAX, 0xFFFF);
+        assertEquals(MessageUtil.UNSIGNED_INT_MAX, 0xFFFFFFFFL);
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/ApiVersionsResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/ApiVersionsResponseTest.java
index 2c9b1e8fad023..62571c6986a93 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/ApiVersionsResponseTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/ApiVersionsResponseTest.java
@@ -17,10 +17,18 @@
 
 package org.apache.kafka.common.requests;
 
+import java.util.Collections;
+import java.util.HashSet;
+import org.apache.kafka.common.feature.Features;
+import org.apache.kafka.common.feature.SupportedVersionRange;
 import org.apache.kafka.common.message.ApiMessageType;
+import org.apache.kafka.common.message.ApiMessageType.ListenerType;
 import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersion;
 import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersionCollection;
+import org.apache.kafka.common.message.ApiVersionsResponseData.FinalizedFeatureKey;
+import org.apache.kafka.common.message.ApiVersionsResponseData.SupportedFeatureKey;
 import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.record.RecordBatch;
 import org.apache.kafka.common.record.RecordVersion;
 import org.apache.kafka.common.utils.Utils;
 import org.junit.jupiter.api.Test;
@@ -102,6 +110,91 @@ public void shouldHaveCommonlyAgreedApiVersionResponseWithControllerOnForwardabl
             ApiKeys.JOIN_GROUP.latestVersion(), commonResponse);
     }
 
+    @Test
+    public void shouldCreateApiResponseOnlyWithKeysSupportedByMagicValue() {
+        ApiVersionsResponse response = ApiVersionsResponse.createApiVersionsResponse(
+            10,
+            RecordVersion.V1,
+            Features.emptySupportedFeatures(),
+            Collections.emptyMap(),
+            ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH,
+            null,
+            ListenerType.ZK_BROKER
+        );
+        verifyApiKeysForMagic(response, RecordBatch.MAGIC_VALUE_V1);
+        assertEquals(10, response.throttleTimeMs());
+        assertTrue(response.data().supportedFeatures().isEmpty());
+        assertTrue(response.data().finalizedFeatures().isEmpty());
+        assertEquals(ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH, response.data().finalizedFeaturesEpoch());
+    }
+
+    @Test
+    public void shouldReturnFeatureKeysWhenMagicIsCurrentValueAndThrottleMsIsDefaultThrottle() {
+        ApiVersionsResponse response = ApiVersionsResponse.createApiVersionsResponse(
+            10,
+            RecordVersion.V1,
+            Features.supportedFeatures(
+                Utils.mkMap(Utils.mkEntry("feature", new SupportedVersionRange((short) 1, (short) 4)))),
+            Utils.mkMap(Utils.mkEntry("feature", (short) 3)),
+            10L,
+            null,
+            ListenerType.ZK_BROKER
+        );
+
+        verifyApiKeysForMagic(response, RecordBatch.MAGIC_VALUE_V1);
+        assertEquals(10, response.throttleTimeMs());
+        assertEquals(1, response.data().supportedFeatures().size());
+        SupportedFeatureKey sKey = response.data().supportedFeatures().find("feature");
+        assertNotNull(sKey);
+        assertEquals(1, sKey.minVersion());
+        assertEquals(4, sKey.maxVersion());
+        assertEquals(1, response.data().finalizedFeatures().size());
+        FinalizedFeatureKey fKey = response.data().finalizedFeatures().find("feature");
+        assertNotNull(fKey);
+        assertEquals(3, fKey.minVersionLevel());
+        assertEquals(3, fKey.maxVersionLevel());
+        assertEquals(10, response.data().finalizedFeaturesEpoch());
+    }
+
+    @Test
+    public void shouldReturnAllKeysWhenMagicIsCurrentValueAndThrottleMsIsDefaultThrottle() {
+        ApiVersionsResponse response = ApiVersionsResponse.createApiVersionsResponse(
+            AbstractResponse.DEFAULT_THROTTLE_TIME,
+            RecordVersion.current(),
+            Features.emptySupportedFeatures(),
+            Collections.emptyMap(),
+            ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH,
+            null,
+            ListenerType.ZK_BROKER
+        );
+        assertEquals(new HashSet<>(ApiKeys.zkBrokerApis()), apiKeysInResponse(response));
+        assertEquals(AbstractResponse.DEFAULT_THROTTLE_TIME, response.throttleTimeMs());
+        assertTrue(response.data().supportedFeatures().isEmpty());
+        assertTrue(response.data().finalizedFeatures().isEmpty());
+        assertEquals(ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH, response.data().finalizedFeaturesEpoch());
+    }
+
+    @Test
+    public void testMetadataQuorumApisAreDisabled() {
+        ApiVersionsResponse response = ApiVersionsResponse.createApiVersionsResponse(
+            AbstractResponse.DEFAULT_THROTTLE_TIME,
+            RecordVersion.current(),
+            Features.emptySupportedFeatures(),
+            Collections.emptyMap(),
+            ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH,
+            null,
+            ListenerType.ZK_BROKER
+        );
+
+        // Ensure that APIs needed for the KRaft mode are not exposed through ApiVersions until we are ready for them
+        HashSet<ApiKeys> exposedApis = apiKeysInResponse(response);
+        assertFalse(exposedApis.contains(ApiKeys.ENVELOPE));
+        assertFalse(exposedApis.contains(ApiKeys.VOTE));
+        assertFalse(exposedApis.contains(ApiKeys.BEGIN_QUORUM_EPOCH));
+        assertFalse(exposedApis.contains(ApiKeys.END_QUORUM_EPOCH));
+        assertFalse(exposedApis.contains(ApiKeys.DESCRIBE_QUORUM));
+    }
+
     @Test
     public void testIntersect() {
         assertFalse(ApiVersionsResponse.intersect(null, null).isPresent());
@@ -145,4 +238,18 @@ private void verifyVersions(short forwardableAPIKey,
         assertEquals(expectedVersionsForForwardableAPI, commonResponse.find(forwardableAPIKey));
     }
 
+    private void verifyApiKeysForMagic(ApiVersionsResponse response, Byte maxMagic) {
+        for (ApiVersion version : response.data().apiKeys()) {
+            assertTrue(ApiKeys.forId(version.apiKey()).minRequiredInterBrokerMagic <= maxMagic);
+        }
+    }
+
+    private HashSet<ApiKeys> apiKeysInResponse(ApiVersionsResponse apiVersions) {
+        HashSet<ApiKeys> apiKeys = new HashSet<>();
+        for (ApiVersion version : apiVersions.data().apiKeys()) {
+            apiKeys.add(ApiKeys.forId(version.apiKey()));
+        }
+        return apiKeys;
+    }
+
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrRequestTest.java b/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrRequestTest.java
index de9914c575e31..83c33e4903fba 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrRequestTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrRequestTest.java
@@ -116,7 +116,7 @@ public void testVersionLogic() {
                     .setLeader(0)
                     .setLeaderEpoch(10)
                     .setIsr(asList(0, 1))
-                    .setZkVersion(10)
+                    .setPartitionEpoch(10)
                     .setReplicas(asList(0, 1, 2))
                     .setAddingReplicas(asList(3))
                     .setRemovingReplicas(asList(2)),
@@ -127,7 +127,7 @@ public void testVersionLogic() {
                     .setLeader(1)
                     .setLeaderEpoch(11)
                     .setIsr(asList(1, 2, 3))
-                    .setZkVersion(11)
+                    .setPartitionEpoch(11)
                     .setReplicas(asList(1, 2, 3))
                     .setAddingReplicas(emptyList())
                     .setRemovingReplicas(emptyList()),
@@ -138,7 +138,7 @@ public void testVersionLogic() {
                     .setLeader(2)
                     .setLeaderEpoch(11)
                     .setIsr(asList(2, 3, 4))
-                    .setZkVersion(11)
+                    .setPartitionEpoch(11)
                     .setReplicas(asList(2, 3, 4))
                     .setAddingReplicas(emptyList())
                     .setRemovingReplicas(emptyList())
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrResponseTest.java
index 9f46304a4de31..6b611bb6df1a5 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrResponseTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/LeaderAndIsrResponseTest.java
@@ -48,7 +48,7 @@ public void testErrorCountsFromGetErrorResponse() {
             .setLeader(1)
             .setLeaderEpoch(10)
             .setIsr(Collections.singletonList(10))
-            .setZkVersion(20)
+            .setPartitionEpoch(20)
             .setReplicas(Collections.singletonList(10))
             .setIsNew(false));
         partitionStates.add(new LeaderAndIsrPartitionState()
@@ -58,7 +58,7 @@ public void testErrorCountsFromGetErrorResponse() {
             .setLeader(1)
             .setLeaderEpoch(10)
             .setIsr(Collections.singletonList(10))
-            .setZkVersion(20)
+            .setPartitionEpoch(20)
             .setReplicas(Collections.singletonList(10))
             .setIsNew(false));
         Map<String, Uuid> topicIds = Collections.singletonMap("foo", Uuid.randomUuid());
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/MetadataRequestTest.java b/clients/src/test/java/org/apache/kafka/common/requests/MetadataRequestTest.java
index 74c217df91f86..84764c29da6e8 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/MetadataRequestTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/MetadataRequestTest.java
@@ -26,6 +26,7 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNull;
@@ -82,12 +83,25 @@ public void testTopicIdAndNullTopicNameRequests() {
 
         // if version is 10 or 11, the invalid topic metadata should return an error
         List<Short> invalidVersions = Arrays.asList((short) 10, (short) 11);
-        invalidVersions.forEach(version ->
-            topics.forEach(topic -> {
-                MetadataRequestData metadataRequestData = new MetadataRequestData().setTopics(Collections.singletonList(topic));
-                MetadataRequest.Builder builder = new MetadataRequest.Builder(metadataRequestData);
-                assertThrows(UnsupportedVersionException.class, () -> builder.build(version));
-            })
-        );
+        invalidVersions.forEach(version -> topics.forEach(topic -> {
+            MetadataRequestData metadataRequestData = new MetadataRequestData().setTopics(Collections.singletonList(topic));
+            MetadataRequest.Builder builder = new MetadataRequest.Builder(metadataRequestData);
+            assertThrows(UnsupportedVersionException.class, () -> builder.build(version));
+        }));
+    }
+
+    @Test
+    public void testTopicIdWithZeroUuid() {
+        List<MetadataRequestData.MetadataRequestTopic> topics = Arrays.asList(
+                new MetadataRequestData.MetadataRequestTopic().setName("topic").setTopicId(Uuid.ZERO_UUID),
+                new MetadataRequestData.MetadataRequestTopic().setName("topic").setTopicId(new Uuid(0L, 0L)),
+                new MetadataRequestData.MetadataRequestTopic().setName("topic"));
+
+        List<Short> invalidVersions = Arrays.asList((short) 10, (short) 11);
+        invalidVersions.forEach(version -> topics.forEach(topic -> {
+            MetadataRequestData metadataRequestData = new MetadataRequestData().setTopics(Collections.singletonList(topic));
+            MetadataRequest.Builder builder = new MetadataRequest.Builder(metadataRequestData);
+            assertDoesNotThrow(() -> builder.build(version));
+        }));
     }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/MetadataResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/MetadataResponseTest.java
new file mode 100644
index 0000000000000..37f7356c6969d
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/common/requests/MetadataResponseTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.common.requests;
+
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Uuid;
+import org.apache.kafka.common.message.MetadataResponseData;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.protocol.Errors;
+import org.junit.jupiter.api.Test;
+
+import static java.util.Collections.emptyList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+public class MetadataResponseTest {
+
+    @Test
+    void buildClusterTest() {
+        Uuid zeroUuid = new Uuid(0L, 0L);
+        Uuid randomUuid = Uuid.randomUuid();
+        MetadataResponseData.MetadataResponseTopic topicMetadata1 = new MetadataResponseData.MetadataResponseTopic()
+                .setName("topic1")
+                .setErrorCode(Errors.NONE.code())
+                .setPartitions(emptyList())
+                .setIsInternal(false);
+        MetadataResponseData.MetadataResponseTopic topicMetadata2 = new MetadataResponseData.MetadataResponseTopic()
+                .setName("topic2")
+                .setErrorCode(Errors.NONE.code())
+                .setTopicId(zeroUuid)
+                .setPartitions(emptyList())
+                .setIsInternal(false);
+        MetadataResponseData.MetadataResponseTopic topicMetadata3 = new MetadataResponseData.MetadataResponseTopic()
+                .setName("topic3")
+                .setErrorCode(Errors.NONE.code())
+                .setTopicId(randomUuid)
+                .setPartitions(emptyList())
+                .setIsInternal(false);
+
+        MetadataResponseData.MetadataResponseTopicCollection topics =
+                new MetadataResponseData.MetadataResponseTopicCollection();
+        topics.add(topicMetadata1);
+        topics.add(topicMetadata2);
+        topics.add(topicMetadata3);
+        MetadataResponse metadataResponse = new MetadataResponse(new MetadataResponseData().setTopics(topics),
+                ApiKeys.METADATA.latestVersion());
+        Cluster cluster = metadataResponse.buildCluster();
+        assertNull(cluster.topicName(Uuid.ZERO_UUID));
+        assertNull(cluster.topicName(zeroUuid));
+        assertEquals("topic3", cluster.topicName(randomUuid));
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
index 8b5ada9ef860c..b6df4c44d6392 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java
@@ -41,8 +41,8 @@
 import org.apache.kafka.common.message.AllocateProducerIdsResponseData;
 import org.apache.kafka.common.message.AlterClientQuotasResponseData;
 import org.apache.kafka.common.message.AlterConfigsResponseData;
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.AlterReplicaLogDirsRequestData;
@@ -258,6 +258,7 @@
 import static org.apache.kafka.common.protocol.ApiKeys.DELETE_ACLS;
 import static org.apache.kafka.common.protocol.ApiKeys.DELETE_TOPICS;
 import static org.apache.kafka.common.protocol.ApiKeys.DESCRIBE_CONFIGS;
+import static org.apache.kafka.common.protocol.ApiKeys.DESCRIBE_DELEGATION_TOKEN;
 import static org.apache.kafka.common.protocol.ApiKeys.DESCRIBE_LOG_DIRS;
 import static org.apache.kafka.common.protocol.ApiKeys.ELECT_LEADERS;
 import static org.apache.kafka.common.protocol.ApiKeys.FETCH;
@@ -939,7 +940,7 @@ public void testErrorCountsIncludesNone() {
         assertEquals(2, createDescribeConfigsResponse(DESCRIBE_CONFIGS.latestVersion()).errorCounts().get(Errors.NONE));
         assertEquals(1, createDescribeGroupResponse().errorCounts().get(Errors.NONE));
         assertEquals(2, createDescribeLogDirsResponse().errorCounts().get(Errors.NONE));
-        assertEquals(1, createDescribeTokenResponse().errorCounts().get(Errors.NONE));
+        assertEquals(1, createDescribeTokenResponse(DESCRIBE_DELEGATION_TOKEN.latestVersion()).errorCounts().get(Errors.NONE));
         assertEquals(2, createElectLeadersResponse().errorCounts().get(Errors.NONE));
         assertEquals(1, createEndTxnResponse().errorCounts().get(Errors.NONE));
         assertEquals(1, createExpireTokenResponse().errorCounts().get(Errors.NONE));
@@ -1029,7 +1030,7 @@ private AbstractRequest getRequest(ApiKeys apikey, short version) {
             case BEGIN_QUORUM_EPOCH: return createBeginQuorumEpochRequest(version);
             case END_QUORUM_EPOCH: return createEndQuorumEpochRequest(version);
             case DESCRIBE_QUORUM: return createDescribeQuorumRequest(version);
-            case ALTER_ISR: return createAlterIsrRequest(version);
+            case ALTER_PARTITION: return createAlterPartitionRequest(version);
             case UPDATE_FEATURES: return createUpdateFeaturesRequest(version);
             case ENVELOPE: return createEnvelopeRequest(version);
             case FETCH_SNAPSHOT: return createFetchSnapshotRequest(version);
@@ -1088,7 +1089,7 @@ private AbstractResponse getResponse(ApiKeys apikey, short version) {
             case CREATE_DELEGATION_TOKEN: return createCreateTokenResponse();
             case RENEW_DELEGATION_TOKEN: return createRenewTokenResponse();
             case EXPIRE_DELEGATION_TOKEN: return createExpireTokenResponse();
-            case DESCRIBE_DELEGATION_TOKEN: return createDescribeTokenResponse();
+            case DESCRIBE_DELEGATION_TOKEN: return createDescribeTokenResponse(version);
             case DELETE_GROUPS: return createDeleteGroupsResponse();
             case ELECT_LEADERS: return createElectLeadersResponse();
             case INCREMENTAL_ALTER_CONFIGS: return createIncrementalAlterConfigsResponse();
@@ -1103,7 +1104,7 @@ private AbstractResponse getResponse(ApiKeys apikey, short version) {
             case BEGIN_QUORUM_EPOCH: return createBeginQuorumEpochResponse();
             case END_QUORUM_EPOCH: return createEndQuorumEpochResponse();
             case DESCRIBE_QUORUM: return createDescribeQuorumResponse();
-            case ALTER_ISR: return createAlterIsrResponse();
+            case ALTER_PARTITION: return createAlterPartitionResponse(version);
             case UPDATE_FEATURES: return createUpdateFeaturesResponse();
             case ENVELOPE: return createEnvelopeResponse();
             case FETCH_SNAPSHOT: return createFetchSnapshotResponse();
@@ -1305,33 +1306,49 @@ private DescribeUserScramCredentialsResponse createDescribeUserScramCredentialsR
         return new DescribeUserScramCredentialsResponse(data);
     }
 
-    private AlterIsrRequest createAlterIsrRequest(short version) {
-        AlterIsrRequestData data = new AlterIsrRequestData()
-                .setBrokerEpoch(123L)
-                .setBrokerId(1)
-                .setTopics(singletonList(new AlterIsrRequestData.TopicData()
-                        .setName("topic1")
-                        .setPartitions(singletonList(new AlterIsrRequestData.PartitionData()
-                                .setPartitionIndex(1)
-                                .setCurrentIsrVersion(2)
-                                .setLeaderEpoch(3)
-                                .setNewIsr(asList(1, 2))))));
-        return new AlterIsrRequest.Builder(data).build(version);
+    private AlterPartitionRequest createAlterPartitionRequest(short version) {
+        AlterPartitionRequestData.PartitionData partitionData = new AlterPartitionRequestData.PartitionData()
+            .setPartitionIndex(1)
+            .setPartitionEpoch(2)
+            .setLeaderEpoch(3)
+            .setNewIsr(asList(1, 2));
+
+        if (version >= 1) {
+            // Use the none default value; 1 - RECOVERING
+            partitionData.setLeaderRecoveryState((byte) 1);
+        }
+
+        AlterPartitionRequestData data = new AlterPartitionRequestData()
+            .setBrokerEpoch(123L)
+            .setBrokerId(1)
+            .setTopics(singletonList(new AlterPartitionRequestData.TopicData()
+                .setTopicName("topic1")
+                .setTopicId(Uuid.randomUuid())
+                .setPartitions(singletonList(partitionData))));
+        return new AlterPartitionRequest.Builder(data, version >= 1).build(version);
     }
 
-    private AlterIsrResponse createAlterIsrResponse() {
-        AlterIsrResponseData data = new AlterIsrResponseData()
+    private AlterPartitionResponse createAlterPartitionResponse(int version) {
+        AlterPartitionResponseData.PartitionData partitionData = new AlterPartitionResponseData.PartitionData()
+            .setPartitionEpoch(1)
+            .setIsr(asList(0, 1, 2))
+            .setErrorCode(Errors.NONE.code())
+            .setLeaderEpoch(2)
+            .setLeaderId(3);
+
+        if (version >= 1) {
+            // Use the none default value; 1 - RECOVERING
+            partitionData.setLeaderRecoveryState((byte) 1);
+        }
+
+        AlterPartitionResponseData data = new AlterPartitionResponseData()
                 .setErrorCode(Errors.NONE.code())
                 .setThrottleTimeMs(123)
-                .setTopics(singletonList(new AlterIsrResponseData.TopicData()
-                        .setName("topic1")
-                        .setPartitions(singletonList(new AlterIsrResponseData.PartitionData()
-                                .setCurrentIsrVersion(1)
-                                .setIsr(asList(0, 1, 2))
-                                .setErrorCode(Errors.NONE.code())
-                                .setLeaderEpoch(2)
-                                .setLeaderId(3)))));
-        return new AlterIsrResponse(data);
+                .setTopics(singletonList(new AlterPartitionResponseData.TopicData()
+                    .setTopicName("topic1")
+                    .setTopicId(Uuid.randomUuid())
+                    .setPartitions(singletonList(partitionData))));
+        return new AlterPartitionResponse(data);
     }
 
     private UpdateFeaturesRequest createUpdateFeaturesRequest(short version) {
@@ -2193,7 +2210,7 @@ private LeaderAndIsrRequest createLeaderAndIsrRequest(short version) {
             .setLeader(2)
             .setLeaderEpoch(1)
             .setIsr(isr)
-            .setZkVersion(2)
+            .setPartitionEpoch(2)
             .setReplicas(replicas)
             .setIsNew(false));
         partitionStates.add(new LeaderAndIsrPartitionState()
@@ -2203,7 +2220,7 @@ private LeaderAndIsrRequest createLeaderAndIsrRequest(short version) {
             .setLeader(1)
             .setLeaderEpoch(1)
             .setIsr(isr)
-            .setZkVersion(2)
+            .setPartitionEpoch(2)
             .setReplicas(replicas)
             .setIsNew(false));
         partitionStates.add(new LeaderAndIsrPartitionState()
@@ -2213,7 +2230,7 @@ private LeaderAndIsrRequest createLeaderAndIsrRequest(short version) {
             .setLeader(0)
             .setLeaderEpoch(1)
             .setIsr(isr)
-            .setZkVersion(2)
+            .setPartitionEpoch(2)
             .setReplicas(replicas)
             .setIsNew(false));
 
@@ -2964,7 +2981,7 @@ private DescribeDelegationTokenRequest createDescribeTokenRequest(short version)
         return new DescribeDelegationTokenRequest.Builder(owners).build(version);
     }
 
-    private DescribeDelegationTokenResponse createDescribeTokenResponse() {
+    private DescribeDelegationTokenResponse createDescribeTokenResponse(short version) {
         List<KafkaPrincipal> renewers = new ArrayList<>();
         renewers.add(SecurityUtils.parseKafkaPrincipal("User:user1"));
         renewers.add(SecurityUtils.parseKafkaPrincipal("User:user2"));
@@ -2980,7 +2997,7 @@ private DescribeDelegationTokenResponse createDescribeTokenResponse() {
         tokenList.add(new DelegationToken(tokenInfo1, "test".getBytes()));
         tokenList.add(new DelegationToken(tokenInfo2, "test".getBytes()));
 
-        return new DescribeDelegationTokenResponse(20, Errors.NONE, tokenList);
+        return new DescribeDelegationTokenResponse(version, 20, Errors.NONE, tokenList);
     }
 
     private ElectLeadersRequest createElectLeadersRequestNullPartitions() {
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/UpdateFeaturesRequestTest.java b/clients/src/test/java/org/apache/kafka/common/requests/UpdateFeaturesRequestTest.java
index 1b63aecd01617..cf267da5574ce 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/UpdateFeaturesRequestTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/UpdateFeaturesRequestTest.java
@@ -16,14 +16,20 @@
  */
 package org.apache.kafka.common.requests;
 
+import org.apache.kafka.clients.admin.FeatureUpdate;
 import org.apache.kafka.common.errors.UnknownServerException;
+import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.message.UpdateFeaturesRequestData;
 import org.apache.kafka.common.protocol.Errors;
 import org.junit.jupiter.api.Test;
 
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
 public class UpdateFeaturesRequestTest {
 
@@ -53,4 +59,88 @@ public void testGetErrorResponse() {
         assertEquals(Collections.singletonMap(Errors.UNKNOWN_SERVER_ERROR, 1), response.errorCounts());
     }
 
+    @Test
+    public void testUpdateFeaturesV0() {
+        UpdateFeaturesRequestData.FeatureUpdateKeyCollection features =
+                new UpdateFeaturesRequestData.FeatureUpdateKeyCollection();
+
+        features.add(new UpdateFeaturesRequestData.FeatureUpdateKey()
+            .setFeature("foo")
+            .setMaxVersionLevel((short) 1)
+            .setAllowDowngrade(true)
+        );
+
+        features.add(new UpdateFeaturesRequestData.FeatureUpdateKey()
+            .setFeature("bar")
+            .setMaxVersionLevel((short) 3)
+        );
+
+        UpdateFeaturesRequest request = new UpdateFeaturesRequest(
+            new UpdateFeaturesRequestData().setFeatureUpdates(features),
+            UpdateFeaturesRequestData.LOWEST_SUPPORTED_VERSION
+        );
+        ByteBuffer buffer = request.serialize();
+        request = UpdateFeaturesRequest.parse(buffer, UpdateFeaturesRequestData.LOWEST_SUPPORTED_VERSION);
+
+        List<UpdateFeaturesRequest.FeatureUpdateItem> updates = new ArrayList<>(request.featureUpdates());
+        assertEquals(updates.size(), 2);
+        assertEquals(updates.get(0).upgradeType(), FeatureUpdate.UpgradeType.SAFE_DOWNGRADE);
+        assertEquals(updates.get(1).upgradeType(), FeatureUpdate.UpgradeType.UPGRADE);
+    }
+
+    @Test
+    public void testUpdateFeaturesV1() {
+        UpdateFeaturesRequestData.FeatureUpdateKeyCollection features =
+            new UpdateFeaturesRequestData.FeatureUpdateKeyCollection();
+
+        features.add(new UpdateFeaturesRequestData.FeatureUpdateKey()
+            .setFeature("foo")
+            .setMaxVersionLevel((short) 1)
+            .setUpgradeType(FeatureUpdate.UpgradeType.SAFE_DOWNGRADE.code())
+        );
+
+        features.add(new UpdateFeaturesRequestData.FeatureUpdateKey()
+            .setFeature("bar")
+            .setMaxVersionLevel((short) 3)
+        );
+
+        UpdateFeaturesRequest request = new UpdateFeaturesRequest(
+            new UpdateFeaturesRequestData().setFeatureUpdates(features),
+            UpdateFeaturesRequestData.HIGHEST_SUPPORTED_VERSION
+        );
+
+        ByteBuffer buffer = request.serialize();
+        request = UpdateFeaturesRequest.parse(buffer, UpdateFeaturesRequestData.HIGHEST_SUPPORTED_VERSION);
+
+        List<UpdateFeaturesRequest.FeatureUpdateItem> updates = new ArrayList<>(request.featureUpdates());
+        assertEquals(updates.size(), 2);
+        assertEquals(updates.get(0).upgradeType(), FeatureUpdate.UpgradeType.SAFE_DOWNGRADE);
+        assertEquals(updates.get(1).upgradeType(), FeatureUpdate.UpgradeType.UPGRADE);
+
+    }
+
+    @Test
+    public void testUpdateFeaturesV1OldBoolean() {
+        UpdateFeaturesRequestData.FeatureUpdateKeyCollection features =
+            new UpdateFeaturesRequestData.FeatureUpdateKeyCollection();
+
+        features.add(new UpdateFeaturesRequestData.FeatureUpdateKey()
+            .setFeature("foo")
+            .setMaxVersionLevel((short) 1)
+            .setAllowDowngrade(true)
+        );
+
+        features.add(new UpdateFeaturesRequestData.FeatureUpdateKey()
+            .setFeature("bar")
+            .setMaxVersionLevel((short) 3)
+        );
+
+        UpdateFeaturesRequest request = new UpdateFeaturesRequest(
+            new UpdateFeaturesRequestData().setFeatureUpdates(features),
+            UpdateFeaturesRequestData.HIGHEST_SUPPORTED_VERSION
+        );
+        assertThrows(UnsupportedVersionException.class, request::serialize,
+            "This should fail since allowDowngrade is not supported in v1 of this RPC");
+    }
+
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/requests/UpdateMetadataRequestTest.java b/clients/src/test/java/org/apache/kafka/common/requests/UpdateMetadataRequestTest.java
index 6f9d5c2454606..2dd17f776ec95 100644
--- a/clients/src/test/java/org/apache/kafka/common/requests/UpdateMetadataRequestTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/requests/UpdateMetadataRequestTest.java
@@ -203,7 +203,7 @@ public void testVersionLogic() {
 
             long topicIdCount = deserializedRequest.data().topicStates().stream()
                     .map(UpdateMetadataRequestData.UpdateMetadataTopicState::topicId)
-                    .filter(topicId -> topicId != Uuid.ZERO_UUID).count();
+                    .filter(topicId -> !Uuid.ZERO_UUID.equals(topicId)).count();
             if (version >= 7)
                 assertEquals(2, topicIdCount);
             else
diff --git a/clients/src/test/java/org/apache/kafka/common/resource/ResourceTypeTest.java b/clients/src/test/java/org/apache/kafka/common/resource/ResourceTypeTest.java
index fcde96865e60f..d40c8aeac6aca 100644
--- a/clients/src/test/java/org/apache/kafka/common/resource/ResourceTypeTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/resource/ResourceTypeTest.java
@@ -42,7 +42,8 @@ private static class AclResourceTypeTestInfo {
         new AclResourceTypeTestInfo(ResourceType.GROUP, 3, "group", false),
         new AclResourceTypeTestInfo(ResourceType.CLUSTER, 4, "cluster", false),
         new AclResourceTypeTestInfo(ResourceType.TRANSACTIONAL_ID, 5, "transactional_id", false),
-        new AclResourceTypeTestInfo(ResourceType.DELEGATION_TOKEN, 6, "delegation_token", false)
+        new AclResourceTypeTestInfo(ResourceType.DELEGATION_TOKEN, 6, "delegation_token", false),
+        new AclResourceTypeTestInfo(ResourceType.USER, 7, "user", false)
     };
 
     @Test
diff --git a/clients/src/test/java/org/apache/kafka/common/security/SaslExtensionsTest.java b/clients/src/test/java/org/apache/kafka/common/security/SaslExtensionsTest.java
index 9acb78cf3ef50..085baf70d2a2a 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/SaslExtensionsTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/SaslExtensionsTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.common.security;
 
+import java.util.Collections;
 import org.apache.kafka.common.security.auth.SaslExtensions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -23,6 +24,7 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
@@ -50,4 +52,30 @@ public void testCannotAddValueToMapReferenceAndGetFromExtensions() {
         this.map.put("hello", "42");
         assertNull(extensions.map().get("hello"));
     }
+
+    /**
+     * Tests that even when using the same underlying values in the map, two {@link SaslExtensions}
+     * are considered unique.
+     *
+     * @see SaslExtensions class-level documentation
+     */
+    @Test
+    public void testExtensionsWithEqualValuesAreUnique() {
+        // If the maps are distinct objects but have the same underlying values, the SaslExtension
+        // objects should still be unique.
+        assertNotEquals(new SaslExtensions(Collections.singletonMap("key", "value")),
+            new SaslExtensions(Collections.singletonMap("key", "value")),
+            "SaslExtensions with unique maps should be unique");
+
+        // If the maps are the same object (with the same underlying values), the SaslExtension
+        // objects should still be unique.
+        assertNotEquals(new SaslExtensions(map),
+            new SaslExtensions(map),
+            "SaslExtensions with duplicate maps should be unique");
+
+        // If the maps are empty, the SaslExtension objects should still be unique.
+        assertNotEquals(SaslExtensions.empty(),
+            SaslExtensions.empty(),
+            "SaslExtensions returned from SaslExtensions.empty() should be unique");
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticatorTest.java b/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticatorTest.java
index af0fedd4f5ad9..50696ecf0512d 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/authenticator/SaslServerAuthenticatorTest.java
@@ -16,10 +16,12 @@
  */
 package org.apache.kafka.common.security.authenticator;
 
-import java.net.InetAddress;
 import org.apache.kafka.common.config.internals.BrokerSecurityConfigs;
 import org.apache.kafka.common.errors.IllegalSaslStateException;
 import org.apache.kafka.common.message.ApiMessageType;
+import org.apache.kafka.common.message.SaslAuthenticateRequestData;
+import org.apache.kafka.common.message.SaslHandshakeRequestData;
+import org.apache.kafka.common.network.ChannelBuilders;
 import org.apache.kafka.common.network.ChannelMetadataRegistry;
 import org.apache.kafka.common.network.ClientInformation;
 import org.apache.kafka.common.network.DefaultChannelMetadataRegistry;
@@ -27,31 +29,55 @@
 import org.apache.kafka.common.network.ListenerName;
 import org.apache.kafka.common.network.TransportLayer;
 import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.requests.AbstractRequest;
 import org.apache.kafka.common.requests.ApiVersionsRequest;
 import org.apache.kafka.common.requests.ApiVersionsResponse;
+import org.apache.kafka.common.requests.RequestHeader;
 import org.apache.kafka.common.requests.RequestTestUtils;
+import org.apache.kafka.common.requests.ResponseHeader;
+import org.apache.kafka.common.requests.SaslAuthenticateRequest;
+import org.apache.kafka.common.requests.SaslAuthenticateResponse;
+import org.apache.kafka.common.requests.SaslHandshakeRequest;
 import org.apache.kafka.common.security.auth.AuthenticateCallbackHandler;
+import org.apache.kafka.common.security.auth.KafkaPrincipal;
+import org.apache.kafka.common.security.auth.KafkaPrincipalBuilder;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
-import org.apache.kafka.common.requests.RequestHeader;
+import org.apache.kafka.common.security.kerberos.KerberosShortNamer;
+import org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule;
 import org.apache.kafka.common.security.plain.PlainLoginModule;
+import org.apache.kafka.common.security.ssl.SslPrincipalMapper;
 import org.apache.kafka.common.utils.AppInfoParser;
+import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
+import org.junit.jupiter.api.Test;
+import org.mockito.Answers;
+import org.mockito.ArgumentCaptor;
+import org.mockito.MockedStatic;
+import org.mockito.Mockito;
+import org.mockito.stubbing.Answer;
 
 import javax.security.auth.Subject;
+import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
 import java.io.IOException;
+import java.net.InetAddress;
+import java.nio.Buffer;
 import java.nio.ByteBuffer;
+import java.time.Duration;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
-
-import org.junit.jupiter.api.Test;
-import org.mockito.Answers;
+import java.util.stream.Collectors;
 
 import static org.apache.kafka.common.security.scram.internals.ScramMechanism.SCRAM_SHA_256;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.fail;
 import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -59,13 +85,15 @@
 
 public class SaslServerAuthenticatorTest {
 
+    private final String clientId = "clientId";
+    
     @Test
     public void testOversizeRequest() throws IOException {
         TransportLayer transportLayer = mock(TransportLayer.class);
         Map<String, ?> configs = Collections.singletonMap(BrokerSecurityConfigs.SASL_ENABLED_MECHANISMS_CONFIG,
                 Collections.singletonList(SCRAM_SHA_256.mechanismName()));
         SaslServerAuthenticator authenticator = setupAuthenticator(configs, transportLayer,
-            SCRAM_SHA_256.mechanismName(), new DefaultChannelMetadataRegistry());
+                SCRAM_SHA_256.mechanismName(), new DefaultChannelMetadataRegistry());
 
         when(transportLayer.read(any(ByteBuffer.class))).then(invocation -> {
             invocation.<ByteBuffer>getArgument(0).putInt(SaslServerAuthenticator.MAX_RECEIVE_SIZE + 1);
@@ -81,9 +109,9 @@ public void testUnexpectedRequestType() throws IOException {
         Map<String, ?> configs = Collections.singletonMap(BrokerSecurityConfigs.SASL_ENABLED_MECHANISMS_CONFIG,
                 Collections.singletonList(SCRAM_SHA_256.mechanismName()));
         SaslServerAuthenticator authenticator = setupAuthenticator(configs, transportLayer,
-            SCRAM_SHA_256.mechanismName(), new DefaultChannelMetadataRegistry());
+                SCRAM_SHA_256.mechanismName(), new DefaultChannelMetadataRegistry());
 
-        RequestHeader header = new RequestHeader(ApiKeys.METADATA, (short) 0, "clientId", 13243);
+        RequestHeader header = new RequestHeader(ApiKeys.METADATA, (short) 0, clientId, 13243);
         ByteBuffer headerBuffer = RequestTestUtils.serializeRequestHeader(header);
 
         when(transportLayer.read(any(ByteBuffer.class))).then(invocation -> {
@@ -108,42 +136,223 @@ public void testUnexpectedRequestType() throws IOException {
     @Test
     public void testOldestApiVersionsRequest() throws IOException {
         testApiVersionsRequest(ApiKeys.API_VERSIONS.oldestVersion(),
-            ClientInformation.UNKNOWN_NAME_OR_VERSION, ClientInformation.UNKNOWN_NAME_OR_VERSION);
+                ClientInformation.UNKNOWN_NAME_OR_VERSION, ClientInformation.UNKNOWN_NAME_OR_VERSION);
     }
 
     @Test
     public void testLatestApiVersionsRequest() throws IOException {
         testApiVersionsRequest(ApiKeys.API_VERSIONS.latestVersion(),
-            "apache-kafka-java", AppInfoParser.getVersion());
+                "apache-kafka-java", AppInfoParser.getVersion());
     }
 
-    private void testApiVersionsRequest(short version, String expectedSoftwareName,
-                                       String expectedSoftwareVersion) throws IOException {
-        TransportLayer transportLayer = mock(TransportLayer.class, Answers.RETURNS_DEEP_STUBS);
+    @Test
+    public void testSessionExpiresAtTokenExpiryDespiteNoReauthIsSet() throws IOException {
+        String mechanism = OAuthBearerLoginModule.OAUTHBEARER_MECHANISM;
+        Duration tokenExpirationDuration = Duration.ofSeconds(1);
+        SaslServer saslServer = mock(SaslServer.class);
+
+        MockTime time = new MockTime();
+        try (
+                MockedStatic<?> ignored = mockSaslServer(saslServer, mechanism, time, tokenExpirationDuration);
+                MockedStatic<?> ignored2 = mockKafkaPrincipal("[principal-type]", "[principal-name");
+                TransportLayer transportLayer = mockTransportLayer()
+        ) {
+
+            SaslServerAuthenticator authenticator = getSaslServerAuthenticatorForOAuth(mechanism, transportLayer, time, 0L);
+
+            mockRequest(saslHandshakeRequest(mechanism), transportLayer);
+            authenticator.authenticate();
+
+            when(saslServer.isComplete()).thenReturn(false).thenReturn(true);
+            mockRequest(saslAuthenticateRequest(), transportLayer);
+            authenticator.authenticate();
+
+            long atTokenExpiryNanos = time.nanoseconds() + tokenExpirationDuration.toNanos();
+            assertEquals(atTokenExpiryNanos, authenticator.serverSessionExpirationTimeNanos());
+
+            ByteBuffer secondResponseSent = getResponses(transportLayer).get(1);
+            consumeSizeAndHeader(secondResponseSent);
+            SaslAuthenticateResponse response = SaslAuthenticateResponse.parse(secondResponseSent, (short) 2);
+            assertEquals(tokenExpirationDuration.toMillis(), response.sessionLifetimeMs());
+        }
+    }
+
+    @Test
+    public void testSessionExpiresAtMaxReauthTime() throws IOException {
+        String mechanism = OAuthBearerLoginModule.OAUTHBEARER_MECHANISM;
+        SaslServer saslServer = mock(SaslServer.class);
+        MockTime time = new MockTime(0, 1, 1000);
+        long maxReauthMs = 100L;
+        Duration tokenExpiryGreaterThanMaxReauth = Duration.ofMillis(maxReauthMs).multipliedBy(10);
+
+        try (
+                MockedStatic<?> ignored = mockSaslServer(saslServer, mechanism, time, tokenExpiryGreaterThanMaxReauth);
+                MockedStatic<?> ignored2 = mockKafkaPrincipal("[principal-type]", "[principal-name");
+                TransportLayer transportLayer = mockTransportLayer()
+        ) {
+
+            SaslServerAuthenticator authenticator = getSaslServerAuthenticatorForOAuth(mechanism, transportLayer, time, maxReauthMs);
+
+            mockRequest(saslHandshakeRequest(mechanism), transportLayer);
+            authenticator.authenticate();
+
+            when(saslServer.isComplete()).thenReturn(false).thenReturn(true);
+            mockRequest(saslAuthenticateRequest(), transportLayer);
+            authenticator.authenticate();
+
+            long atMaxReauthNanos = time.nanoseconds() + Duration.ofMillis(maxReauthMs).toNanos();
+            assertEquals(atMaxReauthNanos, authenticator.serverSessionExpirationTimeNanos());
+
+            ByteBuffer secondResponseSent = getResponses(transportLayer).get(1);
+            consumeSizeAndHeader(secondResponseSent);
+            SaslAuthenticateResponse response = SaslAuthenticateResponse.parse(secondResponseSent, (short) 2);
+            assertEquals(maxReauthMs, response.sessionLifetimeMs());
+        }
+    }
+
+    @Test
+    public void testSessionExpiresAtTokenExpiry() throws IOException {
+        String mechanism = OAuthBearerLoginModule.OAUTHBEARER_MECHANISM;
+        SaslServer saslServer = mock(SaslServer.class);
+        MockTime time = new MockTime(0, 1, 1000);
+        Duration tokenExpiryShorterThanMaxReauth = Duration.ofSeconds(2);
+        long maxReauthMs = tokenExpiryShorterThanMaxReauth.multipliedBy(2).toMillis();
+
+        try (
+                MockedStatic<?> ignored = mockSaslServer(saslServer, mechanism, time, tokenExpiryShorterThanMaxReauth);
+                MockedStatic<?> ignored2 = mockKafkaPrincipal("[principal-type]", "[principal-name");
+                TransportLayer transportLayer = mockTransportLayer()
+        ) {
+
+            SaslServerAuthenticator authenticator = getSaslServerAuthenticatorForOAuth(mechanism, transportLayer, time, maxReauthMs);
+
+            mockRequest(saslHandshakeRequest(mechanism), transportLayer);
+            authenticator.authenticate();
+
+            when(saslServer.isComplete()).thenReturn(false).thenReturn(true);
+            mockRequest(saslAuthenticateRequest(), transportLayer);
+            authenticator.authenticate();
+
+            long atTokenExpiryNanos = time.nanoseconds() + tokenExpiryShorterThanMaxReauth.toNanos();
+            assertEquals(atTokenExpiryNanos, authenticator.serverSessionExpirationTimeNanos());
+
+            ByteBuffer secondResponseSent = getResponses(transportLayer).get(1);
+            consumeSizeAndHeader(secondResponseSent);
+            SaslAuthenticateResponse response = SaslAuthenticateResponse.parse(secondResponseSent, (short) 2);
+            assertEquals(tokenExpiryShorterThanMaxReauth.toMillis(), response.sessionLifetimeMs());
+        }
+    }
+
+    private SaslServerAuthenticator getSaslServerAuthenticatorForOAuth(String mechanism, TransportLayer transportLayer, Time time, Long maxReauth) {
         Map<String, ?> configs = Collections.singletonMap(BrokerSecurityConfigs.SASL_ENABLED_MECHANISMS_CONFIG,
-            Collections.singletonList(SCRAM_SHA_256.mechanismName()));
+                Collections.singletonList(mechanism));
         ChannelMetadataRegistry metadataRegistry = new DefaultChannelMetadataRegistry();
-        SaslServerAuthenticator authenticator = setupAuthenticator(configs, transportLayer,
-            SCRAM_SHA_256.mechanismName(), metadataRegistry);
 
-        RequestHeader header = new RequestHeader(ApiKeys.API_VERSIONS, version, "clientId", 0);
+        return setupAuthenticator(configs, transportLayer, mechanism, metadataRegistry, time, maxReauth);
+    }
+
+    private MockedStatic<?> mockSaslServer(SaslServer saslServer, String mechanism, Time time, Duration tokenExpirationDuration) throws SaslException {
+        when(saslServer.getMechanismName()).thenReturn(mechanism);
+        when(saslServer.evaluateResponse(any())).thenReturn(new byte[]{});
+        long millisToExpiration = tokenExpirationDuration.toMillis();
+        when(saslServer.getNegotiatedProperty(eq(SaslInternalConfigs.CREDENTIAL_LIFETIME_MS_SASL_NEGOTIATED_PROPERTY_KEY)))
+                .thenReturn(time.milliseconds() + millisToExpiration);
+        return Mockito.mockStatic(Sasl.class, (Answer<SaslServer>) invocation -> saslServer);
+    }
+
+    private MockedStatic<?> mockKafkaPrincipal(String principalType, String name) {
+        KafkaPrincipalBuilder kafkaPrincipalBuilder = mock(KafkaPrincipalBuilder.class);
+        when(kafkaPrincipalBuilder.build(any())).thenReturn(new KafkaPrincipal(principalType, name));
+        MockedStatic<ChannelBuilders> channelBuilders = Mockito.mockStatic(ChannelBuilders.class, Answers.RETURNS_MOCKS);
+        channelBuilders.when(() ->
+                ChannelBuilders.createPrincipalBuilder(anyMap(), any(KerberosShortNamer.class), any(SslPrincipalMapper.class))
+        ).thenReturn(kafkaPrincipalBuilder);
+        return channelBuilders;
+    }
+
+    private void consumeSizeAndHeader(ByteBuffer responseBuffer) {
+        responseBuffer.getInt();
+        ResponseHeader.parse(responseBuffer, (short) 1);
+    }
+
+    private List<ByteBuffer> getResponses(TransportLayer transportLayer) throws IOException {
+        ArgumentCaptor<ByteBuffer[]> buffersCaptor = ArgumentCaptor.forClass(ByteBuffer[].class);
+        verify(transportLayer, times(2)).write(buffersCaptor.capture());
+        return buffersCaptor.getAllValues().stream()
+                .map(this::concatBuffers)
+                .collect(Collectors.toList());
+    }
+
+    private ByteBuffer concatBuffers(ByteBuffer[] buffers) {
+        int combinedCapacity = 0;
+        for (ByteBuffer buffer : buffers) {
+            combinedCapacity += buffer.capacity();
+        }
+        if (combinedCapacity > 0) {
+            ByteBuffer concat = ByteBuffer.allocate(combinedCapacity);
+            for (ByteBuffer buffer : buffers) {
+                concat.put(buffer);
+            }
+            return safeFlip(concat);
+        } else {
+            return ByteBuffer.allocate(0);
+        }
+    }
+
+    private ByteBuffer safeFlip(ByteBuffer buffer) {
+        return (ByteBuffer) ((Buffer) buffer).flip();
+    }
+
+    private SaslAuthenticateRequest saslAuthenticateRequest() {
+        SaslAuthenticateRequestData authenticateRequestData = new SaslAuthenticateRequestData();
+        return new SaslAuthenticateRequest.Builder(authenticateRequestData).build(ApiKeys.SASL_AUTHENTICATE.latestVersion());
+    }
+
+    private SaslHandshakeRequest saslHandshakeRequest(String mechanism) {
+        SaslHandshakeRequestData handshakeRequestData = new SaslHandshakeRequestData();
+        handshakeRequestData.setMechanism(mechanism);
+        return new SaslHandshakeRequest.Builder(handshakeRequestData).build(ApiKeys.SASL_HANDSHAKE.latestVersion());
+    }
+
+    private TransportLayer mockTransportLayer() throws IOException {
+        TransportLayer transportLayer = mock(TransportLayer.class, Answers.RETURNS_DEEP_STUBS);
+        when(transportLayer.socketChannel().socket().getInetAddress()).thenReturn(InetAddress.getLoopbackAddress());
+        when(transportLayer.write(any(ByteBuffer[].class))).thenReturn(Long.MAX_VALUE);
+        return transportLayer;
+    }
+
+    private void mockRequest(AbstractRequest request, TransportLayer transportLayer) throws IOException {
+        mockRequest(new RequestHeader(request.apiKey(), request.apiKey().latestVersion(), clientId, 0), request, transportLayer);
+    }
+
+    private void mockRequest(RequestHeader header, AbstractRequest request, TransportLayer transportLayer) throws IOException {
         ByteBuffer headerBuffer = RequestTestUtils.serializeRequestHeader(header);
 
-        ApiVersionsRequest request = new ApiVersionsRequest.Builder().build(version);
         ByteBuffer requestBuffer = request.serialize();
         requestBuffer.rewind();
 
-        when(transportLayer.socketChannel().socket().getInetAddress()).thenReturn(InetAddress.getLoopbackAddress());
-
         when(transportLayer.read(any(ByteBuffer.class))).then(invocation -> {
             invocation.<ByteBuffer>getArgument(0).putInt(headerBuffer.remaining() + requestBuffer.remaining());
             return 4;
         }).then(invocation -> {
             invocation.<ByteBuffer>getArgument(0)
-                .put(headerBuffer.duplicate())
-                .put(requestBuffer.duplicate());
+                    .put(headerBuffer.duplicate())
+                    .put(requestBuffer.duplicate());
             return headerBuffer.remaining() + requestBuffer.remaining();
         });
+    }
+
+    private void testApiVersionsRequest(short version, String expectedSoftwareName,
+                                        String expectedSoftwareVersion) throws IOException {
+        TransportLayer transportLayer = mockTransportLayer();
+        Map<String, ?> configs = Collections.singletonMap(BrokerSecurityConfigs.SASL_ENABLED_MECHANISMS_CONFIG,
+                Collections.singletonList(SCRAM_SHA_256.mechanismName()));
+        ChannelMetadataRegistry metadataRegistry = new DefaultChannelMetadataRegistry();
+        SaslServerAuthenticator authenticator = setupAuthenticator(configs, transportLayer, SCRAM_SHA_256.mechanismName(), metadataRegistry);
+
+        RequestHeader header = new RequestHeader(ApiKeys.API_VERSIONS, version, clientId, 0);
+        ApiVersionsRequest request = new ApiVersionsRequest.Builder().build(version);
+        mockRequest(header, request, transportLayer);
 
         authenticator.authenticate();
 
@@ -155,16 +364,24 @@ private void testApiVersionsRequest(short version, String expectedSoftwareName,
 
     private SaslServerAuthenticator setupAuthenticator(Map<String, ?> configs, TransportLayer transportLayer,
                                                        String mechanism, ChannelMetadataRegistry metadataRegistry) {
+        return setupAuthenticator(configs, transportLayer, mechanism, metadataRegistry, new MockTime(), null);
+    }
+
+    private SaslServerAuthenticator setupAuthenticator(Map<String, ?> configs, TransportLayer transportLayer,
+                                                       String mechanism, ChannelMetadataRegistry metadataRegistry, Time time, Long maxReauth) {
         TestJaasConfig jaasConfig = new TestJaasConfig();
-        jaasConfig.addEntry("jaasContext", PlainLoginModule.class.getName(), new HashMap<String, Object>());
+        jaasConfig.addEntry("jaasContext", PlainLoginModule.class.getName(), new HashMap<>());
         Map<String, Subject> subjects = Collections.singletonMap(mechanism, new Subject());
         Map<String, AuthenticateCallbackHandler> callbackHandlers = Collections.singletonMap(
                 mechanism, new SaslServerCallbackHandler());
         ApiVersionsResponse apiVersionsResponse = ApiVersionsResponse.defaultApiVersionsResponse(
-            ApiMessageType.ListenerType.ZK_BROKER);
+                ApiMessageType.ListenerType.ZK_BROKER);
+        Map<String, Long> connectionsMaxReauthMsByMechanism = maxReauth != null ?
+                Collections.singletonMap(mechanism, maxReauth) : Collections.emptyMap();
+
         return new SaslServerAuthenticator(configs, callbackHandlers, "node", subjects, null,
-                new ListenerName("ssl"), SecurityProtocol.SASL_SSL, transportLayer, Collections.emptyMap(),
-                metadataRegistry, Time.SYSTEM, () -> apiVersionsResponse);
+                new ListenerName("ssl"), SecurityProtocol.SASL_SSL, transportLayer, connectionsMaxReauthMsByMechanism,
+                metadataRegistry, time, () -> apiVersionsResponse);
     }
 
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/OAuthBearerLoginModuleTest.java b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/OAuthBearerLoginModuleTest.java
index ea03ec5bfa339..0dabeab1f433f 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/OAuthBearerLoginModuleTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/OAuthBearerLoginModuleTest.java
@@ -127,8 +127,8 @@ public void login1Commit1Login2Commit2Logout1Login3Commit3Logout2() throws Login
         // Create callback handler
         OAuthBearerToken[] tokens = new OAuthBearerToken[] {mock(OAuthBearerToken.class),
             mock(OAuthBearerToken.class), mock(OAuthBearerToken.class)};
-        SaslExtensions[] extensions = new SaslExtensions[] {mock(SaslExtensions.class),
-            mock(SaslExtensions.class), mock(SaslExtensions.class)};
+        SaslExtensions[] extensions = new SaslExtensions[] {saslExtensions(),
+            saslExtensions(), saslExtensions()};
         TestCallbackHandler testTokenCallbackHandler = new TestCallbackHandler(tokens, extensions);
 
         // Create login modules
@@ -208,7 +208,6 @@ public void login1Commit1Login2Commit2Logout1Login3Commit3Logout2() throws Login
         assertSame(extensions[2], publicCredentials.iterator().next());
 
         verifyNoInteractions((Object[]) tokens);
-        verifyNoInteractions((Object[]) extensions);
     }
 
     @Test
@@ -224,8 +223,8 @@ public void login1Commit1Logout1Login2Commit2Logout2() throws LoginException {
         // Create callback handler
         OAuthBearerToken[] tokens = new OAuthBearerToken[] {mock(OAuthBearerToken.class),
             mock(OAuthBearerToken.class)};
-        SaslExtensions[] extensions = new SaslExtensions[] {mock(SaslExtensions.class),
-            mock(SaslExtensions.class)};
+        SaslExtensions[] extensions = new SaslExtensions[] {saslExtensions(),
+            saslExtensions()};
         TestCallbackHandler testTokenCallbackHandler = new TestCallbackHandler(tokens, extensions);
 
         // Create login modules
@@ -270,7 +269,6 @@ public void login1Commit1Logout1Login2Commit2Logout2() throws LoginException {
         assertEquals(0, publicCredentials.size());
 
         verifyNoInteractions((Object[]) tokens);
-        verifyNoInteractions((Object[]) extensions);
     }
 
     @Test
@@ -285,8 +283,8 @@ public void loginAbortLoginCommitLogout() throws LoginException {
         // Create callback handler
         OAuthBearerToken[] tokens = new OAuthBearerToken[] {mock(OAuthBearerToken.class),
             mock(OAuthBearerToken.class)};
-        SaslExtensions[] extensions = new SaslExtensions[] {mock(SaslExtensions.class),
-            mock(SaslExtensions.class)};
+        SaslExtensions[] extensions = new SaslExtensions[] {saslExtensions(),
+            saslExtensions()};
         TestCallbackHandler testTokenCallbackHandler = new TestCallbackHandler(tokens, extensions);
 
         // Create login module
@@ -322,7 +320,6 @@ public void loginAbortLoginCommitLogout() throws LoginException {
         assertEquals(0, publicCredentials.size());
 
         verifyNoInteractions((Object[]) tokens);
-        verifyNoInteractions((Object[]) extensions);
     }
 
     @Test
@@ -338,8 +335,8 @@ public void login1Commit1Login2Abort2Login3Commit3Logout3() throws LoginExceptio
         // Create callback handler
         OAuthBearerToken[] tokens = new OAuthBearerToken[] {mock(OAuthBearerToken.class),
             mock(OAuthBearerToken.class), mock(OAuthBearerToken.class)};
-        SaslExtensions[] extensions = new SaslExtensions[] {mock(SaslExtensions.class),
-            mock(SaslExtensions.class), mock(SaslExtensions.class)};
+        SaslExtensions[] extensions = new SaslExtensions[] {saslExtensions(), saslExtensions(),
+            saslExtensions()};
         TestCallbackHandler testTokenCallbackHandler = new TestCallbackHandler(tokens, extensions);
 
         // Create login modules
@@ -406,7 +403,6 @@ public void login1Commit1Login2Abort2Login3Commit3Logout3() throws LoginExceptio
         assertSame(extensions[2], publicCredentials.iterator().next());
 
         verifyNoInteractions((Object[]) tokens);
-        verifyNoInteractions((Object[]) extensions);
     }
 
     /**
@@ -436,4 +432,21 @@ public void commitDoesNotThrowOnUnsupportedExtensionsCallback() throws LoginExce
 
         verifyNoInteractions((Object[]) tokens);
     }
+
+    /**
+     * We don't want to use mocks for our tests as we need to make sure to test
+     * {@link SaslExtensions}' {@link SaslExtensions#equals(Object)} and
+     * {@link SaslExtensions#hashCode()} methods.
+     *
+     * <p/>
+     *
+     * We need to make distinct calls to this method (vs. caching the result and reusing it
+     * multiple times) because we need to ensure the {@link SaslExtensions} instances are unique.
+     * This properly mimics the behavior that is used during the token refresh logic.
+     *
+     * @return Unique, newly-created {@link SaslExtensions} instance
+     */
+    private SaslExtensions saslExtensions() {
+        return SaslExtensions.empty();
+    }
 }
diff --git a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/internals/expiring/ExpiringCredentialRefreshingLoginTest.java b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/internals/expiring/ExpiringCredentialRefreshingLoginTest.java
index 9a77c738d2719..85f6622f090b3 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/internals/expiring/ExpiringCredentialRefreshingLoginTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/internals/expiring/ExpiringCredentialRefreshingLoginTest.java
@@ -48,6 +48,7 @@
 import org.junit.jupiter.api.Test;
 import org.mockito.InOrder;
 import org.mockito.Mockito;
+import org.mockito.internal.util.MockUtil;
 
 public class ExpiringCredentialRefreshingLoginTest {
     private static final Configuration EMPTY_WILDCARD_CONFIGURATION;
@@ -188,8 +189,7 @@ public TestLoginContext(TestExpiringCredentialRefreshingLogin testExpiringCreden
             super("contextName", null, null, EMPTY_WILDCARD_CONFIGURATION);
             this.testExpiringCredentialRefreshingLogin = Objects.requireNonNull(testExpiringCredentialRefreshingLogin);
             // sanity check to make sure it is likely a mock
-            if (Objects.requireNonNull(mockLoginContext).getClass().equals(LoginContext.class)
-                    || mockLoginContext.getClass().equals(getClass()))
+            if (!MockUtil.isMock(mockLoginContext))
                 throw new IllegalArgumentException();
             this.mockLoginContext = mockLoginContext;
         }
@@ -233,8 +233,7 @@ private static class TestLoginContextFactory extends LoginContextFactory {
         public void configure(LoginContext mockLoginContext,
                 TestExpiringCredentialRefreshingLogin testExpiringCredentialRefreshingLogin) throws LoginException {
             // sanity check to make sure it is likely a mock
-            if (Objects.requireNonNull(mockLoginContext).getClass().equals(LoginContext.class)
-                    || mockLoginContext.getClass().equals(TestLoginContext.class))
+            if (!MockUtil.isMock(mockLoginContext))
                 throw new IllegalArgumentException();
             this.testLoginContext = new TestLoginContext(Objects.requireNonNull(testExpiringCredentialRefreshingLogin),
                     mockLoginContext);
diff --git a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/AccessTokenBuilder.java b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/AccessTokenBuilder.java
index 24a40aa5b693c..5387d40abf91c 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/AccessTokenBuilder.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/AccessTokenBuilder.java
@@ -22,6 +22,9 @@
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.io.IOException;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
 import org.jose4j.jwk.PublicJsonWebKey;
@@ -39,7 +42,7 @@ public class AccessTokenBuilder {
 
     private String subject = "jdoe";
 
-    private final String subjectClaimName = ReservedClaimNames.SUBJECT;
+    private String subjectClaimName = ReservedClaimNames.SUBJECT;
 
     private Object scope = "engineering";
 
@@ -51,6 +54,8 @@ public class AccessTokenBuilder {
 
     private PublicJsonWebKey jwk;
 
+    private final Map<String, String> customClaims = new HashMap<>();
+
     public AccessTokenBuilder() {
         this(new MockTime());
     }
@@ -87,6 +92,11 @@ public String subjectClaimName() {
         return subjectClaimName;
     }
 
+    public AccessTokenBuilder subjectClaimName(String subjectClaimName) {
+        this.subjectClaimName = subjectClaimName;
+        return this;
+    }
+
     public Object scope() {
         return scope;
     }
@@ -133,6 +143,14 @@ public AccessTokenBuilder jwk(PublicJsonWebKey jwk) {
         return this;
     }
 
+    public AccessTokenBuilder addCustomClaim(String name, String value) {
+        String validatedName = ClaimValidationUtils.validateClaimNameOverride("claim name", name);
+        String validatedValue = ClaimValidationUtils.validateClaimNameOverride(validatedName, value);
+
+        customClaims.put(validatedName, validatedValue);
+        return this;
+    }
+
     @SuppressWarnings("unchecked")
     public String build() throws JoseException, IOException {
         ObjectNode node = objectMapper.createObjectNode();
@@ -162,6 +180,10 @@ public String build() throws JoseException, IOException {
         if (expirationSeconds != null)
             node.put(ReservedClaimNames.EXPIRATION_TIME, expirationSeconds);
 
+        for (Map.Entry<String, String> claim : customClaims.entrySet()) {
+            node.put(claim.getKey(), claim.getValue());
+        }
+
         String json = objectMapper.writeValueAsString(node);
 
         JsonWebSignature jws = new JsonWebSignature();
diff --git a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidatorTest.java b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidatorTest.java
index a48198879e3b8..f24bd590acf69 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidatorTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/oauthbearer/secured/ValidatorAccessTokenValidatorTest.java
@@ -59,6 +59,25 @@ public void testInvalidEncryptionAlgorithm() throws Exception {
             "fake is an unknown, unsupported or unavailable alg algorithm");
     }
 
+    @Test
+    public void testMissingSubShouldBeValid() throws Exception {
+        String subClaimName = "client_id";
+        String subject = "otherSub";
+        PublicJsonWebKey jwk = createRsaJwk();
+        AccessTokenBuilder tokenBuilder = new AccessTokenBuilder()
+            .jwk(jwk)
+            .alg(AlgorithmIdentifiers.RSA_USING_SHA256)
+            .addCustomClaim(subClaimName, subject)
+            .subjectClaimName(subClaimName)
+            .subject(null);
+        AccessTokenValidator validator = createAccessTokenValidator(tokenBuilder);
+
+        // Validation should succeed (e.g. signature verification) even if sub claim is missing
+        OAuthBearerToken token = validator.validate(tokenBuilder.build());
+
+        assertEquals(subject, token.principalName());
+    }
+
     private void testEncryptionAlgorithm(PublicJsonWebKey jwk, String alg) throws Exception {
         AccessTokenBuilder builder = new AccessTokenBuilder().jwk(jwk).alg(alg);
         AccessTokenValidator validator = createAccessTokenValidator(builder);
diff --git a/clients/src/test/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactoryTest.java b/clients/src/test/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactoryTest.java
index 0e494cc529da4..fc3726ac59a4b 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactoryTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/ssl/DefaultSslEngineFactoryTest.java
@@ -18,7 +18,6 @@
 
 import org.apache.kafka.common.config.SslConfigs;
 import org.apache.kafka.common.config.types.Password;
-import org.apache.kafka.common.errors.InvalidConfigurationException;
 import org.apache.kafka.test.TestUtils;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -33,7 +32,6 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
-import static org.junit.jupiter.api.Assertions.assertThrows;
 
 public class DefaultSslEngineFactoryTest {
 
@@ -291,7 +289,14 @@ public void testPemKeyStoreFileNoKeyPassword() throws Exception {
         configs.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG,
                 pemFilePath(pemAsConfigValue(KEY, CERTCHAIN).value()));
         configs.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, DefaultSslEngineFactory.PEM_TYPE);
-        assertThrows(InvalidConfigurationException.class, () -> factory.configure(configs));
+        configs.put(SslConfigs.SSL_KEY_PASSWORD_CONFIG, null);
+        factory.configure(configs);
+
+        KeyStore keyStore = factory.keystore();
+        List<String> aliases = Collections.list(keyStore.aliases());
+        assertEquals(Collections.singletonList("kafka"), aliases);
+        assertNotNull(keyStore.getCertificate("kafka"), "Certificate not loaded");
+        assertNotNull(keyStore.getKey("kafka", null), "Private key not loaded");
     }
 
     @Test
diff --git a/clients/src/test/java/org/apache/kafka/common/security/ssl/SslFactoryTest.java b/clients/src/test/java/org/apache/kafka/common/security/ssl/SslFactoryTest.java
index cfb37b3944ed4..b827b24f8410c 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/ssl/SslFactoryTest.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/ssl/SslFactoryTest.java
@@ -43,6 +43,7 @@
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.test.TestSslUtils;
 import org.apache.kafka.common.network.Mode;
+import org.apache.kafka.test.TestUtils;
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -66,7 +67,7 @@ public SslFactoryTest(String tlsProtocol) {
 
     @Test
     public void testSslFactoryConfiguration() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> serverSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -122,7 +123,7 @@ public void testSslFactoryWithIncorrectProviderClassConfiguration() {
 
     @Test
     public void testSslFactoryWithoutPasswordConfiguration() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> serverSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -138,7 +139,7 @@ public void testSslFactoryWithoutPasswordConfiguration() throws Exception {
 
     @Test
     public void testClientMode() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> clientSslConfig = sslConfigsBuilder(Mode.CLIENT)
                 .createNewTrustStore(trustStoreFile)
                 .useClientCert(false)
@@ -152,7 +153,7 @@ public void testClientMode() throws Exception {
 
     @Test
     public void staleSslEngineFactoryShouldBeClosed() throws IOException, GeneralSecurityException {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> clientSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .useClientCert(false)
@@ -164,7 +165,7 @@ public void staleSslEngineFactoryShouldBeClosed() throws IOException, GeneralSec
         assertNotNull(sslEngineFactory);
         assertFalse(sslEngineFactory.closed);
 
-        trustStoreFile = File.createTempFile("truststore", ".jks");
+        trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         clientSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -178,7 +179,7 @@ public void staleSslEngineFactoryShouldBeClosed() throws IOException, GeneralSec
 
     @Test
     public void testReconfiguration() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> sslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -193,7 +194,7 @@ public void testReconfiguration() throws Exception {
         assertSame(sslEngineFactory, sslFactory.sslEngineFactory(), "SslEngineFactory recreated unnecessarily");
 
         // Verify that the SslEngineFactory is recreated on reconfigure() if config is changed
-        trustStoreFile = File.createTempFile("truststore", ".jks");
+        trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         sslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -230,7 +231,7 @@ public void testReconfiguration() throws Exception {
 
     @Test
     public void testReconfigurationWithoutTruststore() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> sslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -258,7 +259,7 @@ public void testReconfigurationWithoutTruststore() throws Exception {
 
     @Test
     public void testReconfigurationWithoutKeystore() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> sslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -273,7 +274,7 @@ public void testReconfigurationWithoutKeystore() throws Exception {
                 "SSL context recreated unnecessarily");
         assertFalse(sslFactory.createSslEngine("localhost", 0).getUseClientMode());
 
-        File newTrustStoreFile = File.createTempFile("truststore", ".jks");
+        File newTrustStoreFile = TestUtils.tempFile("truststore", ".jks");
         sslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(newTrustStoreFile)
                 .build();
@@ -338,7 +339,7 @@ public void testPemReconfiguration() throws Exception {
 
     @Test
     public void testKeyStoreTrustStoreValidation() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> serverSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .build();
@@ -349,8 +350,8 @@ public void testKeyStoreTrustStoreValidation() throws Exception {
 
     @Test
     public void testUntrustedKeyStoreValidationFails() throws Exception {
-        File trustStoreFile1 = File.createTempFile("truststore1", ".jks");
-        File trustStoreFile2 = File.createTempFile("truststore2", ".jks");
+        File trustStoreFile1 = TestUtils.tempFile("truststore1", ".jks");
+        File trustStoreFile2 = TestUtils.tempFile("truststore2", ".jks");
         Map<String, Object> sslConfig1 = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile1)
                 .build();
@@ -419,12 +420,12 @@ public void testPemCertificateEntriesValidation() throws Exception {
     }
 
     private void verifyCertificateEntriesValidation(boolean usePem, String tlsProtocol) throws Exception {
-        File trustStoreFile = usePem ? null : File.createTempFile("truststore", ".jks");
+        File trustStoreFile = usePem ? null : TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> serverSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .usePem(usePem)
                 .build();
-        File newTrustStoreFile = usePem ? null : File.createTempFile("truststore", ".jks");
+        File newTrustStoreFile = usePem ? null : TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> newCnConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(newTrustStoreFile)
                 .cn("Another CN")
@@ -447,7 +448,7 @@ private void verifyCertificateEntriesValidation(boolean usePem, String tlsProtoc
      */
     @Test
     public void testClientSpecifiedSslEngineFactoryUsed() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> clientSslConfig = sslConfigsBuilder(Mode.CLIENT)
                 .createNewTrustStore(trustStoreFile)
                 .useClientCert(false)
@@ -461,7 +462,7 @@ public void testClientSpecifiedSslEngineFactoryUsed() throws Exception {
 
     @Test
     public void testEngineFactoryClosed() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> clientSslConfig = sslConfigsBuilder(Mode.CLIENT)
                 .createNewTrustStore(trustStoreFile)
                 .useClientCert(false)
@@ -480,7 +481,7 @@ public void testEngineFactoryClosed() throws Exception {
      */
     @Test
     public void testServerSpecifiedSslEngineFactoryUsed() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> serverSslConfig = sslConfigsBuilder(Mode.SERVER)
                 .createNewTrustStore(trustStoreFile)
                 .useClientCert(false)
@@ -497,7 +498,7 @@ public void testServerSpecifiedSslEngineFactoryUsed() throws Exception {
      */
     @Test
     public void testInvalidSslEngineFactory() throws Exception {
-        File trustStoreFile = File.createTempFile("truststore", ".jks");
+        File trustStoreFile = TestUtils.tempFile("truststore", ".jks");
         Map<String, Object> clientSslConfig = sslConfigsBuilder(Mode.CLIENT)
                 .createNewTrustStore(trustStoreFile)
                 .useClientCert(false)
@@ -510,7 +511,7 @@ public void testInvalidSslEngineFactory() throws Exception {
     @Test
     public void testUsedConfigs() throws IOException, GeneralSecurityException {
         Map<String, Object> serverSslConfig = sslConfigsBuilder(Mode.SERVER)
-                .createNewTrustStore(File.createTempFile("truststore", ".jks"))
+                .createNewTrustStore(TestUtils.tempFile("truststore", ".jks"))
                 .useClientCert(false)
                 .build();
         serverSslConfig.put(SslConfigs.SSL_ENGINE_FACTORY_CLASS_CONFIG, TestSslUtils.TestSslEngineFactory.class);
diff --git a/clients/src/test/java/org/apache/kafka/common/security/ssl/mock/TestKeyManagerFactory.java b/clients/src/test/java/org/apache/kafka/common/security/ssl/mock/TestKeyManagerFactory.java
index dc686c246b52a..7c9c0dc094c61 100644
--- a/clients/src/test/java/org/apache/kafka/common/security/ssl/mock/TestKeyManagerFactory.java
+++ b/clients/src/test/java/org/apache/kafka/common/security/ssl/mock/TestKeyManagerFactory.java
@@ -35,6 +35,7 @@
 import org.apache.kafka.common.config.types.Password;
 import org.apache.kafka.test.TestSslUtils;
 import org.apache.kafka.test.TestSslUtils.CertificateBuilder;
+import org.apache.kafka.test.TestUtils;
 
 public class TestKeyManagerFactory extends KeyManagerFactorySpi {
     public static final String ALGORITHM = "TestAlgorithm";
@@ -70,7 +71,7 @@ protected TestKeyManager() {
                 this.certificate = certBuilder.generate("CN=" + CN + ", O=A server", this.keyPair);
                 Map<String, X509Certificate> certificates = new HashMap<>();
                 certificates.put(ALIAS, certificate);
-                File trustStoreFile = File.createTempFile("testTrustStore", ".jks");
+                File trustStoreFile = TestUtils.tempFile("testTrustStore", ".jks");
                 mockTrustStoreFile = trustStoreFile.getPath();
                 TestSslUtils.createTrustStore(mockTrustStoreFile, new Password(TestSslUtils.TRUST_STORE_PASSWORD), certificates);
             } catch (IOException | GeneralSecurityException e) {
diff --git a/clients/src/test/java/org/apache/kafka/server/policy/AlterConfigPolicyTest.java b/clients/src/test/java/org/apache/kafka/server/policy/AlterConfigPolicyTest.java
new file mode 100644
index 0000000000000..13971effe9ec1
--- /dev/null
+++ b/clients/src/test/java/org/apache/kafka/server/policy/AlterConfigPolicyTest.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.policy;
+
+import org.apache.kafka.common.config.ConfigResource;
+import org.apache.kafka.common.config.ConfigResource.Type;
+import org.apache.kafka.server.policy.AlterConfigPolicy.RequestMetadata;
+
+import org.junit.jupiter.api.Test;
+import java.util.Collections;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+
+public class AlterConfigPolicyTest {
+
+    @Test
+    public void testRequestMetadataEquals() {
+        RequestMetadata requestMetadata = new RequestMetadata(
+            new ConfigResource(Type.BROKER, "0"),
+            Collections.singletonMap("foo", "bar")
+        );
+
+        assertEquals(requestMetadata, requestMetadata);
+
+        assertNotEquals(requestMetadata, null);
+        assertNotEquals(requestMetadata, new Object());
+        assertNotEquals(requestMetadata, new RequestMetadata(
+            new ConfigResource(Type.BROKER, "1"),
+            Collections.singletonMap("foo", "bar")
+        ));
+        assertNotEquals(requestMetadata, new RequestMetadata(
+            new ConfigResource(Type.BROKER, "0"),
+            Collections.emptyMap()
+        ));
+    }
+}
diff --git a/clients/src/test/java/org/apache/kafka/test/MockProducerInterceptor.java b/clients/src/test/java/org/apache/kafka/test/MockProducerInterceptor.java
index 133ff567d47fa..eedc3bdaecdf1 100644
--- a/clients/src/test/java/org/apache/kafka/test/MockProducerInterceptor.java
+++ b/clients/src/test/java/org/apache/kafka/test/MockProducerInterceptor.java
@@ -35,6 +35,7 @@ public class MockProducerInterceptor implements ClusterResourceListener, Produce
     public static final AtomicInteger ON_SUCCESS_COUNT = new AtomicInteger(0);
     public static final AtomicInteger ON_ERROR_COUNT = new AtomicInteger(0);
     public static final AtomicInteger ON_ERROR_WITH_METADATA_COUNT = new AtomicInteger(0);
+    public static final AtomicInteger ON_ACKNOWLEDGEMENT_COUNT = new AtomicInteger(0);
     public static final AtomicReference<ClusterResource> CLUSTER_META = new AtomicReference<>();
     public static final ClusterResource NO_CLUSTER_ID = new ClusterResource("no_cluster_id");
     public static final AtomicReference<ClusterResource> CLUSTER_ID_BEFORE_ON_ACKNOWLEDGEMENT = new AtomicReference<>(NO_CLUSTER_ID);
@@ -69,6 +70,7 @@ public ProducerRecord<String, String> onSend(ProducerRecord<String, String> reco
 
     @Override
     public void onAcknowledgement(RecordMetadata metadata, Exception exception) {
+        ON_ACKNOWLEDGEMENT_COUNT.incrementAndGet();
         // This will ensure that we get the cluster metadata when onAcknowledgement is called for the first time
         // as subsequent compareAndSet operations will fail.
         CLUSTER_ID_BEFORE_ON_ACKNOWLEDGEMENT.compareAndSet(NO_CLUSTER_ID, CLUSTER_META.get());
diff --git a/clients/src/test/java/org/apache/kafka/test/TestSslUtils.java b/clients/src/test/java/org/apache/kafka/test/TestSslUtils.java
index 39fb6e8bdb551..d02abc2940494 100644
--- a/clients/src/test/java/org/apache/kafka/test/TestSslUtils.java
+++ b/clients/src/test/java/org/apache/kafka/test/TestSslUtils.java
@@ -216,7 +216,7 @@ public static void convertToPem(Map<String, Object> sslProps, boolean writeToFil
         }
         if (trustCerts != null) {
             if (tsPath == null) {
-                tsPath = File.createTempFile("truststore", ".pem").getPath();
+                tsPath = TestUtils.tempFile("truststore", ".pem").getPath();
                 sslProps.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, tsPath);
             }
             sslProps.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, PEM_TYPE);
@@ -504,13 +504,13 @@ private Map<String, Object> buildJks() throws IOException, GeneralSecurityExcept
             File keyStoreFile = null;
 
             if (mode == Mode.CLIENT && useClientCert) {
-                keyStoreFile = File.createTempFile("clientKS", ".jks");
+                keyStoreFile = TestUtils.tempFile("clientKS", ".jks");
                 KeyPair cKP = generateKeyPair(algorithm);
                 X509Certificate cCert = certBuilder.generate("CN=" + cn + ", O=A client", cKP);
                 createKeyStore(keyStoreFile.getPath(), keyStorePassword, keyPassword, "client", cKP.getPrivate(), cCert);
                 certs.put(certAlias, cCert);
             } else if (mode == Mode.SERVER) {
-                keyStoreFile = File.createTempFile("serverKS", ".jks");
+                keyStoreFile = TestUtils.tempFile("serverKS", ".jks");
                 KeyPair sKP = generateKeyPair(algorithm);
                 X509Certificate sCert = certBuilder.generate("CN=" + cn + ", O=A server", sKP);
                 createKeyStore(keyStoreFile.getPath(), keyStorePassword, keyPassword, "server", sKP.getPrivate(), sCert);
diff --git a/clients/src/test/java/org/apache/kafka/test/TestUtils.java b/clients/src/test/java/org/apache/kafka/test/TestUtils.java
index 3c819befa5fa8..81c77ea9c0e3d 100644
--- a/clients/src/test/java/org/apache/kafka/test/TestUtils.java
+++ b/clients/src/test/java/org/apache/kafka/test/TestUtils.java
@@ -34,10 +34,10 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
@@ -142,26 +142,40 @@ public static String randomString(final int len) {
     }
 
     /**
-     * Create an empty file in the default temporary-file directory, using `kafka` as the prefix and `tmp` as the
-     * suffix to generate its name.
+     * Create an empty file in the default temporary-file directory, using the given prefix and suffix
+     * to generate its name.
+     * @throws IOException
      */
-    public static File tempFile() throws IOException {
-        final File file = File.createTempFile("kafka", ".tmp");
+    public static File tempFile(final String prefix, final String suffix) throws IOException {
+        final File file = Files.createTempFile(prefix, suffix).toFile();
         file.deleteOnExit();
 
+        Exit.addShutdownHook("delete-temp-file-shutdown-hook", () -> {
+            try {
+                Utils.delete(file);
+            } catch (IOException e) {
+                log.error("Error deleting {}", file.getAbsolutePath(), e);
+            }
+        });
+
         return file;
     }
 
+    /**
+     * Create an empty file in the default temporary-file directory, using `kafka` as the prefix and `tmp` as the
+     * suffix to generate its name.
+     */
+    public static File tempFile() throws IOException {
+        return tempFile("kafka", ".tmp");
+    }
+
     /**
      * Create a file with the given contents in the default temporary-file directory,
      * using `kafka` as the prefix and `tmp` as the suffix to generate its name.
      */
     public static File tempFile(final String contents) throws IOException {
         final File file = tempFile();
-        final FileWriter writer = new FileWriter(file);
-        writer.write(contents);
-        writer.close();
-
+        Files.write(file.toPath(), contents.getBytes(StandardCharsets.UTF_8));
         return file;
     }
 
diff --git a/clients/src/test/resources/common/message/SimpleExampleMessage.json b/clients/src/test/resources/common/message/SimpleExampleMessage.json
index 342a9b994a844..9b9c049593afc 100644
--- a/clients/src/test/resources/common/message/SimpleExampleMessage.json
+++ b/clients/src/test/resources/common/message/SimpleExampleMessage.json
@@ -50,9 +50,10 @@
       "fields": [
         { "name": "structId", "type": "string", "versions": "2+", "about": "String field in struct"}
     ]},
-    { "name":  "myCommonStruct", "type": "TestCommonStruct", "versions": "0+"},
-    { "name":  "myOtherCommonStruct", "type": "TestCommonStruct", "versions": "0+"},
-    { "name":  "myUint16", "type": "uint16", "versions": "1+", "default":  "33000" }
+    { "name": "myCommonStruct", "type": "TestCommonStruct", "versions": "0+"},
+    { "name": "myOtherCommonStruct", "type": "TestCommonStruct", "versions": "0+"},
+    { "name": "myUint16", "type": "uint16", "versions": "1+", "default": "33000" },
+    { "name": "myUint32", "type": "uint32", "versions": "1+", "default": "1234567" }
   ],
   "commonStructs": [
     { "name": "TestCommonStruct", "versions": "0+", "fields": [
diff --git a/config/kraft/README.md b/config/kraft/README.md
index 8ba87c484480b..d280f87958be1 100644
--- a/config/kraft/README.md
+++ b/config/kraft/README.md
@@ -114,11 +114,12 @@ We don't support any kind of upgrade right now, either to or from KRaft mode.  T
 
 Finally, the following Kafka features have not yet been fully implemented:
 
-* Support for certain security features: configuring a KRaft-based Authorizer, setting up SCRAM, delegation tokens, and so forth
-  (although note that you can use authorizers such as `kafka.security.authorizer.AclAuthorizer` with KRaft clusters, even
-  if they are ZooKeeper-based: simply define `authorizer.class.name` and configure the authorizer as you normally would).
+* Configuring SCRAM users via the administrative API
+* Supporting JBOD configurations with multiple storage directories
+* Modifying certain dynamic configurations on the standalone KRaft controller
 * Support for some configurations, like enabling unclean leader election by default or dynamically changing broker endpoints
-* Support for KIP-112 "JBOD" modes
+* Delegation tokens
+* Upgrade from ZooKeeper mode
 
 We've tried to make it clear when a feature is not supported in the preview release, but you may encounter some rough edges. We will cover these feature gaps incrementally in the `trunk` branch.
 
diff --git a/config/server.properties b/config/server.properties
index f5172c297ec66..21ba1c7d9c61b 100644
--- a/config/server.properties
+++ b/config/server.properties
@@ -109,7 +109,7 @@ log.retention.hours=168
 #log.retention.bytes=1073741824
 
 # The maximum size of a log segment file. When this size is reached a new log segment will be created.
-log.segment.bytes=1073741824
+#log.segment.bytes=1073741824
 
 # The interval at which log segments are checked to see if they can be deleted according
 # to the retention policies
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/source/ConnectorTransactionBoundaries.java b/connect/api/src/main/java/org/apache/kafka/connect/source/ConnectorTransactionBoundaries.java
new file mode 100644
index 0000000000000..73746ba0993f3
--- /dev/null
+++ b/connect/api/src/main/java/org/apache/kafka/connect/source/ConnectorTransactionBoundaries.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.source;
+
+/**
+ * An enum to represent the level of support for connector-defined transaction boundaries.
+ */
+public enum ConnectorTransactionBoundaries {
+    /**
+     * Signals that a connector can define its own transaction boundaries.
+     */
+    SUPPORTED,
+    /**
+     * Signals that a connector cannot define its own transaction boundaries.
+     */
+    UNSUPPORTED
+}
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/source/ExactlyOnceSupport.java b/connect/api/src/main/java/org/apache/kafka/connect/source/ExactlyOnceSupport.java
new file mode 100644
index 0000000000000..3980410e4b538
--- /dev/null
+++ b/connect/api/src/main/java/org/apache/kafka/connect/source/ExactlyOnceSupport.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.source;
+
+/**
+ * An enum to represent the level of support for exactly-once delivery from a source connector.
+ */
+public enum ExactlyOnceSupport {
+    /**
+     * Signals that a connector supports exactly-once delivery.
+     */
+    SUPPORTED,
+    /**
+     * Signals that a connector does not support exactly-once delivery.
+     */
+    UNSUPPORTED;
+}
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/source/SourceConnector.java b/connect/api/src/main/java/org/apache/kafka/connect/source/SourceConnector.java
index 6e9694024d334..3bd012f9fbce0 100644
--- a/connect/api/src/main/java/org/apache/kafka/connect/source/SourceConnector.java
+++ b/connect/api/src/main/java/org/apache/kafka/connect/source/SourceConnector.java
@@ -18,6 +18,8 @@
 
 import org.apache.kafka.connect.connector.Connector;
 
+import java.util.Map;
+
 /**
  * SourceConnectors implement the connector interface to pull data from another system and send
  * it to Kafka.
@@ -28,4 +30,45 @@ public abstract class SourceConnector extends Connector {
     protected SourceConnectorContext context() {
         return (SourceConnectorContext) context;
     }
+
+    /**
+     * Signals whether the connector supports exactly-once delivery guarantees with a proposed configuration.
+     * Connector authors can assume that worker-level exactly-once support is enabled when this method is invoked.
+     *
+     * <p>For backwards compatibility, the default implementation will return {@code null}, but connector authors are
+     * strongly encouraged to override this method to return a non-null value such as
+     * {@link ExactlyOnceSupport#SUPPORTED SUPPORTED} or {@link ExactlyOnceSupport#UNSUPPORTED UNSUPPORTED}.
+     *
+     * <p>Similar to {@link #validate(Map) validate}, this method may be called by the runtime before the
+     * {@link #start(Map) start} method is invoked when the connector will be run with exactly-once support.
+     *
+     * @param connectorConfig the configuration that will be used for the connector.
+     * @return {@link ExactlyOnceSupport#SUPPORTED} if the connector can provide exactly-once support with the given
+     * configuration, and {@link ExactlyOnceSupport#UNSUPPORTED} if it cannot. If this method is overridden by a
+     * connector, should not be {@code null}, but if {@code null}, it will be assumed that the connector cannot provide
+     * exactly-once guarantees.
+     * @since 3.3
+     */
+    public ExactlyOnceSupport exactlyOnceSupport(Map<String, String> connectorConfig) {
+        return null;
+    }
+
+    /**
+     * Signals whether the connector implementation is capable of defining the transaction boundaries for a
+     * connector with the given configuration. This method is called before {@link #start(Map)}, only when the
+     * runtime supports exactly-once and the connector configuration includes {@code transaction.boundary=connector}.
+     *
+     * <p>This method need not be implemented if the connector implementation does not support defining
+     * transaction boundaries.
+     *
+     * @param connectorConfig the configuration that will be used for the connector
+     * @return {@link ConnectorTransactionBoundaries#SUPPORTED} if the connector will define its own transaction boundaries,
+     * or {@link ConnectorTransactionBoundaries#UNSUPPORTED} otherwise; may never be {@code null}. The default implementation
+     * returns {@link ConnectorTransactionBoundaries#UNSUPPORTED}.
+     * @since 3.3
+     * @see TransactionContext
+     */
+    public ConnectorTransactionBoundaries canDefineTransactionBoundaries(Map<String, String> connectorConfig) {
+        return ConnectorTransactionBoundaries.UNSUPPORTED;
+    }
 }
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTask.java b/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTask.java
index f5209e1ccab64..559f02340ca17 100644
--- a/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTask.java
+++ b/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTask.java
@@ -16,17 +16,65 @@
  */
 package org.apache.kafka.connect.source;
 
-import org.apache.kafka.connect.connector.Task;
 import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.connect.connector.Task;
 
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
+import java.util.Objects;
 
 /**
  * SourceTask is a Task that pulls records from another system for storage in Kafka.
  */
 public abstract class SourceTask implements Task {
 
+    /**
+     * The configuration key that determines how source tasks will define transaction boundaries
+     * when exactly-once support is enabled.
+     */
+    public static final String TRANSACTION_BOUNDARY_CONFIG = "transaction.boundary";
+
+    /**
+     * Represents the permitted values for the {@link #TRANSACTION_BOUNDARY_CONFIG} property.
+     */
+    public enum TransactionBoundary {
+        /**
+         * A new transaction will be started and committed for every batch of records returned by {@link #poll()}.
+         */
+        POLL,
+        /**
+         * Transactions will be started and committed on a user-defined time interval.
+         */
+        INTERVAL,
+        /**
+         * Transactions will be defined by the connector itself, via a {@link TransactionContext}.
+         */
+        CONNECTOR;
+
+        /**
+         * The default transaction boundary style that will be used for source connectors when no style is explicitly
+         * configured.
+         */
+        public static final TransactionBoundary DEFAULT = POLL;
+
+        /**
+         * Parse a {@link TransactionBoundary} from the given string.
+         * @param property the string to parse; should not be null
+         * @return the {@link TransactionBoundary} whose name matches the given string
+         * @throws IllegalArgumentException if there is no transaction boundary type with the given name
+         */
+        public static TransactionBoundary fromProperty(String property) {
+            Objects.requireNonNull(property, "Value for transaction boundary property may not be null");
+            return TransactionBoundary.valueOf(property.toUpperCase(Locale.ROOT).trim());
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
     protected SourceTaskContext context;
 
     /**
@@ -44,16 +92,13 @@ public void initialize(SourceTaskContext context) {
     public abstract void start(Map<String, String> props);
 
     /**
-     * <p>
      * Poll this source task for new records. If no data is currently available, this method
      * should block but return control to the caller regularly (by returning {@code null}) in
      * order for the task to transition to the {@code PAUSED} state if requested to do so.
-     * </p>
      * <p>
      * The task will be {@link #stop() stopped} on a separate thread, and when that happens
      * this method is expected to unblock, quickly finish up any remaining processing, and
      * return.
-     * </p>
      *
      * @return a list of source records
      */
@@ -63,12 +108,10 @@ public void initialize(SourceTaskContext context) {
      * <p>
      * Commit the offsets, up to the offsets that have been returned by {@link #poll()}. This
      * method should block until the commit is complete.
-     * </p>
      * <p>
      * SourceTasks are not required to implement this functionality; Kafka Connect will record offsets
      * automatically. This hook is provided for systems that also need to store offsets internally
      * in their own system.
-     * </p>
      */
     public void commit() throws InterruptedException {
         // This space intentionally left blank.
@@ -91,17 +134,14 @@ public void commit() throws InterruptedException {
      * <p>
      * Commit an individual {@link SourceRecord} when the callback from the producer client is received. This method is
      * also called when a record is filtered by a transformation, and thus will never be ACK'd by a broker.
-     * </p>
      * <p>
      * This is an alias for {@link #commitRecord(SourceRecord, RecordMetadata)} for backwards compatibility. The default
      * implementation of {@link #commitRecord(SourceRecord, RecordMetadata)} just calls this method. It is not necessary
      * to override both methods.
-     * </p>
      * <p>
      * SourceTasks are not required to implement this functionality; Kafka Connect will record offsets
      * automatically. This hook is provided for systems that also need to store offsets internally
      * in their own system.
-     * </p>
      *
      * @param record {@link SourceRecord} that was successfully sent via the producer or filtered by a transformation
      * @throws InterruptedException
@@ -115,19 +155,16 @@ public void commitRecord(SourceRecord record) throws InterruptedException {
     /**
      * <p>
      * Commit an individual {@link SourceRecord} when the callback from the producer client is received. This method is
-     * also called when a record is filtered by a transformation or when {@link ConnectorConfig} "errors.tolerance" is set to "all"
+     * also called when a record is filtered by a transformation or when "errors.tolerance" is set to "all"
      * and thus will never be ACK'd by a broker.
      * In both cases {@code metadata} will be null.
-     * </p>
      * <p>
      * SourceTasks are not required to implement this functionality; Kafka Connect will record offsets
      * automatically. This hook is provided for systems that also need to store offsets internally
      * in their own system.
-     * </p>
      * <p>
      * The default implementation just calls {@link #commitRecord(SourceRecord)}, which is a nop by default. It is
      * not necessary to implement both methods.
-     * </p>
      *
      * @param record {@link SourceRecord} that was successfully sent via the producer, filtered by a transformation, or dropped on producer exception
      * @param metadata {@link RecordMetadata} record metadata returned from the broker, or null if the record was filtered or if producer exceptions are ignored
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTaskContext.java b/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTaskContext.java
index ddb0a78718351..7745b197c2d4c 100644
--- a/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTaskContext.java
+++ b/connect/api/src/main/java/org/apache/kafka/connect/source/SourceTaskContext.java
@@ -38,4 +38,29 @@ public interface SourceTaskContext {
      * Get the OffsetStorageReader for this SourceTask.
      */
     OffsetStorageReader offsetStorageReader();
+
+    /**
+     * Get a {@link TransactionContext} that can be used to define producer transaction boundaries
+     * when exactly-once support is enabled for the connector.
+     *
+     * <p>This method was added in Apache Kafka 3.2. Source tasks that use this method but want to
+     * maintain backward compatibility so they can also be deployed to older Connect runtimes
+     * should guard the call to this method with a try-catch block, since calling this method will result in a
+     * {@link NoSuchMethodException} or {@link NoClassDefFoundError} when the source connector is deployed to
+     * Connect runtimes older than Kafka 3.2. For example:
+     * <pre>
+     *     TransactionContext transactionContext;
+     *     try {
+     *         transactionContext = context.transactionContext();
+     *     } catch (NoSuchMethodError | NoClassDefFoundError e) {
+     *         transactionContext = null;
+     *     }
+     * </pre>
+     *
+     * @return the transaction context, or null if the connector was not configured to specify transaction boundaries
+     * @since 3.3
+     */
+    default TransactionContext transactionContext() {
+        return null;
+    }
 }
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/source/TransactionContext.java b/connect/api/src/main/java/org/apache/kafka/connect/source/TransactionContext.java
new file mode 100644
index 0000000000000..f90d75baf4748
--- /dev/null
+++ b/connect/api/src/main/java/org/apache/kafka/connect/source/TransactionContext.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.source;
+
+/**
+ * Provided to source tasks to allow them to define their own producer transaction boundaries when
+ * exactly-once support is enabled.
+ */
+public interface TransactionContext {
+
+    /**
+     * Request a transaction commit after the next batch of records from {@link SourceTask#poll()}
+     * is processed.
+     */
+    void commitTransaction();
+
+    /**
+     * Request a transaction commit after a source record is processed. The source record will be the
+     * last record in the committed transaction.
+     * @param record the record to commit the transaction after; may not be null.
+     */
+    void commitTransaction(SourceRecord record);
+
+    /**
+     * Requests a transaction abort after the next batch of records from {@link SourceTask#poll()}. All of
+     * the records in that transaction will be discarded and will not appear in a committed transaction.
+     * However, offsets for that transaction will still be committed so than the records in that transaction
+     * are not reprocessed. If the data should instead be reprocessed, the task should not invoke this method
+     * and should instead throw an exception.
+     */
+    void abortTransaction();
+
+    /**
+     * Requests a transaction abort after a source record is processed. The source record will be the
+     * last record in the aborted transaction. All of the records in that transaction will be discarded
+     * and will not appear in a committed transaction. However, offsets for that transaction will still
+     * be committed so that the records in that transaction are not reprocessed. If the data should be
+     * reprocessed, the task should not invoke this method and should instead throw an exception.
+     * @param record the record to abort the transaction after; may not be null.
+     */
+    void abortTransaction(SourceRecord record);
+}
diff --git a/connect/api/src/main/java/org/apache/kafka/connect/storage/StringConverter.java b/connect/api/src/main/java/org/apache/kafka/connect/storage/StringConverter.java
index 534cdddfa1331..69eda3459bbc8 100644
--- a/connect/api/src/main/java/org/apache/kafka/connect/storage/StringConverter.java
+++ b/connect/api/src/main/java/org/apache/kafka/connect/storage/StringConverter.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.common.errors.SerializationException;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaAndValue;
 import org.apache.kafka.connect.errors.DataException;
@@ -104,6 +105,7 @@ public SchemaAndValue toConnectHeader(String topic, String headerKey, byte[] val
 
     @Override
     public void close() {
-        // do nothing
+        Utils.closeQuietly(this.serializer, "string converter serializer");
+        Utils.closeQuietly(this.deserializer, "string converter deserializer");
     }
 }
diff --git a/connect/basic-auth-extension/src/main/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilter.java b/connect/basic-auth-extension/src/main/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilter.java
index 0299cbba0b546..ff12a384b7628 100644
--- a/connect/basic-auth-extension/src/main/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilter.java
+++ b/connect/basic-auth-extension/src/main/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilter.java
@@ -18,7 +18,11 @@
 package org.apache.kafka.connect.rest.basic.auth.extension;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.function.Predicate;
 import java.util.regex.Pattern;
 import javax.security.auth.login.Configuration;
 import javax.ws.rs.HttpMethod;
@@ -45,7 +49,10 @@
 public class JaasBasicAuthFilter implements ContainerRequestFilter {
 
     private static final Logger log = LoggerFactory.getLogger(JaasBasicAuthFilter.class);
-    private static final Pattern TASK_REQUEST_PATTERN = Pattern.compile("/?connectors/([^/]+)/tasks/?");
+    private static final Set<RequestMatcher> INTERNAL_REQUEST_MATCHERS = new HashSet<>(Arrays.asList(
+            new RequestMatcher(HttpMethod.POST, "/?connectors/([^/]+)/tasks/?"),
+            new RequestMatcher(HttpMethod.PUT, "/?connectors/[^/]+/fence/?")
+    ));
     private static final String CONNECT_LOGIN_MODULE = "KafkaConnect";
 
     static final String AUTHORIZATION = "Authorization";
@@ -53,13 +60,29 @@ public class JaasBasicAuthFilter implements ContainerRequestFilter {
     // Package-private for testing
     final Configuration configuration;
 
+    private static class RequestMatcher implements Predicate<ContainerRequestContext> {
+        private final String method;
+        private final Pattern path;
+
+        public RequestMatcher(String method, String path) {
+            this.method = method;
+            this.path = Pattern.compile(path);
+        }
+
+        @Override
+        public boolean test(ContainerRequestContext requestContext) {
+            return requestContext.getMethod().equals(method)
+                    && path.matcher(requestContext.getUriInfo().getPath()).matches();
+        }
+    }
+
     public JaasBasicAuthFilter(Configuration configuration) {
         this.configuration = configuration;
     }
 
     @Override
     public void filter(ContainerRequestContext requestContext) throws IOException {
-        if (isInternalTaskConfigRequest(requestContext)) {
+        if (isInternalRequest(requestContext)) {
             log.trace("Skipping authentication for internal request");
             return;
         }
@@ -82,12 +105,10 @@ public void filter(ContainerRequestContext requestContext) throws IOException {
         }
     }
 
-    private static boolean isInternalTaskConfigRequest(ContainerRequestContext requestContext) {
-        return requestContext.getMethod().equals(HttpMethod.POST)
-            && TASK_REQUEST_PATTERN.matcher(requestContext.getUriInfo().getPath()).matches();
+    private boolean isInternalRequest(ContainerRequestContext requestContext) {
+        return INTERNAL_REQUEST_MATCHERS.stream().anyMatch(m -> m.test(requestContext));
     }
 
-
     public static class BasicAuthCallBackHandler implements CallbackHandler {
 
         private static final String BASIC = "basic";
diff --git a/connect/basic-auth-extension/src/test/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilterTest.java b/connect/basic-auth-extension/src/test/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilterTest.java
index 561095f68218a..2513c308a7e01 100644
--- a/connect/basic-auth-extension/src/test/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilterTest.java
+++ b/connect/basic-auth-extension/src/test/java/org/apache/kafka/connect/rest/basic/auth/extension/JaasBasicAuthFilterTest.java
@@ -42,8 +42,10 @@
 
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.atLeastOnce;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
 
 public class JaasBasicAuthFilterTest {
@@ -58,7 +60,7 @@ public void testSuccess() throws IOException {
         ContainerRequestContext requestContext = setMock("Basic", "user", "password");
         jaasBasicAuthFilter.filter(requestContext);
 
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -69,7 +71,7 @@ public void testEmptyCredentialsFile() throws IOException {
         ContainerRequestContext requestContext = setMock("Basic", "user", "password");
         jaasBasicAuthFilter.filter(requestContext);
 
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -81,7 +83,7 @@ public void testBadCredential() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(requestContext).abortWith(any(Response.class));
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -93,7 +95,7 @@ public void testBadPassword() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(requestContext).abortWith(any(Response.class));
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -105,7 +107,7 @@ public void testUnknownBearer() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(requestContext).abortWith(any(Response.class));
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -117,7 +119,7 @@ public void testUnknownLoginModule() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(requestContext).abortWith(any(Response.class));
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -128,7 +130,7 @@ public void testUnknownCredentialsFile() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(requestContext).abortWith(any(Response.class));
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
@@ -139,17 +141,26 @@ public void testNoFileOption() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(requestContext).abortWith(any(Response.class));
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getHeaderString(JaasBasicAuthFilter.AUTHORIZATION);
     }
 
     @Test
-    public void testPostWithoutAppropriateCredential() throws IOException {
+    public void testInternalTaskConfigEndpointSkipped() throws IOException {
+        testInternalEndpointSkipped(HttpMethod.POST, "connectors/connName/tasks");
+    }
+
+    @Test
+    public void testInternalZombieFencingEndpointSkipped() throws IOException {
+        testInternalEndpointSkipped(HttpMethod.PUT, "connectors/connName/fence");
+    }
+
+    private void testInternalEndpointSkipped(String method, String endpoint) throws IOException {
         UriInfo uriInfo = mock(UriInfo.class);
-        when(uriInfo.getPath()).thenReturn("connectors/connName/tasks");
+        when(uriInfo.getPath()).thenReturn(endpoint);
 
         ContainerRequestContext requestContext = mock(ContainerRequestContext.class);
-        when(requestContext.getMethod()).thenReturn(HttpMethod.POST);
+        when(requestContext.getMethod()).thenReturn(method);
         when(requestContext.getUriInfo()).thenReturn(uriInfo);
 
         File credentialFile = setupPropertyLoginFile(true);
@@ -158,8 +169,9 @@ public void testPostWithoutAppropriateCredential() throws IOException {
         jaasBasicAuthFilter.filter(requestContext);
 
         verify(uriInfo).getPath();
-        verify(requestContext).getMethod();
+        verify(requestContext, atLeastOnce()).getMethod();
         verify(requestContext).getUriInfo();
+        verifyNoMoreInteractions(requestContext);
     }
 
     @Test
diff --git a/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceConnectorTest.java b/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceConnectorTest.java
index 3550d5c8ab120..8e4661d13d154 100644
--- a/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceConnectorTest.java
+++ b/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceConnectorTest.java
@@ -19,20 +19,19 @@
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.ConfigValue;
 import org.apache.kafka.connect.connector.ConnectorContext;
-import org.easymock.EasyMock;
-import org.easymock.EasyMockSupport;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.mock;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-public class FileStreamSourceConnectorTest extends EasyMockSupport {
+public class FileStreamSourceConnectorTest {
 
     private static final String SINGLE_TOPIC = "test";
     private static final String MULTIPLE_TOPICS = "test1,test2";
@@ -45,7 +44,7 @@ public class FileStreamSourceConnectorTest extends EasyMockSupport {
     @BeforeEach
     public void setup() {
         connector = new FileStreamSourceConnector();
-        ctx = createMock(ConnectorContext.class);
+        ctx = mock(ConnectorContext.class);
         connector.initialize(ctx);
 
         sourceProperties = new HashMap<>();
@@ -55,18 +54,14 @@ public void setup() {
 
     @Test
     public void testConnectorConfigValidation() {
-        replayAll();
         List<ConfigValue> configValues = connector.config().validate(sourceProperties);
         for (ConfigValue val : configValues) {
             assertEquals(0, val.errorMessages().size(), "Config property errors: " + val.errorMessages());
         }
-        verifyAll();
     }
 
     @Test
     public void testSourceTasks() {
-        replayAll();
-
         connector.start(sourceProperties);
         List<Map<String, String>> taskConfigs = connector.taskConfigs(1);
         assertEquals(1, taskConfigs.size());
@@ -82,21 +77,15 @@ public void testSourceTasks() {
                 taskConfigs.get(0).get(FileStreamSourceConnector.FILE_CONFIG));
         assertEquals(SINGLE_TOPIC,
                 taskConfigs.get(0).get(FileStreamSourceConnector.TOPIC_CONFIG));
-
-        verifyAll();
     }
 
     @Test
     public void testSourceTasksStdin() {
-        EasyMock.replay(ctx);
-
         sourceProperties.remove(FileStreamSourceConnector.FILE_CONFIG);
         connector.start(sourceProperties);
         List<Map<String, String>> taskConfigs = connector.taskConfigs(1);
         assertEquals(1, taskConfigs.size());
         assertNull(taskConfigs.get(0).get(FileStreamSourceConnector.FILE_CONFIG));
-
-        EasyMock.verify(ctx);
     }
 
     @Test
@@ -107,12 +96,8 @@ public void testMultipleSourcesInvalid() {
 
     @Test
     public void testTaskClass() {
-        EasyMock.replay(ctx);
-
         connector.start(sourceProperties);
         assertEquals(FileStreamSourceTask.class, connector.taskClass());
-
-        EasyMock.verify(ctx);
     }
 
     @Test
diff --git a/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceTaskTest.java b/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceTaskTest.java
index d02463d75a04a..c77f8b0acf3aa 100644
--- a/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceTaskTest.java
+++ b/connect/file/src/test/java/org/apache/kafka/connect/file/FileStreamSourceTaskTest.java
@@ -19,8 +19,6 @@
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTaskContext;
 import org.apache.kafka.connect.storage.OffsetStorageReader;
-import org.easymock.EasyMock;
-import org.easymock.EasyMockSupport;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -38,8 +36,12 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
-public class FileStreamSourceTaskTest extends EasyMockSupport {
+public class FileStreamSourceTaskTest {
 
     private static final String TOPIC = "test";
 
@@ -49,8 +51,6 @@ public class FileStreamSourceTaskTest extends EasyMockSupport {
     private SourceTaskContext context;
     private FileStreamSourceTask task;
 
-    private boolean verifyMocks = false;
-
     @BeforeEach
     public void setup() throws IOException {
         tempFile = File.createTempFile("file-stream-source-task-test", null);
@@ -59,28 +59,19 @@ public void setup() throws IOException {
         config.put(FileStreamSourceConnector.TOPIC_CONFIG, TOPIC);
         config.put(FileStreamSourceConnector.TASK_BATCH_SIZE_CONFIG, String.valueOf(FileStreamSourceConnector.DEFAULT_TASK_BATCH_SIZE));
         task = new FileStreamSourceTask(2);
-        offsetStorageReader = createMock(OffsetStorageReader.class);
-        context = createMock(SourceTaskContext.class);
+        offsetStorageReader = mock(OffsetStorageReader.class);
+        context = mock(SourceTaskContext.class);
         task.initialize(context);
     }
 
     @AfterEach
     public void teardown() {
         tempFile.delete();
-
-        if (verifyMocks)
-            verifyAll();
-    }
-
-    private void replay() {
-        replayAll();
-        verifyMocks = true;
     }
 
     @Test
     public void testNormalLifecycle() throws InterruptedException, IOException {
         expectOffsetLookupReturnNone();
-        replay();
 
         task.start(config);
 
@@ -128,12 +119,13 @@ public void testNormalLifecycle() throws InterruptedException, IOException {
 
         os.close();
         task.stop();
+
+        verifyAll();
     }
 
     @Test
     public void testBatchSize() throws IOException, InterruptedException {
         expectOffsetLookupReturnNone();
-        replay();
 
         config.put(FileStreamSourceConnector.TASK_BATCH_SIZE_CONFIG, "5000");
         task.start(config);
@@ -154,13 +146,13 @@ public void testBatchSize() throws IOException, InterruptedException {
 
         os.close();
         task.stop();
+        verifyAll();
     }
 
     @Test
     public void testBufferResize() throws IOException, InterruptedException {
         int batchSize = 1000;
         expectOffsetLookupReturnNone();
-        replay();
 
         config.put(FileStreamSourceConnector.TASK_BATCH_SIZE_CONFIG, Integer.toString(batchSize));
         task.start(config);
@@ -181,6 +173,8 @@ public void testBufferResize() throws IOException, InterruptedException {
         writeAndAssertBufferSize(batchSize, os, "9       \n".getBytes(), 2048);
         os.close();
         task.stop();
+
+        verifyAll();
     }
 
     private void writeAndAssertBufferSize(int batchSize, OutputStream os, byte[] bytes, int expectBufferSize)
@@ -203,9 +197,7 @@ private void writeTimesAndFlush(OutputStream os, int times, byte[] line) throws
     }
 
     @Test
-    public void testMissingFile() throws InterruptedException {
-        replay();
-
+    public void testUsingSystemInputSourceOnMissingFile() throws InterruptedException {
         String data = "line\n";
         System.setIn(new ByteArrayInputStream(data.getBytes()));
 
@@ -220,17 +212,22 @@ public void testMissingFile() throws InterruptedException {
         task.stop();
     }
 
+    @Test
     public void testInvalidFile() throws InterruptedException {
         config.put(FileStreamSourceConnector.FILE_CONFIG, "bogusfilename");
         task.start(config);
         // Currently the task retries indefinitely if the file isn't found, but shouldn't return any data.
-        for (int i = 0; i < 100; i++)
+        for (int i = 0; i < 3; i++)
             assertNull(task.poll());
     }
 
-
     private void expectOffsetLookupReturnNone() {
-        EasyMock.expect(context.offsetStorageReader()).andReturn(offsetStorageReader);
-        EasyMock.expect(offsetStorageReader.offset(EasyMock.<Map<String, String>>anyObject())).andReturn(null);
+        when(context.offsetStorageReader()).thenReturn(offsetStorageReader);
+        when(offsetStorageReader.offset(anyMap())).thenReturn(null);
+    }
+
+    private void verifyAll() {
+        verify(context).offsetStorageReader();
+        verify(offsetStorageReader).offset(anyMap());
     }
 }
diff --git a/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java b/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java
index 10fde8f20a5b0..6a17ae277b554 100644
--- a/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java
+++ b/connect/json/src/main/java/org/apache/kafka/connect/json/JsonConverter.java
@@ -26,6 +26,7 @@
 import org.apache.kafka.common.cache.SynchronizedCache;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.errors.SerializationException;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.data.SchemaBuilder;
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.Struct;
@@ -282,7 +283,8 @@ public void configure(Map<String, ?> configs, boolean isKey) {
 
     @Override
     public void close() {
-        // do nothing
+        Utils.closeQuietly(this.serializer, "JSON converter serializer");
+        Utils.closeQuietly(this.deserializer, "JSON converter deserializer");
     }
 
     @Override
diff --git a/connect/mirror-client/src/main/java/org/apache/kafka/connect/mirror/MirrorClientConfig.java b/connect/mirror-client/src/main/java/org/apache/kafka/connect/mirror/MirrorClientConfig.java
index 4305366f6fa27..9f79ec5f7a2a9 100644
--- a/connect/mirror-client/src/main/java/org/apache/kafka/connect/mirror/MirrorClientConfig.java
+++ b/connect/mirror-client/src/main/java/org/apache/kafka/connect/mirror/MirrorClientConfig.java
@@ -21,10 +21,14 @@
 import org.apache.kafka.common.config.ConfigDef.Type;
 import org.apache.kafka.common.config.ConfigDef.Importance;
 import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.common.utils.Utils;
 
 import java.util.Map;
 import java.util.HashMap;
 
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
+
 /** Configuration required for MirrorClient to talk to a given target cluster.
  *  <p>
  *  Generally, these properties come from an mm2.properties configuration file
@@ -99,6 +103,7 @@ private Map<String, Object> clientConfig(String prefix) {
         .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
             Type.STRING,
             CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+            in(Utils.enumOptions(SecurityProtocol.class)),
             Importance.MEDIUM,
             CommonClientConfigs.SECURITY_PROTOCOL_DOC)
         .withClientSslSupport()
@@ -125,6 +130,7 @@ private Map<String, Object> clientConfig(String prefix) {
         .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
                 Type.STRING,
                 CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                in(Utils.enumOptions(SecurityProtocol.class)),
                 Importance.MEDIUM,
                 CommonClientConfigs.SECURITY_PROTOCOL_DOC)
         .withClientSslSupport()
diff --git a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorCheckpointTask.java b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorCheckpointTask.java
index 47631998fbbec..3e6247334bb81 100644
--- a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorCheckpointTask.java
+++ b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorCheckpointTask.java
@@ -17,9 +17,11 @@
 package org.apache.kafka.connect.mirror;
 
 import org.apache.kafka.clients.admin.Admin;
+import org.apache.kafka.clients.admin.AlterConsumerGroupOffsetsResult;
 import org.apache.kafka.clients.admin.ConsumerGroupDescription;
 import org.apache.kafka.common.ConsumerGroupState;
 import org.apache.kafka.common.KafkaFuture;
+import org.apache.kafka.common.errors.UnknownMemberIdException;
 import org.apache.kafka.connect.source.SourceTask;
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.data.Schema;
@@ -37,11 +39,14 @@
 import java.util.Map;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.Optional;
+import java.util.OptionalLong;
 import java.util.Set;
 import java.util.Collections;
 import java.util.stream.Collectors;
 import java.util.concurrent.ExecutionException;
 import java.time.Duration;
+import java.util.stream.Stream;
 
 /** Emits checkpoints for upstream consumer groups. */
 public class MirrorCheckpointTask extends SourceTask {
@@ -105,7 +110,7 @@ public void start(Map<String, String> props) {
     }
 
     @Override
-    public void commit() throws InterruptedException {
+    public void commit() {
         // nop
     }
 
@@ -169,6 +174,7 @@ private List<Checkpoint> checkpointsForGroup(String group) throws ExecutionExcep
         return listConsumerGroupOffsets(group).entrySet().stream()
             .filter(x -> shouldCheckpointTopic(x.getKey().topic()))
             .map(x -> checkpoint(group, x.getKey(), x.getValue()))
+            .flatMap(o -> o.map(Stream::of).orElseGet(Stream::empty)) // do not emit checkpoints for partitions that don't have offset-syncs
             .filter(x -> x.downstreamOffset() >= 0)  // ignore offsets we cannot translate accurately
             .collect(Collectors.toList());
     }
@@ -182,12 +188,16 @@ private Map<TopicPartition, OffsetAndMetadata> listConsumerGroupOffsets(String g
         return sourceAdminClient.listConsumerGroupOffsets(group).partitionsToOffsetAndMetadata().get();
     }
 
-    Checkpoint checkpoint(String group, TopicPartition topicPartition,
-            OffsetAndMetadata offsetAndMetadata) {
+    Optional<Checkpoint> checkpoint(String group, TopicPartition topicPartition,
+                                    OffsetAndMetadata offsetAndMetadata) {
         long upstreamOffset = offsetAndMetadata.offset();
-        long downstreamOffset = offsetSyncStore.translateDownstream(topicPartition, upstreamOffset);
-        return new Checkpoint(group, renameTopicPartition(topicPartition),
-            upstreamOffset, downstreamOffset, offsetAndMetadata.metadata());
+        OptionalLong downstreamOffset = offsetSyncStore.translateDownstream(topicPartition, upstreamOffset);
+        if (downstreamOffset.isPresent()) {
+            return Optional.of(new Checkpoint(group, renameTopicPartition(topicPartition),
+                    upstreamOffset, downstreamOffset.getAsLong(), offsetAndMetadata.metadata()));
+        } else {
+            return Optional.empty();
+        }
     }
 
     SourceRecord checkpointRecord(Checkpoint checkpoint, long timestamp) {
@@ -232,11 +242,10 @@ private void refreshIdleConsumerGroupOffset() {
                 ConsumerGroupState consumerGroupState = consumerGroupDesc.state();
                 // sync offset to the target cluster only if the state of current consumer group is:
                 // (1) idle: because the consumer at target is not actively consuming the mirrored topic
-                // (2) dead: the new consumer that is recently created at source and never exist at target
-                if (consumerGroupState.equals(ConsumerGroupState.EMPTY)) {
+                // (2) dead: the new consumer that is recently created at source and never existed at target
+                if (consumerGroupState == ConsumerGroupState.EMPTY) {
                     idleConsumerGroupsOffset.put(group, targetAdminClient.listConsumerGroupOffsets(group)
-                        .partitionsToOffsetAndMetadata().get().entrySet().stream().collect(
-                            Collectors.toMap(Entry::getKey, Entry::getValue)));
+                        .partitionsToOffsetAndMetadata().get());
                 }
                 // new consumer upstream has state "DEAD" and will be identified during the offset sync-up
             } catch (InterruptedException | ExecutionException e) {
@@ -299,9 +308,18 @@ Map<String, Map<TopicPartition, OffsetAndMetadata>> syncGroupOffset() {
 
     void syncGroupOffset(String consumerGroupId, Map<TopicPartition, OffsetAndMetadata> offsetToSync) {
         if (targetAdminClient != null) {
-            targetAdminClient.alterConsumerGroupOffsets(consumerGroupId, offsetToSync);
-            log.trace("sync-ed the offset for consumer group: {} with {} number of offset entries",
-                      consumerGroupId, offsetToSync.size());
+            AlterConsumerGroupOffsetsResult result = targetAdminClient.alterConsumerGroupOffsets(consumerGroupId, offsetToSync);
+            result.all().whenComplete((v, throwable) -> {
+                if (throwable != null) {
+                    if (throwable.getCause() instanceof UnknownMemberIdException) {
+                        log.warn("Unable to sync offsets for consumer group {}. This is likely caused by consumers currently using this group in the target cluster.", consumerGroupId);
+                    } else {
+                        log.error("Unable to sync offsets for consumer group {}.", consumerGroupId, throwable);
+                    }
+                } else {
+                    log.trace("Sync-ed {} offsets for consumer group {}.", offsetToSync.size(), consumerGroupId);
+                }
+            });
         }
     }
 
diff --git a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorConnectorConfig.java b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorConnectorConfig.java
index a94fb3dc66e82..d59f4bc7664eb 100644
--- a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorConnectorConfig.java
+++ b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorConnectorConfig.java
@@ -25,10 +25,13 @@
 import org.apache.kafka.common.metrics.JmxReporter;
 import org.apache.kafka.common.metrics.MetricsContext;
 import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.ConfigUtils;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.runtime.ConnectorConfig;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.AUTO_OFFSET_RESET_CONFIG;
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
 
 import java.util.Map;
 import java.util.HashMap;
@@ -714,6 +717,7 @@ Duration syncGroupOffsetsInterval() {
                     CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
                     ConfigDef.Type.STRING,
                     CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                    in(Utils.enumOptions(SecurityProtocol.class)),
                     ConfigDef.Importance.MEDIUM,
                     CommonClientConfigs.SECURITY_PROTOCOL_DOC)
             .withClientSslSupport()
diff --git a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorMakerConfig.java b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorMakerConfig.java
index dc963a0382473..072b5c802d96f 100644
--- a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorMakerConfig.java
+++ b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/MirrorMakerConfig.java
@@ -17,6 +17,8 @@
 package org.apache.kafka.connect.mirror;
 
 import java.util.Map.Entry;
+
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
@@ -38,6 +40,8 @@
 import java.util.Collections;
 import java.util.stream.Collectors;
 
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
+
 /** Top-level config describing replication flows between multiple Kafka clusters.
  *
  *  Supports cluster-level properties of the form cluster.x.y.z, and replication-level
@@ -261,6 +265,7 @@ Map<String, String> transform(Map<String, String> props) {
             .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
                 Type.STRING,
                 CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                in(Utils.enumOptions(SecurityProtocol.class)),
                 Importance.MEDIUM,
                 CommonClientConfigs.SECURITY_PROTOCOL_DOC)
             .withClientSslSupport()
diff --git a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSync.java b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSync.java
index 68e6441f18fc7..e1ecb1e1dbad0 100644
--- a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSync.java
+++ b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSync.java
@@ -39,9 +39,9 @@ public class OffsetSync {
             new Field(TOPIC_KEY, Type.STRING),
             new Field(PARTITION_KEY, Type.INT32));
 
-    private TopicPartition topicPartition;
-    private long upstreamOffset;
-    private long downstreamOffset;
+    private final TopicPartition topicPartition;
+    private final long upstreamOffset;
+    private final long downstreamOffset;
 
     public OffsetSync(TopicPartition topicPartition, long upstreamOffset, long downstreamOffset) {
         this.topicPartition = topicPartition;
diff --git a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSyncStore.java b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSyncStore.java
index 600dda46f3166..f9b6617c13d3b 100644
--- a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSyncStore.java
+++ b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/OffsetSyncStore.java
@@ -27,12 +27,14 @@
 import java.util.HashMap;
 import java.util.Collections;
 import java.time.Duration;
+import java.util.Optional;
+import java.util.OptionalLong;
 
 /** Used internally by MirrorMaker. Stores offset syncs and performs offset translation. */
 class OffsetSyncStore implements AutoCloseable {
-    private KafkaConsumer<byte[], byte[]> consumer;
-    private Map<TopicPartition, OffsetSync> offsetSyncs = new HashMap<>();
-    private TopicPartition offsetSyncTopicPartition;
+    private final KafkaConsumer<byte[], byte[]> consumer;
+    private final Map<TopicPartition, OffsetSync> offsetSyncs = new HashMap<>();
+    private final TopicPartition offsetSyncTopicPartition;
 
     OffsetSyncStore(MirrorConnectorConfig config) {
         consumer = new KafkaConsumer<>(config.offsetSyncsTopicConsumerConfig(),
@@ -47,14 +49,18 @@ class OffsetSyncStore implements AutoCloseable {
         this.offsetSyncTopicPartition = offsetSyncTopicPartition;
     }
 
-    long translateDownstream(TopicPartition sourceTopicPartition, long upstreamOffset) {
-        OffsetSync offsetSync = latestOffsetSync(sourceTopicPartition);
-        if (offsetSync.upstreamOffset() > upstreamOffset) {
-            // Offset is too far in the past to translate accurately
-            return -1;
+    OptionalLong translateDownstream(TopicPartition sourceTopicPartition, long upstreamOffset) {
+        Optional<OffsetSync> offsetSync = latestOffsetSync(sourceTopicPartition);
+        if (offsetSync.isPresent()) {
+            if (offsetSync.get().upstreamOffset() > upstreamOffset) {
+                // Offset is too far in the past to translate accurately
+                return OptionalLong.of(-1L);
+            }
+            long upstreamStep = upstreamOffset - offsetSync.get().upstreamOffset();
+            return OptionalLong.of(offsetSync.get().downstreamOffset() + upstreamStep);
+        } else {
+            return OptionalLong.empty();
         }
-        long upstreamStep = upstreamOffset - offsetSync.upstreamOffset();
-        return offsetSync.downstreamOffset() + upstreamStep;
     }
 
     // poll and handle records
@@ -77,8 +83,7 @@ protected void handleRecord(ConsumerRecord<byte[], byte[]> record) {
         offsetSyncs.put(sourceTopicPartition, offsetSync);
     }
 
-    private OffsetSync latestOffsetSync(TopicPartition topicPartition) {
-        return offsetSyncs.computeIfAbsent(topicPartition, x -> new OffsetSync(topicPartition,
-            -1, -1));
+    private Optional<OffsetSync> latestOffsetSync(TopicPartition topicPartition) {
+        return Optional.ofNullable(offsetSyncs.get(topicPartition));
     }
 }
diff --git a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/Scheduler.java b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/Scheduler.java
index 20f2ca7e2c5cc..0644d6a6c6c05 100644
--- a/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/Scheduler.java
+++ b/connect/mirror/src/main/java/org/apache/kafka/connect/mirror/Scheduler.java
@@ -27,7 +27,7 @@
 import org.slf4j.LoggerFactory;
 
 class Scheduler implements AutoCloseable {
-    private static Logger log = LoggerFactory.getLogger(Scheduler.class);
+    private static final Logger LOG = LoggerFactory.getLogger(Scheduler.class);
 
     private final String name;
     private final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
@@ -62,11 +62,11 @@ void execute(Task task, String description) {
         try {
             executor.submit(() -> executeThread(task, description)).get(timeout.toMillis(), TimeUnit.MILLISECONDS);
         } catch (InterruptedException e) {
-            log.warn("{} was interrupted running task: {}", name, description);
+            LOG.warn("{} was interrupted running task: {}", name, description);
         } catch (TimeoutException e) {
-            log.error("{} timed out running task: {}", name, description);
+            LOG.error("{} timed out running task: {}", name, description);
         } catch (Throwable e) {
-            log.error("{} caught exception in task: {}", name, description, e);
+            LOG.error("{} caught exception in task: {}", name, description, e);
         }
     } 
 
@@ -76,10 +76,10 @@ public void close() {
         try {
             boolean terminated = executor.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS);
             if (!terminated) {
-                log.error("{} timed out during shutdown of internal scheduler.", name);
+                LOG.error("{} timed out during shutdown of internal scheduler.", name);
             }
         } catch (InterruptedException e) {
-            log.warn("{} was interrupted during shutdown of internal scheduler.", name);
+            LOG.warn("{} was interrupted during shutdown of internal scheduler.", name);
         }
     }
 
@@ -92,21 +92,21 @@ private void run(Task task, String description) {
             long start = System.currentTimeMillis();
             task.run();
             long elapsed = System.currentTimeMillis() - start;
-            log.info("{} took {} ms", description, elapsed);
+            LOG.info("{} took {} ms", description, elapsed);
             if (elapsed > timeout.toMillis()) {
-                log.warn("{} took too long ({} ms) running task: {}", name, elapsed, description);
+                LOG.warn("{} took too long ({} ms) running task: {}", name, elapsed, description);
             }
         } catch (InterruptedException e) {
-            log.warn("{} was interrupted running task: {}", name, description);
+            LOG.warn("{} was interrupted running task: {}", name, description);
         } catch (Throwable e) {
-            log.error("{} caught exception in scheduled task: {}", name, description, e);
+            LOG.error("{} caught exception in scheduled task: {}", name, description, e);
         }
     }
 
     private void executeThread(Task task, String description) {
         Thread.currentThread().setName(name + "-" + description);
         if (closed) {
-            log.info("{} skipping task due to shutdown: {}", name, description);
+            LOG.info("{} skipping task due to shutdown: {}", name, description);
             return;
         }
         run(task, description);
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorCheckpointTaskTest.java b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorCheckpointTaskTest.java
index 7ef878ab2e8d3..54fe678e73afe 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorCheckpointTaskTest.java
+++ b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorCheckpointTaskTest.java
@@ -21,6 +21,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Collections;
+import java.util.Optional;
+
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
 import org.apache.kafka.connect.source.SourceRecord;
@@ -28,6 +30,8 @@
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class MirrorCheckpointTaskTest {
 
@@ -53,8 +57,10 @@ public void testCheckpoint() {
             new DefaultReplicationPolicy(), offsetSyncStore, Collections.emptyMap(), Collections.emptyMap());
         offsetSyncStore.sync(new TopicPartition("topic1", 2), 3L, 4L);
         offsetSyncStore.sync(new TopicPartition("target2.topic5", 6), 7L, 8L);
-        Checkpoint checkpoint1 = mirrorCheckpointTask.checkpoint("group9", new TopicPartition("topic1", 2),
+        Optional<Checkpoint> optionalCheckpoint1 = mirrorCheckpointTask.checkpoint("group9", new TopicPartition("topic1", 2),
             new OffsetAndMetadata(10, null));
+        assertTrue(optionalCheckpoint1.isPresent());
+        Checkpoint checkpoint1 = optionalCheckpoint1.get();
         SourceRecord sourceRecord1 = mirrorCheckpointTask.checkpointRecord(checkpoint1, 123L);
         assertEquals(new TopicPartition("source1.topic1", 2), checkpoint1.topicPartition(),
                 "checkpoint group9 source1.topic1 failed");
@@ -68,8 +74,10 @@ public void testCheckpoint() {
                 "checkpoint group9 downstreamOffset failed");
         assertEquals(123L, sourceRecord1.timestamp().longValue(),
                 "checkpoint group9 timestamp failed");
-        Checkpoint checkpoint2 = mirrorCheckpointTask.checkpoint("group11", new TopicPartition("target2.topic5", 6),
+        Optional<Checkpoint> optionalCheckpoint2 = mirrorCheckpointTask.checkpoint("group11", new TopicPartition("target2.topic5", 6),
             new OffsetAndMetadata(12, null));
+        assertTrue(optionalCheckpoint2.isPresent());
+        Checkpoint checkpoint2 = optionalCheckpoint2.get();
         SourceRecord sourceRecord2 = mirrorCheckpointTask.checkpointRecord(checkpoint2, 234L);
         assertEquals(new TopicPartition("topic5", 6), checkpoint2.topicPartition(),
                 "checkpoint group11 topic5 failed");
@@ -138,4 +146,19 @@ public void testSyncOffset() {
         assertEquals(51, output.get(consumer2).get(t2p0).offset(),
                 "Consumer 2 " + topic2 + " failed");
     }
+
+    @Test
+    public void testNoCheckpointForTopicWithoutOffsetSyncs() {
+        OffsetSyncStoreTest.FakeOffsetSyncStore offsetSyncStore = new OffsetSyncStoreTest.FakeOffsetSyncStore();
+        MirrorCheckpointTask mirrorCheckpointTask = new MirrorCheckpointTask("source1", "target2",
+                new DefaultReplicationPolicy(), offsetSyncStore, Collections.emptyMap(), Collections.emptyMap());
+        offsetSyncStore.sync(new TopicPartition("topic1", 0), 3L, 4L);
+
+        Optional<Checkpoint> checkpoint1 = mirrorCheckpointTask.checkpoint("group9", new TopicPartition("topic1", 1),
+                new OffsetAndMetadata(10, null));
+        Optional<Checkpoint> checkpoint2 = mirrorCheckpointTask.checkpoint("group9", new TopicPartition("topic1", 0),
+                new OffsetAndMetadata(10, null));
+        assertFalse(checkpoint1.isPresent());
+        assertTrue(checkpoint2.isPresent());
+    }
 }
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorConfigTest.java b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorConfigTest.java
index c7f629edd9525..ab8e33768c840 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorConfigTest.java
+++ b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorConnectorConfigTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.connect.mirror;
 
+import org.apache.kafka.clients.CommonClientConfigs;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigException;
@@ -327,4 +328,11 @@ public void testAdminConfigsForOffsetSyncsTopic() {
         assertEquals(config.targetAdminConfig(), config.offsetSyncsTopicAdminConfig());
     }
 
+    @Test
+    public void testInvalidSecurityProtocol() {
+        ConfigException ce = assertThrows(ConfigException.class,
+                () -> new MirrorConnectorConfig(makeProps(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc")));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
+
 }
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorMakerConfigTest.java b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorMakerConfigTest.java
index 41bcacb218e1c..3f70ba8eb4f98 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorMakerConfigTest.java
+++ b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/MirrorMakerConfigTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.connect.mirror;
 
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.types.Password;
 import org.apache.kafka.common.config.provider.ConfigProvider;
 import org.apache.kafka.common.config.ConfigData;
@@ -32,6 +34,8 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class MirrorMakerConfigTest {
 
@@ -49,7 +53,7 @@ public void testClusterConfigProperties() {
             "clusters", "a, b",
             "a.bootstrap.servers", "servers-one",
             "b.bootstrap.servers", "servers-two",
-            "security.protocol", "SASL",
+            "security.protocol", "SSL",
             "replication.factor", "4"));
         Map<String, String> connectorProps = mirrorConfig.connectorBaseConfig(new SourceAndTarget("a", "b"),
             MirrorSourceConnector.class);
@@ -57,7 +61,7 @@ public void testClusterConfigProperties() {
             "source.cluster.bootstrap.servers is set");
         assertEquals("servers-two", connectorProps.get("target.cluster.bootstrap.servers"),
             "target.cluster.bootstrap.servers is set");
-        assertEquals("SASL", connectorProps.get("security.protocol"),
+        assertEquals("SSL", connectorProps.get("security.protocol"),
             "top-level security.protocol is passed through to connector config");
     }
 
@@ -82,7 +86,7 @@ public void testClientConfigProperties() {
             "ssl.key.password", "${fake:secret:password}",  // resolves to "secret2"
             "security.protocol", "SSL", 
             "a.security.protocol", "PLAINTEXT", 
-            "a.producer.security.protocol", "SASL", 
+            "a.producer.security.protocol", "SSL",
             "a.bootstrap.servers", "one:9092, two:9092",
             "metrics.reporter", FakeMetricsReporter.class.getName(),
             "a.metrics.reporter", FakeMetricsReporter.class.getName(),
@@ -99,7 +103,7 @@ public void testClientConfigProperties() {
             "client configs include boostrap.servers");
         assertEquals("PLAINTEXT", aClientConfig.adminConfig().get("security.protocol"),
             "client configs include security.protocol");
-        assertEquals("SASL", aClientConfig.producerConfig().get("security.protocol"),
+        assertEquals("SSL", aClientConfig.producerConfig().get("security.protocol"),
             "producer configs include security.protocol");
         assertFalse(aClientConfig.adminConfig().containsKey("xxx"),
             "unknown properties aren't included in client configs");
@@ -330,6 +334,24 @@ public void testClusterPairsWithGloballyDisabledHeartbeatsCentralLocal() {
             "clusterPairs count should match (x->y.enabled=true or x->y.emit.heartbeats.enabled=true) count");
     }
 
+    @Test
+    public void testInvalidSecurityProtocol() {
+        ConfigException ce = assertThrows(ConfigException.class,
+                () -> new MirrorMakerConfig(makeProps(
+                        "clusters", "a, b, c",
+                        "a->b.emit.heartbeats.enabled", "false",
+                        "a->c.emit.heartbeats.enabled", "false",
+                        CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc")));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
+
+    @Test
+    public void testClientInvalidSecurityProtocol() {
+        ConfigException ce = assertThrows(ConfigException.class,
+                () -> new MirrorClientConfig(makeProps(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc")));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
+
     public static class FakeConfigProvider implements ConfigProvider {
 
         Map<String, String> secrets = Collections.singletonMap("password", "secret2");
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/OffsetSyncStoreTest.java b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/OffsetSyncStoreTest.java
index 9307c60886567..9224a088081ac 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/OffsetSyncStoreTest.java
+++ b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/OffsetSyncStoreTest.java
@@ -47,26 +47,26 @@ public void testOffsetTranslation() {
         FakeOffsetSyncStore store = new FakeOffsetSyncStore();
 
         store.sync(tp, 100, 200);
-        assertEquals(store.translateDownstream(tp, 150), 250,
+        assertEquals(250L, store.translateDownstream(tp, 150).getAsLong(),
                 "Failure in translating downstream offset 250");
 
         // Translate exact offsets
         store.sync(tp, 150, 251);
-        assertEquals(store.translateDownstream(tp, 150), 251,
+        assertEquals(251L, store.translateDownstream(tp, 150).getAsLong(),
                 "Failure in translating exact downstream offset 251");
 
         // Use old offset (5) prior to any sync -> can't translate
-        assertEquals(-1, store.translateDownstream(tp, 5),
+        assertEquals(-1, store.translateDownstream(tp, 5).getAsLong(),
                 "Expected old offset to not translate");
 
         // Downstream offsets reset
         store.sync(tp, 200, 10);
-        assertEquals(store.translateDownstream(tp, 200), 10,
+        assertEquals(10L, store.translateDownstream(tp, 200).getAsLong(),
                 "Failure in resetting translation of downstream offset");
 
         // Upstream offsets reset
         store.sync(tp, 20, 20);
-        assertEquals(store.translateDownstream(tp, 20), 20,
+        assertEquals(20L, store.translateDownstream(tp, 20).getAsLong(),
                 "Failure in resetting translation of upstream offset");
     }
 }
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/IdentityReplicationIntegrationTest.java b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/IdentityReplicationIntegrationTest.java
index 9e60e4880dc5e..56ae3f8ebf967 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/IdentityReplicationIntegrationTest.java
+++ b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/IdentityReplicationIntegrationTest.java
@@ -266,7 +266,7 @@ public void testOneWayReplicationWithAutoOffsetSync() throws InterruptedExceptio
      * Returns expected topic name on target cluster.
      */
     @Override
-    String backupClusterTopicName(String topic) {
+    String remoteTopicName(String topic, String clusterAlias) {
         return topic;
     }
 }
diff --git a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java
index 8f692ca911612..dfafdcbd8c673 100644
--- a/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java
+++ b/connect/mirror/src/test/java/org/apache/kafka/connect/mirror/integration/MirrorConnectorsIntegrationBaseTest.java
@@ -41,7 +41,6 @@
 import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster;
 import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster;
 import org.apache.kafka.connect.util.clusters.UngracefulShutdownException;
-import static org.apache.kafka.test.TestUtils.waitForCondition;
 
 import java.time.Duration;
 import java.util.ArrayList;
@@ -55,11 +54,13 @@
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
 
 import org.junit.jupiter.api.Tag;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.kafka.test.TestUtils.waitForCondition;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -80,7 +81,7 @@
  * between clusters during this failover and failback.
  */
 @Tag("integration")
-public abstract class MirrorConnectorsIntegrationBaseTest {
+public class MirrorConnectorsIntegrationBaseTest {
     private static final Logger log = LoggerFactory.getLogger(MirrorConnectorsIntegrationBaseTest.class);
     
     protected static final int NUM_RECORDS_PER_PARTITION = 10;
@@ -93,11 +94,13 @@ public abstract class MirrorConnectorsIntegrationBaseTest {
     private static final int TOPIC_SYNC_DURATION_MS = 60_000;
     private static final int REQUEST_TIMEOUT_DURATION_MS = 60_000;
     private static final int NUM_WORKERS = 3;
-    protected static final Duration CONSUMER_POLL_TIMEOUT_MS = Duration.ofMillis(500);
+    protected static final Duration CONSUMER_POLL_TIMEOUT_MS = Duration.ofMillis(500L);
     protected static final String PRIMARY_CLUSTER_ALIAS = "primary";
     protected static final String BACKUP_CLUSTER_ALIAS = "backup";
-    protected static final List<Class<? extends Connector>> CONNECTOR_LIST =
-            Arrays.asList(MirrorSourceConnector.class, MirrorCheckpointConnector.class, MirrorHeartbeatConnector.class);
+    protected static final List<Class<? extends Connector>> CONNECTOR_LIST = Arrays.asList(
+            MirrorSourceConnector.class,
+            MirrorCheckpointConnector.class,
+            MirrorHeartbeatConnector.class);
 
     private volatile boolean shuttingDown;
     protected Map<String, String> mm2Props = new HashMap<>();
@@ -243,10 +246,7 @@ public void testReplication() throws Exception {
         produceMessages(primary, "test-topic-1");
         produceMessages(backup, "test-topic-1");
         String consumerGroupName = "consumer-group-testReplication";
-        Map<String, Object> consumerProps = new HashMap<String, Object>() {{
-                put("group.id", consumerGroupName);
-                put("auto.offset.reset", "latest");
-            }};
+        Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName);
         // warm up consumers before starting the connectors so we don't need to wait for discovery
         warmUpConsumer(consumerProps);
         
@@ -319,9 +319,6 @@ public void testReplication() throws Exception {
         waitForCondition(() -> primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS,
             Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("backup.test-topic-1", 0)), CHECKPOINT_DURATION_MS, "Offsets not translated downstream to primary cluster.");
 
-        waitForCondition(() -> primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS,
-            Duration.ofMillis(CHECKPOINT_DURATION_MS)).containsKey(new TopicPartition("test-topic-1", 0)), CHECKPOINT_DURATION_MS, "Offsets not translated upstream to primary cluster.");
-
         Map<TopicPartition, OffsetAndMetadata> primaryOffsets = primaryClient.remoteConsumerOffsets(consumerGroupName, BACKUP_CLUSTER_ALIAS,
                 Duration.ofMillis(CHECKPOINT_DURATION_MS));
  
@@ -329,17 +326,14 @@ public void testReplication() throws Exception {
         backupClient.close();
         
         // Failback consumer group to primary cluster
-        try (Consumer<byte[], byte[]> backupConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) {
-            backupConsumer.assign(primaryOffsets.keySet());
-            primaryOffsets.forEach(backupConsumer::seek);
-            backupConsumer.poll(CONSUMER_POLL_TIMEOUT_MS);
-            backupConsumer.commitAsync();
-        
-            assertTrue(backupConsumer.position(new TopicPartition("test-topic-1", 0)) > 0, "Consumer failedback to zero upstream offset.");
-            assertTrue(backupConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0, "Consumer failedback to zero downstream offset.");
-            assertTrue(backupConsumer.position(
-                new TopicPartition("test-topic-1", 0)) <= NUM_RECORDS_PRODUCED, "Consumer failedback beyond expected upstream offset.");
-            assertTrue(backupConsumer.position(
+        try (Consumer<byte[], byte[]> primaryConsumer = primary.kafka().createConsumer(Collections.singletonMap("group.id", consumerGroupName))) {
+            primaryConsumer.assign(primaryOffsets.keySet());
+            primaryOffsets.forEach(primaryConsumer::seek);
+            primaryConsumer.poll(CONSUMER_POLL_TIMEOUT_MS);
+            primaryConsumer.commitAsync();
+
+            assertTrue(primaryConsumer.position(new TopicPartition("backup.test-topic-1", 0)) > 0, "Consumer failedback to zero downstream offset.");
+            assertTrue(primaryConsumer.position(
                 new TopicPartition("backup.test-topic-1", 0)) <= NUM_RECORDS_PRODUCED, "Consumer failedback beyond expected downstream offset.");
         }
       
@@ -526,6 +520,64 @@ public void testOffsetSyncsTopicsOnTarget() throws Exception {
         assertFalse(primaryTopics.contains("mm2-offset-syncs." + BACKUP_CLUSTER_ALIAS + ".internal"));
     }
 
+    @Test
+    public void testNoCheckpointsIfNoRecordsAreMirrored() throws InterruptedException {
+        String consumerGroupName = "consumer-group-no-checkpoints";
+        Map<String, Object> consumerProps = Collections.singletonMap("group.id", consumerGroupName);
+
+        // ensure there are some records in the topic on the source cluster
+        produceMessages(primary, "test-topic-1");
+
+        // warm up consumers before starting the connectors, so we don't need to wait for discovery
+        warmUpConsumer(consumerProps);
+
+        // one way replication from primary to backup
+        mm2Props.put(BACKUP_CLUSTER_ALIAS + "->" + PRIMARY_CLUSTER_ALIAS + ".enabled", "false");
+        mm2Config = new MirrorMakerConfig(mm2Props);
+        waitUntilMirrorMakerIsRunning(backup, CONNECTOR_LIST, mm2Config, PRIMARY_CLUSTER_ALIAS, BACKUP_CLUSTER_ALIAS);
+
+        // make sure the topics  are created in the backup cluster
+        waitForTopicCreated(backup, remoteTopicName("test-topic-1", PRIMARY_CLUSTER_ALIAS));
+        waitForTopicCreated(backup, remoteTopicName("test-topic-no-checkpoints", PRIMARY_CLUSTER_ALIAS));
+
+        // commit some offsets for both topics in the source cluster
+        TopicPartition tp1 = new TopicPartition("test-topic-1", 0);
+        TopicPartition tp2 = new TopicPartition("test-topic-no-checkpoints", 0);
+        try (Consumer<byte[], byte[]> consumer = primary.kafka().createConsumer(consumerProps)) {
+            Collection<TopicPartition> tps = Arrays.asList(tp1, tp2);
+            Map<TopicPartition, Long> endOffsets = consumer.endOffsets(tps);
+            Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = endOffsets.entrySet().stream()
+                            .collect(Collectors.toMap(
+                                    Map.Entry::getKey,
+                                    e -> new OffsetAndMetadata(e.getValue())
+                            ));
+            consumer.commitSync(offsetsToCommit);
+        }
+
+        // Only test-topic-1 should have translated offsets because we've not yet mirrored any records for test-topic-no-checkpoints
+        MirrorClient backupClient = new MirrorClient(mm2Config.clientConfig(BACKUP_CLUSTER_ALIAS));
+        waitForCondition(() -> {
+            Map<TopicPartition, OffsetAndMetadata> translatedOffsets = backupClient.remoteConsumerOffsets(
+                    consumerGroupName, PRIMARY_CLUSTER_ALIAS, Duration.ofSeconds(30L));
+            return translatedOffsets.containsKey(remoteTopicPartition(tp1, PRIMARY_CLUSTER_ALIAS)) &&
+                   !translatedOffsets.containsKey(remoteTopicPartition(tp2, PRIMARY_CLUSTER_ALIAS));
+        }, OFFSET_SYNC_DURATION_MS, "Checkpoints were not emitted correctly to backup cluster");
+
+        // Send some records to test-topic-no-checkpoints in the source cluster
+        produceMessages(primary, "test-topic-no-checkpoints");
+
+        waitForCondition(() -> {
+            Map<TopicPartition, OffsetAndMetadata> translatedOffsets = backupClient.remoteConsumerOffsets(
+                    consumerGroupName, PRIMARY_CLUSTER_ALIAS, Duration.ofSeconds(30L));
+            return translatedOffsets.containsKey(remoteTopicPartition(tp1, PRIMARY_CLUSTER_ALIAS)) &&
+                   translatedOffsets.containsKey(remoteTopicPartition(tp2, PRIMARY_CLUSTER_ALIAS));
+        }, OFFSET_SYNC_DURATION_MS, "Checkpoints were not emitted correctly to backup cluster");
+    }
+
+    private TopicPartition remoteTopicPartition(TopicPartition tp, String alias) {
+        return new TopicPartition(remoteTopicName(tp.topic(), alias), tp.partition());
+    }
+
     /*
      * Run tests for Exclude Filter for copying topic configurations
      */
@@ -536,7 +588,7 @@ void createAndTestNewTopicWithConfigFilter() throws Exception {
         topicConfig.put("retention.bytes", "1000"); // should be included, default value is -1
 
         final String topic = "test-topic-with-config";
-        final String backupTopic = backupClusterTopicName(topic);
+        final String backupTopic = remoteTopicName(topic, PRIMARY_CLUSTER_ALIAS);
 
         primary.kafka().createTopic(topic, NUM_PARTITIONS, 1, topicConfig);
         waitForTopicCreated(backup, backupTopic);
@@ -560,8 +612,8 @@ void createAndTestNewTopicWithConfigFilter() throws Exception {
     /*
      * Returns expected topic name on target cluster.
      */
-    String backupClusterTopicName(String topic) {
-        return PRIMARY_CLUSTER_ALIAS + "." + topic;
+    String remoteTopicName(String topic, String clusterAlias) {
+        return clusterAlias + "." + topic;
     }
 
     /*
@@ -721,6 +773,7 @@ private void createTopics() {
         primary.kafka().createTopic("test-topic-1", NUM_PARTITIONS, 1, topicConfig, adminClientConfig);
         primary.kafka().createTopic("backup.test-topic-1", 1, 1, emptyMap, adminClientConfig);
         primary.kafka().createTopic("heartbeats", 1, 1, emptyMap, adminClientConfig);
+        primary.kafka().createTopic("test-topic-no-checkpoints", 1, 1, emptyMap, adminClientConfig);
         backup.kafka().createTopic("test-topic-1", NUM_PARTITIONS, 1, emptyMap, adminClientConfig);
         backup.kafka().createTopic("primary.test-topic-1", 1, 1, emptyMap, adminClientConfig);
         backup.kafka().createTopic("heartbeats", 1, 1, emptyMap, adminClientConfig);
@@ -729,7 +782,7 @@ private void createTopics() {
     /*
      * Generate some consumer activity on both clusters to ensure the checkpoint connector always starts promptly
      */
-    protected void warmUpConsumer(Map<String, Object> consumerProps) throws InterruptedException {
+    protected void warmUpConsumer(Map<String, Object> consumerProps) {
         Consumer<byte[], byte[]> dummyConsumer = primary.kafka().createConsumerAndSubscribeTo(consumerProps, "test-topic-1");
         dummyConsumer.poll(CONSUMER_POLL_TIMEOUT_MS);
         dummyConsumer.commitSync();
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/converters/NumberConverter.java b/connect/runtime/src/main/java/org/apache/kafka/connect/converters/NumberConverter.java
index 4605b96f5bd4e..c76486eb8b81e 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/converters/NumberConverter.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/converters/NumberConverter.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.common.errors.SerializationException;
 import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.serialization.Serializer;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaAndValue;
 import org.apache.kafka.connect.errors.DataException;
@@ -122,5 +123,7 @@ public SchemaAndValue toConnectHeader(String topic, String headerKey, byte[] val
 
     @Override
     public void close() {
+        Utils.closeQuietly(this.serializer, "number converter serializer");
+        Utils.closeQuietly(this.deserializer, "number converter deserializer");
     }
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java
index 2fe75a955b06c..90adde67a6fbd 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractHerder.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.connect.runtime;
 
+import org.apache.kafka.clients.admin.AdminClientConfig;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.Config;
@@ -28,7 +30,6 @@
 import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.connector.policy.ConnectorClientConfigRequest;
 import org.apache.kafka.connect.errors.NotFoundException;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
 import org.apache.kafka.connect.runtime.isolation.PluginType;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
 import org.apache.kafka.connect.runtime.rest.entities.ActiveTopicsInfo;
@@ -40,7 +41,9 @@
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo;
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorType;
 import org.apache.kafka.connect.runtime.rest.errors.BadRequestException;
+import org.apache.kafka.connect.sink.SinkConnector;
 import org.apache.kafka.connect.source.SourceConnector;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.ConfigBackingStore;
 import org.apache.kafka.connect.storage.Converter;
 import org.apache.kafka.connect.storage.HeaderConverter;
@@ -349,9 +352,11 @@ public ConnectorStateInfo.TaskState taskStatus(ConnectorTaskId id) {
                 status.workerId(), status.trace());
     }
 
-    protected Map<String, ConfigValue> validateBasicConnectorConfig(Connector connector,
-                                                                    ConfigDef configDef,
-                                                                    Map<String, String> config) {
+    protected Map<String, ConfigValue> validateSinkConnectorConfig(SinkConnector connector, ConfigDef configDef, Map<String, String> config) {
+        return configDef.validateAll(config);
+    }
+
+    protected Map<String, ConfigValue> validateSourceConnectorConfig(SourceConnector connector, ConfigDef configDef, Map<String, String> config) {
         return configDef.validateAll(config);
     }
 
@@ -417,7 +422,23 @@ public Optional<RestartPlan> buildRestartPlan(RestartRequest request) {
                 conf == null ? ConnectorType.UNKNOWN : connectorTypeForClass(conf.get(ConnectorConfig.CONNECTOR_CLASS_CONFIG))
         );
         return Optional.of(new RestartPlan(request, stateInfo));
+    }
+
+    protected boolean connectorUsesConsumer(org.apache.kafka.connect.health.ConnectorType connectorType, Map<String, String> connProps) {
+        return connectorType == org.apache.kafka.connect.health.ConnectorType.SINK;
+    }
 
+    protected boolean connectorUsesAdmin(org.apache.kafka.connect.health.ConnectorType connectorType, Map<String, String> connProps) {
+        if (connectorType == org.apache.kafka.connect.health.ConnectorType.SOURCE) {
+            return SourceConnectorConfig.usesTopicCreation(connProps);
+        } else {
+            return SinkConnectorConfig.hasDlqTopicConfig(connProps);
+        }
+    }
+
+    protected boolean connectorUsesProducer(org.apache.kafka.connect.health.ConnectorType connectorType, Map<String, String> connProps) {
+        return connectorType == org.apache.kafka.connect.health.ConnectorType.SOURCE
+            || SinkConnectorConfig.hasDlqTopicConfig(connProps);
     }
 
     ConfigInfos validateConnectorConfig(Map<String, String> connectorProps, boolean doLog) {
@@ -431,22 +452,20 @@ ConfigInfos validateConnectorConfig(Map<String, String> connectorProps, boolean
         Connector connector = getConnector(connType);
         org.apache.kafka.connect.health.ConnectorType connectorType;
         ClassLoader savedLoader = plugins().compareAndSwapLoaders(connector);
+        ConfigDef enrichedConfigDef;
+        Map<String, ConfigValue> validatedConnectorConfig;
         try {
-            ConfigDef baseConfigDef;
             if (connector instanceof SourceConnector) {
-                baseConfigDef = SourceConnectorConfig.configDef();
                 connectorType = org.apache.kafka.connect.health.ConnectorType.SOURCE;
+                enrichedConfigDef = ConnectorConfig.enrich(plugins(), SourceConnectorConfig.configDef(), connectorProps, false);
+                validatedConnectorConfig = validateSourceConnectorConfig((SourceConnector) connector, enrichedConfigDef, connectorProps);
             } else {
-                baseConfigDef = SinkConnectorConfig.configDef();
                 SinkConnectorConfig.validate(connectorProps);
                 connectorType = org.apache.kafka.connect.health.ConnectorType.SINK;
+                enrichedConfigDef = ConnectorConfig.enrich(plugins(), SinkConnectorConfig.configDef(), connectorProps, false);
+                validatedConnectorConfig = validateSinkConnectorConfig((SinkConnector) connector, enrichedConfigDef, connectorProps);
             }
-            ConfigDef enrichedConfigDef = ConnectorConfig.enrich(plugins(), baseConfigDef, connectorProps, false);
-            Map<String, ConfigValue> validatedConnectorConfig = validateBasicConnectorConfig(
-                    connector,
-                    enrichedConfigDef,
-                    connectorProps
-            );
+
             connectorProps.entrySet().stream()
                 .filter(e -> e.getValue() == null)
                 .map(Map.Entry::getKey)
@@ -454,6 +473,7 @@ ConfigInfos validateConnectorConfig(Map<String, String> connectorProps, boolean
                     validatedConnectorConfig.computeIfAbsent(prop, ConfigValue::new)
                         .addErrorMessage("Null value can not be supplied as the configuration value.")
             );
+
             List<ConfigValue> configValues = new ArrayList<>(validatedConnectorConfig.values());
             Map<String, ConfigKey> configKeys = new LinkedHashMap<>(enrichedConfigDef.configKeys());
             Set<String> allGroups = new LinkedHashSet<>(enrichedConfigDef.groups());
@@ -487,40 +507,41 @@ ConfigInfos validateConnectorConfig(Map<String, String> connectorProps, boolean
             ConfigInfos producerConfigInfos = null;
             ConfigInfos consumerConfigInfos = null;
             ConfigInfos adminConfigInfos = null;
-            if (connectorType.equals(org.apache.kafka.connect.health.ConnectorType.SOURCE)) {
-                producerConfigInfos = validateClientOverrides(connName,
-                                                              ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX,
-                                                              connectorConfig,
-                                                              ProducerConfig.configDef(),
-                                                              connector.getClass(),
-                                                              connectorType,
-                                                              ConnectorClientConfigRequest.ClientType.PRODUCER,
-                                                              connectorClientConfigOverridePolicy);
-                return mergeConfigInfos(connType, configInfos, producerConfigInfos);
-            } else {
-                consumerConfigInfos = validateClientOverrides(connName,
-                                                              ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX,
-                                                              connectorConfig,
-                                                              ProducerConfig.configDef(),
-                                                              connector.getClass(),
-                                                              connectorType,
-                                                              ConnectorClientConfigRequest.ClientType.CONSUMER,
-                                                              connectorClientConfigOverridePolicy);
-                // check if topic for dead letter queue exists
-                String topic = connectorProps.get(SinkConnectorConfig.DLQ_TOPIC_NAME_CONFIG);
-                if (topic != null && !topic.isEmpty()) {
-                    adminConfigInfos = validateClientOverrides(connName,
-                                                               ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX,
-                                                               connectorConfig,
-                                                               ProducerConfig.configDef(),
-                                                               connector.getClass(),
-                                                               connectorType,
-                                                               ConnectorClientConfigRequest.ClientType.ADMIN,
-                                                               connectorClientConfigOverridePolicy);
-                }
 
+            if (connectorUsesProducer(connectorType, connectorProps)) {
+                producerConfigInfos = validateClientOverrides(
+                    connName,
+                    ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX,
+                    connectorConfig,
+                    ProducerConfig.configDef(),
+                    connector.getClass(),
+                    connectorType,
+                    ConnectorClientConfigRequest.ClientType.PRODUCER,
+                    connectorClientConfigOverridePolicy);
             }
-            return mergeConfigInfos(connType, configInfos, consumerConfigInfos, adminConfigInfos);
+            if (connectorUsesAdmin(connectorType, connectorProps)) {
+                adminConfigInfos = validateClientOverrides(
+                    connName,
+                    ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX,
+                    connectorConfig,
+                    AdminClientConfig.configDef(),
+                    connector.getClass(),
+                    connectorType,
+                    ConnectorClientConfigRequest.ClientType.ADMIN,
+                    connectorClientConfigOverridePolicy);
+            }
+            if (connectorUsesConsumer(connectorType, connectorProps)) {
+                consumerConfigInfos = validateClientOverrides(
+                    connName,
+                    ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX,
+                    connectorConfig,
+                    ConsumerConfig.configDef(),
+                    connector.getClass(),
+                    connectorType,
+                    ConnectorClientConfigRequest.ClientType.CONSUMER,
+                    connectorClientConfigOverridePolicy);
+            }
+            return mergeConfigInfos(connType, configInfos, producerConfigInfos, consumerConfigInfos, adminConfigInfos);
         } finally {
             Plugins.compareAndSwapLoaders(savedLoader);
         }
@@ -665,7 +686,7 @@ protected Connector getConnector(String connType) {
         return tempConnectors.computeIfAbsent(connType, k -> plugins().newConnector(k));
     }
 
-    /*
+    /**
      * Retrieves ConnectorType for the corresponding connector class
      * @param connClass class of the connector
      */
@@ -673,6 +694,15 @@ public ConnectorType connectorTypeForClass(String connClass) {
         return ConnectorType.from(getConnector(connClass).getClass());
     }
 
+    /**
+     * Retrieves ConnectorType for the class specified in the connector config
+     * @param connConfig the connector config; may not be null
+     * @return the {@link ConnectorType} of the connector
+     */
+    public ConnectorType connectorTypeForConfig(Map<String, String> connConfig) {
+        return connectorTypeForClass(connConfig.get(ConnectorConfig.CONNECTOR_CLASS_CONFIG));
+    }
+
     /**
      * Checks a given {@link ConfigInfos} for validation error messages and adds an exception
      * to the given {@link Callback} if any were found.
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractWorkerSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractWorkerSourceTask.java
new file mode 100644
index 0000000000000..693ef510f1a4d
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/AbstractWorkerSourceTask.java
@@ -0,0 +1,657 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime;
+
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.Producer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.CumulativeSum;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Rate;
+import org.apache.kafka.common.metrics.stats.Value;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.errors.RetriableException;
+import org.apache.kafka.connect.header.Header;
+import org.apache.kafka.connect.header.Headers;
+import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
+import org.apache.kafka.connect.runtime.errors.Stage;
+import org.apache.kafka.connect.runtime.errors.ToleranceType;
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.source.SourceTaskContext;
+import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
+import org.apache.kafka.connect.storage.Converter;
+import org.apache.kafka.connect.storage.HeaderConverter;
+import org.apache.kafka.connect.storage.OffsetStorageWriter;
+import org.apache.kafka.connect.storage.StatusBackingStore;
+import org.apache.kafka.connect.util.ConnectUtils;
+import org.apache.kafka.connect.util.ConnectorTaskId;
+import org.apache.kafka.connect.util.TopicAdmin;
+import org.apache.kafka.connect.util.TopicCreation;
+import org.apache.kafka.connect.util.TopicCreationGroup;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Executor;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_TRACKING_ENABLE_CONFIG;
+
+/**
+ * WorkerTask that contains shared logic for running source tasks with either standard or exactly-once delivery guarantees.
+ */
+public abstract class AbstractWorkerSourceTask extends WorkerTask {
+    private static final Logger log = LoggerFactory.getLogger(AbstractWorkerSourceTask.class);
+
+    private static final long SEND_FAILED_BACKOFF_MS = 100;
+
+    /**
+     * Hook to define custom startup behavior before the calling {@link SourceTask#initialize(SourceTaskContext)}
+     * and {@link SourceTask#start(Map)}.
+     */
+    protected abstract void prepareToInitializeTask();
+
+    /**
+     * Hook to define custom initialization behavior when preparing to begin the poll-convert-send loop for the first time,
+     * or when re-entering the loop after being paused.
+     */
+    protected abstract void prepareToEnterSendLoop();
+
+    /**
+     * Hook to define custom periodic behavior to be performed at the top of every iteration of the poll-convert-send loop.
+     */
+    protected abstract void beginSendIteration();
+
+    /**
+     * Hook to define custom periodic checks for health, metrics, etc. Called whenever {@link SourceTask#poll()} is about to be invoked.
+     */
+    protected abstract void prepareToPollTask();
+
+    /**
+     * Invoked when a record provided by the task has been filtered out by a transform or the converter,
+     * or will be discarded due to failures during transformation or conversion.
+     * @param record the pre-transform record that has been dropped; never null.
+     */
+    protected abstract void recordDropped(SourceRecord record);
+
+    /**
+     * Invoked when a record is about to be dispatched to the producer. May be invoked multiple times for the same
+     * record if retriable errors are encountered.
+     * @param sourceRecord the pre-transform {@link SourceRecord} provided by the source task; never null.
+     * @param producerRecord the {@link ProducerRecord} produced by transforming and converting the
+     * {@code sourceRecord}; never null;
+     * @return a {@link SubmittedRecords.SubmittedRecord} to be {@link SubmittedRecords.SubmittedRecord#ack() acknowledged}
+     * if the corresponding producer record is ack'd by Kafka or {@link SubmittedRecords.SubmittedRecord#drop() dropped}
+     * if synchronously rejected by the producer. Can also be {@link Optional#empty()} if it is not necessary to track the acknowledgment
+     * of individual producer records
+     */
+    protected abstract Optional<SubmittedRecords.SubmittedRecord> prepareToSendRecord(
+            SourceRecord sourceRecord,
+            ProducerRecord<byte[], byte[]> producerRecord
+    );
+
+    /**
+     * Invoked when a record has been transformed, converted, and dispatched to the producer successfully via
+     * {@link Producer#send}. Does not guarantee that the record has been sent to Kafka or ack'd by the required number
+     * of brokers, but does guarantee that it will never be re-processed.
+     * @param record the pre-transform {@link SourceRecord} that was successfully dispatched to the producer; never null.
+     */
+    protected abstract void recordDispatched(SourceRecord record);
+
+    /**
+     * Invoked when an entire batch of records returned from {@link SourceTask#poll} has been transformed, converted,
+     * and either discarded due to transform/conversion errors, filtered by a transform, or dispatched to the producer
+     * successfully via {@link Producer#send}. Does not guarantee that the records have been sent to Kafka or ack'd by the
+     * required number of brokers, but does guarantee that none of the records in the batch will ever be re-processed during
+     * the lifetime of this task. At most one record batch is polled from the task in between calls to this method.
+     */
+    protected abstract void batchDispatched();
+
+    /**
+     * Invoked when a record has been sent and ack'd by the Kafka cluster. Note that this method may be invoked
+     *  concurrently and should therefore be made thread-safe.
+     * @param sourceRecord  the pre-transform {@link SourceRecord} that was successfully sent to Kafka; never null.
+     * @param producerRecord the {@link ProducerRecord} produced by transforming and converting the
+     * {@code sourceRecord}; never null;
+     * @param recordMetadata the {@link RecordMetadata} for the corresponding producer record; never null.
+     */
+    protected abstract void recordSent(
+            SourceRecord sourceRecord,
+            ProducerRecord<byte[], byte[]> producerRecord,
+            RecordMetadata recordMetadata
+    );
+
+    /**
+     * Invoked when a record given to {@link Producer#send(ProducerRecord, Callback)} has failed with a non-retriable error.
+     * @param synchronous whether the error occurred during the invocation of {@link Producer#send(ProducerRecord, Callback)}.
+     *                    If {@code false}, indicates that the error was reported asynchronously by the producer by a {@link Callback}
+     * @param producerRecord the {@link ProducerRecord} that the producer failed to send; never null
+     * @param preTransformRecord the pre-transform {@link SourceRecord} that the producer record was derived from; never null
+     * @param e the exception that was either thrown from {@link Producer#send(ProducerRecord, Callback)}, or reported by the producer
+     *          via {@link Callback} after the call to {@link Producer#send(ProducerRecord, Callback)} completed
+     */
+    protected abstract void producerSendFailed(
+            boolean synchronous,
+            ProducerRecord<byte[], byte[]> producerRecord,
+            SourceRecord preTransformRecord,
+            Exception e
+    );
+
+    /**
+     * Invoked when no more records will be polled from the task or dispatched to the producer. Should attempt to
+     * commit the offsets for any outstanding records when possible.
+     * @param failed whether the task is undergoing a healthy or an unhealthy shutdown
+     */
+    protected abstract void finalOffsetCommit(boolean failed);
+
+
+    protected final WorkerConfig workerConfig;
+    protected final WorkerSourceTaskContext sourceTaskContext;
+    protected final ConnectorOffsetBackingStore offsetStore;
+    protected final OffsetStorageWriter offsetWriter;
+    protected final Producer<byte[], byte[]> producer;
+
+    private final SourceTask task;
+    private final Converter keyConverter;
+    private final Converter valueConverter;
+    private final HeaderConverter headerConverter;
+    private final TransformationChain<SourceRecord> transformationChain;
+    private final TopicAdmin admin;
+    private final CloseableOffsetStorageReader offsetReader;
+    private final SourceTaskMetricsGroup sourceTaskMetricsGroup;
+    private final CountDownLatch stopRequestedLatch;
+    private final boolean topicTrackingEnabled;
+    private final TopicCreation topicCreation;
+    private final Executor closeExecutor;
+
+    // Visible for testing
+    List<SourceRecord> toSend;
+    protected Map<String, String> taskConfig;
+    protected boolean started = false;
+    private volatile boolean producerClosed = false;
+
+    protected AbstractWorkerSourceTask(ConnectorTaskId id,
+                                       SourceTask task,
+                                       TaskStatus.Listener statusListener,
+                                       TargetState initialState,
+                                       Converter keyConverter,
+                                       Converter valueConverter,
+                                       HeaderConverter headerConverter,
+                                       TransformationChain<SourceRecord> transformationChain,
+                                       WorkerSourceTaskContext sourceTaskContext,
+                                       Producer<byte[], byte[]> producer,
+                                       TopicAdmin admin,
+                                       Map<String, TopicCreationGroup> topicGroups,
+                                       CloseableOffsetStorageReader offsetReader,
+                                       OffsetStorageWriter offsetWriter,
+                                       ConnectorOffsetBackingStore offsetStore,
+                                       WorkerConfig workerConfig,
+                                       ConnectMetrics connectMetrics,
+                                       ClassLoader loader,
+                                       Time time,
+                                       RetryWithToleranceOperator retryWithToleranceOperator,
+                                       StatusBackingStore statusBackingStore,
+                                       Executor closeExecutor) {
+
+        super(id, statusListener, initialState, loader, connectMetrics,
+                retryWithToleranceOperator, time, statusBackingStore);
+
+        this.workerConfig = workerConfig;
+        this.task = task;
+        this.keyConverter = keyConverter;
+        this.valueConverter = valueConverter;
+        this.headerConverter = headerConverter;
+        this.transformationChain = transformationChain;
+        this.producer = producer;
+        this.admin = admin;
+        this.offsetReader = offsetReader;
+        this.offsetWriter = offsetWriter;
+        this.offsetStore = Objects.requireNonNull(offsetStore, "offset store cannot be null for source tasks");
+        this.closeExecutor = closeExecutor;
+        this.sourceTaskContext = sourceTaskContext;
+
+        this.stopRequestedLatch = new CountDownLatch(1);
+        this.sourceTaskMetricsGroup = new SourceTaskMetricsGroup(id, connectMetrics);
+        this.topicTrackingEnabled = workerConfig.getBoolean(TOPIC_TRACKING_ENABLE_CONFIG);
+        this.topicCreation = TopicCreation.newTopicCreation(workerConfig, topicGroups);
+    }
+
+    @Override
+    public void initialize(TaskConfig taskConfig) {
+        try {
+            this.taskConfig = taskConfig.originalsStrings();
+        } catch (Throwable t) {
+            log.error("{} Task failed initialization and will not be started.", this, t);
+            onFailure(t);
+        }
+    }
+
+    @Override
+    protected void initializeAndStart() {
+        prepareToInitializeTask();
+        offsetStore.start();
+        // If we try to start the task at all by invoking initialize, then count this as
+        // "started" and expect a subsequent call to the task's stop() method
+        // to properly clean up any resources allocated by its initialize() or
+        // start() methods. If the task throws an exception during stop(),
+        // the worst thing that happens is another exception gets logged for an already-
+        // failed task
+        started = true;
+        task.initialize(sourceTaskContext);
+        task.start(taskConfig);
+        log.info("{} Source task finished initialization and start", this);
+    }
+
+    @Override
+    public void cancel() {
+        super.cancel();
+        // Preemptively close the offset reader in case the task is blocked on an offset read.
+        offsetReader.close();
+        // We proactively close the producer here as the main work thread for the task may
+        // be blocked indefinitely in a call to Producer::send if automatic topic creation is
+        // not enabled on either the connector or the Kafka cluster. Closing the producer should
+        // unblock it in that case and allow shutdown to proceed normally.
+        // With a duration of 0, the producer's own shutdown logic should be fairly quick,
+        // but closing user-pluggable classes like interceptors may lag indefinitely. So, we
+        // call close on a separate thread in order to avoid blocking the herder's tick thread.
+        closeExecutor.execute(() -> closeProducer(Duration.ZERO));
+    }
+
+    @Override
+    public void stop() {
+        super.stop();
+        stopRequestedLatch.countDown();
+    }
+
+    @Override
+    public void removeMetrics() {
+        Utils.closeQuietly(sourceTaskMetricsGroup, "source task metrics tracker");
+        super.removeMetrics();
+    }
+
+    @Override
+    protected void close() {
+        if (started) {
+            Utils.closeQuietly(task::stop, "source task");
+        }
+
+        closeProducer(Duration.ofSeconds(30));
+
+        if (admin != null) {
+            Utils.closeQuietly(() -> admin.close(Duration.ofSeconds(30)), "source task admin");
+        }
+        Utils.closeQuietly(transformationChain, "transformation chain");
+        Utils.closeQuietly(retryWithToleranceOperator, "retry operator");
+        Utils.closeQuietly(offsetReader, "offset reader");
+        Utils.closeQuietly(offsetStore::stop, "offset backing store");
+        Utils.closeQuietly(headerConverter, "header converter");
+    }
+
+    private void closeProducer(Duration duration) {
+        if (producer != null) {
+            producerClosed = true;
+            Utils.closeQuietly(() -> producer.close(duration), "source task producer");
+        }
+    }
+
+    @Override
+    public void execute() {
+        try {
+            prepareToEnterSendLoop();
+            while (!isStopping()) {
+                beginSendIteration();
+
+                if (shouldPause()) {
+                    onPause();
+                    if (awaitUnpause()) {
+                        onResume();
+                        prepareToEnterSendLoop();
+                    }
+                    continue;
+                }
+
+                if (toSend == null) {
+                    prepareToPollTask();
+
+                    log.trace("{} Nothing to send to Kafka. Polling source for additional records", this);
+                    long start = time.milliseconds();
+                    toSend = poll();
+                    if (toSend != null) {
+                        recordPollReturned(toSend.size(), time.milliseconds() - start);
+                    }
+                }
+                if (toSend == null)
+                    continue;
+                log.trace("{} About to send {} records to Kafka", this, toSend.size());
+                if (sendRecords()) {
+                    batchDispatched();
+                } else {
+                    stopRequestedLatch.await(SEND_FAILED_BACKOFF_MS, TimeUnit.MILLISECONDS);
+                }
+            }
+        } catch (InterruptedException e) {
+            // Ignore and allow to exit.
+        } catch (RuntimeException e) {
+            try {
+                finalOffsetCommit(true);
+            } catch (Exception offsetException) {
+                log.error("Failed to commit offsets for already-failing task", offsetException);
+            }
+            throw e;
+        }
+        finalOffsetCommit(false);
+    }
+
+    /**
+     * Try to send a batch of records. If a send fails and is retriable, this saves the remainder of the batch so it can
+     * be retried after backing off. If a send fails and is not retriable, this will throw a ConnectException.
+     * @return true if all messages were sent, false if some need to be retried
+     */
+    // Visible for testing
+    boolean sendRecords() {
+        int processed = 0;
+        recordBatch(toSend.size());
+        final SourceRecordWriteCounter counter =
+                toSend.size() > 0 ? new SourceRecordWriteCounter(toSend.size(), sourceTaskMetricsGroup) : null;
+        for (final SourceRecord preTransformRecord : toSend) {
+            retryWithToleranceOperator.sourceRecord(preTransformRecord);
+            final SourceRecord record = transformationChain.apply(preTransformRecord);
+            final ProducerRecord<byte[], byte[]> producerRecord = convertTransformedRecord(record);
+            if (producerRecord == null || retryWithToleranceOperator.failed()) {
+                counter.skipRecord();
+                recordDropped(preTransformRecord);
+                continue;
+            }
+
+            log.trace("{} Appending record to the topic {} with key {}, value {}", this, record.topic(), record.key(), record.value());
+            Optional<SubmittedRecords.SubmittedRecord> submittedRecord = prepareToSendRecord(preTransformRecord, producerRecord);
+            try {
+                final String topic = producerRecord.topic();
+                maybeCreateTopic(topic);
+                producer.send(
+                    producerRecord,
+                    (recordMetadata, e) -> {
+                        if (e != null) {
+                            if (producerClosed) {
+                                log.trace("{} failed to send record to {}; this is expected as the producer has already been closed", AbstractWorkerSourceTask.this, topic, e);
+                            } else {
+                                log.error("{} failed to send record to {}: ", AbstractWorkerSourceTask.this, topic, e);
+                            }
+                            log.trace("{} Failed record: {}", AbstractWorkerSourceTask.this, preTransformRecord);
+                            producerSendFailed(false, producerRecord, preTransformRecord, e);
+                            if (retryWithToleranceOperator.getErrorToleranceType() == ToleranceType.ALL) {
+                                counter.skipRecord();
+                                submittedRecord.ifPresent(SubmittedRecords.SubmittedRecord::ack);
+                            }
+                        } else {
+                            counter.completeRecord();
+                            log.trace("{} Wrote record successfully: topic {} partition {} offset {}",
+                                    AbstractWorkerSourceTask.this,
+                                    recordMetadata.topic(), recordMetadata.partition(),
+                                    recordMetadata.offset());
+                            recordSent(preTransformRecord, producerRecord, recordMetadata);
+                            submittedRecord.ifPresent(SubmittedRecords.SubmittedRecord::ack);
+                            if (topicTrackingEnabled) {
+                                recordActiveTopic(producerRecord.topic());
+                            }
+                        }
+                    });
+                // Note that this will cause retries to take place within a transaction
+            } catch (RetriableException | org.apache.kafka.common.errors.RetriableException e) {
+                log.warn("{} Failed to send record to topic '{}' and partition '{}'. Backing off before retrying: ",
+                        this, producerRecord.topic(), producerRecord.partition(), e);
+                toSend = toSend.subList(processed, toSend.size());
+                submittedRecord.ifPresent(SubmittedRecords.SubmittedRecord::drop);
+                counter.retryRemaining();
+                return false;
+            } catch (ConnectException e) {
+                log.warn("{} Failed to send record to topic '{}' and partition '{}' due to an unrecoverable exception: ",
+                        this, producerRecord.topic(), producerRecord.partition(), e);
+                log.trace("{} Failed to send {} with unrecoverable exception: ", this, producerRecord, e);
+                throw e;
+            } catch (KafkaException e) {
+                producerSendFailed(true, producerRecord, preTransformRecord, e);
+            }
+            processed++;
+            recordDispatched(preTransformRecord);
+        }
+        toSend = null;
+        return true;
+    }
+
+    protected List<SourceRecord> poll() throws InterruptedException {
+        try {
+            return task.poll();
+        } catch (RetriableException | org.apache.kafka.common.errors.RetriableException e) {
+            log.warn("{} failed to poll records from SourceTask. Will retry operation.", this, e);
+            // Do nothing. Let the framework poll whenever it's ready.
+            return null;
+        }
+    }
+
+    /**
+     * Convert the source record into a producer record.
+     *
+     * @param record the transformed record
+     * @return the producer record which can sent over to Kafka. A null is returned if the input is null or
+     * if an error was encountered during any of the converter stages.
+     */
+    protected ProducerRecord<byte[], byte[]> convertTransformedRecord(SourceRecord record) {
+        if (record == null) {
+            return null;
+        }
+
+        RecordHeaders headers = retryWithToleranceOperator.execute(() -> convertHeaderFor(record), Stage.HEADER_CONVERTER, headerConverter.getClass());
+
+        byte[] key = retryWithToleranceOperator.execute(() -> keyConverter.fromConnectData(record.topic(), headers, record.keySchema(), record.key()),
+                Stage.KEY_CONVERTER, keyConverter.getClass());
+
+        byte[] value = retryWithToleranceOperator.execute(() -> valueConverter.fromConnectData(record.topic(), headers, record.valueSchema(), record.value()),
+                Stage.VALUE_CONVERTER, valueConverter.getClass());
+
+        if (retryWithToleranceOperator.failed()) {
+            return null;
+        }
+
+        return new ProducerRecord<>(record.topic(), record.kafkaPartition(),
+                ConnectUtils.checkAndConvertTimestamp(record.timestamp()), key, value, headers);
+    }
+
+    // Due to transformations that may change the destination topic of a record (such as
+    // RegexRouter) topic creation can not be batched for multiple topics
+    private void maybeCreateTopic(String topic) {
+        if (!topicCreation.isTopicCreationRequired(topic)) {
+            log.trace("Topic creation by the connector is disabled or the topic {} was previously created." +
+                    "If auto.create.topics.enable is enabled on the broker, " +
+                    "the topic will be created with default settings", topic);
+            return;
+        }
+        log.info("The task will send records to topic '{}' for the first time. Checking "
+                + "whether topic exists", topic);
+        Map<String, TopicDescription> existing = admin.describeTopics(topic);
+        if (!existing.isEmpty()) {
+            log.info("Topic '{}' already exists.", topic);
+            topicCreation.addTopic(topic);
+            return;
+        }
+
+        log.info("Creating topic '{}'", topic);
+        TopicCreationGroup topicGroup = topicCreation.findFirstGroup(topic);
+        log.debug("Topic '{}' matched topic creation group: {}", topic, topicGroup);
+        NewTopic newTopic = topicGroup.newTopic(topic);
+
+        TopicAdmin.TopicCreationResponse response = admin.createOrFindTopics(newTopic);
+        if (response.isCreated(newTopic.name())) {
+            topicCreation.addTopic(topic);
+            log.info("Created topic '{}' using creation group {}", newTopic, topicGroup);
+        } else if (response.isExisting(newTopic.name())) {
+            topicCreation.addTopic(topic);
+            log.info("Found existing topic '{}'", newTopic);
+        } else {
+            // The topic still does not exist and could not be created, so treat it as a task failure
+            log.warn("Request to create new topic '{}' failed", topic);
+            throw new ConnectException("Task failed to create new topic " + newTopic + ". Ensure "
+                    + "that the task is authorized to create topics or that the topic exists and "
+                    + "restart the task");
+        }
+    }
+
+    protected RecordHeaders convertHeaderFor(SourceRecord record) {
+        Headers headers = record.headers();
+        RecordHeaders result = new RecordHeaders();
+        if (headers != null) {
+            String topic = record.topic();
+            for (Header header : headers) {
+                String key = header.key();
+                byte[] rawHeader = headerConverter.fromConnectHeader(topic, key, header.schema(), header.value());
+                result.add(key, rawHeader);
+            }
+        }
+        return result;
+    }
+
+    protected void commitTaskRecord(SourceRecord record, RecordMetadata metadata) {
+        try {
+            task.commitRecord(record, metadata);
+        } catch (Throwable t) {
+            log.error("{} Exception thrown while calling task.commitRecord()", this, t);
+        }
+    }
+
+    protected void commitSourceTask() {
+        try {
+            this.task.commit();
+        } catch (Throwable t) {
+            log.error("{} Exception thrown while calling task.commit()", this, t);
+        }
+    }
+
+    protected void recordPollReturned(int numRecordsInBatch, long duration) {
+        sourceTaskMetricsGroup.recordPoll(numRecordsInBatch, duration);
+    }
+
+    SourceTaskMetricsGroup sourceTaskMetricsGroup() {
+        return sourceTaskMetricsGroup;
+    }
+
+    static class SourceRecordWriteCounter {
+        private final SourceTaskMetricsGroup metricsGroup;
+        private final int batchSize;
+        private boolean completed = false;
+        private int counter;
+        public SourceRecordWriteCounter(int batchSize, SourceTaskMetricsGroup metricsGroup) {
+            assert batchSize > 0;
+            assert metricsGroup != null;
+            this.batchSize = batchSize;
+            counter = batchSize;
+            this.metricsGroup = metricsGroup;
+        }
+        public void skipRecord() {
+            if (counter > 0 && --counter == 0) {
+                finishedAllWrites();
+            }
+        }
+        public void completeRecord() {
+            if (counter > 0 && --counter == 0) {
+                finishedAllWrites();
+            }
+        }
+        public void retryRemaining() {
+            finishedAllWrites();
+        }
+        private void finishedAllWrites() {
+            if (!completed) {
+                metricsGroup.recordWrite(batchSize - counter);
+                completed = true;
+            }
+        }
+    }
+
+    static class SourceTaskMetricsGroup implements AutoCloseable {
+        private final ConnectMetrics.MetricGroup metricGroup;
+        private final Sensor sourceRecordPoll;
+        private final Sensor sourceRecordWrite;
+        private final Sensor sourceRecordActiveCount;
+        private final Sensor pollTime;
+        private int activeRecordCount;
+
+        public SourceTaskMetricsGroup(ConnectorTaskId id, ConnectMetrics connectMetrics) {
+            ConnectMetricsRegistry registry = connectMetrics.registry();
+            metricGroup = connectMetrics.group(registry.sourceTaskGroupName(),
+                    registry.connectorTagName(), id.connector(),
+                    registry.taskTagName(), Integer.toString(id.task()));
+            // remove any previously created metrics in this group to prevent collisions.
+            metricGroup.close();
+
+            sourceRecordPoll = metricGroup.sensor("source-record-poll");
+            sourceRecordPoll.add(metricGroup.metricName(registry.sourceRecordPollRate), new Rate());
+            sourceRecordPoll.add(metricGroup.metricName(registry.sourceRecordPollTotal), new CumulativeSum());
+
+            sourceRecordWrite = metricGroup.sensor("source-record-write");
+            sourceRecordWrite.add(metricGroup.metricName(registry.sourceRecordWriteRate), new Rate());
+            sourceRecordWrite.add(metricGroup.metricName(registry.sourceRecordWriteTotal), new CumulativeSum());
+
+            pollTime = metricGroup.sensor("poll-batch-time");
+            pollTime.add(metricGroup.metricName(registry.sourceRecordPollBatchTimeMax), new Max());
+            pollTime.add(metricGroup.metricName(registry.sourceRecordPollBatchTimeAvg), new Avg());
+
+            sourceRecordActiveCount = metricGroup.sensor("source-record-active-count");
+            sourceRecordActiveCount.add(metricGroup.metricName(registry.sourceRecordActiveCount), new Value());
+            sourceRecordActiveCount.add(metricGroup.metricName(registry.sourceRecordActiveCountMax), new Max());
+            sourceRecordActiveCount.add(metricGroup.metricName(registry.sourceRecordActiveCountAvg), new Avg());
+        }
+
+        @Override
+        public void close() {
+            metricGroup.close();
+        }
+
+        void recordPoll(int batchSize, long duration) {
+            sourceRecordPoll.record(batchSize);
+            pollTime.record(duration);
+            activeRecordCount += batchSize;
+            sourceRecordActiveCount.record(activeRecordCount);
+        }
+
+        void recordWrite(int recordCount) {
+            sourceRecordWrite.record(recordCount);
+            activeRecordCount -= recordCount;
+            activeRecordCount = Math.max(0, activeRecordCount);
+            sourceRecordActiveCount.record(activeRecordCount);
+        }
+
+        protected ConnectMetrics.MetricGroup metricGroup() {
+            return metricGroup;
+        }
+    }
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Connect.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Connect.java
index 80eef0369abc0..e5ab246c0bd18 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Connect.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Connect.java
@@ -21,7 +21,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.net.URI;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
@@ -89,12 +88,8 @@ public boolean isRunning() {
     }
 
     // Visible for testing
-    public URI restUrl() {
-        return rest.serverUrl();
-    }
-
-    public URI adminUrl() {
-        return rest.adminUrl();
+    public RestServer rest() {
+        return rest;
     }
 
     private class ShutdownHook extends Thread {
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetrics.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetrics.java
index 7dad6aec0af1b..ed81be657a02e 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetrics.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetrics.java
@@ -319,9 +319,7 @@ Map<String, String> tags() {
          */
         public <T> void addValueMetric(MetricNameTemplate nameTemplate, final LiteralSupplier<T> supplier) {
             MetricName metricName = metricName(nameTemplate);
-            if (metrics().metric(metricName) == null) {
-                metrics().addMetric(metricName, (Gauge<T>) (config, now) -> supplier.metricValue(now));
-            }
+            metrics().addMetricIfAbsent(metricName, null, (Gauge<T>) (config, now) -> supplier.metricValue(now));
         }
 
         /**
@@ -333,9 +331,7 @@ public <T> void addValueMetric(MetricNameTemplate nameTemplate, final LiteralSup
          */
         public <T> void addImmutableValueMetric(MetricNameTemplate nameTemplate, final T value) {
             MetricName metricName = metricName(nameTemplate);
-            if (metrics().metric(metricName) == null) {
-                metrics().addMetric(metricName, (Gauge<T>) (config, now) -> value);
-            }
+            metrics().addMetricIfAbsent(metricName, null, (Gauge<T>) (config, now) -> value);
         }
 
         /**
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetricsRegistry.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetricsRegistry.java
index f301439da8356..d8579d44fc655 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetricsRegistry.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ConnectMetricsRegistry.java
@@ -129,7 +129,7 @@ public ConnectMetricsRegistry(Set<String> tags) {
 
         connectorStatus = createTemplate("status", CONNECTOR_GROUP_NAME,
                                          "The status of the connector. One of 'unassigned', 'running', 'paused', 'failed', or " +
-                                         "'destroyed'.",
+                                         "'restarting'.",
                                          connectorTags);
         connectorType = createTemplate("connector-type", CONNECTOR_GROUP_NAME, "The type of the connector. One of 'source' or 'sink'.",
                                        connectorTags);
@@ -144,7 +144,7 @@ public ConnectMetricsRegistry(Set<String> tags) {
 
         taskStatus = createTemplate("status", TASK_GROUP_NAME,
                                     "The status of the connector task. One of 'unassigned', 'running', 'paused', 'failed', or " +
-                                    "'destroyed'.",
+                                    "'restarting'.",
                                     workerTaskTags);
         taskRunningRatio = createTemplate("running-ratio", TASK_GROUP_NAME,
                                           "The fraction of time this task has spent in the running state.", workerTaskTags);
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ExactlyOnceWorkerSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ExactlyOnceWorkerSourceTask.java
new file mode 100644
index 0000000000000..931917b9e15ce
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/ExactlyOnceWorkerSourceTask.java
@@ -0,0 +1,529 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime;
+
+import org.apache.kafka.clients.producer.Producer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.errors.InvalidProducerEpochException;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.stats.Avg;
+import org.apache.kafka.common.metrics.stats.Max;
+import org.apache.kafka.common.metrics.stats.Min;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.source.SourceTask.TransactionBoundary;
+import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ClusterConfigState;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
+import org.apache.kafka.connect.storage.Converter;
+import org.apache.kafka.connect.storage.HeaderConverter;
+import org.apache.kafka.connect.storage.OffsetStorageWriter;
+import org.apache.kafka.connect.storage.StatusBackingStore;
+import org.apache.kafka.connect.util.ConnectorTaskId;
+import org.apache.kafka.connect.util.LoggingContext;
+import org.apache.kafka.connect.util.TopicAdmin;
+import org.apache.kafka.connect.util.TopicCreationGroup;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.Executor;
+import java.util.concurrent.atomic.AtomicReference;
+
+
+/**
+ * WorkerTask that uses a SourceTask to ingest data into Kafka, with support for exactly-once delivery guarantees.
+ */
+class ExactlyOnceWorkerSourceTask extends AbstractWorkerSourceTask {
+    private static final Logger log = LoggerFactory.getLogger(ExactlyOnceWorkerSourceTask.class);
+
+    private boolean transactionOpen;
+    private final LinkedHashMap<SourceRecord, RecordMetadata> commitableRecords;
+
+    private final TransactionBoundaryManager transactionBoundaryManager;
+    private final TransactionMetricsGroup transactionMetrics;
+
+    private final Runnable preProducerCheck;
+    private final Runnable postProducerCheck;
+
+    public ExactlyOnceWorkerSourceTask(ConnectorTaskId id,
+                                       SourceTask task,
+                                       TaskStatus.Listener statusListener,
+                                       TargetState initialState,
+                                       Converter keyConverter,
+                                       Converter valueConverter,
+                                       HeaderConverter headerConverter,
+                                       TransformationChain<SourceRecord> transformationChain,
+                                       Producer<byte[], byte[]> producer,
+                                       TopicAdmin admin,
+                                       Map<String, TopicCreationGroup> topicGroups,
+                                       CloseableOffsetStorageReader offsetReader,
+                                       OffsetStorageWriter offsetWriter,
+                                       ConnectorOffsetBackingStore offsetStore,
+                                       WorkerConfig workerConfig,
+                                       ClusterConfigState configState,
+                                       ConnectMetrics connectMetrics,
+                                       ClassLoader loader,
+                                       Time time,
+                                       RetryWithToleranceOperator retryWithToleranceOperator,
+                                       StatusBackingStore statusBackingStore,
+                                       SourceConnectorConfig sourceConfig,
+                                       Executor closeExecutor,
+                                       Runnable preProducerCheck,
+                                       Runnable postProducerCheck) {
+        super(id, task, statusListener, initialState, keyConverter, valueConverter, headerConverter, transformationChain,
+                new WorkerSourceTaskContext(offsetReader, id, configState, buildTransactionContext(sourceConfig)),
+                producer, admin, topicGroups, offsetReader, offsetWriter, offsetStore, workerConfig, connectMetrics,
+                loader, time, retryWithToleranceOperator, statusBackingStore, closeExecutor);
+
+        this.transactionOpen = false;
+        this.commitableRecords = new LinkedHashMap<>();
+
+        this.preProducerCheck = preProducerCheck;
+        this.postProducerCheck = postProducerCheck;
+
+        this.transactionBoundaryManager = buildTransactionManager(workerConfig, sourceConfig, sourceTaskContext.transactionContext());
+        this.transactionMetrics = new TransactionMetricsGroup(id, connectMetrics);
+    }
+
+    private static WorkerTransactionContext buildTransactionContext(SourceConnectorConfig sourceConfig) {
+        return TransactionBoundary.CONNECTOR.equals(sourceConfig.transactionBoundary())
+                ? new WorkerTransactionContext()
+                : null;
+    }
+
+    @Override
+    protected void prepareToInitializeTask() {
+        preProducerCheck.run();
+
+        // Try not to initialize the transactional producer (which may accidentally fence out other, later task generations) if we've already
+        // been shut down at this point
+        if (isStopping())
+            return;
+        producer.initTransactions();
+
+        postProducerCheck.run();
+    }
+
+    @Override
+    protected void prepareToEnterSendLoop() {
+        transactionBoundaryManager.initialize();
+    }
+
+    @Override
+    protected void beginSendIteration() {
+        // No-op
+    }
+
+    @Override
+    protected void prepareToPollTask() {
+        // No-op
+    }
+
+    @Override
+    protected void recordDropped(SourceRecord record) {
+        synchronized (commitableRecords) {
+            commitableRecords.put(record, null);
+        }
+        transactionBoundaryManager.maybeCommitTransactionForRecord(record);
+    }
+
+    @Override
+    protected Optional<SubmittedRecords.SubmittedRecord> prepareToSendRecord(
+            SourceRecord sourceRecord,
+            ProducerRecord<byte[], byte[]> producerRecord
+    ) {
+        if (offsetStore.primaryOffsetsTopic().equals(producerRecord.topic())) {
+            // This is to prevent deadlock that occurs when:
+            //     1. A task provides a record whose topic is the task's offsets topic
+            //     2. That record is dispatched to the task's producer in a transaction that remains open
+            //        at least until the worker polls the task again
+            //     3. In the subsequent call to SourceTask::poll, the task requests offsets from the worker
+            //        (which requires a read to the end of the offsets topic, and will block until any open
+            //        transactions on the topic are either committed or aborted)
+            throw new ConnectException("Source tasks may not produce to their own offsets topics when exactly-once support is enabled");
+        }
+        maybeBeginTransaction();
+        return Optional.empty();
+    }
+
+    @Override
+    protected void recordDispatched(SourceRecord record) {
+        // Offsets are converted & serialized in the OffsetWriter
+        // Important: we only save offsets for the record after it has been accepted by the producer; this way,
+        // we commit those offsets if and only if the record is sent successfully.
+        offsetWriter.offset(record.sourcePartition(), record.sourceOffset());
+        transactionMetrics.addRecord();
+        transactionBoundaryManager.maybeCommitTransactionForRecord(record);
+    }
+
+    @Override
+    protected void batchDispatched() {
+        transactionBoundaryManager.maybeCommitTransactionForBatch();
+    }
+
+    @Override
+    protected void recordSent(
+            SourceRecord sourceRecord,
+            ProducerRecord<byte[], byte[]> producerRecord,
+            RecordMetadata recordMetadata
+    ) {
+        synchronized (commitableRecords) {
+            commitableRecords.put(sourceRecord, recordMetadata);
+        }
+    }
+
+    @Override
+    protected void producerSendFailed(
+            boolean synchronous,
+            ProducerRecord<byte[], byte[]> producerRecord,
+            SourceRecord preTransformRecord,
+            Exception e
+    ) {
+        if (synchronous) {
+            throw maybeWrapProducerSendException(
+                    "Unrecoverable exception trying to send",
+                    e
+            );
+        } else {
+            // No-op; all asynchronously-reported producer exceptions should be bubbled up again by Producer::commitTransaction
+        }
+    }
+
+    @Override
+    protected void finalOffsetCommit(boolean failed) {
+        if (failed) {
+            log.debug("Skipping final offset commit as task has failed");
+            return;
+        } else if (isCancelled()) {
+            log.debug("Skipping final offset commit as task has been cancelled");
+            return;
+        }
+
+        // It should be safe to commit here even if we were in the middle of retrying on RetriableExceptions in the
+        // send loop since we only track source offsets for records that have been successfully dispatched to the
+        // producer.
+        // Any records that we were retrying on (and any records after them in the batch) won't be included in the
+        // transaction and their offsets won't be committed, but (unless the user has requested connector-defined
+        // transaction boundaries), it's better to commit some data than none.
+        transactionBoundaryManager.maybeCommitFinalTransaction();
+    }
+
+    @Override
+    public void removeMetrics() {
+        Utils.closeQuietly(transactionMetrics, "source task transaction metrics tracker");
+    }
+
+    @Override
+    protected void onPause() {
+        super.onPause();
+        // Commit the transaction now so that we don't end up with a hanging transaction, or worse, get fenced out
+        // and fail the task once unpaused
+        transactionBoundaryManager.maybeCommitFinalTransaction();
+    }
+
+    private void maybeBeginTransaction() {
+        if (!transactionOpen) {
+            producer.beginTransaction();
+            transactionOpen = true;
+        }
+    }
+
+    private void commitTransaction() {
+        log.debug("{} Committing offsets", this);
+
+        long started = time.milliseconds();
+
+        // We might have just aborted a transaction, in which case we'll have to begin a new one
+        // in order to commit offsets
+        maybeBeginTransaction();
+
+        AtomicReference<Throwable> flushError = new AtomicReference<>();
+        if (offsetWriter.beginFlush()) {
+            // Now we can actually write the offsets to the internal topic.
+            // No need to track the flush future here since it's guaranteed to complete by the time
+            // Producer::commitTransaction completes
+            // We do have to track failures for that callback though, since they may originate from outside
+            // the producer (i.e., the offset writer or the backing offset store), and would not cause
+            // Producer::commitTransaction to fail
+            offsetWriter.doFlush((error, result) -> {
+                if (error != null) {
+                    log.error("{} Failed to flush offsets to storage: ", ExactlyOnceWorkerSourceTask.this, error);
+                    flushError.compareAndSet(null, error);
+                } else {
+                    log.trace("{} Finished flushing offsets to storage", ExactlyOnceWorkerSourceTask.this);
+                }
+            });
+        }
+
+        // Commit the transaction
+        // Blocks until all outstanding records have been sent and ack'd
+        try {
+            producer.commitTransaction();
+        } catch (Throwable t) {
+            log.error("{} Failed to commit producer transaction", ExactlyOnceWorkerSourceTask.this, t);
+            flushError.compareAndSet(null, t);
+        }
+
+        transactionOpen = false;
+
+        Throwable error = flushError.get();
+        if (error != null) {
+            recordCommitFailure(time.milliseconds() - started, null);
+            offsetWriter.cancelFlush();
+            throw maybeWrapProducerSendException(
+                    "Failed to flush offsets and/or records for task " + id,
+                    error
+            );
+        }
+
+        transactionMetrics.commitTransaction();
+
+        long durationMillis = time.milliseconds() - started;
+        recordCommitSuccess(durationMillis);
+        log.debug("{} Finished commitOffsets successfully in {} ms", this, durationMillis);
+
+        // Synchronize in order to guarantee that writes on other threads are picked up by this one
+        synchronized (commitableRecords) {
+            commitableRecords.forEach(this::commitTaskRecord);
+            commitableRecords.clear();
+        }
+        commitSourceTask();
+    }
+
+    private RuntimeException maybeWrapProducerSendException(String message, Throwable error) {
+        if (isPossibleTransactionTimeoutError(error)) {
+            return wrapTransactionTimeoutError(error);
+        } else {
+            return new ConnectException(message, error);
+        }
+    }
+
+    private static boolean isPossibleTransactionTimeoutError(Throwable error) {
+        return error instanceof InvalidProducerEpochException
+            || error.getCause() instanceof InvalidProducerEpochException;
+    }
+
+    private ConnectException wrapTransactionTimeoutError(Throwable error) {
+        return new ConnectException(
+            "The task " + id + " was unable to finish writing records to Kafka before its producer transaction expired. "
+                + "It may be necessary to reconfigure this connector in order for it to run healthily with exactly-once support. "
+                + "Options for this include: tune the connector's producer configuration for higher throughput, "
+                + "increase the transaction timeout for the connector's producers, "
+                + "decrease the offset commit interval (if using interval-based transaction boundaries), "
+                + "or use the 'poll' transaction boundary (if the connector is not already configured to use it).",
+            error
+        );
+    }
+
+    @Override
+    public String toString() {
+        return "ExactlyOnceWorkerSourceTask{" +
+            "id=" + id +
+            '}';
+    }
+
+    private abstract class TransactionBoundaryManager {
+        protected boolean shouldCommitTransactionForRecord(SourceRecord record) {
+            return false;
+        }
+
+        protected boolean shouldCommitTransactionForBatch(long currentTimeMs) {
+            return false;
+        }
+
+        protected boolean shouldCommitFinalTransaction() {
+            return false;
+        }
+
+        /**
+         * Hook to signal that a new transaction cycle has been started. May be invoked
+         * multiple times if the task is paused and then resumed. It can be assumed that
+         * a new transaction is created at least every time an existing transaction is
+         * committed; this is just a hook to notify that a new transaction may have been
+         * created outside of that flow as well.
+         */
+        protected void initialize() {
+        }
+
+        public void maybeCommitTransactionForRecord(SourceRecord record) {
+            maybeCommitTransaction(shouldCommitTransactionForRecord(record));
+        }
+
+        public void maybeCommitTransactionForBatch() {
+            maybeCommitTransaction(shouldCommitTransactionForBatch(time.milliseconds()));
+        }
+
+        public void maybeCommitFinalTransaction() {
+            maybeCommitTransaction(shouldCommitFinalTransaction());
+        }
+
+        private void maybeCommitTransaction(boolean shouldCommit) {
+            if (shouldCommit && (transactionOpen || offsetWriter.willFlush())) {
+                try (LoggingContext loggingContext = LoggingContext.forOffsets(id)) {
+                    commitTransaction();
+                }
+            }
+        }
+    }
+
+    private TransactionBoundaryManager buildTransactionManager(
+            WorkerConfig workerConfig,
+            SourceConnectorConfig sourceConfig,
+            WorkerTransactionContext transactionContext) {
+        TransactionBoundary boundary = sourceConfig.transactionBoundary();
+        switch (boundary) {
+            case POLL:
+                return new TransactionBoundaryManager() {
+                    @Override
+                    protected boolean shouldCommitTransactionForBatch(long currentTimeMs) {
+                        return true;
+                    }
+
+                    @Override
+                    protected boolean shouldCommitFinalTransaction() {
+                        return true;
+                    }
+                };
+
+            case INTERVAL:
+                long transactionBoundaryInterval = Optional.ofNullable(sourceConfig.transactionBoundaryInterval())
+                        .orElse(workerConfig.offsetCommitInterval());
+                return new TransactionBoundaryManager() {
+                    private final long commitInterval = transactionBoundaryInterval;
+                    private long lastCommit;
+
+                    @Override
+                    public void initialize() {
+                        this.lastCommit = time.milliseconds();
+                    }
+
+                    @Override
+                    protected boolean shouldCommitTransactionForBatch(long currentTimeMs) {
+                        if (time.milliseconds() >= lastCommit + commitInterval) {
+                            lastCommit = time.milliseconds();
+                            return true;
+                        } else {
+                            return false;
+                        }
+                    }
+
+                    @Override
+                    protected  boolean shouldCommitFinalTransaction() {
+                        return true;
+                    }
+                };
+
+            case CONNECTOR:
+                Objects.requireNonNull(transactionContext, "Transaction context must be provided when using connector-defined transaction boundaries");
+                return new TransactionBoundaryManager() {
+                    @Override
+                    protected boolean shouldCommitFinalTransaction() {
+                        return shouldCommitTransactionForBatch(time.milliseconds());
+                    }
+
+                    @Override
+                    protected boolean shouldCommitTransactionForBatch(long currentTimeMs) {
+                        if (transactionContext.shouldAbortBatch()) {
+                            log.info("Aborting transaction for batch as requested by connector");
+                            abortTransaction();
+                            // We abort the transaction, which causes all the records up to this point to be dropped, but we still want to
+                            // commit offsets so that the task doesn't see the same records all over again
+                            return true;
+                        }
+                        return transactionContext.shouldCommitBatch();
+                    }
+
+                    @Override
+                    protected boolean shouldCommitTransactionForRecord(SourceRecord record) {
+                        if (transactionContext.shouldAbortOn(record)) {
+                            log.info("Aborting transaction for record on topic {} as requested by connector", record.topic());
+                            log.trace("Last record in aborted transaction: {}", record);
+                            abortTransaction();
+                            // We abort the transaction, which causes all the records up to this point to be dropped, but we still want to
+                            // commit offsets so that the task doesn't see the same records all over again
+                            return true;
+                        }
+                        return transactionContext.shouldCommitOn(record);
+                    }
+
+                    private void abortTransaction() {
+                        producer.abortTransaction();
+                        transactionMetrics.abortTransaction();
+                        transactionOpen = false;
+                    }
+                };
+            default:
+                throw new IllegalArgumentException("Unrecognized transaction boundary: " + boundary);
+        }
+    }
+
+    TransactionMetricsGroup transactionMetricsGroup() {
+        return transactionMetrics;
+    }
+
+
+    static class TransactionMetricsGroup implements AutoCloseable {
+        private final Sensor transactionSize;
+        private int size;
+        private final ConnectMetrics.MetricGroup metricGroup;
+
+        public TransactionMetricsGroup(ConnectorTaskId id, ConnectMetrics connectMetrics) {
+            ConnectMetricsRegistry registry = connectMetrics.registry();
+            metricGroup = connectMetrics.group(registry.sourceTaskGroupName(),
+                    registry.connectorTagName(), id.connector(),
+                    registry.taskTagName(), Integer.toString(id.task()));
+
+            transactionSize = metricGroup.sensor("transaction-size");
+            transactionSize.add(metricGroup.metricName(registry.transactionSizeAvg), new Avg());
+            transactionSize.add(metricGroup.metricName(registry.transactionSizeMin), new Min());
+            transactionSize.add(metricGroup.metricName(registry.transactionSizeMax), new Max());
+        }
+
+        @Override
+        public void close() {
+            metricGroup.close();
+        }
+
+        void addRecord() {
+            size++;
+        }
+
+        void abortTransaction() {
+            size = 0;
+        }
+
+        void commitTransaction() {
+            transactionSize.record(size);
+            size = 0;
+        }
+
+        protected ConnectMetrics.MetricGroup metricGroup() {
+            return metricGroup;
+        }
+
+    }
+
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Herder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Herder.java
index 0f20c0bb3557f..32ab697735417 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Herder.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Herder.java
@@ -138,6 +138,18 @@ public interface Herder {
      */
     void putTaskConfigs(String connName, List<Map<String, String>> configs, Callback<Void> callback, InternalRequestSignature requestSignature);
 
+    /**
+     * Fence out any older task generations for a source connector, and then write a record to the config topic
+     * indicating that it is safe to bring up a new generation of tasks. If that record is already present, do nothing
+     * and invoke the callback successfully.
+     * @param connName the name of the connector to fence out, which must refer to a source connector; if the
+     *                 connector does not exist or is not a source connector, the callback will be invoked with an error
+     * @param callback callback to invoke upon completion
+     * @param requestSignature the signature of the request made for this connector;
+     *                         may be null if no signature was provided
+     */
+    void fenceZombieSourceTasks(String connName, Callback<Void> callback, InternalRequestSignature requestSignature);
+
     /**
      * Get a list of connectors currently running in this cluster.
      * @return A list of connector names
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java
index e38072b9b6e50..2115bda662baf 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SourceConnectorConfig.java
@@ -20,24 +20,38 @@
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
+import org.apache.kafka.connect.source.SourceTask;
 
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.stream.Collectors;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.ExactlyOnceSupportLevel.REQUESTED;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.ExactlyOnceSupportLevel.REQUIRED;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_GROUP;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_PREFIX;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.EXCLUDE_REGEX_CONFIG;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.INCLUDE_REGEX_CONFIG;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.PARTITIONS_CONFIG;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.REPLICATION_FACTOR_CONFIG;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.CONNECTOR;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.DEFAULT;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.INTERVAL;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.POLL;
+import static org.apache.kafka.common.utils.Utils.enumOptions;
 
 public class SourceConnectorConfig extends ConnectorConfig {
 
+    private static final Logger log = LoggerFactory.getLogger(SourceConnectorConfig.class);
+
     protected static final String TOPIC_CREATION_GROUP = "Topic Creation";
 
     public static final String TOPIC_CREATION_PREFIX = "topic.creation.";
@@ -47,6 +61,57 @@ public class SourceConnectorConfig extends ConnectorConfig {
             + "created by source connectors";
     private static final String TOPIC_CREATION_GROUPS_DISPLAY = "Topic Creation Groups";
 
+    protected static final String EXACTLY_ONCE_SUPPORT_GROUP = "Exactly Once Support";
+
+    public enum ExactlyOnceSupportLevel {
+        REQUESTED,
+        REQUIRED;
+
+        public static ExactlyOnceSupportLevel fromProperty(String property) {
+            return valueOf(property.toUpperCase(Locale.ROOT).trim());
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
+    public static final String EXACTLY_ONCE_SUPPORT_CONFIG = "exactly.once.support";
+    private static final String EXACTLY_ONCE_SUPPORT_DOC = "Permitted values are " + String.join(", ", enumOptions(ExactlyOnceSupportLevel.class)) + ". "
+            + "If set to \"" + REQUIRED + "\", forces a preflight check for the connector to ensure that it can provide exactly-once delivery "
+            + "with the given configuration. Some connectors may be capable of providing exactly-once delivery but not signal to "
+            + "Connect that they support this; in that case, documentation for the connector should be consulted carefully before "
+            + "creating it, and the value for this property should be set to \"" + REQUESTED + "\". "
+            + "Additionally, if the value is set to \"" + REQUIRED + "\" but the worker that performs preflight validation does not have "
+            + "exactly-once support enabled for source connectors, requests to create or validate the connector will fail.";
+    private static final String EXACTLY_ONCE_SUPPORT_DISPLAY = "Exactly once support";
+
+    public static final String TRANSACTION_BOUNDARY_CONFIG = SourceTask.TRANSACTION_BOUNDARY_CONFIG;
+    private static final String TRANSACTION_BOUNDARY_DOC = "Permitted values are: " + String.join(", ", enumOptions(TransactionBoundary.class)) + ". "
+            + "If set to '" + POLL + "', a new producer transaction will be started and committed for every batch of records that each task from "
+            + "this connector provides to Connect. If set to '" + CONNECTOR + "', relies on connector-defined transaction boundaries; note that "
+            + "not all connectors are capable of defining their own transaction boundaries, and in that case, attempts to instantiate a connector with "
+            + "this value will fail. Finally, if set to '" + INTERVAL + "', commits transactions only after a user-defined time interval has passed.";
+    private static final String TRANSACTION_BOUNDARY_DISPLAY = "Transaction Boundary";
+
+    public static final String TRANSACTION_BOUNDARY_INTERVAL_CONFIG = "transaction.boundary.interval.ms";
+    private static final String TRANSACTION_BOUNDARY_INTERVAL_DOC = "If '" + TRANSACTION_BOUNDARY_CONFIG + "' is set to '" + INTERVAL
+            + "', determines the interval for producer transaction commits by connector tasks. If unset, defaults to the value of the worker-level "
+            + "'" + WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG + "' property. It has no effect if a different "
+            + TRANSACTION_BOUNDARY_CONFIG + " is specified.";
+    private static final String TRANSACTION_BOUNDARY_INTERVAL_DISPLAY = "Transaction boundary interval";
+
+    protected static final String OFFSETS_TOPIC_GROUP = "offsets.topic";
+
+    public static final String OFFSETS_TOPIC_CONFIG = "offsets.storage.topic";
+    private static final String OFFSETS_TOPIC_DOC = "The name of a separate offsets topic to use for this connector. "
+            + "If empty or not specified, the worker’s global offsets topic name will be used. "
+            + "If specified, the offsets topic will be created if it does not already exist on the Kafka cluster targeted by this connector "
+            + "(which may be different from the one used for the worker's global offsets topic if the bootstrap.servers property of the connector's producer "
+            + "has been overridden from the worker's). Only applicable in distributed mode; in standalone mode, setting this property will have no effect.";
+    private static final String OFFSETS_TOPIC_DISPLAY = "Offsets topic";
+
     private static class EnrichedSourceConnectorConfig extends ConnectorConfig {
         EnrichedSourceConnectorConfig(Plugins plugins, ConfigDef configDef, Map<String, String> props) {
             super(plugins, configDef, props);
@@ -58,23 +123,87 @@ public Object get(String key) {
         }
     }
 
-    private static final ConfigDef CONFIG = SourceConnectorConfig.configDef();
+    private final TransactionBoundary transactionBoundary;
+    private final Long transactionBoundaryInterval;
     private final EnrichedSourceConnectorConfig enrichedSourceConfig;
+    private final String offsetsTopic;
 
     public static ConfigDef configDef() {
+        ConfigDef.Validator atLeastZero = ConfigDef.Range.atLeast(0);
         int orderInGroup = 0;
         return new ConfigDef(ConnectorConfig.configDef())
-                .define(TOPIC_CREATION_GROUPS_CONFIG, ConfigDef.Type.LIST, Collections.emptyList(),
-                        ConfigDef.CompositeValidator.of(new ConfigDef.NonNullValidator(), ConfigDef.LambdaValidator.with(
+                .define(
+                        TOPIC_CREATION_GROUPS_CONFIG,
+                        ConfigDef.Type.LIST,
+                        Collections.emptyList(),
+                        ConfigDef.CompositeValidator.of(
+                                new ConfigDef.NonNullValidator(),
+                                ConfigDef.LambdaValidator.with(
+                                    (name, value) -> {
+                                        List<?> groupAliases = (List<?>) value;
+                                        if (groupAliases.size() > new HashSet<>(groupAliases).size()) {
+                                            throw new ConfigException(name, value, "Duplicate alias provided.");
+                                        }
+                                    },
+                                    () -> "unique topic creation groups")),
+                        ConfigDef.Importance.LOW,
+                        TOPIC_CREATION_GROUPS_DOC,
+                        TOPIC_CREATION_GROUP,
+                        ++orderInGroup,
+                        ConfigDef.Width.LONG,
+                        TOPIC_CREATION_GROUPS_DISPLAY)
+                .define(
+                        EXACTLY_ONCE_SUPPORT_CONFIG,
+                        ConfigDef.Type.STRING,
+                        REQUESTED.toString(),
+                        ConfigDef.CaseInsensitiveValidString.in(enumOptions(ExactlyOnceSupportLevel.class)),
+                        ConfigDef.Importance.MEDIUM,
+                        EXACTLY_ONCE_SUPPORT_DOC,
+                        EXACTLY_ONCE_SUPPORT_GROUP,
+                        ++orderInGroup,
+                        ConfigDef.Width.SHORT,
+                        EXACTLY_ONCE_SUPPORT_DISPLAY)
+                .define(
+                        TRANSACTION_BOUNDARY_CONFIG,
+                        ConfigDef.Type.STRING,
+                        DEFAULT.toString(),
+                        ConfigDef.CaseInsensitiveValidString.in(enumOptions(TransactionBoundary.class)),
+                        ConfigDef.Importance.MEDIUM,
+                        TRANSACTION_BOUNDARY_DOC,
+                        EXACTLY_ONCE_SUPPORT_GROUP,
+                        ++orderInGroup,
+                        ConfigDef.Width.SHORT,
+                        TRANSACTION_BOUNDARY_DISPLAY)
+                .define(
+                        TRANSACTION_BOUNDARY_INTERVAL_CONFIG,
+                        ConfigDef.Type.LONG,
+                        null,
+                        ConfigDef.LambdaValidator.with(
                             (name, value) -> {
-                                List<?> groupAliases = (List<?>) value;
-                                if (groupAliases.size() > new HashSet<>(groupAliases).size()) {
-                                    throw new ConfigException(name, value, "Duplicate alias provided.");
+                                if (value == null) {
+                                    return;
                                 }
+                                atLeastZero.ensureValid(name, value);
                             },
-                            () -> "unique topic creation groups")),
-                        ConfigDef.Importance.LOW, TOPIC_CREATION_GROUPS_DOC, TOPIC_CREATION_GROUP,
-                        ++orderInGroup, ConfigDef.Width.LONG, TOPIC_CREATION_GROUPS_DISPLAY);
+                            atLeastZero::toString
+                        ),
+                        ConfigDef.Importance.LOW,
+                        TRANSACTION_BOUNDARY_INTERVAL_DOC,
+                        EXACTLY_ONCE_SUPPORT_GROUP,
+                        ++orderInGroup,
+                        ConfigDef.Width.SHORT,
+                        TRANSACTION_BOUNDARY_INTERVAL_DISPLAY)
+                .define(
+                        OFFSETS_TOPIC_CONFIG,
+                        ConfigDef.Type.STRING,
+                        null,
+                        new ConfigDef.NonEmptyString(),
+                        ConfigDef.Importance.LOW,
+                        OFFSETS_TOPIC_DOC,
+                        OFFSETS_TOPIC_GROUP,
+                        orderInGroup = 1,
+                        ConfigDef.Width.LONG,
+                        OFFSETS_TOPIC_DISPLAY);
     }
 
     public static ConfigDef embedDefaultGroup(ConfigDef baseConfigDef) {
@@ -98,6 +227,13 @@ public static ConfigDef enrich(ConfigDef baseConfigDef, Map<String, String> prop
             topicCreationGroups.addAll((List<?>) aliases);
         }
 
+        //Remove "topic.creation.groups" config if its present and the value is "default"
+        if (topicCreationGroups.contains(DEFAULT_TOPIC_CREATION_GROUP)) {
+            log.warn("'{}' topic creation group always exists and does not need to be listed explicitly",
+                DEFAULT_TOPIC_CREATION_GROUP);
+            topicCreationGroups.removeAll(Collections.singleton(DEFAULT_TOPIC_CREATION_GROUP));
+        }
+
         ConfigDef newDef = new ConfigDef(baseConfigDef);
         String defaultGroupPrefix = TOPIC_CREATION_PREFIX + DEFAULT_TOPIC_CREATION_GROUP + ".";
         short defaultGroupReplicationFactor = defaultGroupConfig.getShort(defaultGroupPrefix + REPLICATION_FACTOR_CONFIG);
@@ -116,9 +252,9 @@ public static ConfigDef enrich(ConfigDef baseConfigDef, Map<String, String> prop
     }
 
     public SourceConnectorConfig(Plugins plugins, Map<String, String> props, boolean createTopics) {
-        super(plugins, CONFIG, props);
+        super(plugins, configDef(), props);
         if (createTopics && props.entrySet().stream().anyMatch(e -> e.getKey().startsWith(TOPIC_CREATION_PREFIX))) {
-            ConfigDef defaultConfigDef = embedDefaultGroup(CONFIG);
+            ConfigDef defaultConfigDef = embedDefaultGroup(configDef());
             // This config is only used to set default values for partitions and replication
             // factor from the default group and otherwise it remains unused
             AbstractConfig defaultGroup = new AbstractConfig(defaultConfigDef, props, false);
@@ -135,6 +271,13 @@ public SourceConnectorConfig(Plugins plugins, Map<String, String> props, boolean
         } else {
             enrichedSourceConfig = null;
         }
+        transactionBoundary = TransactionBoundary.fromProperty(getString(TRANSACTION_BOUNDARY_CONFIG));
+        transactionBoundaryInterval = getLong(TRANSACTION_BOUNDARY_INTERVAL_CONFIG);
+        offsetsTopic = getString(OFFSETS_TOPIC_CONFIG);
+    }
+
+    public static boolean usesTopicCreation(Map<String, String> props) {
+        return props.entrySet().stream().anyMatch(e -> e.getKey().startsWith(TOPIC_CREATION_PREFIX));
     }
 
     @Override
@@ -142,6 +285,18 @@ public Object get(String key) {
         return enrichedSourceConfig != null ? enrichedSourceConfig.get(key) : super.get(key);
     }
 
+    public TransactionBoundary transactionBoundary() {
+        return transactionBoundary;
+    }
+
+    public Long transactionBoundaryInterval() {
+        return transactionBoundaryInterval;
+    }
+
+    public String offsetsTopic() {
+        return offsetsTopic;
+    }
+
     /**
      * Returns whether this configuration uses topic creation properties.
      *
@@ -181,6 +336,6 @@ public Map<String, Object> topicCreationOtherConfigs(String group) {
     }
 
     public static void main(String[] args) {
-        System.out.println(CONFIG.toHtml(4, config -> "sourceconnectorconfigs_" + config));
+        System.out.println(configDef().toHtml(4, config -> "sourceconnectorconfigs_" + config));
     }
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SubmittedRecords.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SubmittedRecords.java
index 6cdd2c1842b7b..b77a6fa9841d5 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SubmittedRecords.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/SubmittedRecords.java
@@ -35,7 +35,7 @@
  * source offsets. Records are tracked in the order in which they are submitted, which should match the order they were
  * returned from {@link SourceTask#poll()}. The latest-eligible offsets for each source partition can be retrieved via
  * {@link #committableOffsets()}, where every record up to and including the record for each returned offset has been
- * either {@link SubmittedRecord#ack() acknowledged} or {@link #removeLastOccurrence(SubmittedRecord) removed}.
+ * either {@link SubmittedRecord#ack() acknowledged} or {@link SubmittedRecord#drop dropped}.
  * Note that this class is not thread-safe, though a {@link SubmittedRecord} can be
  * {@link SubmittedRecord#ack() acknowledged} from a different thread.
  */
@@ -54,13 +54,13 @@ public SubmittedRecords() {
     /**
      * Enqueue a new source record before dispatching it to a producer.
      * The returned {@link SubmittedRecord} should either be {@link SubmittedRecord#ack() acknowledged} in the
-     * producer callback, or {@link #removeLastOccurrence(SubmittedRecord) removed} if the record could not be successfully
+     * producer callback, or {@link SubmittedRecord#drop() dropped} if the record could not be successfully
      * sent to the producer.
-     * 
+     *
      * @param record the record about to be dispatched; may not be null but may have a null
      *               {@link SourceRecord#sourcePartition()} and/or {@link SourceRecord#sourceOffset()}
      * @return a {@link SubmittedRecord} that can be either {@link SubmittedRecord#ack() acknowledged} once ack'd by
-     *         the producer, or {@link #removeLastOccurrence removed} if synchronously rejected by the producer
+     *         the producer, or {@link SubmittedRecord#drop() dropped} if synchronously rejected by the producer
      */
     @SuppressWarnings("unchecked")
     public SubmittedRecord submit(SourceRecord record) {
@@ -78,32 +78,6 @@ SubmittedRecord submit(Map<String, Object> partition, Map<String, Object> offset
         return result;
     }
 
-    /**
-     * Remove a source record and do not take it into account any longer when tracking offsets.
-     * Useful if the record has been synchronously rejected by the producer.
-     * If multiple instances of the same {@link SubmittedRecord} have been submitted already, only the first one found
-     * (traversing from the end of the deque backward) will be removed.
-     * @param record the {@link #submit previously-submitted} record to stop tracking; may not be null
-     * @return whether an instance of the record was removed
-     */
-    public boolean removeLastOccurrence(SubmittedRecord record) {
-        Deque<SubmittedRecord> deque = records.get(record.partition());
-        if (deque == null) {
-            log.warn("Attempted to remove record from submitted queue for partition {}, but no records with that partition appear to have been submitted", record.partition());
-            return false;
-        }
-        boolean result = deque.removeLastOccurrence(record);
-        if (deque.isEmpty()) {
-            records.remove(record.partition());
-        }
-        if (result) {
-            messageAcked();
-        } else {
-            log.warn("Attempted to remove record from submitted queue for partition {}, but the record has not been submitted or has already been removed", record.partition());
-        }
-        return result;
-    }
-
     /**
      * Clear out any acknowledged records at the head of the deques and return a {@link CommittableOffsets snapshot} of the offsets and offset metadata
      * accrued between the last time this method was invoked and now. This snapshot can be {@link CommittableOffsets#updatedWith(CommittableOffsets) combined}
@@ -187,7 +161,7 @@ private synchronized void messageAcked() {
         }
     }
 
-    class SubmittedRecord {
+    public class SubmittedRecord {
         private final Map<String, Object> partition;
         private final Map<String, Object> offset;
         private final AtomicBoolean acked;
@@ -208,6 +182,34 @@ public void ack() {
             }
         }
 
+        /**
+         * Remove this record and do not take it into account any longer when tracking offsets.
+         * Useful if the record has been synchronously rejected by the producer.
+         * If multiple instances of this record have been submitted already, only the first one found
+         * (traversing from the end of the deque backward) will be removed.
+         * <p>
+         * This is <strong>not safe</strong> to be called from a different thread
+         * than what called {@link SubmittedRecords#submit(SourceRecord)}.
+         * @return whether this instance was dropped
+         */
+        public boolean drop() {
+            Deque<SubmittedRecord> deque = records.get(partition);
+            if (deque == null) {
+                log.warn("Attempted to remove record from submitted queue for partition {}, but no records with that partition appear to have been submitted", partition);
+                return false;
+            }
+            boolean result = deque.removeLastOccurrence(this);
+            if (deque.isEmpty()) {
+                records.remove(partition);
+            }
+            if (result) {
+                messageAcked();
+            } else {
+                log.warn("Attempted to remove record from submitted queue for partition {}, but the record has not been submitted or has already been removed", partition);
+            }
+            return result;
+        }
+
         private boolean acked() {
             return acked.get();
         }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java
index 582271a196335..5bc67693d0a66 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/Worker.java
@@ -16,12 +16,18 @@
  */
 package org.apache.kafka.connect.runtime;
 
+import org.apache.kafka.clients.admin.Admin;
 import org.apache.kafka.clients.admin.AdminClientConfig;
+import org.apache.kafka.clients.admin.FenceProducersOptions;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.KafkaConsumer;
 import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.KafkaFuture;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.MetricNameTemplate;
+import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigValue;
 import org.apache.kafka.common.config.provider.ConfigProvider;
 import org.apache.kafka.common.utils.Time;
@@ -35,7 +41,10 @@
 import org.apache.kafka.connect.json.JsonConverter;
 import org.apache.kafka.connect.json.JsonConverterConfig;
 import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.runtime.isolation.LoaderSwap;
+import org.apache.kafka.connect.runtime.rest.resources.ConnectResource;
+import org.apache.kafka.connect.storage.ClusterConfigState;
+import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
 import org.apache.kafka.connect.runtime.errors.DeadLetterQueueReporter;
 import org.apache.kafka.connect.runtime.errors.ErrorHandlingMetrics;
 import org.apache.kafka.connect.runtime.errors.ErrorReporter;
@@ -49,10 +58,11 @@
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTask;
 import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.storage.Converter;
 import org.apache.kafka.connect.storage.HeaderConverter;
+import org.apache.kafka.connect.storage.KafkaOffsetBackingStore;
 import org.apache.kafka.connect.storage.OffsetBackingStore;
-import org.apache.kafka.connect.storage.OffsetStorageReader;
 import org.apache.kafka.connect.storage.OffsetStorageReaderImpl;
 import org.apache.kafka.connect.storage.OffsetStorageWriter;
 import org.apache.kafka.connect.util.Callback;
@@ -69,15 +79,23 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.apache.kafka.clients.CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG;
 
 /**
  * <p>
@@ -91,6 +109,7 @@
 public class Worker {
 
     public static final long CONNECTOR_GRACEFUL_SHUTDOWN_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(5);
+    public static final long EXECUTOR_SHUTDOWN_TERMINATION_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(1);
 
     private static final Logger log = LoggerFactory.getLogger(Worker.class);
 
@@ -107,11 +126,11 @@ public class Worker {
     private final WorkerConfig config;
     private final Converter internalKeyConverter;
     private final Converter internalValueConverter;
-    private final OffsetBackingStore offsetBackingStore;
+    private final OffsetBackingStore globalOffsetBackingStore;
 
     private final ConcurrentMap<String, WorkerConnector> connectors = new ConcurrentHashMap<>();
     private final ConcurrentMap<ConnectorTaskId, WorkerTask> tasks = new ConcurrentHashMap<>();
-    private SourceTaskOffsetCommitter sourceTaskOffsetCommitter;
+    private Optional<SourceTaskOffsetCommitter> sourceTaskOffsetCommitter;
     private final WorkerConfigTransformer workerConfigTransformer;
     private final ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy;
 
@@ -120,9 +139,9 @@ public Worker(
         Time time,
         Plugins plugins,
         WorkerConfig config,
-        OffsetBackingStore offsetBackingStore,
+        OffsetBackingStore globalOffsetBackingStore,
         ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy) {
-        this(workerId, time, plugins, config, offsetBackingStore, Executors.newCachedThreadPool(), connectorClientConfigOverridePolicy);
+        this(workerId, time, plugins, config, globalOffsetBackingStore, Executors.newCachedThreadPool(), connectorClientConfigOverridePolicy);
     }
 
     Worker(
@@ -130,7 +149,7 @@ public Worker(
             Time time,
             Plugins plugins,
             WorkerConfig config,
-            OffsetBackingStore offsetBackingStore,
+            OffsetBackingStore globalOffsetBackingStore,
             ExecutorService executorService,
             ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy
     ) {
@@ -148,8 +167,8 @@ public Worker(
         this.internalKeyConverter = plugins.newInternalConverter(true, JsonConverter.class.getName(), internalConverterConfig);
         this.internalValueConverter = plugins.newInternalConverter(false, JsonConverter.class.getName(), internalConverterConfig);
 
-        this.offsetBackingStore = offsetBackingStore;
-        this.offsetBackingStore.configure(config);
+        this.globalOffsetBackingStore = globalOffsetBackingStore;
+        this.globalOffsetBackingStore.configure(config);
 
         this.workerConfigTransformer = initConfigTransformer();
 
@@ -183,8 +202,11 @@ protected Herder herder() {
     public void start() {
         log.info("Worker starting");
 
-        offsetBackingStore.start();
-        sourceTaskOffsetCommitter = new SourceTaskOffsetCommitter(config);
+        globalOffsetBackingStore.start();
+
+        sourceTaskOffsetCommitter = config.exactlyOnceSourceEnabled()
+                ? Optional.empty()
+                : Optional.of(new SourceTaskOffsetCommitter(config));
 
         connectorStatusMetricsGroup = new ConnectorStatusMetricsGroup(metrics, tasks, herder);
 
@@ -211,9 +233,9 @@ public void stop() {
         }
 
         long timeoutMs = limit - time.milliseconds();
-        sourceTaskOffsetCommitter.close(timeoutMs);
+        sourceTaskOffsetCommitter.ifPresent(committer -> committer.close(timeoutMs));
 
-        offsetBackingStore.stop();
+        globalOffsetBackingStore.stop();
         metrics.stop();
 
         log.info("Worker stopped");
@@ -222,6 +244,20 @@ public void stop() {
         connectorStatusMetricsGroup.close();
 
         workerConfigTransformer.close();
+        executor.shutdown();
+        try {
+            // Wait a while for existing tasks to terminate
+            if (!executor.awaitTermination(EXECUTOR_SHUTDOWN_TERMINATION_TIMEOUT_MS, TimeUnit.MILLISECONDS)) {
+                executor.shutdownNow(); //cancel current executing threads
+                // Wait a while for tasks to respond to being cancelled
+                if (!executor.awaitTermination(EXECUTOR_SHUTDOWN_TERMINATION_TIMEOUT_MS, TimeUnit.MILLISECONDS))
+                    log.error("Executor did not terminate in time");
+            }
+        } catch (InterruptedException e) {
+            executor.shutdownNow(); // (Re-)Cancel if current thread also interrupted
+            // Preserve interrupt status
+            Thread.currentThread().interrupt();
+        }
     }
 
     /**
@@ -263,14 +299,26 @@ public void startConnector(
 
                 log.info("Creating connector {} of type {}", connName, connClass);
                 final Connector connector = plugins.newConnector(connClass);
-                final ConnectorConfig connConfig = ConnectUtils.isSinkConnector(connector)
-                        ? new SinkConnectorConfig(plugins, connProps)
-                        : new SourceConnectorConfig(plugins, connProps, config.topicCreationEnable());
-
-                final OffsetStorageReader offsetReader = new OffsetStorageReaderImpl(
-                        offsetBackingStore, connName, internalKeyConverter, internalValueConverter);
+                final ConnectorConfig connConfig;
+                final CloseableOffsetStorageReader offsetReader;
+                final ConnectorOffsetBackingStore offsetStore;
+                if (ConnectUtils.isSinkConnector(connector)) {
+                    connConfig = new SinkConnectorConfig(plugins, connProps);
+                    offsetReader = null;
+                    offsetStore = null;
+                } else {
+                    SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins, connProps, config.topicCreationEnable());
+                    connConfig = sourceConfig;
+
+                    // Set up the offset backing store for this connector instance
+                    offsetStore = config.exactlyOnceSourceEnabled()
+                            ? offsetStoreForExactlyOnceSourceConnector(sourceConfig, connName, connector)
+                            : offsetStoreForRegularSourceConnector(sourceConfig, connName, connector);
+                    offsetStore.configure(config);
+                    offsetReader = new OffsetStorageReaderImpl(offsetStore, connName, internalKeyConverter, internalValueConverter);
+                }
                 workerConnector = new WorkerConnector(
-                        connName, connector, connConfig, ctx, metrics, connectorStatusListener, offsetReader, connectorLoader);
+                        connName, connector, connConfig, ctx, metrics, connectorStatusListener, offsetReader, offsetStore, connectorLoader);
                 log.info("Instantiated connector {} with version {} of type {}", connName, connector.version(), connector.getClass());
                 workerConnector.transitionTo(initialState, onConnectorStateChange);
                 Plugins.compareAndSwapLoaders(savedLoader);
@@ -405,7 +453,7 @@ private void awaitStopConnector(String connName, long timeout) {
             }
 
             if (!connector.awaitShutdown(timeout)) {
-                log.error("Connector ‘{}’ failed to properly shut down, has become unresponsive, and "
+                log.error("Connector '{}' failed to properly shut down, has become unresponsive, and "
                         + "may be consuming external resources. Correct the configuration for "
                         + "this connector or remove the connector. After fixing the connector, it "
                         + "may be necessary to restart this worker to release any consumed "
@@ -475,22 +523,95 @@ public boolean isRunning(String connName) {
     }
 
     /**
-     * Start a task managed by this worker.
+     * Start a sink task managed by this worker.
+     *
+     * @param id the task ID.
+     * @param configState the most recent {@link ClusterConfigState} known to the worker
+     * @param connProps the connector properties.
+     * @param taskProps the tasks properties.
+     * @param statusListener a listener for the runtime status transitions of the task.
+     * @param initialState the initial state of the connector.
+     * @return true if the task started successfully.
+     */
+    public boolean startSinkTask(
+            ConnectorTaskId id,
+            ClusterConfigState configState,
+            Map<String, String> connProps,
+            Map<String, String> taskProps,
+            TaskStatus.Listener statusListener,
+            TargetState initialState
+    ) {
+        return startTask(id, connProps, taskProps, statusListener,
+                new SinkTaskBuilder(id, configState, statusListener, initialState));
+    }
+
+    /**
+     * Start a source task managed by this worker using older behavior that does not provide exactly-once support.
      *
      * @param id the task ID.
+     * @param configState the most recent {@link ClusterConfigState} known to the worker
      * @param connProps the connector properties.
      * @param taskProps the tasks properties.
      * @param statusListener a listener for the runtime status transitions of the task.
      * @param initialState the initial state of the connector.
      * @return true if the task started successfully.
      */
-    public boolean startTask(
+    public boolean startSourceTask(
             ConnectorTaskId id,
             ClusterConfigState configState,
             Map<String, String> connProps,
             Map<String, String> taskProps,
             TaskStatus.Listener statusListener,
             TargetState initialState
+    ) {
+        return startTask(id, connProps, taskProps, statusListener,
+                new SourceTaskBuilder(id, configState, statusListener, initialState));
+    }
+
+    /**
+     * Start a source task with exactly-once support managed by this worker.
+     *
+     * @param id the task ID.
+     * @param configState the most recent {@link ClusterConfigState} known to the worker
+     * @param connProps the connector properties.
+     * @param taskProps the tasks properties.
+     * @param statusListener a listener for the runtime status transitions of the task.
+     * @param initialState the initial state of the connector.
+     * @param preProducerCheck a preflight check that should be performed before the task initializes its transactional producer.
+     * @param postProducerCheck a preflight check that should be performed after the task initializes its transactional producer,
+     *                          but before producing any source records or offsets.
+     * @return true if the task started successfully.
+     */
+    public boolean startExactlyOnceSourceTask(
+            ConnectorTaskId id,
+            ClusterConfigState configState,
+            Map<String, String> connProps,
+            Map<String, String> taskProps,
+            TaskStatus.Listener statusListener,
+            TargetState initialState,
+            Runnable preProducerCheck,
+            Runnable postProducerCheck
+    ) {
+        return startTask(id, connProps, taskProps, statusListener,
+                new ExactlyOnceSourceTaskBuilder(id, configState, statusListener, initialState, preProducerCheck, postProducerCheck));
+    }
+
+    /**
+     * Start a task managed by this worker.
+     *
+     * @param id the task ID.
+     * @param connProps the connector properties.
+     * @param taskProps the tasks properties.
+     * @param statusListener a listener for the runtime status transitions of the task.
+     * @param taskBuilder the {@link TaskBuilder} used to create the {@link WorkerTask} that manages the lifecycle of the task.
+     * @return true if the task started successfully.
+     */
+    private boolean startTask(
+            ConnectorTaskId id,
+            Map<String, String> connProps,
+            Map<String, String> taskProps,
+            TaskStatus.Listener statusListener,
+            TaskBuilder taskBuilder
     ) {
         final WorkerTask workerTask;
         final TaskStatus.Listener taskStatusListener = workerMetricsGroup.wrapStatusListener(statusListener);
@@ -541,8 +662,15 @@ public boolean startTask(
                     log.info("Set up the header converter {} for task {} using the connector config", headerConverter.getClass(), id);
                 }
 
-                workerTask = buildWorkerTask(configState, connConfig, id, task, taskStatusListener,
-                        initialState, keyConverter, valueConverter, headerConverter, connectorLoader);
+                workerTask = taskBuilder
+                        .withTask(task)
+                        .withConnectorConfig(connConfig)
+                        .withKeyConverter(keyConverter)
+                        .withValueConverter(valueConverter)
+                        .withHeaderConverter(headerConverter)
+                        .withClassloader(connectorLoader)
+                        .build();
+
                 workerTask.initialize(taskConfig);
                 Plugins.compareAndSwapLoaders(savedLoader);
             } catch (Throwable t) {
@@ -561,80 +689,101 @@ public boolean startTask(
 
             executor.submit(workerTask);
             if (workerTask instanceof WorkerSourceTask) {
-                sourceTaskOffsetCommitter.schedule(id, (WorkerSourceTask) workerTask);
+                sourceTaskOffsetCommitter.ifPresent(committer -> committer.schedule(id, (WorkerSourceTask) workerTask));
             }
             return true;
         }
     }
 
-    private WorkerTask buildWorkerTask(ClusterConfigState configState,
-                                       ConnectorConfig connConfig,
-                                       ConnectorTaskId id,
-                                       Task task,
-                                       TaskStatus.Listener statusListener,
-                                       TargetState initialState,
-                                       Converter keyConverter,
-                                       Converter valueConverter,
-                                       HeaderConverter headerConverter,
-                                       ClassLoader loader) {
-        ErrorHandlingMetrics errorHandlingMetrics = errorHandlingMetrics(id);
-        final Class<? extends Connector> connectorClass = plugins.connectorClass(
-            connConfig.getString(ConnectorConfig.CONNECTOR_CLASS_CONFIG));
-        RetryWithToleranceOperator retryWithToleranceOperator = new RetryWithToleranceOperator(connConfig.errorRetryTimeout(),
-                connConfig.errorMaxDelayInMillis(), connConfig.errorToleranceType(), Time.SYSTEM);
-        retryWithToleranceOperator.metrics(errorHandlingMetrics);
-
-        // Decide which type of worker task we need based on the type of task.
-        if (task instanceof SourceTask) {
-            SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins,
-                    connConfig.originalsStrings(), config.topicCreationEnable());
-            retryWithToleranceOperator.reporters(sourceTaskReporters(id, sourceConfig, errorHandlingMetrics));
-            TransformationChain<SourceRecord> transformationChain = new TransformationChain<>(sourceConfig.<SourceRecord>transformations(), retryWithToleranceOperator);
-            log.info("Initializing: {}", transformationChain);
-            CloseableOffsetStorageReader offsetReader = new OffsetStorageReaderImpl(offsetBackingStore, id.connector(),
-                    internalKeyConverter, internalValueConverter);
-            OffsetStorageWriter offsetWriter = new OffsetStorageWriter(offsetBackingStore, id.connector(),
-                    internalKeyConverter, internalValueConverter);
-            Map<String, Object> producerProps = producerConfigs(id, "connector-producer-" + id, config, sourceConfig, connectorClass,
-                                                                connectorClientConfigOverridePolicy, kafkaClusterId);
-            KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps);
-            TopicAdmin admin;
-            Map<String, TopicCreationGroup> topicCreationGroups;
-            if (config.topicCreationEnable() && sourceConfig.usesTopicCreation()) {
-                Map<String, Object> adminProps = adminConfigs(id, "connector-adminclient-" + id, config,
-                        sourceConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId);
-                admin = new TopicAdmin(adminProps);
-                topicCreationGroups = TopicCreationGroup.configuredGroups(sourceConfig);
-            } else {
-                admin = null;
-                topicCreationGroups = null;
-            }
-
-            // Note we pass the configState as it performs dynamic transformations under the covers
-            return new WorkerSourceTask(id, (SourceTask) task, statusListener, initialState, keyConverter, valueConverter,
-                    headerConverter, transformationChain, producer, admin, topicCreationGroups,
-                    offsetReader, offsetWriter, config, configState, metrics, loader, time, retryWithToleranceOperator, herder.statusBackingStore(), executor);
-        } else if (task instanceof SinkTask) {
-            TransformationChain<SinkRecord> transformationChain = new TransformationChain<>(connConfig.<SinkRecord>transformations(), retryWithToleranceOperator);
-            log.info("Initializing: {}", transformationChain);
-            SinkConnectorConfig sinkConfig = new SinkConnectorConfig(plugins, connConfig.originalsStrings());
-            retryWithToleranceOperator.reporters(sinkTaskReporters(id, sinkConfig, errorHandlingMetrics, connectorClass));
-            WorkerErrantRecordReporter workerErrantRecordReporter = createWorkerErrantRecordReporter(sinkConfig, retryWithToleranceOperator,
-                    keyConverter, valueConverter, headerConverter);
+    /**
+     * Using the admin principal for this connector, perform a round of zombie fencing that disables transactional producers
+     * for the specified number of source tasks from sending any more records.
+     * @param connName the name of the connector
+     * @param numTasks the number of tasks to fence out
+     * @param connProps the configuration of the connector; may not be null
+     * @return a {@link KafkaFuture} that will complete when the producers have all been fenced out, or the attempt has failed
+     */
+    public KafkaFuture<Void> fenceZombies(String connName, int numTasks, Map<String, String> connProps) {
+        return fenceZombies(connName, numTasks, connProps, Admin::create);
+    }
 
-            Map<String, Object> consumerProps = consumerConfigs(id, config, connConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId);
-            KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
+    // Allows us to mock out the Admin client for testing
+    KafkaFuture<Void> fenceZombies(String connName, int numTasks, Map<String, String> connProps, Function<Map<String, Object>, Admin> adminFactory) {
+        log.debug("Fencing out {} task producers for source connector {}", numTasks, connName);
+        try (LoggingContext loggingContext = LoggingContext.forConnector(connName)) {
+            String connType = connProps.get(ConnectorConfig.CONNECTOR_CLASS_CONFIG);
+            ClassLoader connectorLoader = plugins.delegatingLoader().connectorLoader(connType);
+            try (LoaderSwap loaderSwap = plugins.withClassLoader(connectorLoader)) {
+                final SourceConnectorConfig connConfig = new SourceConnectorConfig(plugins, connProps, config.topicCreationEnable());
+                final Class<? extends Connector> connClass = plugins.connectorClass(
+                        connConfig.getString(ConnectorConfig.CONNECTOR_CLASS_CONFIG));
+
+                Map<String, Object> adminConfig = adminConfigs(
+                        connName,
+                        "connector-worker-adminclient-" + connName,
+                        config,
+                        connConfig,
+                        connClass,
+                        connectorClientConfigOverridePolicy,
+                        kafkaClusterId,
+                        ConnectorType.SOURCE);
+                final Admin admin = adminFactory.apply(adminConfig);
+
+                try {
+                    Collection<String> transactionalIds = IntStream.range(0, numTasks)
+                            .mapToObj(i -> new ConnectorTaskId(connName, i))
+                            .map(this::taskTransactionalId)
+                            .collect(Collectors.toList());
+                    FenceProducersOptions fencingOptions = new FenceProducersOptions()
+                            .timeoutMs((int) ConnectResource.DEFAULT_REST_REQUEST_TIMEOUT_MS);
+                    return admin.fenceProducers(transactionalIds, fencingOptions).all().whenComplete((ignored, error) -> {
+                        if (error != null)
+                            log.debug("Finished fencing out {} task producers for source connector {}", numTasks, connName);
+                        Utils.closeQuietly(admin, "Zombie fencing admin for connector " + connName);
+                    });
+                } catch (Exception e) {
+                    Utils.closeQuietly(admin, "Zombie fencing admin for connector " + connName);
+                    throw e;
+                }
+            }
+        }
+    }
 
-            return new WorkerSinkTask(id, (SinkTask) task, statusListener, initialState, config, configState, metrics, keyConverter,
-                                      valueConverter, headerConverter, transformationChain, consumer, loader, time,
-                                      retryWithToleranceOperator, workerErrantRecordReporter, herder.statusBackingStore());
-        } else {
-            log.error("Tasks must be a subclass of either SourceTask or SinkTask and current is {}", task);
-            throw new ConnectException("Tasks must be a subclass of either SourceTask or SinkTask");
+    static Map<String, Object> exactlyOnceSourceTaskProducerConfigs(ConnectorTaskId id,
+                                                              WorkerConfig config,
+                                                              ConnectorConfig connConfig,
+                                                              Class<? extends Connector>  connectorClass,
+                                                              ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
+                                                              String clusterId) {
+        Map<String, Object> result = baseProducerConfigs(id.connector(), "connector-producer-" + id, config, connConfig, connectorClass, connectorClientConfigOverridePolicy, clusterId);
+        // The base producer properties forcibly disable idempotence; remove it from those properties
+        // if not explicitly requested by the user
+        boolean connectorProducerIdempotenceConfigured = connConfig.originals().containsKey(
+                ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG
+        );
+        if (!connectorProducerIdempotenceConfigured) {
+            boolean workerProducerIdempotenceConfigured = config.originals().containsKey(
+                    "producer." + ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG
+            );
+            if (!workerProducerIdempotenceConfigured) {
+                result.remove(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG);
+            }
         }
+        ConnectUtils.ensureProperty(
+                result, ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true",
+                "for connectors when exactly-once source support is enabled",
+                false
+        );
+        String transactionalId = taskTransactionalId(config.groupId(), id.connector(), id.task());
+        ConnectUtils.ensureProperty(
+                result, ProducerConfig.TRANSACTIONAL_ID_CONFIG, transactionalId,
+                "for connectors when exactly-once source support is enabled",
+                true
+        );
+        return result;
     }
 
-    static Map<String, Object> producerConfigs(ConnectorTaskId id,
+    static Map<String, Object> baseProducerConfigs(String connName,
                                                String defaultClientId,
                                                WorkerConfig config,
                                                ConnectorConfig connConfig,
@@ -642,12 +791,18 @@ static Map<String, Object> producerConfigs(ConnectorTaskId id,
                                                ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
                                                String clusterId) {
         Map<String, Object> producerProps = new HashMap<>();
-        producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, Utils.join(config.getList(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG), ","));
+        producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.bootstrapServers());
         producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
         producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
         // These settings will execute infinite retries on retriable exceptions. They *may* be overridden via configs passed to the worker,
         // but this may compromise the delivery guarantees of Kafka Connect.
         producerProps.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, Long.toString(Long.MAX_VALUE));
+        // By default, Connect disables idempotent behavior for all producers, even though idempotence became
+        // default for Kafka producers. This is to ensure Connect continues to work with many Kafka broker versions, including older brokers that do not support
+        // idempotent producers or require explicit steps to enable them (e.g. adding the IDEMPOTENT_WRITE ACL to brokers older than 2.8).
+        // These settings might change when https://cwiki.apache.org/confluence/display/KAFKA/KIP-318%3A+Make+Kafka+Connect+Source+idempotent
+        // gets approved and scheduled for release.
+        producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false");
         producerProps.put(ProducerConfig.ACKS_CONFIG, "all");
         producerProps.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "1");
         producerProps.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.toString(Integer.MAX_VALUE));
@@ -659,7 +814,7 @@ static Map<String, Object> producerConfigs(ConnectorTaskId id,
 
         // Connector-specified overrides
         Map<String, Object> producerOverrides =
-            connectorClientConfigOverrides(id, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX,
+            connectorClientConfigOverrides(connName, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX,
                                            ConnectorType.SOURCE, ConnectorClientConfigRequest.ClientType.PRODUCER,
                                            connectorClientConfigOverridePolicy);
         producerProps.putAll(producerOverrides);
@@ -667,20 +822,56 @@ static Map<String, Object> producerConfigs(ConnectorTaskId id,
         return producerProps;
     }
 
-    static Map<String, Object> consumerConfigs(ConnectorTaskId id,
+    static Map<String, Object> exactlyOnceSourceOffsetsConsumerConfigs(String connName,
+                                                                       String defaultClientId,
+                                                                       WorkerConfig config,
+                                                                       ConnectorConfig connConfig,
+                                                                       Class<? extends Connector> connectorClass,
+                                                                       ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
+                                                                       String clusterId) {
+        Map<String, Object> result = baseConsumerConfigs(
+                connName, defaultClientId, config, connConfig, connectorClass,
+                connectorClientConfigOverridePolicy, clusterId, ConnectorType.SOURCE);
+        ConnectUtils.ensureProperty(
+                result, ConsumerConfig.ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                "for source connectors' offset consumers when exactly-once source support is enabled",
+                false
+        );
+        return result;
+    }
+
+    static Map<String, Object> regularSourceOffsetsConsumerConfigs(String connName,
+                                                                   String defaultClientId,
+                                                                   WorkerConfig config,
+                                                                   ConnectorConfig connConfig,
+                                                                   Class<? extends Connector> connectorClass,
+                                                                   ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
+                                                                   String clusterId) {
+        Map<String, Object> result = baseConsumerConfigs(
+                connName, defaultClientId, config, connConfig, connectorClass,
+                connectorClientConfigOverridePolicy, clusterId, ConnectorType.SOURCE);
+        // Users can disable this if they want to; it won't affect delivery guarantees since the task isn't exactly-once anyways
+        result.putIfAbsent(
+                ConsumerConfig.ISOLATION_LEVEL_CONFIG,
+                IsolationLevel.READ_COMMITTED.toString().toLowerCase(Locale.ROOT));
+        return result;
+    }
+
+    static Map<String, Object> baseConsumerConfigs(String connName,
+                                               String defaultClientId,
                                                WorkerConfig config,
                                                ConnectorConfig connConfig,
                                                Class<? extends Connector> connectorClass,
                                                ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
-                                               String clusterId) {
+                                               String clusterId,
+                                               ConnectorType connectorType) {
         // Include any unknown worker configs so consumer configs can be set globally on the worker
         // and through to the task
         Map<String, Object> consumerProps = new HashMap<>();
 
-        consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, SinkUtils.consumerGroupId(id.connector()));
-        consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, "connector-consumer-" + id);
-        consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,
-                  Utils.join(config.getList(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG), ","));
+        consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, SinkUtils.consumerGroupId(connName));
+        consumerProps.put(ConsumerConfig.CLIENT_ID_CONFIG, defaultClientId);
+        consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, config.bootstrapServers());
         consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
         consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
         consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
@@ -691,21 +882,22 @@ static Map<String, Object> consumerConfigs(ConnectorTaskId id,
         ConnectUtils.addMetricsContextProperties(consumerProps, config, clusterId);
         // Connector-specified overrides
         Map<String, Object> consumerOverrides =
-            connectorClientConfigOverrides(id, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX,
-                                           ConnectorType.SINK, ConnectorClientConfigRequest.ClientType.CONSUMER,
+            connectorClientConfigOverrides(connName, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX,
+                                           connectorType, ConnectorClientConfigRequest.ClientType.CONSUMER,
                                            connectorClientConfigOverridePolicy);
         consumerProps.putAll(consumerOverrides);
 
         return consumerProps;
     }
 
-    static Map<String, Object> adminConfigs(ConnectorTaskId id,
+    static Map<String, Object> adminConfigs(String connName,
                                             String defaultClientId,
                                             WorkerConfig config,
                                             ConnectorConfig connConfig,
                                             Class<? extends Connector> connectorClass,
                                             ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
-                                            String clusterId) {
+                                            String clusterId,
+                                            ConnectorType connectorType) {
         Map<String, Object> adminProps = new HashMap<>();
         // Use the top-level worker configs to retain backwards compatibility with older releases which
         // did not require a prefix for connector admin client configs in the worker configuration file
@@ -713,12 +905,11 @@ static Map<String, Object> adminConfigs(ConnectorTaskId id,
         // and those that begin with "producer." and "consumer.", since we know they aren't intended for
         // the admin client
         Map<String, Object> nonPrefixedWorkerConfigs = config.originals().entrySet().stream()
-            .filter(e -> !e.getKey().startsWith("admin.")
-                && !e.getKey().startsWith("producer.")
-                && !e.getKey().startsWith("consumer."))
-            .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
-        adminProps.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,
-            Utils.join(config.getList(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG), ","));
+                .filter(e -> !e.getKey().startsWith("admin.")
+                        && !e.getKey().startsWith("producer.")
+                        && !e.getKey().startsWith("consumer."))
+                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+        adminProps.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, config.bootstrapServers());
         adminProps.put(AdminClientConfig.CLIENT_ID_CONFIG, defaultClientId);
         adminProps.putAll(nonPrefixedWorkerConfigs);
 
@@ -727,9 +918,9 @@ static Map<String, Object> adminConfigs(ConnectorTaskId id,
 
         // Connector-specified overrides
         Map<String, Object> adminOverrides =
-            connectorClientConfigOverrides(id, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX,
-                                           ConnectorType.SINK, ConnectorClientConfigRequest.ClientType.ADMIN,
-                                           connectorClientConfigOverridePolicy);
+                connectorClientConfigOverrides(connName, connConfig, connectorClass, ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX,
+                        connectorType, ConnectorClientConfigRequest.ClientType.ADMIN,
+                        connectorClientConfigOverridePolicy);
         adminProps.putAll(adminOverrides);
 
         //add client metrics.context properties
@@ -738,7 +929,7 @@ static Map<String, Object> adminConfigs(ConnectorTaskId id,
         return adminProps;
     }
 
-    private static Map<String, Object> connectorClientConfigOverrides(ConnectorTaskId id,
+    private static Map<String, Object> connectorClientConfigOverrides(String connName,
                                                                       ConnectorConfig connConfig,
                                                                       Class<? extends Connector> connectorClass,
                                                                       String clientConfigPrefix,
@@ -747,7 +938,7 @@ private static Map<String, Object> connectorClientConfigOverrides(ConnectorTaskI
                                                                       ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy) {
         Map<String, Object> clientOverrides = connConfig.originalsWithPrefix(clientConfigPrefix);
         ConnectorClientConfigRequest connectorClientConfigRequest = new ConnectorClientConfigRequest(
-            id.connector(),
+            connName,
             connectorType,
             connectorClass,
             clientOverrides,
@@ -763,6 +954,19 @@ private static Map<String, Object> connectorClientConfigOverrides(ConnectorTaskI
         return clientOverrides;
     }
 
+    private String taskTransactionalId(ConnectorTaskId id) {
+        return taskTransactionalId(config.groupId(), id.connector(), id.task());
+    }
+
+    /**
+     * @return the {@link ProducerConfig#TRANSACTIONAL_ID_CONFIG transactional ID} to use for a task that writes
+     * records and/or offsets in a transaction. Not to be confused with {@link DistributedConfig#transactionalProducerId()},
+     * which is not used by tasks at all, but instead, by the worker itself.
+     */
+    public static String taskTransactionalId(String groupId, String connector, int taskId) {
+        return String.format("%s-%s-%d", groupId, connector, taskId);
+    }
+
     ErrorHandlingMetrics errorHandlingMetrics(ConnectorTaskId id) {
         return new ErrorHandlingMetrics(id, metrics);
     }
@@ -777,9 +981,9 @@ private List<ErrorReporter> sinkTaskReporters(ConnectorTaskId id, SinkConnectorC
         // check if topic for dead letter queue exists
         String topic = connConfig.dlqTopicName();
         if (topic != null && !topic.isEmpty()) {
-            Map<String, Object> producerProps = producerConfigs(id, "connector-dlq-producer-" + id, config, connConfig, connectorClass,
+            Map<String, Object> producerProps = baseProducerConfigs(id.connector(), "connector-dlq-producer-" + id, config, connConfig, connectorClass,
                                                                 connectorClientConfigOverridePolicy, kafkaClusterId);
-            Map<String, Object> adminProps = adminConfigs(id, "connector-dlq-adminclient-", config, connConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId);
+            Map<String, Object> adminProps = adminConfigs(id.connector(), "connector-dlq-adminclient-", config, connConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId, ConnectorType.SINK);
             DeadLetterQueueReporter reporter = DeadLetterQueueReporter.createAndSetup(adminProps, id, connConfig, producerProps, errorHandlingMetrics);
 
             reporters.add(reporter);
@@ -821,7 +1025,7 @@ private void stopTask(ConnectorTaskId taskId) {
 
             log.info("Stopping task {}", task.id());
             if (task instanceof WorkerSourceTask)
-                sourceTaskOffsetCommitter.remove(task.id());
+                sourceTaskOffsetCommitter.ifPresent(committer -> committer.remove(task.id()));
 
             ClassLoader savedLoader = plugins.currentThreadLoader();
             try {
@@ -979,6 +1183,525 @@ WorkerMetricsGroup workerMetricsGroup() {
         return workerMetricsGroup;
     }
 
+    abstract class TaskBuilder {
+
+        private final ConnectorTaskId id;
+        private final ClusterConfigState configState;
+        private final TaskStatus.Listener statusListener;
+        private final TargetState initialState;
+
+        private Task task = null;
+        private ConnectorConfig connectorConfig = null;
+        private Converter keyConverter = null;
+        private Converter valueConverter = null;
+        private HeaderConverter headerConverter = null;
+        private ClassLoader classLoader = null;
+
+        public TaskBuilder(ConnectorTaskId id,
+                           ClusterConfigState configState,
+                           TaskStatus.Listener statusListener,
+                           TargetState initialState) {
+            this.id = id;
+            this.configState = configState;
+            this.statusListener = statusListener;
+            this.initialState = initialState;
+        }
+
+        public TaskBuilder withTask(Task task) {
+            this.task = task;
+            return this;
+        }
+
+        public TaskBuilder withConnectorConfig(ConnectorConfig connectorConfig) {
+            this.connectorConfig = connectorConfig;
+            return this;
+        }
+
+        public TaskBuilder withKeyConverter(Converter keyConverter) {
+            this.keyConverter = keyConverter;
+            return this;
+        }
+
+        public TaskBuilder withValueConverter(Converter valueConverter) {
+            this.valueConverter = valueConverter;
+            return this;
+        }
+
+        public TaskBuilder withHeaderConverter(HeaderConverter headerConverter) {
+            this.headerConverter = headerConverter;
+            return this;
+        }
+
+        public TaskBuilder withClassloader(ClassLoader classLoader) {
+            this.classLoader = classLoader;
+            return this;
+        }
+
+        public WorkerTask build() {
+            Objects.requireNonNull(task, "Task cannot be null");
+            Objects.requireNonNull(connectorConfig, "Connector config used by task cannot be null");
+            Objects.requireNonNull(keyConverter, "Key converter used by task cannot be null");
+            Objects.requireNonNull(valueConverter, "Value converter used by task cannot be null");
+            Objects.requireNonNull(headerConverter, "Header converter used by task cannot be null");
+            Objects.requireNonNull(classLoader, "Classloader used by task cannot be null");
+
+            ErrorHandlingMetrics errorHandlingMetrics = errorHandlingMetrics(id);
+            final Class<? extends Connector> connectorClass = plugins.connectorClass(
+                    connectorConfig.getString(ConnectorConfig.CONNECTOR_CLASS_CONFIG));
+            RetryWithToleranceOperator retryWithToleranceOperator = new RetryWithToleranceOperator(connectorConfig.errorRetryTimeout(),
+                    connectorConfig.errorMaxDelayInMillis(), connectorConfig.errorToleranceType(), Time.SYSTEM);
+            retryWithToleranceOperator.metrics(errorHandlingMetrics);
+
+            return doBuild(task, id, configState, statusListener, initialState,
+                    connectorConfig, keyConverter, valueConverter, headerConverter, classLoader,
+                    errorHandlingMetrics, connectorClass, retryWithToleranceOperator);
+        }
+
+        abstract WorkerTask doBuild(Task task,
+                                    ConnectorTaskId id,
+                                    ClusterConfigState configState,
+                                    TaskStatus.Listener statusListener,
+                                    TargetState initialState,
+                                    ConnectorConfig connectorConfig,
+                                    Converter keyConverter,
+                                    Converter valueConverter,
+                                    HeaderConverter headerConverter,
+                                    ClassLoader classLoader,
+                                    ErrorHandlingMetrics errorHandlingMetrics,
+                                    Class<? extends Connector> connectorClass,
+                                    RetryWithToleranceOperator retryWithToleranceOperator);
+
+    }
+
+    class SinkTaskBuilder extends TaskBuilder {
+        public SinkTaskBuilder(ConnectorTaskId id,
+                               ClusterConfigState configState,
+                               TaskStatus.Listener statusListener,
+                               TargetState initialState) {
+            super(id, configState, statusListener, initialState);
+        }
+
+        @Override
+        public WorkerTask doBuild(Task task,
+                           ConnectorTaskId id,
+                           ClusterConfigState configState,
+                           TaskStatus.Listener statusListener,
+                           TargetState initialState,
+                           ConnectorConfig connectorConfig,
+                           Converter keyConverter,
+                           Converter valueConverter,
+                           HeaderConverter headerConverter,
+                           ClassLoader classLoader,
+                           ErrorHandlingMetrics errorHandlingMetrics,
+                           Class<? extends Connector> connectorClass,
+                           RetryWithToleranceOperator retryWithToleranceOperator) {
+
+            TransformationChain<SinkRecord> transformationChain = new TransformationChain<>(connectorConfig.<SinkRecord>transformations(), retryWithToleranceOperator);
+            log.info("Initializing: {}", transformationChain);
+            SinkConnectorConfig sinkConfig = new SinkConnectorConfig(plugins, connectorConfig.originalsStrings());
+            retryWithToleranceOperator.reporters(sinkTaskReporters(id, sinkConfig, errorHandlingMetrics, connectorClass));
+            WorkerErrantRecordReporter workerErrantRecordReporter = createWorkerErrantRecordReporter(sinkConfig, retryWithToleranceOperator,
+                    keyConverter, valueConverter, headerConverter);
+
+            Map<String, Object> consumerProps = baseConsumerConfigs(
+                    id.connector(),  "connector-consumer-" + id, config, connectorConfig, connectorClass,
+                    connectorClientConfigOverridePolicy, kafkaClusterId, ConnectorType.SINK);
+            KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
+
+            return new WorkerSinkTask(id, (SinkTask) task, statusListener, initialState, config, configState, metrics, keyConverter,
+                    valueConverter, headerConverter, transformationChain, consumer, classLoader, time,
+                    retryWithToleranceOperator, workerErrantRecordReporter, herder.statusBackingStore());
+        }
+    }
+
+    class SourceTaskBuilder extends TaskBuilder {
+        public SourceTaskBuilder(ConnectorTaskId id,
+                               ClusterConfigState configState,
+                               TaskStatus.Listener statusListener,
+                               TargetState initialState) {
+            super(id, configState, statusListener, initialState);
+        }
+
+        @Override
+        public WorkerTask doBuild(Task task,
+                           ConnectorTaskId id,
+                           ClusterConfigState configState,
+                           TaskStatus.Listener statusListener,
+                           TargetState initialState,
+                           ConnectorConfig connectorConfig,
+                           Converter keyConverter,
+                           Converter valueConverter,
+                           HeaderConverter headerConverter,
+                           ClassLoader classLoader,
+                           ErrorHandlingMetrics errorHandlingMetrics,
+                           Class<? extends Connector> connectorClass,
+                           RetryWithToleranceOperator retryWithToleranceOperator) {
+
+            SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins,
+                    connectorConfig.originalsStrings(), config.topicCreationEnable());
+            retryWithToleranceOperator.reporters(sourceTaskReporters(id, sourceConfig, errorHandlingMetrics));
+            TransformationChain<SourceRecord> transformationChain = new TransformationChain<>(sourceConfig.<SourceRecord>transformations(), retryWithToleranceOperator);
+            log.info("Initializing: {}", transformationChain);
+
+            Map<String, Object> producerProps = baseProducerConfigs(id.connector(), "connector-producer-" + id, config, sourceConfig, connectorClass,
+                    connectorClientConfigOverridePolicy, kafkaClusterId);
+            KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps);
+
+            TopicAdmin topicAdmin = null;
+            final boolean topicCreationEnabled = sourceConnectorTopicCreationEnabled(sourceConfig);
+            if (topicCreationEnabled || regularSourceTaskUsesConnectorSpecificOffsetsStore(sourceConfig)) {
+                Map<String, Object> adminOverrides = adminConfigs(id.connector(), "connector-adminclient-" + id, config,
+                        sourceConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId, ConnectorType.SOURCE);
+                topicAdmin = new TopicAdmin(adminOverrides);
+            }
+
+            Map<String, TopicCreationGroup> topicCreationGroups = topicCreationEnabled
+                    ? TopicCreationGroup.configuredGroups(sourceConfig)
+                    : null;
+
+            // Set up the offset backing store for this task instance
+            ConnectorOffsetBackingStore offsetStore = offsetStoreForRegularSourceTask(
+                    id, sourceConfig, connectorClass, producer, producerProps, topicAdmin);
+            offsetStore.configure(config);
+
+            CloseableOffsetStorageReader offsetReader = new OffsetStorageReaderImpl(offsetStore, id.connector(), internalKeyConverter, internalValueConverter);
+            OffsetStorageWriter offsetWriter = new OffsetStorageWriter(offsetStore, id.connector(), internalKeyConverter, internalValueConverter);
+
+            // Note we pass the configState as it performs dynamic transformations under the covers
+            return new WorkerSourceTask(id, (SourceTask) task, statusListener, initialState, keyConverter, valueConverter,
+                    headerConverter, transformationChain, producer, topicAdmin, topicCreationGroups,
+                    offsetReader, offsetWriter, offsetStore, config, configState, metrics, classLoader, time,
+                    retryWithToleranceOperator, herder.statusBackingStore(), executor);
+        }
+    }
+
+    class ExactlyOnceSourceTaskBuilder extends TaskBuilder {
+        private final Runnable preProducerCheck;
+        private final Runnable postProducerCheck;
+
+        public ExactlyOnceSourceTaskBuilder(ConnectorTaskId id,
+                                            ClusterConfigState configState,
+                                            TaskStatus.Listener statusListener,
+                                            TargetState initialState,
+                                            Runnable preProducerCheck,
+                                            Runnable postProducerCheck) {
+            super(id, configState, statusListener, initialState);
+            this.preProducerCheck = preProducerCheck;
+            this.postProducerCheck = postProducerCheck;
+        }
+
+        @Override
+        public WorkerTask doBuild(Task task,
+                                  ConnectorTaskId id,
+                                  ClusterConfigState configState,
+                                  TaskStatus.Listener statusListener,
+                                  TargetState initialState,
+                                  ConnectorConfig connectorConfig,
+                                  Converter keyConverter,
+                                  Converter valueConverter,
+                                  HeaderConverter headerConverter,
+                                  ClassLoader classLoader,
+                                  ErrorHandlingMetrics errorHandlingMetrics,
+                                  Class<? extends Connector> connectorClass,
+                                  RetryWithToleranceOperator retryWithToleranceOperator) {
+
+            SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins,
+                    connectorConfig.originalsStrings(), config.topicCreationEnable());
+            retryWithToleranceOperator.reporters(sourceTaskReporters(id, sourceConfig, errorHandlingMetrics));
+            TransformationChain<SourceRecord> transformationChain = new TransformationChain<>(sourceConfig.<SourceRecord>transformations(), retryWithToleranceOperator);
+            log.info("Initializing: {}", transformationChain);
+
+            Map<String, Object> producerProps = exactlyOnceSourceTaskProducerConfigs(
+                    id, config, sourceConfig, connectorClass,
+                    connectorClientConfigOverridePolicy, kafkaClusterId);
+            KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps);
+
+            // Create a topic admin that the task will use for its offsets topic and, potentially, automatic topic creation
+            Map<String, Object> adminOverrides = adminConfigs(id.connector(), "connector-adminclient-" + id, config,
+                    sourceConfig, connectorClass, connectorClientConfigOverridePolicy, kafkaClusterId, ConnectorType.SOURCE);
+            TopicAdmin topicAdmin = new TopicAdmin(adminOverrides);
+
+            Map<String, TopicCreationGroup> topicCreationGroups = sourceConnectorTopicCreationEnabled(sourceConfig)
+                    ? TopicCreationGroup.configuredGroups(sourceConfig)
+                    : null;
+
+            // Set up the offset backing store for this task instance
+            ConnectorOffsetBackingStore offsetStore = offsetStoreForExactlyOnceSourceTask(
+                    id, sourceConfig, connectorClass, producer, producerProps, topicAdmin);
+            offsetStore.configure(config);
+
+            CloseableOffsetStorageReader offsetReader = new OffsetStorageReaderImpl(offsetStore, id.connector(), internalKeyConverter, internalValueConverter);
+            OffsetStorageWriter offsetWriter = new OffsetStorageWriter(offsetStore, id.connector(), internalKeyConverter, internalValueConverter);
+
+            // Note we pass the configState as it performs dynamic transformations under the covers
+            return new ExactlyOnceWorkerSourceTask(id, (SourceTask) task, statusListener, initialState, keyConverter, valueConverter,
+                    headerConverter, transformationChain, producer, topicAdmin, topicCreationGroups,
+                    offsetReader, offsetWriter, offsetStore, config, configState, metrics, classLoader, time, retryWithToleranceOperator,
+                    herder.statusBackingStore(), sourceConfig, executor, preProducerCheck, postProducerCheck);
+        }
+    }
+
+    // Visible for testing
+    ConnectorOffsetBackingStore offsetStoreForRegularSourceConnector(
+            SourceConnectorConfig sourceConfig,
+            String connName,
+            Connector connector
+    ) {
+        String connectorSpecificOffsetsTopic = sourceConfig.offsetsTopic();
+
+        Map<String, Object> producerProps = baseProducerConfigs(connName, "connector-producer-" + connName, config, sourceConfig, connector.getClass(),
+                connectorClientConfigOverridePolicy, kafkaClusterId);
+
+        // We use a connector-specific store (i.e., a dedicated KafkaOffsetBackingStore for this connector)
+        // if the worker supports per-connector offsets topics (which may be the case in distributed but not standalone mode, for example)
+        // and if the connector is explicitly configured with an offsets topic
+        final boolean usesConnectorSpecificStore = connectorSpecificOffsetsTopic != null
+                && config.connectorOffsetsTopicsPermitted();
+
+        if (usesConnectorSpecificStore) {
+            Map<String, Object> consumerProps = regularSourceOffsetsConsumerConfigs(
+                        connName, "connector-consumer-" + connName, config, sourceConfig, connector.getClass(),
+                        connectorClientConfigOverridePolicy, kafkaClusterId);
+            KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
+
+            Map<String, Object> adminOverrides = adminConfigs(connName, "connector-adminclient-" + connName, config,
+                    sourceConfig, connector.getClass(), connectorClientConfigOverridePolicy, kafkaClusterId, ConnectorType.SOURCE);
+
+            TopicAdmin admin = new TopicAdmin(adminOverrides);
+            KafkaOffsetBackingStore connectorStore =
+                    KafkaOffsetBackingStore.forConnector(connectorSpecificOffsetsTopic, consumer, admin);
+
+            // If the connector's offsets topic is the same as the worker-global offsets topic, there's no need to construct
+            // an offset store that has a primary and a secondary store which both read from that same topic.
+            // So, if the user has explicitly configured the connector with a connector-specific offsets topic
+            // but we know that that topic is the same as the worker-global offsets topic, we ignore the worker-global
+            // offset store and build a store backed exclusively by a connector-specific offsets store.
+            // It may seem reasonable to instead build a store backed exclusively by the worker-global offset store, but that
+            // would prevent users from being able to customize the config properties used for the Kafka clients that
+            // access the offsets topic, and we would not be able to establish reasonable defaults like setting
+            // isolation.level=read_committed for the offsets topic consumer for this connector
+            if (sameOffsetTopicAsWorker(connectorSpecificOffsetsTopic, producerProps)) {
+                return ConnectorOffsetBackingStore.withOnlyConnectorStore(
+                        () -> LoggingContext.forConnector(connName),
+                        connectorStore,
+                        connectorSpecificOffsetsTopic,
+                        admin
+                );
+            } else {
+                return ConnectorOffsetBackingStore.withConnectorAndWorkerStores(
+                        () -> LoggingContext.forConnector(connName),
+                        globalOffsetBackingStore,
+                        connectorStore,
+                        connectorSpecificOffsetsTopic,
+                        admin
+                );
+            }
+        } else {
+            return ConnectorOffsetBackingStore.withOnlyWorkerStore(
+                    () -> LoggingContext.forConnector(connName),
+                    globalOffsetBackingStore,
+                    config.offsetsTopic()
+            );
+        }
+    }
+
+    // Visible for testing
+    ConnectorOffsetBackingStore offsetStoreForExactlyOnceSourceConnector(
+            SourceConnectorConfig sourceConfig,
+            String connName,
+            Connector connector
+    ) {
+        String connectorSpecificOffsetsTopic = Optional.ofNullable(sourceConfig.offsetsTopic()).orElse(config.offsetsTopic());
+
+        Map<String, Object> producerProps = baseProducerConfigs(connName, "connector-producer-" + connName, config, sourceConfig, connector.getClass(),
+                connectorClientConfigOverridePolicy, kafkaClusterId);
+
+        Map<String, Object> consumerProps = exactlyOnceSourceOffsetsConsumerConfigs(
+                    connName, "connector-consumer-" + connName, config, sourceConfig, connector.getClass(),
+                    connectorClientConfigOverridePolicy, kafkaClusterId);
+        KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
+
+        Map<String, Object> adminOverrides = adminConfigs(connName, "connector-adminclient-" + connName, config,
+                sourceConfig, connector.getClass(), connectorClientConfigOverridePolicy, kafkaClusterId, ConnectorType.SOURCE);
+
+        TopicAdmin admin = new TopicAdmin(adminOverrides);
+        KafkaOffsetBackingStore connectorStore =
+                KafkaOffsetBackingStore.forConnector(connectorSpecificOffsetsTopic, consumer, admin);
+
+        // If the connector's offsets topic is the same as the worker-global offsets topic, there's no need to construct
+        // an offset store that has a primary and a secondary store which both read from that same topic.
+        // So, even if the user has explicitly configured the connector with a connector-specific offsets topic,
+        // if we know that that topic is the same as the worker-global offsets topic, we ignore the worker-global
+        // offset store and build a store backed exclusively by a connector-specific offsets store.
+        // It may seem reasonable to instead build a store backed exclusively by the worker-global offset store, but that
+        // would prevent users from being able to customize the config properties used for the Kafka clients that
+        // access the offsets topic, and may lead to confusion for them when tasks are created for the connector
+        // since they will all have their own dedicated offsets stores anyways
+        if (sameOffsetTopicAsWorker(connectorSpecificOffsetsTopic, producerProps)) {
+            return ConnectorOffsetBackingStore.withOnlyConnectorStore(
+                    () -> LoggingContext.forConnector(connName),
+                    connectorStore,
+                    connectorSpecificOffsetsTopic,
+                    admin
+            );
+        } else {
+            return ConnectorOffsetBackingStore.withConnectorAndWorkerStores(
+                    () -> LoggingContext.forConnector(connName),
+                    globalOffsetBackingStore,
+                    connectorStore,
+                    connectorSpecificOffsetsTopic,
+                    admin
+            );
+        }
+    }
+
+    // Visible for testing
+    ConnectorOffsetBackingStore offsetStoreForRegularSourceTask(
+            ConnectorTaskId id,
+            SourceConnectorConfig sourceConfig,
+            Class<? extends Connector> connectorClass,
+            Producer<byte[], byte[]> producer,
+            Map<String, Object> producerProps,
+            TopicAdmin topicAdmin
+    ) {
+        String connectorSpecificOffsetsTopic = sourceConfig.offsetsTopic();
+
+        if (regularSourceTaskUsesConnectorSpecificOffsetsStore(sourceConfig)) {
+            Objects.requireNonNull(topicAdmin, "Source tasks require a non-null topic admin when configured to use their own offsets topic");
+
+            Map<String, Object> consumerProps = regularSourceOffsetsConsumerConfigs(
+                    id.connector(), "connector-consumer-" + id, config, sourceConfig, connectorClass,
+                    connectorClientConfigOverridePolicy, kafkaClusterId);
+            KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
+
+            KafkaOffsetBackingStore connectorStore =
+                    KafkaOffsetBackingStore.forTask(sourceConfig.offsetsTopic(), producer, consumer, topicAdmin);
+
+            // If the connector's offsets topic is the same as the worker-global offsets topic, there's no need to construct
+            // an offset store that has a primary and a secondary store which both read from that same topic.
+            // So, if the user has (implicitly or explicitly) configured the connector with a connector-specific offsets topic
+            // but we know that that topic is the same as the worker-global offsets topic, we ignore the worker-global
+            // offset store and build a store backed exclusively by a connector-specific offsets store.
+            // It may seem reasonable to instead build a store backed exclusively by the worker-global offset store, but that
+            // would prevent users from being able to customize the config properties used for the Kafka clients that
+            // access the offsets topic, and we would not be able to establish reasonable defaults like setting
+            // isolation.level=read_committed for the offsets topic consumer for this task
+            if (sameOffsetTopicAsWorker(sourceConfig.offsetsTopic(), producerProps)) {
+                return ConnectorOffsetBackingStore.withOnlyConnectorStore(
+                        () -> LoggingContext.forTask(id),
+                        connectorStore,
+                        connectorSpecificOffsetsTopic,
+                        topicAdmin
+                );
+            } else {
+                return ConnectorOffsetBackingStore.withConnectorAndWorkerStores(
+                        () -> LoggingContext.forTask(id),
+                        globalOffsetBackingStore,
+                        connectorStore,
+                        connectorSpecificOffsetsTopic,
+                        topicAdmin
+                );
+            }
+        } else {
+            return ConnectorOffsetBackingStore.withOnlyWorkerStore(
+                    () -> LoggingContext.forTask(id),
+                    globalOffsetBackingStore,
+                    config.offsetsTopic()
+            );
+        }
+    }
+
+    // Visible for testing
+    ConnectorOffsetBackingStore offsetStoreForExactlyOnceSourceTask(
+            ConnectorTaskId id,
+            SourceConnectorConfig sourceConfig,
+            Class<? extends Connector> connectorClass,
+            Producer<byte[], byte[]> producer,
+            Map<String, Object> producerProps,
+            TopicAdmin topicAdmin
+    ) {
+        Objects.requireNonNull(topicAdmin, "Source tasks require a non-null topic admin when exactly-once support is enabled");
+
+        Map<String, Object> consumerProps = exactlyOnceSourceOffsetsConsumerConfigs(
+                id.connector(), "connector-consumer-" + id, config, sourceConfig, connectorClass,
+                connectorClientConfigOverridePolicy, kafkaClusterId);
+        KafkaConsumer<byte[], byte[]> consumer = new KafkaConsumer<>(consumerProps);
+
+        String connectorOffsetsTopic = Optional.ofNullable(sourceConfig.offsetsTopic()).orElse(config.offsetsTopic());
+
+        KafkaOffsetBackingStore connectorStore =
+                KafkaOffsetBackingStore.forTask(connectorOffsetsTopic, producer, consumer, topicAdmin);
+
+        // If the connector's offsets topic is the same as the worker-global offsets topic, there's no need to construct
+        // an offset store that has a primary and a secondary store which both read from that same topic.
+        // So, if the user has (implicitly or explicitly) configured the connector with a connector-specific offsets topic
+        // but we know that that topic is the same as the worker-global offsets topic, we ignore the worker-global
+        // offset store and build a store backed exclusively by a connector-specific offsets store.
+        // We cannot under any circumstances build an offset store backed exclusively by the worker-global offset store
+        // as that would prevent us from being able to write source records and source offset information for the task
+        // with the same producer, and therefore, in the same transaction.
+        if (sameOffsetTopicAsWorker(connectorOffsetsTopic, producerProps)) {
+            return ConnectorOffsetBackingStore.withOnlyConnectorStore(
+                    () -> LoggingContext.forTask(id),
+                    connectorStore,
+                    connectorOffsetsTopic,
+                    topicAdmin
+            );
+        } else {
+            return ConnectorOffsetBackingStore.withConnectorAndWorkerStores(
+                    () -> LoggingContext.forTask(id),
+                    globalOffsetBackingStore,
+                    connectorStore,
+                    connectorOffsetsTopic,
+                    topicAdmin
+            );
+        }
+    }
+
+    /**
+     * Gives a best-effort guess for whether the given offsets topic is the same topic as the worker-global offsets topic.
+     * Even if the name of the topic is the same as the name of the worker's offsets topic, the two may still be different topics
+     * if the connector is configured to produce to a different Kafka cluster than the one that hosts the worker's offsets topic.
+     * @param offsetsTopic the name of the offsets topic for the connector
+     * @param producerProps the producer configuration for the connector
+     * @return whether it appears that the connector's offsets topic is the same topic as the worker-global offsets topic.
+     * If {@code true}, it is guaranteed that the two are the same;
+     * if {@code false}, it is likely but not guaranteed that the two are not the same
+     */
+    private boolean sameOffsetTopicAsWorker(String offsetsTopic, Map<String, Object> producerProps) {
+        // We can check the offset topic name and the Kafka cluster's bootstrap servers,
+        // although this isn't exact and can lead to some false negatives if the user
+        // provides an overridden bootstrap servers value for their producer that is different than
+        // the worker's but still resolves to the same Kafka cluster used by the worker.
+        // At the moment this is probably adequate, especially since we don't want to put
+        // a network ping to a remote Kafka cluster inside the herder's tick thread (which is where this
+        // logic takes place right now) in case that takes a while.
+        Set<String> workerBootstrapServers = new HashSet<>(config.getList(BOOTSTRAP_SERVERS_CONFIG));
+        Set<String> producerBootstrapServers = new HashSet<>();
+        try {
+            String rawBootstrapServers = producerProps.getOrDefault(BOOTSTRAP_SERVERS_CONFIG, "").toString();
+            @SuppressWarnings("unchecked")
+            List<String> parsedBootstrapServers = (List<String>) ConfigDef.parseType(BOOTSTRAP_SERVERS_CONFIG, rawBootstrapServers, ConfigDef.Type.LIST);
+            producerBootstrapServers.addAll(parsedBootstrapServers);
+        } catch (Exception e) {
+            // Should never happen by this point, but if it does, make sure to present a readable error message to the user
+            throw new ConnectException("Failed to parse bootstrap servers property in producer config", e);
+        }
+        return offsetsTopic.equals(config.offsetsTopic())
+                && workerBootstrapServers.equals(producerBootstrapServers);
+    }
+
+    private boolean regularSourceTaskUsesConnectorSpecificOffsetsStore(SourceConnectorConfig sourceConfig) {
+        // We use a connector-specific store (i.e., a dedicated KafkaOffsetBackingStore for this task)
+        // if the worker supports per-connector offsets topics (which may be the case in distributed mode but not standalone, for example)
+        // and the user has explicitly specified an offsets topic for the connector
+        return sourceConfig.offsetsTopic() != null && config.connectorOffsetsTopicsPermitted();
+    }
+
+    private boolean sourceConnectorTopicCreationEnabled(SourceConnectorConfig sourceConfig) {
+        return config.topicCreationEnable() && sourceConfig.usesTopicCreation();
+    }
+
     static class ConnectorStatusMetricsGroup {
         private final ConnectMetrics connectMetrics;
         private final ConnectMetricsRegistry registry;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConfig.java
index 3224a230f90ef..38dbeb87e1bf7 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConfig.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConfig.java
@@ -23,6 +23,7 @@
 import org.apache.kafka.common.config.ConfigDef.Importance;
 import org.apache.kafka.common.config.ConfigDef.Type;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.config.SslClientAuth;
 import org.apache.kafka.common.config.internals.BrokerSecurityConfigs;
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.utils.Utils;
@@ -110,7 +111,8 @@ public class WorkerConfig extends AbstractConfig {
     private static final String OFFSET_COMMIT_TIMEOUT_MS_DOC
             = "Maximum number of milliseconds to wait for records to flush and partition offset data to be"
             + " committed to offset storage before cancelling the process and restoring the offset "
-            + "data to be committed in a future attempt.";
+            + "data to be committed in a future attempt. This property has no effect for source connectors "
+            + "running with exactly-once support.";
     public static final long OFFSET_COMMIT_TIMEOUT_MS_DEFAULT = 5000L;
 
     public static final String LISTENERS_CONFIG = "listeners";
@@ -279,7 +281,7 @@ protected static ConfigDef baseConfigDef() {
                         "", Importance.LOW,
                         CommonClientConfigs.METRIC_REPORTER_CLASSES_DOC)
                 .define(BrokerSecurityConfigs.SSL_CLIENT_AUTH_CONFIG,
-                        ConfigDef.Type.STRING, "none", ConfigDef.Importance.LOW, BrokerSecurityConfigs.SSL_CLIENT_AUTH_DOC)
+                        ConfigDef.Type.STRING, SslClientAuth.NONE.toString(), in(Utils.enumOptions(SslClientAuth.class)), ConfigDef.Importance.LOW, BrokerSecurityConfigs.SSL_CLIENT_AUTH_DOC)
                 .define(HEADER_CONVERTER_CLASS_CONFIG, Type.CLASS,
                         HEADER_CONVERTER_CLASS_DEFAULT,
                         Importance.LOW, HEADER_CONVERTER_CLASS_DOC)
@@ -342,6 +344,15 @@ private void logPluginPathConfigProviderWarning(Map<String, String> rawOriginals
         }
     }
 
+    /**
+     * @return the {@link CommonClientConfigs#BOOTSTRAP_SERVERS_CONFIG bootstrap servers} property
+     * used by the worker when instantiating Kafka clients for connectors and tasks (unless overridden)
+     * and its internal topics (if running in distributed mode)
+     */
+    public String bootstrapServers() {
+        return String.join(",", getList(BOOTSTRAP_SERVERS_CONFIG));
+    }
+
     public Integer getRebalanceTimeout() {
         return null;
     }
@@ -350,6 +361,54 @@ public boolean topicCreationEnable() {
         return getBoolean(TOPIC_CREATION_ENABLE_CONFIG);
     }
 
+    /**
+     * Whether this worker is configured with exactly-once support for source connectors.
+     * The default implementation returns {@code false} and should be overridden by subclasses
+     * if the worker mode for the subclass provides exactly-once support for source connectors.
+     * @return whether exactly-once support is enabled for source connectors on this worker
+     */
+    public boolean exactlyOnceSourceEnabled() {
+        return false;
+    }
+
+    /**
+     * Get the internal topic used by this worker to store source connector offsets.
+     * The default implementation returns {@code null} and should be overridden by subclasses
+     * if the worker mode for the subclass uses an internal offsets topic.
+     * @return the name of the internal offsets topic, or {@code null} if the worker does not use
+     * an internal offsets topic
+     */
+    public String offsetsTopic() {
+        return null;
+    }
+
+    /**
+     * Determine whether this worker supports per-connector source offsets topics.
+     * The default implementation returns {@code false} and should be overridden by subclasses
+     * if the worker mode for the subclass supports per-connector offsets topics.
+     * @return whether the worker supports per-connector offsets topics
+     */
+    public boolean connectorOffsetsTopicsPermitted() {
+        return false;
+    }
+
+    /**
+     * @return the offset commit interval for tasks created by this worker
+     */
+    public long offsetCommitInterval() {
+        return getLong(OFFSET_COMMIT_INTERVAL_MS_CONFIG);
+    }
+
+    /**
+     * Get the {@link CommonClientConfigs#GROUP_ID_CONFIG group ID} used by this worker to form a cluster.
+     * The default implementation returns {@code null} and should be overridden by subclasses
+     * if the worker mode for the subclass is capable of forming a cluster using Kafka's group coordination API.
+     * @return the group ID for the worker's cluster, or {@code null} if the worker is not capable of forming a cluster.
+     */
+    public String groupId() {
+        return null;
+    }
+
     @Override
     protected Map<String, Object> postProcessParsedConfig(final Map<String, Object> parsedValues) {
         return CommonClientConfigs.postProcessReconnectBackoffConfigs(this, parsedValues);
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java
index 09b57fd42a851..fa3acfb88f7e9 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerConnector.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.connect.runtime;
 
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.connector.Connector;
 import org.apache.kafka.connect.connector.ConnectorContext;
 import org.apache.kafka.connect.errors.ConnectException;
@@ -23,6 +24,8 @@
 import org.apache.kafka.connect.runtime.isolation.Plugins;
 import org.apache.kafka.connect.sink.SinkConnectorContext;
 import org.apache.kafka.connect.source.SourceConnectorContext;
+import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.storage.OffsetStorageReader;
 import org.apache.kafka.connect.util.Callback;
 import org.apache.kafka.connect.util.ConnectUtils;
@@ -74,7 +77,8 @@ private enum State {
     private volatile boolean cancelled; // indicates whether the Worker has cancelled the connector (e.g. because of slow shutdown)
 
     private State state;
-    private final OffsetStorageReader offsetStorageReader;
+    private final CloseableOffsetStorageReader offsetStorageReader;
+    private final ConnectorOffsetBackingStore offsetStore;
 
     public WorkerConnector(String connName,
                            Connector connector,
@@ -82,7 +86,8 @@ public WorkerConnector(String connName,
                            CloseableConnectorContext ctx,
                            ConnectMetrics metrics,
                            ConnectorStatus.Listener statusListener,
-                           OffsetStorageReader offsetStorageReader,
+                           CloseableOffsetStorageReader offsetStorageReader,
+                           ConnectorOffsetBackingStore offsetStore,
                            ClassLoader loader) {
         this.connName = connName;
         this.config = connectorConfig.originalsStrings();
@@ -93,6 +98,7 @@ public WorkerConnector(String connName,
         this.metrics = new ConnectorMetricsGroup(metrics, AbstractStatus.State.UNASSIGNED, statusListener);
         this.statusListener = this.metrics;
         this.offsetStorageReader = offsetStorageReader;
+        this.offsetStore = offsetStore;
         this.pendingTargetStateChange = new AtomicReference<>();
         this.pendingStateChangeCallback = new AtomicReference<>();
         this.shutdownLatch = new CountDownLatch(1);
@@ -165,6 +171,9 @@ void initialize() {
                 SinkConnectorConfig.validate(config);
                 connector.initialize(new WorkerSinkConnectorContext());
             } else {
+                Objects.requireNonNull(offsetStore, "Offset store cannot be null for source connectors");
+                Objects.requireNonNull(offsetStorageReader, "Offset reader cannot be null for source connectors");
+                offsetStore.start();
                 connector.initialize(new WorkerSourceConnectorContext(offsetStorageReader));
             }
         } catch (Throwable t) {
@@ -271,8 +280,12 @@ void doShutdown() {
             state = State.FAILED;
             statusListener.onFailure(connName, t);
         } finally {
-            ctx.close();
-            metrics.close();
+            Utils.closeQuietly(ctx, "connector context for " + connName);
+            Utils.closeQuietly(metrics, "connector metrics for " + connName);
+            Utils.closeQuietly(offsetStorageReader, "offset reader for " + connName);
+            if (offsetStore != null) {
+                Utils.closeQuietly(offsetStore::stop, "offset backing store for " + connName);
+            }
         }
     }
 
@@ -281,7 +294,9 @@ public synchronized void cancel() {
         // instance is being abandoned and we won't update the status on its behalf any more
         // after this since a new instance may be started soon
         statusListener.onShutdown(connName);
-        ctx.close();
+        Utils.closeQuietly(ctx, "connector context for " + connName);
+        // Preemptively close the offset reader in case the connector is blocked on an offset read.
+        Utils.closeQuietly(offsetStorageReader, "offset reader for " + connName);
         cancelled = true;
     }
 
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java
index c2566ef9cfd4f..dfe815dffc680 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTask.java
@@ -40,7 +40,7 @@
 import org.apache.kafka.connect.header.ConnectHeaders;
 import org.apache.kafka.connect.header.Headers;
 import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
 import org.apache.kafka.connect.runtime.errors.Stage;
 import org.apache.kafka.connect.runtime.errors.WorkerErrantRecordReporter;
@@ -176,6 +176,7 @@ protected void close() {
         Utils.closeQuietly(consumer, "consumer");
         Utils.closeQuietly(transformationChain, "transformation chain");
         Utils.closeQuietly(retryWithToleranceOperator, "retry operator");
+        Utils.closeQuietly(headerConverter, "header converter");
     }
 
     @Override
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTaskContext.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTaskContext.java
index d91c09896d113..f242ef4fe5d3f 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTaskContext.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSinkTaskContext.java
@@ -19,7 +19,7 @@
 import org.apache.kafka.clients.consumer.KafkaConsumer;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.connect.errors.IllegalWorkerStateException;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.sink.ErrantRecordReporter;
 import org.apache.kafka.connect.sink.SinkTaskContext;
 import org.slf4j.Logger;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java
index 9ce2b8dbb8a80..37d93a3fe8685 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTask.java
@@ -16,50 +16,31 @@
  */
 package org.apache.kafka.connect.runtime;
 
-import org.apache.kafka.clients.admin.NewTopic;
-import org.apache.kafka.clients.admin.TopicDescription;
-import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.clients.producer.RecordMetadata;
-import org.apache.kafka.common.KafkaException;
-import org.apache.kafka.common.header.internals.RecordHeaders;
-import org.apache.kafka.common.metrics.Sensor;
-import org.apache.kafka.common.metrics.stats.Avg;
-import org.apache.kafka.common.metrics.stats.CumulativeSum;
-import org.apache.kafka.common.metrics.stats.Max;
-import org.apache.kafka.common.metrics.stats.Rate;
-import org.apache.kafka.common.metrics.stats.Value;
 import org.apache.kafka.common.utils.Time;
-import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.errors.ConnectException;
-import org.apache.kafka.connect.errors.RetriableException;
-import org.apache.kafka.connect.header.Header;
-import org.apache.kafka.connect.header.Headers;
-import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
-import org.apache.kafka.connect.runtime.SubmittedRecords.SubmittedRecord;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
 import org.apache.kafka.connect.runtime.errors.Stage;
 import org.apache.kafka.connect.runtime.errors.ToleranceType;
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTask;
 import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.storage.Converter;
 import org.apache.kafka.connect.storage.HeaderConverter;
 import org.apache.kafka.connect.storage.OffsetStorageWriter;
 import org.apache.kafka.connect.storage.StatusBackingStore;
-import org.apache.kafka.connect.util.ConnectUtils;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.apache.kafka.connect.util.TopicAdmin;
-import org.apache.kafka.connect.util.TopicCreation;
 import org.apache.kafka.connect.util.TopicCreationGroup;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.time.Duration;
-import java.util.List;
 import java.util.Map;
-import java.util.concurrent.CountDownLatch;
+import java.util.Optional;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
 import java.util.concurrent.Future;
@@ -68,40 +49,16 @@
 import java.util.concurrent.atomic.AtomicReference;
 
 import static org.apache.kafka.connect.runtime.SubmittedRecords.CommittableOffsets;
-import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_TRACKING_ENABLE_CONFIG;
 
 /**
  * WorkerTask that uses a SourceTask to ingest data into Kafka.
  */
-class WorkerSourceTask extends WorkerTask {
+class WorkerSourceTask extends AbstractWorkerSourceTask {
     private static final Logger log = LoggerFactory.getLogger(WorkerSourceTask.class);
 
-    private static final long SEND_FAILED_BACKOFF_MS = 100;
-
-    private final WorkerConfig workerConfig;
-    private final SourceTask task;
-    private final ClusterConfigState configState;
-    private final Converter keyConverter;
-    private final Converter valueConverter;
-    private final HeaderConverter headerConverter;
-    private final TransformationChain<SourceRecord> transformationChain;
-    private final KafkaProducer<byte[], byte[]> producer;
-    private final TopicAdmin admin;
-    private final CloseableOffsetStorageReader offsetReader;
-    private final OffsetStorageWriter offsetWriter;
-    private final Executor closeExecutor;
-    private final SourceTaskMetricsGroup sourceTaskMetricsGroup;
-    private final AtomicReference<Exception> producerSendException;
-    private final boolean isTopicTrackingEnabled;
-    private final TopicCreation topicCreation;
-
-    private List<SourceRecord> toSend;
     private volatile CommittableOffsets committableOffsets;
     private final SubmittedRecords submittedRecords;
-    private final CountDownLatch stopRequestedLatch;
-
-    private Map<String, String> taskConfig;
-    private boolean started = false;
+    private final AtomicReference<Exception> producerSendException;
 
     public WorkerSourceTask(ConnectorTaskId id,
                             SourceTask task,
@@ -111,11 +68,12 @@ public WorkerSourceTask(ConnectorTaskId id,
                             Converter valueConverter,
                             HeaderConverter headerConverter,
                             TransformationChain<SourceRecord> transformationChain,
-                            KafkaProducer<byte[], byte[]> producer,
+                            Producer<byte[], byte[]> producer,
                             TopicAdmin admin,
                             Map<String, TopicCreationGroup> topicGroups,
                             CloseableOffsetStorageReader offsetReader,
                             OffsetStorageWriter offsetWriter,
+                            ConnectorOffsetBackingStore offsetStore,
                             WorkerConfig workerConfig,
                             ClusterConfigState configState,
                             ConnectMetrics connectMetrics,
@@ -125,355 +83,113 @@ public WorkerSourceTask(ConnectorTaskId id,
                             StatusBackingStore statusBackingStore,
                             Executor closeExecutor) {
 
-        super(id, statusListener, initialState, loader, connectMetrics,
-                retryWithToleranceOperator, time, statusBackingStore);
-
-        this.workerConfig = workerConfig;
-        this.task = task;
-        this.configState = configState;
-        this.keyConverter = keyConverter;
-        this.valueConverter = valueConverter;
-        this.headerConverter = headerConverter;
-        this.transformationChain = transformationChain;
-        this.producer = producer;
-        this.admin = admin;
-        this.offsetReader = offsetReader;
-        this.offsetWriter = offsetWriter;
-        this.closeExecutor = closeExecutor;
-
-        this.toSend = null;
+        super(id, task, statusListener, initialState, keyConverter, valueConverter, headerConverter, transformationChain,
+                new WorkerSourceTaskContext(offsetReader, id, configState, null), producer,
+                admin, topicGroups, offsetReader, offsetWriter, offsetStore, workerConfig, connectMetrics, loader,
+                time, retryWithToleranceOperator, statusBackingStore, closeExecutor);
+
         this.committableOffsets = CommittableOffsets.EMPTY;
         this.submittedRecords = new SubmittedRecords();
-        this.stopRequestedLatch = new CountDownLatch(1);
-        this.sourceTaskMetricsGroup = new SourceTaskMetricsGroup(id, connectMetrics);
         this.producerSendException = new AtomicReference<>();
-        this.isTopicTrackingEnabled = workerConfig.getBoolean(TOPIC_TRACKING_ENABLE_CONFIG);
-        this.topicCreation = TopicCreation.newTopicCreation(workerConfig, topicGroups);
     }
 
     @Override
-    public void initialize(TaskConfig taskConfig) {
-        try {
-            this.taskConfig = taskConfig.originalsStrings();
-        } catch (Throwable t) {
-            log.error("{} Task failed initialization and will not be started.", this, t);
-            onFailure(t);
-        }
+    protected void prepareToInitializeTask() {
+        // No-op
     }
 
     @Override
-    protected void close() {
-        if (started) {
-            try {
-                task.stop();
-            } catch (Throwable t) {
-                log.warn("Could not stop task", t);
-            }
-        }
-
-        closeProducer(Duration.ofSeconds(30));
-
-        if (admin != null) {
-            try {
-                admin.close(Duration.ofSeconds(30));
-            } catch (Throwable t) {
-                log.warn("Failed to close admin client on time", t);
-            }
-        }
-        Utils.closeQuietly(transformationChain, "transformation chain");
-        Utils.closeQuietly(retryWithToleranceOperator, "retry operator");
+    protected void prepareToEnterSendLoop() {
+        // No-op
     }
 
     @Override
-    public void removeMetrics() {
-        try {
-            sourceTaskMetricsGroup.close();
-        } finally {
-            super.removeMetrics();
-        }
+    protected void beginSendIteration() {
+        updateCommittableOffsets();
     }
 
     @Override
-    public void cancel() {
-        super.cancel();
-        offsetReader.close();
-        // We proactively close the producer here as the main work thread for the task may
-        // be blocked indefinitely in a call to Producer::send if automatic topic creation is
-        // not enabled on either the connector or the Kafka cluster. Closing the producer should
-        // unblock it in that case and allow shutdown to proceed normally.
-        // With a duration of 0, the producer's own shutdown logic should be fairly quick,
-        // but closing user-pluggable classes like interceptors may lag indefinitely. So, we
-        // call close on a separate thread in order to avoid blocking the herder's tick thread.
-        closeExecutor.execute(() -> closeProducer(Duration.ZERO));
+    protected void prepareToPollTask() {
+        maybeThrowProducerSendException();
     }
 
     @Override
-    public void stop() {
-        super.stop();
-        stopRequestedLatch.countDown();
+    protected void recordDropped(SourceRecord record) {
+        commitTaskRecord(record, null);
     }
 
     @Override
-    protected void initializeAndStart() {
-        // If we try to start the task at all by invoking initialize, then count this as
-        // "started" and expect a subsequent call to the task's stop() method
-        // to properly clean up any resources allocated by its initialize() or
-        // start() methods. If the task throws an exception during stop(),
-        // the worst thing that happens is another exception gets logged for an already-
-        // failed task
-        started = true;
-        task.initialize(new WorkerSourceTaskContext(offsetReader, this, configState));
-        task.start(taskConfig);
-        log.info("{} Source task finished initialization and start", this);
+    protected Optional<SubmittedRecords.SubmittedRecord> prepareToSendRecord(
+            SourceRecord sourceRecord,
+            ProducerRecord<byte[], byte[]> producerRecord
+    ) {
+        maybeThrowProducerSendException();
+        return Optional.of(submittedRecords.submit(sourceRecord));
     }
 
     @Override
-    public void execute() {
-        try {
-            log.info("{} Executing source task", this);
-            while (!isStopping()) {
-                updateCommittableOffsets();
-
-                if (shouldPause()) {
-                    onPause();
-                    if (awaitUnpause()) {
-                        onResume();
-                    }
-                    continue;
-                }
-
-                maybeThrowProducerSendException();
-                if (toSend == null) {
-                    log.trace("{} Nothing to send to Kafka. Polling source for additional records", this);
-                    long start = time.milliseconds();
-                    toSend = poll();
-                    if (toSend != null) {
-                        recordPollReturned(toSend.size(), time.milliseconds() - start);
-                    }
-                }
-
-                if (toSend == null)
-                    continue;
-                log.trace("{} About to send {} records to Kafka", this, toSend.size());
-                if (!sendRecords())
-                    stopRequestedLatch.await(SEND_FAILED_BACKOFF_MS, TimeUnit.MILLISECONDS);
-            }
-        } catch (InterruptedException e) {
-            // Ignore and allow to exit.
-        } finally {
-            submittedRecords.awaitAllMessages(
-                    workerConfig.getLong(WorkerConfig.OFFSET_COMMIT_TIMEOUT_MS_CONFIG),
-                    TimeUnit.MILLISECONDS
-            );
-            // It should still be safe to commit offsets since any exception would have
-            // simply resulted in not getting more records but all the existing records should be ok to flush
-            // and commit offsets. Worst case, task.flush() will also throw an exception causing the offset commit
-            // to fail.
-            updateCommittableOffsets();
-            commitOffsets();
-        }
+    protected void recordDispatched(SourceRecord record) {
+        // No-op
     }
 
-    private void closeProducer(Duration duration) {
-        if (producer != null) {
-            try {
-                producer.close(duration);
-            } catch (Throwable t) {
-                log.warn("Could not close producer for {}", id, t);
-            }
-        }
-    }
-
-    private void maybeThrowProducerSendException() {
-        if (producerSendException.get() != null) {
-            throw new ConnectException(
-                "Unrecoverable exception from producer send callback",
-                producerSendException.get()
-            );
-        }
-    }
-
-    private void updateCommittableOffsets() {
-        CommittableOffsets newOffsets = submittedRecords.committableOffsets();
-        synchronized (this) {
-            this.committableOffsets = this.committableOffsets.updatedWith(newOffsets);
-        }
-    }
-
-    protected List<SourceRecord> poll() throws InterruptedException {
-        try {
-            return task.poll();
-        } catch (RetriableException | org.apache.kafka.common.errors.RetriableException e) {
-            log.warn("{} failed to poll records from SourceTask. Will retry operation.", this, e);
-            // Do nothing. Let the framework poll whenever it's ready.
-            return null;
-        }
-    }
-
-    /**
-     * Convert the source record into a producer record.
-     *
-     * @param record the transformed record
-     * @return the producer record which can sent over to Kafka. A null is returned if the input is null or
-     * if an error was encountered during any of the converter stages.
-     */
-    private ProducerRecord<byte[], byte[]> convertTransformedRecord(SourceRecord record) {
-        if (record == null) {
-            return null;
-        }
-
-        RecordHeaders headers = retryWithToleranceOperator.execute(() -> convertHeaderFor(record), Stage.HEADER_CONVERTER, headerConverter.getClass());
-
-        byte[] key = retryWithToleranceOperator.execute(() -> keyConverter.fromConnectData(record.topic(), headers, record.keySchema(), record.key()),
-                Stage.KEY_CONVERTER, keyConverter.getClass());
-
-        byte[] value = retryWithToleranceOperator.execute(() -> valueConverter.fromConnectData(record.topic(), headers, record.valueSchema(), record.value()),
-                Stage.VALUE_CONVERTER, valueConverter.getClass());
-
-        if (retryWithToleranceOperator.failed()) {
-            return null;
-        }
-
-        return new ProducerRecord<>(record.topic(), record.kafkaPartition(),
-                ConnectUtils.checkAndConvertTimestamp(record.timestamp()), key, value, headers);
+    @Override
+    protected void batchDispatched() {
+        // No-op
     }
 
-    /**
-     * Try to send a batch of records. If a send fails and is retriable, this saves the remainder of the batch so it can
-     * be retried after backing off. If a send fails and is not retriable, this will throw a ConnectException.
-     * @return true if all messages were sent, false if some need to be retried
-     */
-    private boolean sendRecords() {
-        int processed = 0;
-        recordBatch(toSend.size());
-        final SourceRecordWriteCounter counter =
-                toSend.size() > 0 ? new SourceRecordWriteCounter(toSend.size(), sourceTaskMetricsGroup) : null;
-        for (final SourceRecord preTransformRecord : toSend) {
-            maybeThrowProducerSendException();
-
-            retryWithToleranceOperator.sourceRecord(preTransformRecord);
-            final SourceRecord record = transformationChain.apply(preTransformRecord);
-            final ProducerRecord<byte[], byte[]> producerRecord = convertTransformedRecord(record);
-            if (producerRecord == null || retryWithToleranceOperator.failed()) {
-                counter.skipRecord();
-                commitTaskRecord(preTransformRecord, null);
-                continue;
-            }
-
-            log.trace("{} Appending record to the topic {} with key {}, value {}", this, record.topic(), record.key(), record.value());
-            SubmittedRecord submittedRecord = submittedRecords.submit(record);
-            try {
-                maybeCreateTopic(record.topic());
-                final String topic = producerRecord.topic();
-                producer.send(
-                    producerRecord,
-                    (recordMetadata, e) -> {
-                        if (e != null) {
-                            if (retryWithToleranceOperator.getErrorToleranceType() == ToleranceType.ALL) {
-                                log.trace("Ignoring failed record send: {} failed to send record to {}: ",
-                                        WorkerSourceTask.this, topic, e);
-                                // executeFailed here allows the use of existing logging infrastructure/configuration
-                                retryWithToleranceOperator.executeFailed(Stage.KAFKA_PRODUCE, WorkerSourceTask.class,
-                                        preTransformRecord, e);
-                                commitTaskRecord(preTransformRecord, null);
-                            } else {
-                                log.error("{} failed to send record to {}: ", WorkerSourceTask.this, topic, e);
-                                log.trace("{} Failed record: {}", WorkerSourceTask.this, preTransformRecord);
-                                producerSendException.compareAndSet(null, e);
-                            }
-                        } else {
-                            submittedRecord.ack();
-                            counter.completeRecord();
-                            log.trace("{} Wrote record successfully: topic {} partition {} offset {}",
-                                    WorkerSourceTask.this,
-                                    recordMetadata.topic(), recordMetadata.partition(),
-                                    recordMetadata.offset());
-                            commitTaskRecord(preTransformRecord, recordMetadata);
-                            if (isTopicTrackingEnabled) {
-                                recordActiveTopic(producerRecord.topic());
-                            }
-                        }
-                    });
-            } catch (RetriableException | org.apache.kafka.common.errors.RetriableException e) {
-                log.warn("{} Failed to send record to topic '{}' and partition '{}'. Backing off before retrying: ",
-                        this, producerRecord.topic(), producerRecord.partition(), e);
-                toSend = toSend.subList(processed, toSend.size());
-                submittedRecords.removeLastOccurrence(submittedRecord);
-                counter.retryRemaining();
-                return false;
-            } catch (ConnectException e) {
-                log.warn("{} Failed to send record to topic '{}' and partition '{}' due to an unrecoverable exception: ",
-                        this, producerRecord.topic(), producerRecord.partition(), e);
-                log.trace("{} Failed to send {} with unrecoverable exception: ", this, producerRecord, e);
-                throw e;
-            } catch (KafkaException e) {
-                throw new ConnectException("Unrecoverable exception trying to send", e);
-            }
-            processed++;
-        }
-        toSend = null;
-        return true;
+    @Override
+    protected void recordSent(
+            SourceRecord sourceRecord,
+            ProducerRecord<byte[], byte[]> producerRecord,
+            RecordMetadata recordMetadata
+    ) {
+        commitTaskRecord(sourceRecord, recordMetadata);
     }
 
-    // Due to transformations that may change the destination topic of a record (such as
-    // RegexRouter) topic creation can not be batched for multiple topics
-    private void maybeCreateTopic(String topic) {
-        if (!topicCreation.isTopicCreationRequired(topic)) {
-            log.trace("Topic creation by the connector is disabled or the topic {} was previously created." +
-                "If auto.create.topics.enable is enabled on the broker, " +
-                "the topic will be created with default settings", topic);
-            return;
-        }
-        log.info("The task will send records to topic '{}' for the first time. Checking "
-                + "whether topic exists", topic);
-        Map<String, TopicDescription> existing = admin.describeTopics(topic);
-        if (!existing.isEmpty()) {
-            log.info("Topic '{}' already exists.", topic);
-            topicCreation.addTopic(topic);
-            return;
-        }
-
-        log.info("Creating topic '{}'", topic);
-        TopicCreationGroup topicGroup = topicCreation.findFirstGroup(topic);
-        log.debug("Topic '{}' matched topic creation group: {}", topic, topicGroup);
-        NewTopic newTopic = topicGroup.newTopic(topic);
-
-        TopicAdmin.TopicCreationResponse response = admin.createOrFindTopics(newTopic);
-        if (response.isCreated(newTopic.name())) {
-            topicCreation.addTopic(topic);
-            log.info("Created topic '{}' using creation group {}", newTopic, topicGroup);
-        } else if (response.isExisting(newTopic.name())) {
-            topicCreation.addTopic(topic);
-            log.info("Found existing topic '{}'", newTopic);
+    @Override
+    protected void producerSendFailed(
+            boolean synchronous,
+            ProducerRecord<byte[], byte[]> producerRecord,
+            SourceRecord preTransformRecord,
+            Exception e
+    ) {
+        if (synchronous) {
+            throw new ConnectException("Unrecoverable exception trying to send", e);
+        }
+
+        String topic = producerRecord.topic();
+        if (retryWithToleranceOperator.getErrorToleranceType() == ToleranceType.ALL) {
+            log.trace(
+                    "Ignoring failed record send: {} failed to send record to {}: ",
+                    WorkerSourceTask.this,
+                    topic,
+                    e
+            );
+            // executeFailed here allows the use of existing logging infrastructure/configuration
+            retryWithToleranceOperator.executeFailed(
+                    Stage.KAFKA_PRODUCE,
+                    WorkerSourceTask.class,
+                    preTransformRecord,
+                    e
+            );
+            commitTaskRecord(preTransformRecord, null);
         } else {
-            // The topic still does not exist and could not be created, so treat it as a task failure
-            log.warn("Request to create new topic '{}' failed", topic);
-            throw new ConnectException("Task failed to create new topic " + newTopic + ". Ensure "
-                    + "that the task is authorized to create topics or that the topic exists and "
-                    + "restart the task");
-        }
-    }
-
-    private RecordHeaders convertHeaderFor(SourceRecord record) {
-        Headers headers = record.headers();
-        RecordHeaders result = new RecordHeaders();
-        if (headers != null) {
-            String topic = record.topic();
-            for (Header header : headers) {
-                String key = header.key();
-                byte[] rawHeader = headerConverter.fromConnectHeader(topic, key, header.schema(), header.value());
-                result.add(key, rawHeader);
-            }
+            producerSendException.compareAndSet(null, e);
         }
-        return result;
     }
 
-    private void commitTaskRecord(SourceRecord record, RecordMetadata metadata) {
-        try {
-            task.commitRecord(record, metadata);
-        } catch (Throwable t) {
-            log.error("{} Exception thrown while calling task.commitRecord()", this, t);
-        }
+    @Override
+    protected void finalOffsetCommit(boolean failed) {
+        // It should still be safe to commit offsets since any exception would have
+        // simply resulted in not getting more records but all the existing records should be ok to flush
+        // and commit offsets. Worst case, task.commit() will also throw an exception causing the offset
+        // commit to fail.
+        submittedRecords.awaitAllMessages(
+                workerConfig.getLong(WorkerConfig.OFFSET_COMMIT_TIMEOUT_MS_CONFIG),
+                TimeUnit.MILLISECONDS
+        );
+        updateCommittableOffsets();
+        commitOffsets();
     }
 
     public boolean commitOffsets() {
@@ -510,8 +226,8 @@ public boolean commitOffsets() {
                         committableOffsets.largestDequeSize()
                 );
             } else {
-                log.debug("{} There are currently no pending messages for this offset commit; " 
-                        + "all messages dispatched to the task's producer since the last commit have been acknowledged",
+                log.debug("{} There are currently no pending messages for this offset commit; "
+                                + "all messages dispatched to the task's producer since the last commit have been acknowledged",
                         this
                 );
             }
@@ -582,11 +298,19 @@ public boolean commitOffsets() {
         return true;
     }
 
-    private void commitSourceTask() {
-        try {
-            this.task.commit();
-        } catch (Throwable t) {
-            log.error("{} Exception thrown while calling task.commit()", this, t);
+    private void updateCommittableOffsets() {
+        CommittableOffsets newOffsets = submittedRecords.committableOffsets();
+        synchronized (this) {
+            this.committableOffsets = this.committableOffsets.updatedWith(newOffsets);
+        }
+    }
+
+    private void maybeThrowProducerSendException() {
+        if (producerSendException.get() != null) {
+            throw new ConnectException(
+                    "Unrecoverable exception from producer send callback",
+                    producerSendException.get()
+            );
         }
     }
 
@@ -597,101 +321,4 @@ public String toString() {
                 '}';
     }
 
-    protected void recordPollReturned(int numRecordsInBatch, long duration) {
-        sourceTaskMetricsGroup.recordPoll(numRecordsInBatch, duration);
-    }
-
-    SourceTaskMetricsGroup sourceTaskMetricsGroup() {
-        return sourceTaskMetricsGroup;
-    }
-
-    static class SourceRecordWriteCounter {
-        private final SourceTaskMetricsGroup metricsGroup;
-        private final int batchSize;
-        private boolean completed = false;
-        private int counter;
-        public SourceRecordWriteCounter(int batchSize, SourceTaskMetricsGroup metricsGroup) {
-            assert batchSize > 0;
-            assert metricsGroup != null;
-            this.batchSize = batchSize;
-            counter = batchSize;
-            this.metricsGroup = metricsGroup;
-        }
-        public void skipRecord() {
-            if (counter > 0 && --counter == 0) {
-                finishedAllWrites();
-            }
-        }
-        public void completeRecord() {
-            if (counter > 0 && --counter == 0) {
-                finishedAllWrites();
-            }
-        }
-        public void retryRemaining() {
-            finishedAllWrites();
-        }
-        private void finishedAllWrites() {
-            if (!completed) {
-                metricsGroup.recordWrite(batchSize - counter);
-                completed = true;
-            }
-        }
-    }
-
-    static class SourceTaskMetricsGroup {
-        private final MetricGroup metricGroup;
-        private final Sensor sourceRecordPoll;
-        private final Sensor sourceRecordWrite;
-        private final Sensor sourceRecordActiveCount;
-        private final Sensor pollTime;
-        private int activeRecordCount;
-
-        public SourceTaskMetricsGroup(ConnectorTaskId id, ConnectMetrics connectMetrics) {
-            ConnectMetricsRegistry registry = connectMetrics.registry();
-            metricGroup = connectMetrics.group(registry.sourceTaskGroupName(),
-                    registry.connectorTagName(), id.connector(),
-                    registry.taskTagName(), Integer.toString(id.task()));
-            // remove any previously created metrics in this group to prevent collisions.
-            metricGroup.close();
-
-            sourceRecordPoll = metricGroup.sensor("source-record-poll");
-            sourceRecordPoll.add(metricGroup.metricName(registry.sourceRecordPollRate), new Rate());
-            sourceRecordPoll.add(metricGroup.metricName(registry.sourceRecordPollTotal), new CumulativeSum());
-
-            sourceRecordWrite = metricGroup.sensor("source-record-write");
-            sourceRecordWrite.add(metricGroup.metricName(registry.sourceRecordWriteRate), new Rate());
-            sourceRecordWrite.add(metricGroup.metricName(registry.sourceRecordWriteTotal), new CumulativeSum());
-
-            pollTime = metricGroup.sensor("poll-batch-time");
-            pollTime.add(metricGroup.metricName(registry.sourceRecordPollBatchTimeMax), new Max());
-            pollTime.add(metricGroup.metricName(registry.sourceRecordPollBatchTimeAvg), new Avg());
-
-            sourceRecordActiveCount = metricGroup.sensor("source-record-active-count");
-            sourceRecordActiveCount.add(metricGroup.metricName(registry.sourceRecordActiveCount), new Value());
-            sourceRecordActiveCount.add(metricGroup.metricName(registry.sourceRecordActiveCountMax), new Max());
-            sourceRecordActiveCount.add(metricGroup.metricName(registry.sourceRecordActiveCountAvg), new Avg());
-        }
-
-        void close() {
-            metricGroup.close();
-        }
-
-        void recordPoll(int batchSize, long duration) {
-            sourceRecordPoll.record(batchSize);
-            pollTime.record(duration);
-            activeRecordCount += batchSize;
-            sourceRecordActiveCount.record(activeRecordCount);
-        }
-
-        void recordWrite(int recordCount) {
-            sourceRecordWrite.record(recordCount);
-            activeRecordCount -= recordCount;
-            activeRecordCount = Math.max(0, activeRecordCount);
-            sourceRecordActiveCount.record(activeRecordCount);
-        }
-
-        protected MetricGroup metricGroup() {
-            return metricGroup;
-        }
-    }
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTaskContext.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTaskContext.java
index fe1409b282aa0..d58e98e057443 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTaskContext.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerSourceTaskContext.java
@@ -16,33 +16,42 @@
  */
 package org.apache.kafka.connect.runtime;
 
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.source.SourceTaskContext;
 import org.apache.kafka.connect.storage.OffsetStorageReader;
+import org.apache.kafka.connect.util.ConnectorTaskId;
 
 import java.util.Map;
 
 public class WorkerSourceTaskContext implements SourceTaskContext {
 
     private final OffsetStorageReader reader;
-    private final WorkerSourceTask task;
+    private final ConnectorTaskId id;
     private final ClusterConfigState configState;
+    private final WorkerTransactionContext transactionContext;
 
     public WorkerSourceTaskContext(OffsetStorageReader reader,
-                                   WorkerSourceTask task,
-                                   ClusterConfigState configState) {
+                                   ConnectorTaskId id,
+                                   ClusterConfigState configState,
+                                   WorkerTransactionContext transactionContext) {
         this.reader = reader;
-        this.task = task;
+        this.id = id;
         this.configState = configState;
+        this.transactionContext = transactionContext;
     }
 
     @Override
     public Map<String, String> configs() {
-        return configState.taskConfig(task.id());
+        return configState.taskConfig(id);
     }
 
     @Override
     public OffsetStorageReader offsetStorageReader() {
         return reader;
     }
+
+    @Override
+    public WorkerTransactionContext transactionContext() {
+        return transactionContext;
+    }
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTask.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTask.java
index 0d893f56ee568..ea086199aae23 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTask.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTask.java
@@ -18,6 +18,7 @@
 
 import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.MetricNameTemplate;
+import org.apache.kafka.common.metrics.Gauge;
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.stats.Avg;
 import org.apache.kafka.common.metrics.stats.Frequencies;
@@ -52,10 +53,10 @@ abstract class WorkerTask implements Runnable {
     private static final Logger log = LoggerFactory.getLogger(WorkerTask.class);
     private static final String THREAD_NAME_PREFIX = "task-thread-";
 
-    protected final ConnectorTaskId id;
     private final TaskStatus.Listener statusListener;
+    private final StatusBackingStore statusBackingStore;
+    protected final ConnectorTaskId id;
     protected final ClassLoader loader;
-    protected final StatusBackingStore statusBackingStore;
     protected final Time time;
     private final CountDownLatch shutdownLatch = new CountDownLatch(1);
     private final TaskMetricsGroup taskMetricsGroup;
@@ -377,10 +378,8 @@ public TaskMetricsGroup(ConnectorTaskId id, ConnectMetrics connectMetrics, TaskS
 
         private void addRatioMetric(final State matchingState, MetricNameTemplate template) {
             MetricName metricName = metricGroup.metricName(template);
-            if (metricGroup.metrics().metric(metricName) == null) {
-                metricGroup.metrics().addMetric(metricName, (config, now) ->
+            metricGroup.metrics().addMetricIfAbsent(metricName, null, (Gauge<Double>) (config, now) ->
                     taskStateTimer.durationRatio(matchingState, now));
-            }
         }
 
         void close() {
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTransactionContext.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTransactionContext.java
new file mode 100644
index 0000000000000..fde3f6944f72a
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/WorkerTransactionContext.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime;
+
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.TransactionContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * A {@link TransactionContext} that can be given to tasks and then queried by the worker to check on
+ * requests to abort and commit transactions. This class is thread safe and is designed to accommodate
+ * concurrent use without external synchronization.
+ */
+public class WorkerTransactionContext implements TransactionContext {
+
+    private static final Logger log = LoggerFactory.getLogger(WorkerTransactionContext.class);
+
+    private final Set<SourceRecord> commitableRecords = new HashSet<>();
+    private final Set<SourceRecord> abortableRecords = new HashSet<>();
+    private boolean batchCommitRequested = false;
+    private boolean batchAbortRequested = false;
+
+    @Override
+    public synchronized void commitTransaction() {
+        batchCommitRequested = true;
+    }
+
+    @Override
+    public synchronized void commitTransaction(SourceRecord record) {
+        Objects.requireNonNull(record, "Source record used to define transaction boundaries may not be null");
+        commitableRecords.add(record);
+    }
+
+    @Override
+    public synchronized void abortTransaction() {
+        batchAbortRequested = true;
+    }
+
+    @Override
+    public synchronized void abortTransaction(SourceRecord record) {
+        Objects.requireNonNull(record, "Source record used to define transaction boundaries may not be null");
+        abortableRecords.add(record);
+    }
+
+    public synchronized boolean shouldCommitBatch() {
+        checkBatchRequestsConsistency();
+        boolean result = batchCommitRequested;
+        batchCommitRequested = false;
+        return result;
+    }
+
+    public synchronized boolean shouldAbortBatch() {
+        checkBatchRequestsConsistency();
+        boolean result = batchAbortRequested;
+        batchAbortRequested = false;
+        return result;
+    }
+
+    public synchronized boolean shouldCommitOn(SourceRecord record) {
+        // We could perform this check in the connector-facing methods (such as commitTransaction(SourceRecord)),
+        // but the connector might swallow that exception.
+        // This way, we can fail the task unconditionally, which is warranted since the alternative may lead to data loss.
+        // Essentially, instead of telling the task that it screwed up and trusting it to do the right thing, we rat on it to the
+        // worker and let it get punished accordingly.
+        checkRecordRequestConsistency(record);
+        return commitableRecords.remove(record);
+    }
+
+    public synchronized boolean shouldAbortOn(SourceRecord record) {
+        checkRecordRequestConsistency(record);
+        return abortableRecords.remove(record);
+    }
+
+    private void checkBatchRequestsConsistency() {
+        if (batchCommitRequested && batchAbortRequested) {
+            throw new IllegalStateException("Connector requested both commit and abort of same transaction");
+        }
+    }
+
+    private void checkRecordRequestConsistency(SourceRecord record) {
+        if (commitableRecords.contains(record) && abortableRecords.contains(record)) {
+            log.trace("Connector will fail as it has requested both commit and abort of transaction for same record: {}", record);
+            throw new IllegalStateException(String.format(
+                    "Connector requested both commit and abort of same record against topic/partition %s/%s",
+                    record.topic(), record.kafkaPartition()
+            ));
+        }
+    }
+
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ConnectAssignor.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ConnectAssignor.java
index 752e62e680a5e..1436460d1a913 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ConnectAssignor.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ConnectAssignor.java
@@ -32,7 +32,7 @@ public interface ConnectAssignor {
      * method computes an assignment of connectors and tasks among the members of the worker group.
      *
      * @param leaderId the leader of the group
-     * @param protocol the protocol type; for Connect assignors this is normally "connect"
+     * @param protocol the protocol type; for Connect assignors this is "eager", "compatible", or "sessioned"
      * @param allMemberMetadata the metadata of all the active workers of the group
      * @param coordinator the worker coordinator that runs this assignor
      * @return the assignment of connectors and tasks to workers
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedConfig.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedConfig.java
index 0823fbcc30ad3..cc9affa5f904a 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedConfig.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedConfig.java
@@ -17,9 +17,11 @@
 package org.apache.kafka.connect.runtime.distributed;
 
 import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.TopicConfig;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.runtime.WorkerConfig;
 import org.slf4j.Logger;
@@ -29,14 +31,22 @@
 import javax.crypto.Mac;
 import java.security.InvalidParameterException;
 import java.security.NoSuchAlgorithmException;
+import java.security.Provider;
+import java.security.Security;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import static org.apache.kafka.common.config.ConfigDef.Range.atLeast;
 import static org.apache.kafka.common.config.ConfigDef.Range.between;
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
+import static org.apache.kafka.common.utils.Utils.enumOptions;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.PARTITIONS_VALIDATOR;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.REPLICATION_FACTOR_VALIDATOR;
 
@@ -171,8 +181,10 @@ public class DistributedConfig extends WorkerConfig {
     public static final int SCHEDULED_REBALANCE_MAX_DELAY_MS_DEFAULT = Math.toIntExact(TimeUnit.SECONDS.toMillis(300));
 
     public static final String INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG = "inter.worker.key.generation.algorithm";
-    public static final String INTER_WORKER_KEY_GENERATION_ALGORITHM_DOC = "The algorithm to use for generating internal request keys";
     public static final String INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT = "HmacSHA256";
+    public static final String INTER_WORKER_KEY_GENERATION_ALGORITHM_DOC = "The algorithm to use for generating internal request keys. "
+            + "The algorithm '" + INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT + "' will be used as a default on JVMs that support it; "
+            + "on other JVMs, no default is used and a value for this property must be manually specified in the worker config.";
 
     public static final String INTER_WORKER_KEY_SIZE_CONFIG = "inter.worker.key.size";
     public static final String INTER_WORKER_KEY_SIZE_DOC = "The size of the key to use for signing internal requests, in bits. "
@@ -185,12 +197,97 @@ public class DistributedConfig extends WorkerConfig {
     public static final int INTER_WORKER_KEY_TTL_MS_MS_DEFAULT = Math.toIntExact(TimeUnit.HOURS.toMillis(1));
 
     public static final String INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG = "inter.worker.signature.algorithm";
-    public static final String INTER_WORKER_SIGNATURE_ALGORITHM_DOC = "The algorithm used to sign internal requests";
     public static final String INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT = "HmacSHA256";
+    public static final String INTER_WORKER_SIGNATURE_ALGORITHM_DOC = "The algorithm used to sign internal requests"
+            + "The algorithm '" + INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG + "' will be used as a default on JVMs that support it; "
+            + "on other JVMs, no default is used and a value for this property must be manually specified in the worker config.";
 
     public static final String INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG = "inter.worker.verification.algorithms";
-    public static final String INTER_WORKER_VERIFICATION_ALGORITHMS_DOC = "A list of permitted algorithms for verifying internal requests";
     public static final List<String> INTER_WORKER_VERIFICATION_ALGORITHMS_DEFAULT = Collections.singletonList(INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT);
+    public static final String INTER_WORKER_VERIFICATION_ALGORITHMS_DOC = "A list of permitted algorithms for verifying internal requests, "
+        + "which must include the algorithm used for the " + INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG + " property. "
+        + "The algorithm(s) '" + INTER_WORKER_VERIFICATION_ALGORITHMS_DEFAULT + "' will be used as a default on JVMs that provide them; "
+        + "on other JVMs, no default is used and a value for this property must be manually specified in the worker config.";
+
+    private enum ExactlyOnceSourceSupport {
+        DISABLED(false),
+        PREPARING(true),
+        ENABLED(true);
+
+        public final boolean usesTransactionalLeader;
+
+        ExactlyOnceSourceSupport(boolean usesTransactionalLeader) {
+            this.usesTransactionalLeader = usesTransactionalLeader;
+        }
+
+        public static ExactlyOnceSourceSupport fromProperty(String property) {
+            return ExactlyOnceSourceSupport.valueOf(property.toUpperCase(Locale.ROOT));
+        }
+
+        @Override
+        public String toString() {
+            return name().toLowerCase(Locale.ROOT);
+        }
+    }
+
+    public static final String EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG = "exactly.once.source.support";
+    public static final String EXACTLY_ONCE_SOURCE_SUPPORT_DOC = "Whether to enable exactly-once support for source connectors in the cluster "
+            + "by using transactions to write source records and their source offsets, and by proactively fencing out old task generations before bringing up new ones. ";
+            // TODO: https://issues.apache.org/jira/browse/KAFKA-13709
+            //       + "See the exactly-once source support documentation at [add docs link here] for more information on this feature.";
+    public static final String EXACTLY_ONCE_SOURCE_SUPPORT_DEFAULT = ExactlyOnceSourceSupport.DISABLED.toString();
+
+    private static Object defaultKeyGenerationAlgorithm() {
+        try {
+            validateKeyAlgorithm(INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG, INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT);
+            return INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT;
+        } catch (Throwable t) {
+            log.info(
+                    "The default key generation algorithm '{}' does not appear to be available on this worker."
+                            + "A key algorithm will have to be manually specified via the '{}' worker property",
+                    INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT,
+                    INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG
+            );
+            return ConfigDef.NO_DEFAULT_VALUE;
+        }
+    }
+
+    private static Object defaultSignatureAlgorithm() {
+        try {
+            validateSignatureAlgorithm(INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG, INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT);
+            return INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT;
+        } catch (Throwable t) {
+            log.info(
+                    "The default signature algorithm '{}' does not appear to be available on this worker."
+                            + "A signature algorithm will have to be manually specified via the '{}' worker property",
+                    INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT,
+                    INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG
+            );
+            return ConfigDef.NO_DEFAULT_VALUE;
+        }
+    }
+
+    private static Object defaultVerificationAlgorithms() {
+        List<String> result = new ArrayList<>();
+        for (String verificationAlgorithm : INTER_WORKER_VERIFICATION_ALGORITHMS_DEFAULT) {
+            try {
+                validateSignatureAlgorithm(INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG, verificationAlgorithm);
+                result.add(verificationAlgorithm);
+            } catch (Throwable t) {
+                log.trace("Verification algorithm '{}' not found", verificationAlgorithm);
+            }
+        }
+        if (result.isEmpty()) {
+            log.info(
+                    "The default verification algorithm '{}' does not appear to be available on this worker."
+                            + "One or more verification algorithms will have to be manually specified via the '{}' worker property",
+                    INTER_WORKER_VERIFICATION_ALGORITHMS_DEFAULT,
+                    INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG
+            );
+            return ConfigDef.NO_DEFAULT_VALUE;
+        }
+        return result;
+    }
 
     @SuppressWarnings("unchecked")
     private static final ConfigDef CONFIG = baseConfigDef()
@@ -213,6 +310,12 @@ public class DistributedConfig extends WorkerConfig {
                     Math.toIntExact(TimeUnit.SECONDS.toMillis(3)),
                     ConfigDef.Importance.HIGH,
                     HEARTBEAT_INTERVAL_MS_DOC)
+            .define(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG,
+                    ConfigDef.Type.STRING,
+                    EXACTLY_ONCE_SOURCE_SUPPORT_DEFAULT,
+                    ConfigDef.CaseInsensitiveValidString.in(enumOptions(ExactlyOnceSourceSupport.class)),
+                    ConfigDef.Importance.HIGH,
+                    EXACTLY_ONCE_SOURCE_SUPPORT_DOC)
             .define(CommonClientConfigs.METADATA_MAX_AGE_CONFIG,
                     ConfigDef.Type.LONG,
                     TimeUnit.MINUTES.toMillis(5),
@@ -282,6 +385,7 @@ public class DistributedConfig extends WorkerConfig {
             .define(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
                     ConfigDef.Type.STRING,
                     CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                    in(Utils.enumOptions(SecurityProtocol.class)),
                     ConfigDef.Importance.MEDIUM,
                     CommonClientConfigs.SECURITY_PROTOCOL_DOC)
             .withClientSaslSupport()
@@ -366,11 +470,10 @@ public class DistributedConfig extends WorkerConfig {
                     INTER_WORKER_KEY_TTL_MS_MS_DOC)
             .define(INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG,
                     ConfigDef.Type.STRING,
-                    INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT,
+                    defaultKeyGenerationAlgorithm(),
                     ConfigDef.LambdaValidator.with(
-                        (name, value) -> validateKeyAlgorithm(name, (String) value),
-                        () -> "Any KeyGenerator algorithm supported by the worker JVM"
-                    ),
+                            (name, value) -> validateKeyAlgorithm(name, (String) value),
+                            () -> "Any KeyGenerator algorithm supported by the worker JVM"),
                     ConfigDef.Importance.LOW,
                     INTER_WORKER_KEY_GENERATION_ALGORITHM_DOC)
             .define(INTER_WORKER_KEY_SIZE_CONFIG,
@@ -380,31 +483,73 @@ public class DistributedConfig extends WorkerConfig {
                     INTER_WORKER_KEY_SIZE_DOC)
             .define(INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG,
                     ConfigDef.Type.STRING,
-                    INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT,
+                    defaultSignatureAlgorithm(),
                     ConfigDef.LambdaValidator.with(
-                        (name, value) -> validateSignatureAlgorithm(name, (String) value),
-                        () -> "Any MAC algorithm supported by the worker JVM"),
+                            (name, value) -> validateSignatureAlgorithm(name, (String) value),
+                            () -> "Any MAC algorithm supported by the worker JVM"),
                     ConfigDef.Importance.LOW,
                     INTER_WORKER_SIGNATURE_ALGORITHM_DOC)
             .define(INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG,
                     ConfigDef.Type.LIST,
-                    INTER_WORKER_VERIFICATION_ALGORITHMS_DEFAULT,
+                    defaultVerificationAlgorithms(),
                     ConfigDef.LambdaValidator.with(
-                        (name, value) -> validateSignatureAlgorithms(name, (List<String>) value),
-                        () -> "A list of one or more MAC algorithms, each supported by the worker JVM"
-                    ),
+                            (name, value) -> validateVerificationAlgorithms(name, (List<String>) value),
+                            () -> "A list of one or more MAC algorithms, each supported by the worker JVM"),
                     ConfigDef.Importance.LOW,
                     INTER_WORKER_VERIFICATION_ALGORITHMS_DOC);
 
+    private final ExactlyOnceSourceSupport exactlyOnceSourceSupport;
+
     @Override
     public Integer getRebalanceTimeout() {
         return getInt(DistributedConfig.REBALANCE_TIMEOUT_MS_CONFIG);
     }
 
+    @Override
+    public boolean exactlyOnceSourceEnabled() {
+        return exactlyOnceSourceSupport == ExactlyOnceSourceSupport.ENABLED;
+    }
+
+    /**
+     * @return whether the Connect cluster's leader should use a transactional producer to perform writes to the config
+     * topic, which is useful for ensuring that zombie leaders are fenced out and unable to write to the topic after a
+     * new leader has been elected.
+     */
+    public boolean transactionalLeaderEnabled() {
+        return exactlyOnceSourceSupport.usesTransactionalLeader;
+    }
+
+    /**
+     * @return the {@link ProducerConfig#TRANSACTIONAL_ID_CONFIG transactional ID} to use for the worker's producer if
+     * using a transactional producer for writes to internal topics such as the config topic.
+     */
+    public String transactionalProducerId() {
+        return transactionalProducerId(groupId());
+    }
+
+    public static String transactionalProducerId(String groupId) {
+        return "connect-cluster-" + groupId;
+    }
+
+    @Override
+    public String offsetsTopic() {
+        return getString(OFFSET_STORAGE_TOPIC_CONFIG);
+    }
+
+    @Override
+    public boolean connectorOffsetsTopicsPermitted() {
+        return true;
+    }
+
+    @Override
+    public String groupId() {
+        return getString(GROUP_ID_CONFIG);
+    }
+
     public DistributedConfig(Map<String, String> props) {
         super(CONFIG, props);
-        getInternalRequestKeyGenerator(); // Check here for a valid key size + key algorithm to fail fast if either are invalid
-        validateKeyAlgorithmAndVerificationAlgorithms();
+        exactlyOnceSourceSupport = ExactlyOnceSourceSupport.fromProperty(getString(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG));
+        validateInterWorkerKeyConfigs();
     }
 
     public static void main(String[] args) {
@@ -453,34 +598,45 @@ public Map<String, Object> statusStorageTopicSettings() {
         return topicSettings(STATUS_STORAGE_PREFIX);
     }
 
-    private void validateKeyAlgorithmAndVerificationAlgorithms() {
-        String keyAlgorithm = getString(INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG);
+    private void validateInterWorkerKeyConfigs() {
+        getInternalRequestKeyGenerator();
+        ensureVerificationAlgorithmsIncludeSignatureAlgorithm();
+    }
+
+    private void ensureVerificationAlgorithmsIncludeSignatureAlgorithm() {
+        String signatureAlgorithm = getString(INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG);
         List<String> verificationAlgorithms = getList(INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG);
-        if (!verificationAlgorithms.contains(keyAlgorithm)) {
+        if (!verificationAlgorithms.contains(signatureAlgorithm)) {
             throw new ConfigException(
-                INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG,
-                keyAlgorithm,
-                String.format("Key generation algorithm must be present in %s list", INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG)
+                INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG,
+                signatureAlgorithm,
+                String.format("Signature algorithm must be present in %s list", INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG)
             );
         }
     }
 
-    private static void validateSignatureAlgorithms(String configName, List<String> algorithms) {
+    private static void validateVerificationAlgorithms(String configName, List<String> algorithms) {
         if (algorithms.isEmpty()) {
             throw new ConfigException(
-                configName,
-                algorithms,
-                "At least one signature verification algorithm must be provided"
+                    configName,
+                    algorithms,
+                    "At least one signature verification algorithm must be provided"
             );
         }
-        algorithms.forEach(algorithm -> validateSignatureAlgorithm(configName, algorithm));
+        for (String algorithm : algorithms) {
+            try {
+                Mac.getInstance(algorithm);
+            } catch (NoSuchAlgorithmException e) {
+                throw unsupportedAlgorithmException(configName, algorithm, "Mac");
+            }
+        }
     }
 
     private static void validateSignatureAlgorithm(String configName, String algorithm) {
         try {
             Mac.getInstance(algorithm);
         } catch (NoSuchAlgorithmException e) {
-            throw new ConfigException(configName, algorithm, e.getMessage());
+            throw unsupportedAlgorithmException(configName, algorithm, "Mac");
         }
     }
 
@@ -488,7 +644,29 @@ private static void validateKeyAlgorithm(String configName, String algorithm) {
         try {
             KeyGenerator.getInstance(algorithm);
         } catch (NoSuchAlgorithmException e) {
-            throw new ConfigException(configName, algorithm, e.getMessage());
+            throw unsupportedAlgorithmException(configName, algorithm, "KeyGenerator");
+        }
+    }
+
+    private static ConfigException unsupportedAlgorithmException(String name, Object value, String type) {
+        return new ConfigException(
+                name,
+                value,
+                "the algorithm is not supported by this JVM; the supported algorithms are: " + supportedAlgorithms(type)
+        );
+    }
+
+    // Visible for testing
+    static Set<String> supportedAlgorithms(String type) {
+        Set<String> result = new HashSet<>();
+        for (Provider provider : Security.getProviders()) {
+            for (Provider.Service service : provider.getServices()) {
+                if (type.equals(service.getType())) {
+                    result.add(service.getAlgorithm());
+                }
+            }
         }
+        return result;
     }
+
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java
index 357796c9d1af7..388bfa4218a73 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/DistributedHerder.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.connect.runtime.distributed;
 
 import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigValue;
 import org.apache.kafka.common.errors.WakeupException;
@@ -29,7 +30,6 @@
 import org.apache.kafka.common.utils.ThreadUtils;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
-import org.apache.kafka.connect.connector.Connector;
 import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.errors.AlreadyExistsException;
 import org.apache.kafka.connect.errors.ConnectException;
@@ -50,18 +50,27 @@
 import org.apache.kafka.connect.runtime.TargetState;
 import org.apache.kafka.connect.runtime.TaskStatus;
 import org.apache.kafka.connect.runtime.Worker;
+import org.apache.kafka.connect.storage.PrivilegedWriteException;
 import org.apache.kafka.connect.runtime.rest.InternalRequestSignature;
 import org.apache.kafka.connect.runtime.rest.RestClient;
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo;
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo;
+import org.apache.kafka.connect.runtime.rest.entities.ConnectorType;
 import org.apache.kafka.connect.runtime.rest.entities.TaskInfo;
 import org.apache.kafka.connect.runtime.rest.errors.BadRequestException;
 import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
 import org.apache.kafka.connect.sink.SinkConnector;
+import org.apache.kafka.connect.source.ConnectorTransactionBoundaries;
+import org.apache.kafka.connect.source.ExactlyOnceSupport;
+import org.apache.kafka.connect.source.SourceConnector;
+import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.ConfigBackingStore;
 import org.apache.kafka.connect.storage.StatusBackingStore;
 import org.apache.kafka.connect.util.Callback;
+import org.apache.kafka.connect.util.ConnectUtils;
 import org.apache.kafka.connect.util.ConnectorTaskId;
+import org.apache.kafka.connect.util.FutureCallback;
 import org.apache.kafka.connect.util.SinkUtils;
 import org.slf4j.Logger;
 
@@ -84,6 +93,7 @@
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentSkipListSet;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.LinkedBlockingDeque;
@@ -138,6 +148,7 @@ public class DistributedHerder extends AbstractHerder implements Runnable {
     private static final long FORWARD_REQUEST_SHUTDOWN_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(10);
     private static final long START_AND_STOP_SHUTDOWN_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(1);
     private static final long RECONFIGURE_CONNECTOR_TASKS_BACKOFF_MS = 250;
+    private static final long CONFIG_TOPIC_WRITE_PRIVILEGES_BACKOFF_MS = 250;
     private static final int START_STOP_THREAD_POOL_SIZE = 8;
     private static final short BACKOFF_RETRIES = 5;
 
@@ -156,8 +167,9 @@ public class DistributedHerder extends AbstractHerder implements Runnable {
     private final List<String> keySignatureVerificationAlgorithms;
     private final KeyGenerator keyGenerator;
 
+    // Visible for testing
+    ExecutorService forwardRequestExecutor;
     private final ExecutorService herderExecutor;
-    private final ExecutorService forwardRequestExecutor;
     private final ExecutorService startAndStopExecutor;
     private final WorkerGroupMember member;
     private final AtomicBoolean stopping;
@@ -184,7 +196,10 @@ public class DistributedHerder extends AbstractHerder implements Runnable {
     // Similarly collect target state changes (when observed by the config storage listener) for handling in the
     // herder's main thread.
     private Set<String> connectorTargetStateChanges = new HashSet<>();
+    // Access to this map is protected by the herder's monitor
+    private final Map<String, ZombieFencing> activeZombieFencings = new HashMap<>();
     private boolean needsReconfigRebalance;
+    private volatile boolean fencedFromConfigTopic;
     private volatile int generation;
     private volatile long scheduledRebalance;
     private volatile SecretKey sessionKey;
@@ -196,6 +211,10 @@ public class DistributedHerder extends AbstractHerder implements Runnable {
     // The latest pending restart request for each named connector
     final Map<String, RestartRequest> pendingRestartRequests = new HashMap<>();
 
+    // The thread that the herder's tick loop runs on. Would be final, but cannot be set in the constructor,
+    // and it's also useful to be able to modify it for testing
+    Thread herderThread;
+
     private final DistributedConfig config;
 
     /**
@@ -284,6 +303,7 @@ public DistributedHerder(DistributedConfig config,
         configState = ClusterConfigState.EMPTY;
         rebalanceResolved = true; // If we still need to follow up after a rebalance occurred, starting up tasks
         needsReconfigRebalance = false;
+        fencedFromConfigTopic = false;
         canReadConfigs = true; // We didn't try yet, but Configs are readable until proven otherwise
         scheduledRebalance = Long.MAX_VALUE;
         keyExpiration = Long.MAX_VALUE;
@@ -316,6 +336,7 @@ public void start() {
     public void run() {
         try {
             log.info("Herder starting");
+            herderThread = Thread.currentThread();
 
             startServices();
 
@@ -368,18 +389,36 @@ public void tick() {
             return;
         }
 
+        if (fencedFromConfigTopic) {
+            if (isLeader()) {
+                // We were accidentally fenced out, possibly by a zombie leader
+                try {
+                    log.debug("Reclaiming write privileges for config topic after being fenced out");
+                    configBackingStore.claimWritePrivileges();
+                    fencedFromConfigTopic = false;
+                    log.debug("Successfully reclaimed write privileges for config topic after being fenced out");
+                } catch (Exception e) {
+                    log.warn("Unable to claim write privileges for config topic. Will backoff and possibly retry if still the leader", e);
+                    backoff(CONFIG_TOPIC_WRITE_PRIVILEGES_BACKOFF_MS);
+                    return;
+                }
+            } else {
+                log.trace("Relinquished write privileges for config topic after being fenced out, since worker is no longer the leader of the cluster");
+                // We were meant to be fenced out because we fell out of the group and a new leader was elected
+                fencedFromConfigTopic = false;
+            }
+        }
+
         long now = time.milliseconds();
 
         if (checkForKeyRotation(now)) {
             log.debug("Distributing new session key");
             keyExpiration = Long.MAX_VALUE;
             try {
-                configBackingStore.putSessionKey(new SessionKey(
-                    keyGenerator.generateKey(),
-                    now
-                ));
+                SessionKey newSessionKey = new SessionKey(keyGenerator.generateKey(), now);
+                writeToConfigTopicAsLeader(() -> configBackingStore.putSessionKey(newSessionKey));
             } catch (Exception e) {
-                log.info("Failed to write new session key to config topic; forcing a read to the end of the config topic before possibly retrying");
+                log.info("Failed to write new session key to config topic; forcing a read to the end of the config topic before possibly retrying", e);
                 canReadConfigs = false;
                 return;
             }
@@ -404,12 +443,7 @@ public void tick() {
                 break;
             }
 
-            try {
-                next.action().call();
-                next.callback().onCompletion(null, null);
-            } catch (Throwable t) {
-                next.callback().onCompletion(t, null);
-            }
+            runRequest(next.action(), next.callback());
         }
 
         // Process all pending connector restart requests
@@ -488,6 +522,12 @@ private boolean checkForKeyRotation(long now) {
         SecretKey key;
         long expiration;
         synchronized (this) {
+            // This happens on startup; the snapshot contains the session key,
+            // but no callback in the config update listener has been fired for it yet.
+            if (sessionKey == null && configState.sessionKey() != null) {
+                sessionKey = configState.sessionKey().key();
+                keyExpiration = configState.sessionKey().creationTimestamp() + keyRotationIntervalMs;
+            }
             key = sessionKey;
             expiration = keyExpiration;
         }
@@ -507,10 +547,6 @@ private boolean checkForKeyRotation(long now) {
                         + "than required by current worker configuration. Distributing new key now.");
                     return true;
                 }
-            } else if (key == null && configState.sessionKey() != null) {
-                // This happens on startup for follower workers; the snapshot contains the session key,
-                // but no callback in the config update listener has been fired for it yet.
-                sessionKey = configState.sessionKey().key();
             }
         }
         return false;
@@ -680,11 +716,25 @@ private void processTaskConfigUpdatesWithIncrementalCooperative(Set<ConnectorTas
                 localTasks, taskConfigUpdates);
         Set<String> connectorsWhoseTasksToStop = taskConfigUpdates.stream()
                 .map(ConnectorTaskId::connector).collect(Collectors.toSet());
+        stopReconfiguredTasks(connectorsWhoseTasksToStop);
+    }
+
+    private void stopReconfiguredTasks(Set<String> connectors) {
+        Set<ConnectorTaskId> localTasks = assignment == null
+                ? Collections.emptySet()
+                : new HashSet<>(assignment.tasks());
 
         List<ConnectorTaskId> tasksToStop = localTasks.stream()
-                .filter(taskId -> connectorsWhoseTasksToStop.contains(taskId.connector()))
+                .filter(taskId -> connectors.contains(taskId.connector()))
                 .collect(Collectors.toList());
-        log.info("Handling task config update by restarting tasks {}", tasksToStop);
+
+        if (tasksToStop.isEmpty()) {
+            // The rest of the method would essentially be a no-op so this isn't strictly necessary,
+            // but it prevents an unnecessary log message from being emitted
+            return;
+        }
+
+        log.info("Handling task config update by stopping tasks {}, which will be restarted after rebalance if still assigned to this worker", tasksToStop);
         worker.stopAndAwaitTasks(tasksToStop);
         tasksToRestart.addAll(tasksToStop);
     }
@@ -832,7 +882,7 @@ public void deleteConnectorConfig(final String connName, final Callback<Created<
                     callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null);
                 } else {
                     log.trace("Removing connector config {} {}", connName, configState.connectors());
-                    configBackingStore.removeConnectorConfig(connName);
+                    writeToConfigTopicAsLeader(() -> configBackingStore.removeConnectorConfig(connName));
                     callback.onCompletion(null, new Created<>(false, null));
                 }
                 return null;
@@ -842,21 +892,134 @@ public void deleteConnectorConfig(final String connName, final Callback<Created<
     }
 
     @Override
-    protected Map<String, ConfigValue> validateBasicConnectorConfig(Connector connector,
-                                                                    ConfigDef configDef,
-                                                                    Map<String, String> config) {
-        Map<String, ConfigValue> validatedConfig = super.validateBasicConnectorConfig(connector, configDef, config);
-        if (connector instanceof SinkConnector) {
-            ConfigValue validatedName = validatedConfig.get(ConnectorConfig.NAME_CONFIG);
-            String name = (String) validatedName.value();
-            if (workerGroupId.equals(SinkUtils.consumerGroupId(name))) {
-                validatedName.addErrorMessage("Consumer group for sink connector named " + name +
-                        " conflicts with Connect worker group " + workerGroupId);
+    protected Map<String, ConfigValue> validateSinkConnectorConfig(SinkConnector connector, ConfigDef configDef, Map<String, String> config) {
+        Map<String, ConfigValue> result = super.validateSinkConnectorConfig(connector, configDef, config);
+        validateSinkConnectorGroupId(result);
+        return result;
+    }
+
+    @Override
+    protected Map<String, ConfigValue> validateSourceConnectorConfig(SourceConnector connector, ConfigDef configDef, Map<String, String> config) {
+        Map<String, ConfigValue> result = super.validateSourceConnectorConfig(connector, configDef, config);
+        validateSourceConnectorExactlyOnceSupport(config, result, connector);
+        validateSourceConnectorTransactionBoundary(config, result, connector);
+        return result;
+    }
+
+
+    private void validateSinkConnectorGroupId(Map<String, ConfigValue> validatedConfig) {
+        ConfigValue validatedName = validatedConfig.get(ConnectorConfig.NAME_CONFIG);
+        String name = (String) validatedName.value();
+        if (workerGroupId.equals(SinkUtils.consumerGroupId(name))) {
+            validatedName.addErrorMessage("Consumer group for sink connector named " + name +
+                    " conflicts with Connect worker group " + workerGroupId);
+        }
+    }
+
+    private void validateSourceConnectorExactlyOnceSupport(
+            Map<String, String> rawConfig,
+            Map<String, ConfigValue> validatedConfig,
+            SourceConnector connector) {
+        ConfigValue validatedExactlyOnceSupport = validatedConfig.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG);
+        if (validatedExactlyOnceSupport.errorMessages().isEmpty()) {
+            // Should be safe to parse the enum from the user-provided value since it's passed validation so far
+            SourceConnectorConfig.ExactlyOnceSupportLevel exactlyOnceSupportLevel =
+                    SourceConnectorConfig.ExactlyOnceSupportLevel.fromProperty(Objects.toString(validatedExactlyOnceSupport.value()));
+            if (SourceConnectorConfig.ExactlyOnceSupportLevel.REQUIRED.equals(exactlyOnceSupportLevel)) {
+                if (!config.exactlyOnceSourceEnabled()) {
+                    validatedExactlyOnceSupport.addErrorMessage("This worker does not have exactly-once source support enabled.");
+                }
+
+                try {
+                    ExactlyOnceSupport exactlyOnceSupport = connector.exactlyOnceSupport(rawConfig);
+                    if (!ExactlyOnceSupport.SUPPORTED.equals(exactlyOnceSupport)) {
+                        final String validationErrorMessage;
+                        // Would do a switch here but that doesn't permit matching on null values
+                        if (exactlyOnceSupport == null) {
+                            validationErrorMessage = "The connector does not implement the API required for preflight validation of exactly-once "
+                                    + "source support. Please consult the documentation for the connector to determine whether it supports exactly-once "
+                                    + "guarantees, and then consider reconfiguring the connector to use the value \""
+                                    + SourceConnectorConfig.ExactlyOnceSupportLevel.REQUESTED
+                                    + "\" for this property (which will disable this preflight check and allow the connector to be created).";
+                        } else if (ExactlyOnceSupport.UNSUPPORTED.equals(exactlyOnceSupport)) {
+                            validationErrorMessage = "The connector does not support exactly-once delivery guarantees with the provided configuration.";
+                        } else {
+                            throw new ConnectException("Unexpected value returned from SourceConnector::exactlyOnceSupport: " + exactlyOnceSupport);
+                        }
+                        validatedExactlyOnceSupport.addErrorMessage(validationErrorMessage);
+                    }
+                } catch (Exception e) {
+                    log.error("Failed while validating connector support for exactly-once guarantees", e);
+                    String validationErrorMessage = "An unexpected error occurred during validation";
+                    String failureMessage = e.getMessage();
+                    if (failureMessage != null && !failureMessage.trim().isEmpty()) {
+                        validationErrorMessage += ": " + failureMessage.trim();
+                    } else {
+                        validationErrorMessage += "; please see the worker logs for more details.";
+                    }
+                    validatedExactlyOnceSupport.addErrorMessage(validationErrorMessage);
+                }
             }
         }
-        return validatedConfig;
     }
 
+    private void validateSourceConnectorTransactionBoundary(
+            Map<String, String> rawConfig,
+            Map<String, ConfigValue> validatedConfig,
+            SourceConnector connector) {
+        ConfigValue validatedTransactionBoundary = validatedConfig.get(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG);
+        if (validatedTransactionBoundary.errorMessages().isEmpty()) {
+            // Should be safe to parse the enum from the user-provided value since it's passed validation so far
+            SourceTask.TransactionBoundary transactionBoundary =
+                    SourceTask.TransactionBoundary.fromProperty(Objects.toString(validatedTransactionBoundary.value()));
+            if (SourceTask.TransactionBoundary.CONNECTOR.equals(transactionBoundary)) {
+                try {
+                    ConnectorTransactionBoundaries connectorTransactionSupport = connector.canDefineTransactionBoundaries(rawConfig);
+                    if (connectorTransactionSupport == null) {
+                        validatedTransactionBoundary.addErrorMessage(
+                                "This connector has returned a null value from its canDefineTransactionBoundaries method, which is not permitted. " +
+                                        "The connector will be treated as if it cannot define its own transaction boundaries, and cannot be configured with " +
+                                        "'" + SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG + "' set to '" + SourceTask.TransactionBoundary.CONNECTOR + "'."
+                        );
+                    } else if (!ConnectorTransactionBoundaries.SUPPORTED.equals(connectorTransactionSupport)) {
+                        validatedTransactionBoundary.addErrorMessage(
+                                "The connector does not support connector-defined transaction boundaries with the given configuration. "
+                                        + "Please reconfigure it to use a different transaction boundary definition.");
+                    }
+                } catch (Exception e) {
+                    log.error("Failed while validating connector support for defining its own transaction boundaries", e);
+                    String validationErrorMessage = "An unexpected error occurred during validation";
+                    String failureMessage = e.getMessage();
+                    if (failureMessage != null && !failureMessage.trim().isEmpty()) {
+                        validationErrorMessage += ": " + failureMessage.trim();
+                    } else {
+                        validationErrorMessage += "; please see the worker logs for more details.";
+                    }
+                    validatedTransactionBoundary.addErrorMessage(validationErrorMessage);
+                }
+            }
+        }
+    }
+
+    @Override
+    protected boolean connectorUsesAdmin(org.apache.kafka.connect.health.ConnectorType connectorType, Map<String, String> connProps) {
+        return super.connectorUsesAdmin(connectorType, connProps)
+                || connectorUsesSeparateOffsetsTopicClients(connectorType, connProps);
+    }
+
+    @Override
+    protected boolean connectorUsesConsumer(org.apache.kafka.connect.health.ConnectorType connectorType, Map<String, String> connProps) {
+        return super.connectorUsesConsumer(connectorType, connProps)
+                || connectorUsesSeparateOffsetsTopicClients(connectorType, connProps);
+    }
+
+    private boolean connectorUsesSeparateOffsetsTopicClients(org.apache.kafka.connect.health.ConnectorType connectorType, Map<String, String> connProps) {
+        if (connectorType != org.apache.kafka.connect.health.ConnectorType.SOURCE) {
+            return false;
+        }
+        return config.exactlyOnceSourceEnabled()
+                || !connProps.getOrDefault(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, "").trim().isEmpty();
+    }
 
     @Override
     public void putConnectorConfig(final String connName, final Map<String, String> config, final boolean allowReplace,
@@ -891,13 +1054,13 @@ public void putConnectorConfig(final String connName, final Map<String, String>
                             }
 
                             log.trace("Submitting connector config {} {} {}", connName, allowReplace, configState.connectors());
-                            configBackingStore.putConnectorConfig(connName, config);
+                            writeToConfigTopicAsLeader(() -> configBackingStore.putConnectorConfig(connName, config));
 
                             // Note that we use the updated connector config despite the fact that we don't have an updated
                             // snapshot yet. The existing task info should still be accurate.
                             ConnectorInfo info = new ConnectorInfo(connName, config, configState.tasks(connName),
                                 // validateConnectorConfig have checked the existence of CONNECTOR_CLASS_CONFIG
-                                connectorTypeForClass(config.get(ConnectorConfig.CONNECTOR_CLASS_CONFIG)));
+                                connectorTypeForConfig(config));
                             callback.onCompletion(null, new Created<>(!exists, info));
                             return null;
                         },
@@ -956,31 +1119,8 @@ public void taskConfigs(final String connName, final Callback<List<TaskInfo>> ca
     @Override
     public void putTaskConfigs(final String connName, final List<Map<String, String>> configs, final Callback<Void> callback, InternalRequestSignature requestSignature) {
         log.trace("Submitting put task configuration request {}", connName);
-        if (internalRequestValidationEnabled()) {
-            ConnectRestException requestValidationError = null;
-            if (requestSignature == null) {
-                requestValidationError = new BadRequestException("Internal request missing required signature");
-            } else if (!keySignatureVerificationAlgorithms.contains(requestSignature.keyAlgorithm())) {
-                requestValidationError = new BadRequestException(String.format(
-                    "This worker does not support the '%s' key signing algorithm used by other workers. " 
-                        + "This worker is currently configured to use: %s. " 
-                        + "Check that all workers' configuration files permit the same set of signature algorithms, " 
-                        + "and correct any misconfigured worker and restart it.",
-                    requestSignature.keyAlgorithm(),
-                    keySignatureVerificationAlgorithms
-                ));
-            } else {
-                if (!requestSignature.isValid(sessionKey)) {
-                    requestValidationError = new ConnectRestException(
-                        Response.Status.FORBIDDEN,
-                        "Internal request contained invalid signature."
-                    );
-                }
-            }
-            if (requestValidationError != null) {
-                callback.onCompletion(requestValidationError, null);
-                return;
-            }
+        if (requestNotSignedProperly(requestSignature, callback)) {
+            return;
         }
 
         addRequest(
@@ -990,7 +1130,7 @@ public void putTaskConfigs(final String connName, final List<Map<String, String>
                 else if (!configState.contains(connName))
                     callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null);
                 else {
-                    configBackingStore.putTaskConfigs(connName, configs);
+                    writeToConfigTopicAsLeader(() -> configBackingStore.putTaskConfigs(connName, configs));
                     callback.onCompletion(null, null);
                 }
                 return null;
@@ -999,6 +1139,113 @@ else if (!configState.contains(connName))
         );
     }
 
+    // Another worker has forwarded a request to this worker (which it believes is the leader) to perform a round of zombie fencing
+    @Override
+    public void fenceZombieSourceTasks(final String connName, final Callback<Void> callback, InternalRequestSignature requestSignature) {
+        log.trace("Submitting zombie fencing request {}", connName);
+        if (requestNotSignedProperly(requestSignature, callback)) {
+            return;
+        }
+
+        fenceZombieSourceTasks(connName, callback);
+    }
+
+    // A task on this worker requires a round of zombie fencing
+    void fenceZombieSourceTasks(final ConnectorTaskId id, Callback<Void> callback) {
+        log.trace("Performing preflight zombie check for task {}", id);
+        fenceZombieSourceTasks(id.connector(), (error, ignored) -> {
+            if (error == null) {
+                callback.onCompletion(null, null);
+            } else if (error instanceof NotLeaderException) {
+                String forwardedUrl = ((NotLeaderException) error).forwardUrl() + "connectors/" + id.connector() + "/fence";
+                log.trace("Forwarding zombie fencing request for connector {} to leader at {}", id.connector(), forwardedUrl);
+                forwardRequestExecutor.execute(() -> {
+                    try {
+                        RestClient.httpRequest(forwardedUrl, "PUT", null, null, null, config, sessionKey, requestSignatureAlgorithm);
+                        callback.onCompletion(null, null);
+                    } catch (Throwable t) {
+                        callback.onCompletion(t, null);
+                    }
+                });
+            } else {
+                error = ConnectUtils.maybeWrap(error, "Failed to perform zombie fencing");
+                callback.onCompletion(error, null);
+            }
+        });
+    }
+
+    // Visible for testing
+    void fenceZombieSourceTasks(final String connName, final Callback<Void> callback) {
+        addRequest(
+                () -> {
+                    log.trace("Performing zombie fencing request for connector {}", connName);
+                    if (!isLeader())
+                        callback.onCompletion(new NotLeaderException("Only the leader may perform zombie fencing.", leaderUrl()), null);
+                    else if (!configState.contains(connName))
+                        callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null);
+                    else if (!isSourceConnector(connName))
+                        callback.onCompletion(new BadRequestException("Connector " + connName + " is not a source connector"), null);
+                    else {
+                        if (!refreshConfigSnapshot(workerSyncTimeoutMs)) {
+                            throw new ConnectException("Failed to read to end of config topic before performing zombie fencing");
+                        }
+
+                        int taskCount = configState.taskCount(connName);
+                        Integer taskCountRecord = configState.taskCountRecord(connName);
+
+                        ZombieFencing zombieFencing = null;
+                        boolean newFencing = false;
+                        synchronized (DistributedHerder.this) {
+                            // Check first to see if we have to do a fencing. The control flow is a little awkward here (why not stick this in
+                            // an else block lower down?) but we can't synchronize around the body below since that may contain a synchronous
+                            // write to the config topic.
+                            if (configState.pendingFencing(connName) && taskCountRecord != null
+                                    && (taskCountRecord != 1 || taskCount != 1)) {
+                                int taskGen = configState.taskConfigGeneration(connName);
+                                zombieFencing = activeZombieFencings.get(connName);
+                                if (zombieFencing == null) {
+                                    zombieFencing = new ZombieFencing(connName, taskCountRecord, taskCount, taskGen);
+                                    activeZombieFencings.put(connName, zombieFencing);
+                                    newFencing = true;
+                                }
+                            }
+                        }
+                        if (zombieFencing != null) {
+                            if (newFencing) {
+                                zombieFencing.start();
+                            }
+                            zombieFencing.addCallback(callback);
+                            return null;
+                        }
+
+                        if (!configState.pendingFencing(connName)) {
+                            // If the latest task count record for the connector is present after the latest set of task configs, there's no need to
+                            // do any zombie fencing or write a new task count record to the config topic
+                            log.debug("Skipping zombie fencing round for connector {} as all old task generations have already been fenced out", connName);
+                        } else {
+                            if (taskCountRecord == null) {
+                                // If there is no task count record present for the connector, no transactional producers should have been brought up for it,
+                                // so there's nothing to fence--but we do need to write a task count record now so that we know to fence those tasks if/when
+                                // the connector is reconfigured
+                                log.debug("Skipping zombie fencing round but writing task count record for connector {} "
+                                        + "as it is being brought up for the first time with exactly-once source support", connName);
+                            } else {
+                                // If the last generation of tasks only had one task, and the next generation only has one, then the new task will automatically
+                                // fence out the older task if it's still running; no need to fence here, but again, we still need to write a task count record
+                                log.debug("Skipping zombie fencing round but writing task count record for connector {} "
+                                        + "as both the most recent and the current generation of task configs only contain one task", connName);
+                            }
+                            writeToConfigTopicAsLeader(() -> configBackingStore.putTaskCountRecord(connName, taskCount));
+                        }
+                        callback.onCompletion(null, null);
+                        return null;
+                    }
+                    return null;
+                },
+                forwardErrorCallback(callback)
+        );
+    }
+
     @Override
     public void restartConnector(final String connName, final Callback<Void> callback) {
         restartConnector(0, connName, callback);
@@ -1180,8 +1427,8 @@ protected synchronized void doRestartConnectorAndTasks(RestartRequest request) {
             }
         }
         if (restartTasks) {
-            log.debug("Restarting {} of {} tasks for {}", plan.restartTaskCount(), plan.totalTaskCount(), request);
-            plan.taskIdsToRestart().forEach(taskId -> {
+            log.debug("Restarting {} of {} tasks for {}", assignedIdsToRestart.size(), plan.totalTaskCount(), request);
+            assignedIdsToRestart.forEach(taskId -> {
                 try {
                     if (startTask(taskId)) {
                         log.info("Task '{}' restart successful", taskId);
@@ -1192,7 +1439,7 @@ protected synchronized void doRestartConnectorAndTasks(RestartRequest request) {
                     log.error("Task '{}' restart failed", taskId, t);
                 }
             });
-            log.debug("Restarted {} of {} tasks for {} as requested", plan.restartTaskCount(), plan.totalTaskCount(), request);
+            log.debug("Restarted {} of {} tasks for {} as requested", assignedIdsToRestart.size(), plan.totalTaskCount(), request);
         }
         log.info("Completed {}", plan);
     }
@@ -1211,6 +1458,25 @@ private String leaderUrl() {
         return assignment.leaderUrl();
     }
 
+    /**
+     * Perform an action that writes to the config topic, and if it fails because the leader has been fenced out, make note of that
+     * fact so that we can try to reclaim write ownership (if still the leader of the cluster) in a subsequent iteration of the tick loop.
+     * Note that it is not necessary to wrap every write to the config topic in this method, only the writes that should be performed
+     * exclusively by the leader. For example, {@link ConfigBackingStore#putTargetState(String, TargetState)} does not require this
+     * method, as it can be invoked by any worker in the cluster.
+     * @param write the action that writes to the config topic, such as {@link ConfigBackingStore#putSessionKey(SessionKey)} or
+     *              {@link ConfigBackingStore#putConnectorConfig(String, Map)}.
+     */
+    private void writeToConfigTopicAsLeader(Runnable write) {
+        try {
+            write.run();
+        } catch (PrivilegedWriteException e) {
+            log.warn("Failed to write to config topic as leader; will rejoin group if necessary and, if still leader, attempt to reclaim write privileges for the config topic", e);
+            fencedFromConfigTopic = true;
+            throw new ConnectException("Failed to write to config topic; this may be due to a transient error and the request can be safely retried", e);
+        }
+    }
+
     /**
      * Handle post-assignment operations, either trying to resolve issues that kept assignment from completing, getting
      * this node into sync and its work started.
@@ -1308,7 +1574,8 @@ private boolean handleRebalanceCompleted() {
     }
 
     /**
-     * Try to read to the end of the config log within the given timeout
+     * Try to read to the end of the config log within the given timeout. If unsuccessful, leave the group
+     * and wait for a brief backoff period before returning
      * @param timeoutMs maximum time to wait to sync to the end of the log
      * @return true if successful, false if timed out
      */
@@ -1318,18 +1585,32 @@ private boolean readConfigToEnd(long timeoutMs) {
         } else {
             log.info("Reading to end of config log; current config state offset: {}", configState.offset());
         }
+        if (refreshConfigSnapshot(timeoutMs)) {
+            backoffRetries = BACKOFF_RETRIES;
+            return true;
+        } else {
+            // in case reading the log takes too long, leave the group to ensure a quick rebalance (although by default we should be out of the group already)
+            // and back off to avoid a tight loop of rejoin-attempt-to-catch-up-leave
+            member.maybeLeaveGroup("taking too long to read the log");
+            backoff(workerUnsyncBackoffMs);
+            return false;
+        }
+    }
+
+    /**
+     * Try to read to the end of the config log within the given timeout
+     * @param timeoutMs maximum time to wait to sync to the end of the log
+     * @return true if successful; false if timed out
+     */
+    private boolean refreshConfigSnapshot(long timeoutMs) {
         try {
             configBackingStore.refresh(timeoutMs, TimeUnit.MILLISECONDS);
             configState = configBackingStore.snapshot();
             log.info("Finished reading to end of log and updated config snapshot, new config log offset: {}", configState.offset());
-            backoffRetries = BACKOFF_RETRIES;
             return true;
         } catch (TimeoutException e) {
-            // in case reading the log takes too long, leave the group to ensure a quick rebalance (although by default we should be out of the group already)
-            // and back off to avoid a tight loop of rejoin-attempt-to-catch-up-leave
             log.warn("Didn't reach end of config log quickly enough", e);
-            member.maybeLeaveGroup("taking too long to read the log");
-            backoff(workerUnsyncBackoffMs);
+            canReadConfigs = false;
             return false;
         }
     }
@@ -1418,14 +1699,54 @@ private static <T> Collection<T> assignmentDifference(Collection<T> update, Coll
 
     private boolean startTask(ConnectorTaskId taskId) {
         log.info("Starting task {}", taskId);
-        return worker.startTask(
-                taskId,
-                configState,
-                configState.connectorConfig(taskId.connector()),
-                configState.taskConfig(taskId),
-                this,
-                configState.targetState(taskId.connector())
-        );
+        Map<String, String> connProps = configState.connectorConfig(taskId.connector());
+        switch (connectorTypeForConfig(connProps)) {
+            case SINK:
+                return worker.startSinkTask(
+                        taskId,
+                        configState,
+                        connProps,
+                        configState.taskConfig(taskId),
+                        this,
+                        configState.targetState(taskId.connector())
+                );
+            case SOURCE:
+                if (config.exactlyOnceSourceEnabled()) {
+                    int taskGeneration = configState.taskConfigGeneration(taskId.connector());
+                    return worker.startExactlyOnceSourceTask(
+                            taskId,
+                            configState,
+                            connProps,
+                            configState.taskConfig(taskId),
+                            this,
+                            configState.targetState(taskId.connector()),
+                            () -> {
+                                FutureCallback<Void> preflightFencing = new FutureCallback<>();
+                                fenceZombieSourceTasks(taskId, preflightFencing);
+                                try {
+                                    preflightFencing.get();
+                                } catch (InterruptedException e) {
+                                    throw new ConnectException("Interrupted while attempting to perform round of zombie fencing", e);
+                                } catch (ExecutionException e) {
+                                    Throwable cause = e.getCause();
+                                    throw ConnectUtils.maybeWrap(cause, "Failed to perform round of zombie fencing");
+                                }
+                            },
+                            () -> verifyTaskGenerationAndOwnership(taskId, taskGeneration)
+                    );
+                } else {
+                    return worker.startSourceTask(
+                            taskId,
+                            configState,
+                            connProps,
+                            configState.taskConfig(taskId),
+                            this,
+                            configState.targetState(taskId.connector())
+                    );
+                }
+            default:
+                throw new ConnectException("Failed to start task " + taskId + " since it is not a recognizable type (source or sink)");
+        }
     }
 
     private Callable<Void> getTaskStartingCallable(final ConnectorTaskId taskId) {
@@ -1583,7 +1904,7 @@ private void reconfigureConnector(final String connName, final Callback<Void> cb
             if (changed) {
                 List<Map<String, String>> rawTaskProps = reverseTransform(connName, configState, taskProps);
                 if (isLeader()) {
-                    configBackingStore.putTaskConfigs(connName, rawTaskProps);
+                    writeToConfigTopicAsLeader(() -> configBackingStore.putTaskConfigs(connName, rawTaskProps));
                     cb.onCompletion(null, null);
                 } else {
                     // We cannot forward the request on the same thread because this reconfiguration can happen as a result of connector
@@ -1618,6 +1939,48 @@ private void reconfigureConnector(final String connName, final Callback<Void> cb
         }
     }
 
+    // Invoked by exactly-once worker source tasks after they have successfully initialized their transactional
+    // producer to ensure that it is still safe to bring up the task
+    private void verifyTaskGenerationAndOwnership(ConnectorTaskId id, int initialTaskGen) {
+        log.debug("Reading to end of config topic to ensure it is still safe to bring up source task {} with exactly-once support", id);
+        if (!refreshConfigSnapshot(Long.MAX_VALUE)) {
+            throw new ConnectException("Failed to read to end of config topic");
+        }
+
+        FutureCallback<Void> verifyCallback = new FutureCallback<>();
+
+        addRequest(
+            () -> verifyTaskGenerationAndOwnership(id, initialTaskGen, verifyCallback),
+            forwardErrorCallback(verifyCallback)
+        );
+
+        try {
+            verifyCallback.get();
+        } catch (InterruptedException e) {
+            throw new ConnectException("Interrupted while performing preflight check for task " + id, e);
+        } catch (ExecutionException e) {
+            Throwable cause = e.getCause();
+            throw ConnectUtils.maybeWrap(cause, "Failed to perform preflight check for task " + id);
+        }
+    }
+
+    // Visible for testing
+    Void verifyTaskGenerationAndOwnership(ConnectorTaskId id, int initialTaskGen, Callback<Void> callback) {
+        Integer currentTaskGen = configState.taskConfigGeneration(id.connector());
+        if (!Objects.equals(initialTaskGen, currentTaskGen)) {
+            throw new ConnectException("Cannot start source task "
+                + id + " with exactly-once support as the connector has already generated a new set of task configs");
+        }
+
+        if (!assignment.tasks().contains(id)) {
+            throw new ConnectException("Cannot start source task "
+                + id + " as it has already been revoked from this worker");
+        }
+
+        callback.onCompletion(null, null);
+        return null;
+    }
+
     private boolean checkRebalanceNeeded(Callback<?> callback) {
         // Raise an error if we are expecting a rebalance to begin. This prevents us from forwarding requests
         // based on stale leadership or assignment information
@@ -1628,6 +1991,23 @@ private boolean checkRebalanceNeeded(Callback<?> callback) {
         return false;
     }
 
+    /**
+     * Execute the given action and subsequent callback immediately if the current thread is the herder's tick thread,
+     * or use them to create and store a {@link DistributedHerderRequest} on the request queue and return the resulting request
+     * if not.
+     * @param action the action that should be run on the herder's tick thread
+     * @param callback the callback that should be invoked once the action is complete
+     * @return a new {@link DistributedHerderRequest} if one has been created and added to the request queue, and {@code null} otherwise
+     */
+    DistributedHerderRequest runOnTickThread(Callable<Void> action, Callback<Void> callback) {
+        if (Thread.currentThread().equals(herderThread)) {
+            runRequest(action, callback);
+            return null;
+        } else {
+            return addRequest(action, callback);
+        }
+    }
+
     DistributedHerderRequest addRequest(Callable<Void> action, Callback<Void> callback) {
         return addRequest(0, action, callback);
     }
@@ -1640,6 +2020,15 @@ DistributedHerderRequest addRequest(long delayMs, Callable<Void> action, Callbac
         return req;
     }
 
+    private void runRequest(Callable<Void> action, Callback<Void> callback) {
+        try {
+            action.call();
+            callback.onCompletion(null, null);
+        } catch (Throwable t) {
+            callback.onCompletion(t, null);
+        }
+    }
+
     private boolean internalRequestValidationEnabled() {
         return internalRequestValidationEnabled(member.currentProtocolVersion());
     }
@@ -1692,7 +2081,7 @@ public void onTaskConfigUpdate(Collection<ConnectorTaskId> tasks) {
             log.info("Tasks {} configs updated", tasks);
 
             // Stage the update and wake up the work thread.
-            // The set of tasks is recorder for incremental cooperative rebalancing, in which
+            // The set of tasks is recorded for incremental cooperative rebalancing, in which
             // tasks don't get restarted unless they are balanced between workers.
             // With eager rebalancing there's no need to record the set of tasks because task reconfigs
             // always need a rebalance to ensure offsets get committed. In eager rebalancing the
@@ -1703,6 +2092,20 @@ public void onTaskConfigUpdate(Collection<ConnectorTaskId> tasks) {
                 needsReconfigRebalance = true;
                 taskConfigUpdates.addAll(tasks);
             }
+            tasks.stream()
+                    .map(ConnectorTaskId::connector)
+                    .distinct()
+                    .forEach(connName -> {
+                        synchronized (this) {
+                            ZombieFencing activeFencing = activeZombieFencings.get(connName);
+                            if (activeFencing != null) {
+                                activeFencing.completeExceptionally(new ConnectRestException(
+                                    Response.Status.CONFLICT.getStatusCode(),
+                                    "Failed to complete zombie fencing because a new set of task configs was generated"
+                                ));
+                            }
+                        }
+                    });
             member.wakeup();
         }
 
@@ -1892,12 +2295,20 @@ public void onAssigned(ExtendedAssignment assignment, int generation) {
                 herderMetrics.rebalanceStarted(time.milliseconds());
             }
 
-            // Delete the statuses of all connectors and tasks removed prior to the start of this rebalance. This
-            // has to be done after the rebalance completes to avoid race conditions as the previous generation
-            // attempts to change the state to UNASSIGNED after tasks have been stopped.
             if (isLeader()) {
+                // Delete the statuses of all connectors and tasks removed prior to the start of this rebalance. This
+                // has to be done after the rebalance completes to avoid race conditions as the previous generation
+                // attempts to change the state to UNASSIGNED after tasks have been stopped.
                 updateDeletedConnectorStatus();
                 updateDeletedTaskStatus();
+                // As the leader, we're now allowed to write directly to the config topic for important things like
+                // connector configs, session keys, and task count records
+                try {
+                    configBackingStore.claimWritePrivileges();
+                } catch (Exception e) {
+                    fencedFromConfigTopic = true;
+                    log.error("Unable to claim write privileges for config topic after being elected leader during rebalance", e);
+                }
             }
 
             // We *must* interrupt any poll() call since this could occur when the poll starts, and we might then
@@ -1965,6 +2376,166 @@ private void resetActiveTopics(Collection<String> connectors, Collection<Connect
         }
     }
 
+    private boolean isSourceConnector(String connName) {
+        return ConnectorType.SOURCE.equals(connectorTypeForConfig(configState.connectorConfig(connName)));
+    }
+
+    /**
+     * Checks a given {@link InternalRequestSignature request signature} for validity and adds an exception
+     * to the given {@link Callback} if any errors are found.
+     *
+     * @param requestSignature the request signature to validate
+     * @param callback callback to report invalid signature errors to
+     * @return true if the signature was not valid
+     */
+    private boolean requestNotSignedProperly(InternalRequestSignature requestSignature, Callback<?> callback) {
+        if (internalRequestValidationEnabled()) {
+            ConnectRestException requestValidationError = null;
+            if (requestSignature == null) {
+                requestValidationError = new BadRequestException("Internal request missing required signature");
+            } else if (!keySignatureVerificationAlgorithms.contains(requestSignature.keyAlgorithm())) {
+                requestValidationError = new BadRequestException(String.format(
+                        "This worker does not support the '%s' key signing algorithm used by other workers. "
+                                + "This worker is currently configured to use: %s. "
+                                + "Check that all workers' configuration files permit the same set of signature algorithms, "
+                                + "and correct any misconfigured worker and restart it.",
+                        requestSignature.keyAlgorithm(),
+                        keySignatureVerificationAlgorithms
+                ));
+            } else {
+                if (!requestSignature.isValid(sessionKey)) {
+                    requestValidationError = new ConnectRestException(
+                            Response.Status.FORBIDDEN,
+                            "Internal request contained invalid signature."
+                    );
+                }
+            }
+            if (requestValidationError != null) {
+                callback.onCompletion(requestValidationError, null);
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Represents an active zombie fencing: that is, an in-progress attempt to invoke
+     * {@link Worker#fenceZombies(String, int, Map)} and then, if successful, write a new task count
+     * record to the config topic.
+     */
+    class ZombieFencing {
+        private final String connName;
+        private final int tasksToFence;
+        private final int tasksToRecord;
+        private final int taskGen;
+        private final FutureCallback<Void> fencingFollowup;
+        private KafkaFuture<Void> fencingFuture;
+
+        public ZombieFencing(String connName, int tasksToFence, int tasksToRecord, int taskGen) {
+            this.connName = connName;
+            this.tasksToFence = tasksToFence;
+            this.tasksToRecord = tasksToRecord;
+            this.taskGen = taskGen;
+            this.fencingFollowup = new FutureCallback<>();
+        }
+
+        /**
+         * Start sending requests to the Kafka cluster to fence zombies. In rare cases, may cause blocking calls to
+         * take place before returning, so care should be taken to ensure that this method is not invoked while holding
+         * any important locks (e.g., while synchronized on the surrounding DistributedHerder instance).
+         * This method must be invoked before any {@link #addCallback(Callback) callbacks can be added},
+         * and may only be invoked once.
+         * @throws IllegalStateException if invoked multiple times
+         */
+        public void start() {
+            if (fencingFuture != null) {
+                throw new IllegalStateException("Cannot invoke start() multiple times");
+            }
+            fencingFuture = worker.fenceZombies(connName, tasksToFence, configState.connectorConfig(connName)).thenApply(ignored -> {
+                // This callback will be called on the same thread that invokes KafkaFuture::thenApply if
+                // the future is already completed. Since that thread is the herder tick thread, we don't need
+                // to perform follow-up logic through an additional herder request (and if we tried, it would lead
+                // to deadlock)
+                runOnTickThread(
+                        this::onZombieFencingSuccess,
+                        fencingFollowup
+                );
+                awaitFollowup();
+                return null;
+            });
+            // Immediately after the fencing and necessary followup work (i.e., writing the task count record to the config topic)
+            // is complete, remove this from the list of active fencings
+            addCallback((ignored, error) -> {
+                synchronized (DistributedHerder.this) {
+                    activeZombieFencings.remove(connName);
+                }
+            });
+
+        }
+
+        // Invoked after the worker has successfully fenced out the producers of old task generations using an admin client
+        // Note that work here will be performed on the herder's tick thread, so it should not block for very long
+        private Void onZombieFencingSuccess() {
+            if (!refreshConfigSnapshot(workerSyncTimeoutMs)) {
+                throw new ConnectException("Failed to read to end of config topic");
+            }
+            if (taskGen < configState.taskConfigGeneration(connName)) {
+                throw new ConnectRestException(
+                    Response.Status.CONFLICT.getStatusCode(),
+                    "Fencing failed because new task configurations were generated for the connector");
+            }
+            // If we've already been cancelled, skip the write to the config topic
+            if (fencingFollowup.isDone()) {
+                return null;
+            }
+            writeToConfigTopicAsLeader(() -> configBackingStore.putTaskCountRecord(connName, tasksToRecord));
+            return null;
+        }
+
+        private void awaitFollowup() {
+            try {
+                fencingFollowup.get();
+            } catch (InterruptedException e) {
+                throw new ConnectException("Interrupted while performing zombie fencing", e);
+            } catch (ExecutionException e) {
+                Throwable cause = e.getCause();
+                throw ConnectUtils.maybeWrap(cause, "Failed to perform round of zombie fencing");
+            }
+        }
+
+        /**
+         * Fail the fencing if it is still active, reporting the given exception as the cause of failure
+         * @param t the cause of failure to report for the failed fencing; may not be null
+         */
+        public void completeExceptionally(Throwable t) {
+            Objects.requireNonNull(t);
+            fencingFollowup.onCompletion(t, null);
+        }
+
+        /**
+         * Add a callback to invoke after the fencing has succeeded and a record of it has been written to the config topic
+         * Note that this fencing must be {@link #start() started} before this method is invoked
+         * @param callback the callback to report the success or failure of the fencing to
+         * @throws IllegalStateException if this method is invoked before {@link #start()}
+         */
+        public void addCallback(Callback<Void> callback) {
+            if (fencingFuture == null) {
+                throw new IllegalStateException("The start() method must be invoked before adding callbacks for this zombie fencing");
+            }
+            fencingFuture.whenComplete((ignored, error) -> {
+                if (error != null) {
+                    callback.onCompletion(
+                            ConnectUtils.maybeWrap(error, "Failed to perform zombie fencing"),
+                            null
+                    );
+                } else {
+                    callback.onCompletion(null, null);
+                }
+            });
+        }
+    }
+
     class HerderMetrics {
         private final MetricGroup metricGroup;
         private final Sensor rebalanceCompletedCounts;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/EagerAssignor.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/EagerAssignor.java
index d86feaaaaf967..f4edb98ed60fb 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/EagerAssignor.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/EagerAssignor.java
@@ -18,6 +18,7 @@
 
 import org.apache.kafka.common.utils.CircularIterator;
 import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.slf4j.Logger;
 
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignor.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignor.java
index e6a8b302b4c21..57e7b004857f7 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignor.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignor.java
@@ -16,11 +16,14 @@
  */
 package org.apache.kafka.connect.runtime.distributed;
 
+import java.util.Arrays;
 import java.util.Map.Entry;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks;
 import org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad;
+import org.apache.kafka.connect.util.ConnectUtils;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.slf4j.Logger;
 
@@ -43,9 +46,10 @@
 
 import static org.apache.kafka.common.message.JoinGroupResponseData.JoinGroupResponseMember;
 import static org.apache.kafka.connect.runtime.distributed.ConnectProtocol.Assignment;
-import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1;
 import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2;
 import static org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.LeaderState;
+import static org.apache.kafka.connect.util.ConnectUtils.combineCollections;
+import static org.apache.kafka.connect.util.ConnectUtils.transformValues;
 
 /**
  * An assignor that computes a distribution of connectors and tasks according to the incremental
@@ -104,18 +108,15 @@ public Map<String, ByteBuffer> performAssignment(String leaderId, String protoco
         log.debug("Max config offset root: {}, local snapshot config offsets root: {}",
                   maxOffset, coordinator.configSnapshot().offset());
 
-        short protocolVersion = memberConfigs.values().stream()
-            .allMatch(state -> state.assignment().version() == CONNECT_PROTOCOL_V2)
-                ? CONNECT_PROTOCOL_V2
-                : CONNECT_PROTOCOL_V1;
+        short protocolVersion = ConnectProtocolCompatibility.fromProtocol(protocol).protocolVersion();
 
         Long leaderOffset = ensureLeaderConfig(maxOffset, coordinator);
         if (leaderOffset == null) {
             Map<String, ExtendedAssignment> assignments = fillAssignments(
                     memberConfigs.keySet(), Assignment.CONFIG_MISMATCH,
-                    leaderId, memberConfigs.get(leaderId).url(), maxOffset, Collections.emptyMap(),
-                    Collections.emptyMap(), Collections.emptyMap(), 0, protocolVersion);
-            return serializeAssignments(assignments);
+                    leaderId, memberConfigs.get(leaderId).url(), maxOffset,
+                    ClusterAssignment.EMPTY, 0, protocolVersion);
+            return serializeAssignments(assignments, protocolVersion);
         }
         return performTaskAssignment(leaderId, leaderOffset, memberConfigs, coordinator, protocolVersion);
     }
@@ -159,11 +160,41 @@ protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long ma
                                                             WorkerCoordinator coordinator, short protocolVersion) {
         log.debug("Performing task assignment during generation: {} with memberId: {}",
                 coordinator.generationId(), coordinator.memberId());
+        Map<String, ConnectorsAndTasks> memberAssignments = transformValues(
+                memberConfigs,
+                memberConfig -> new ConnectorsAndTasks.Builder()
+                        .with(memberConfig.assignment().connectors(), memberConfig.assignment().tasks())
+                        .build()
+        );
+        ClusterAssignment clusterAssignment = performTaskAssignment(
+                coordinator.configSnapshot(),
+                coordinator.lastCompletedGenerationId(),
+                coordinator.generationId(),
+                memberAssignments
+        );
+
+        coordinator.leaderState(new LeaderState(memberConfigs, clusterAssignment.allAssignedConnectors(), clusterAssignment.allAssignedTasks()));
 
+        Map<String, ExtendedAssignment> assignments =
+                fillAssignments(memberConfigs.keySet(), Assignment.NO_ERROR, leaderId,
+                        memberConfigs.get(leaderId).url(), maxOffset,
+                        clusterAssignment,
+                        delay, protocolVersion);
+
+        log.debug("Actual assignments: {}", assignments);
+        return serializeAssignments(assignments, protocolVersion);
+    }
+
+    // Visible for testing
+    ClusterAssignment performTaskAssignment(
+            ClusterConfigState configSnapshot,
+            int lastCompletedGenerationId,
+            int currentGenerationId,
+            Map<String, ConnectorsAndTasks> memberAssignments
+    ) {
         // Base set: The previous assignment of connectors-and-tasks is a standalone snapshot that
         // can be used to calculate derived sets
         log.debug("Previous assignments: {}", previousAssignment);
-        int lastCompletedGenerationId = coordinator.lastCompletedGenerationId();
         if (previousGenerationId != lastCompletedGenerationId) {
             log.debug("Clearing the view of previous assignments due to generation mismatch between "
                     + "previous generation ID {} and last completed generation ID {}. This can "
@@ -175,11 +206,8 @@ protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long ma
             this.previousAssignment = ConnectorsAndTasks.EMPTY;
         }
 
-        ClusterConfigState snapshot = coordinator.configSnapshot();
-        Set<String> configuredConnectors = new TreeSet<>(snapshot.connectors());
-        Set<ConnectorTaskId> configuredTasks = configuredConnectors.stream()
-                .flatMap(c -> snapshot.tasks(c).stream())
-                .collect(Collectors.toSet());
+        Set<String> configuredConnectors = new TreeSet<>(configSnapshot.connectors());
+        Set<ConnectorTaskId> configuredTasks = combineCollections(configuredConnectors, configSnapshot::tasks, Collectors.toSet());
 
         // Base set: The set of configured connectors-and-tasks is a standalone snapshot that can
         // be used to calculate derived sets
@@ -189,7 +217,7 @@ protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long ma
 
         // Base set: The set of active connectors-and-tasks is a standalone snapshot that can be
         // used to calculate derived sets
-        ConnectorsAndTasks activeAssignments = assignment(memberConfigs);
+        ConnectorsAndTasks activeAssignments = assignment(memberAssignments);
         log.debug("Active assignments: {}", activeAssignments);
 
         // This means that a previous revocation did not take effect. In this case, reset
@@ -225,7 +253,7 @@ protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long ma
         log.debug("New assignments: {}", newSubmissions);
 
         // A collection of the complete assignment
-        List<WorkerLoad> completeWorkerAssignment = workerAssignment(memberConfigs, ConnectorsAndTasks.EMPTY);
+        List<WorkerLoad> completeWorkerAssignment = workerAssignment(memberAssignments, ConnectorsAndTasks.EMPTY);
         log.debug("Complete (ignoring deletions) worker assignments: {}", completeWorkerAssignment);
 
         // Per worker connector assignments without removing deleted connectors yet
@@ -239,23 +267,23 @@ protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long ma
         log.debug("Complete (ignoring deletions) task assignments: {}", taskAssignments);
 
         // A collection of the current assignment excluding the connectors-and-tasks to be deleted
-        List<WorkerLoad> currentWorkerAssignment = workerAssignment(memberConfigs, deleted);
+        List<WorkerLoad> currentWorkerAssignment = workerAssignment(memberAssignments, deleted);
 
         Map<String, ConnectorsAndTasks> toRevoke = computeDeleted(deleted, connectorAssignments, taskAssignments);
         log.debug("Connector and task to delete assignments: {}", toRevoke);
 
         // Revoking redundant connectors/tasks if the workers have duplicate assignments
-        toRevoke.putAll(computeDuplicatedAssignments(memberConfigs, connectorAssignments, taskAssignments));
+        toRevoke.putAll(computeDuplicatedAssignments(memberAssignments, connectorAssignments, taskAssignments));
         log.debug("Connector and task to revoke assignments (include duplicated assignments): {}", toRevoke);
 
         // Recompute the complete assignment excluding the deleted connectors-and-tasks
-        completeWorkerAssignment = workerAssignment(memberConfigs, deleted);
+        completeWorkerAssignment = workerAssignment(memberAssignments, deleted);
         connectorAssignments =
                 completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::connectors));
         taskAssignments =
                 completeWorkerAssignment.stream().collect(Collectors.toMap(WorkerLoad::worker, WorkerLoad::tasks));
 
-        handleLostAssignments(lostAssignments, newSubmissions, completeWorkerAssignment, memberConfigs);
+        handleLostAssignments(lostAssignments, newSubmissions, completeWorkerAssignment);
 
         // Do not revoke resources for re-assignment while a delayed rebalance is active
         // Also we do not revoke in two consecutive rebalances by the same leader
@@ -298,20 +326,24 @@ protected Map<String, ByteBuffer> performTaskAssignment(String leaderId, long ma
         Map<String, Collection<ConnectorTaskId>> incrementalTaskAssignments =
                 diff(taskAssignments, currentTaskAssignments);
 
+        previousAssignment = computePreviousAssignment(toRevoke, connectorAssignments, taskAssignments, lostAssignments);
+        previousGenerationId = currentGenerationId;
+        previousMembers = memberAssignments.keySet();
+
         log.debug("Incremental connector assignments: {}", incrementalConnectorAssignments);
         log.debug("Incremental task assignments: {}", incrementalTaskAssignments);
 
-        coordinator.leaderState(new LeaderState(memberConfigs, connectorAssignments, taskAssignments));
-
-        Map<String, ExtendedAssignment> assignments =
-                fillAssignments(memberConfigs.keySet(), Assignment.NO_ERROR, leaderId,
-                                memberConfigs.get(leaderId).url(), maxOffset, incrementalConnectorAssignments,
-                                incrementalTaskAssignments, toRevoke, delay, protocolVersion);
-        previousAssignment = computePreviousAssignment(toRevoke, connectorAssignments, taskAssignments, lostAssignments);
-        previousGenerationId = coordinator.generationId();
-        previousMembers = memberConfigs.keySet();
-        log.debug("Actual assignments: {}", assignments);
-        return serializeAssignments(assignments);
+        Map<String, Collection<String>> revokedConnectors = transformValues(toRevoke, ConnectorsAndTasks::connectors);
+        Map<String, Collection<ConnectorTaskId>> revokedTasks = transformValues(toRevoke, ConnectorsAndTasks::tasks);
+
+        return new ClusterAssignment(
+                incrementalConnectorAssignments,
+                incrementalTaskAssignments,
+                revokedConnectors,
+                revokedTasks,
+                diff(connectorAssignments, revokedConnectors),
+                diff(taskAssignments, revokedTasks)
+        );
     }
 
     private Map<String, ConnectorsAndTasks> computeDeleted(ConnectorsAndTasks deleted,
@@ -344,9 +376,9 @@ private ConnectorsAndTasks computePreviousAssignment(Map<String, ConnectorsAndTa
                                                          Map<String, Collection<ConnectorTaskId>> taskAssignments,
                                                          ConnectorsAndTasks lostAssignments) {
         ConnectorsAndTasks previousAssignment = new ConnectorsAndTasks.Builder().with(
-                connectorAssignments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet()),
-                taskAssignments.values() .stream() .flatMap(Collection::stream).collect(Collectors.toSet()))
-                .build();
+                ConnectUtils.combineCollections(connectorAssignments.values()),
+                ConnectUtils.combineCollections(taskAssignments.values())
+        ).build();
 
         for (ConnectorsAndTasks revoked : toRevoke.values()) {
             previousAssignment.connectors().removeAll(revoked.connectors());
@@ -363,29 +395,36 @@ private ConnectorsAndTasks computePreviousAssignment(Map<String, ConnectorsAndTa
         return previousAssignment;
     }
 
-    private ConnectorsAndTasks duplicatedAssignments(Map<String, ExtendedWorkerState> memberConfigs) {
-        Set<String> connectors = memberConfigs.entrySet().stream()
-                .flatMap(memberConfig -> memberConfig.getValue().assignment().connectors().stream())
-                .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
+    private ConnectorsAndTasks duplicatedAssignments(Map<String, ConnectorsAndTasks> memberAssignments) {
+        Map<String, Long> connectorInstanceCounts = combineCollections(
+                memberAssignments.values(),
+                ConnectorsAndTasks::connectors,
+                Collectors.groupingBy(Function.identity(), Collectors.counting())
+        );
+        Set<String> duplicatedConnectors = connectorInstanceCounts
                 .entrySet().stream()
                 .filter(entry -> entry.getValue() > 1L)
                 .map(Entry::getKey)
                 .collect(Collectors.toSet());
 
-        Set<ConnectorTaskId> tasks = memberConfigs.values().stream()
-                .flatMap(state -> state.assignment().tasks().stream())
-                .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
+        Map<ConnectorTaskId, Long> taskInstanceCounts = combineCollections(
+                memberAssignments.values(),
+                ConnectorsAndTasks::tasks,
+                Collectors.groupingBy(Function.identity(), Collectors.counting())
+        );
+        Set<ConnectorTaskId> duplicatedTasks = taskInstanceCounts
                 .entrySet().stream()
                 .filter(entry -> entry.getValue() > 1L)
                 .map(Entry::getKey)
                 .collect(Collectors.toSet());
-        return new ConnectorsAndTasks.Builder().with(connectors, tasks).build();
+
+        return new ConnectorsAndTasks.Builder().with(duplicatedConnectors, duplicatedTasks).build();
     }
 
-    private Map<String, ConnectorsAndTasks> computeDuplicatedAssignments(Map<String, ExtendedWorkerState> memberConfigs,
+    private Map<String, ConnectorsAndTasks> computeDuplicatedAssignments(Map<String, ConnectorsAndTasks> memberAssignments,
                                              Map<String, Collection<String>> connectorAssignments,
                                              Map<String, Collection<ConnectorTaskId>> taskAssignment) {
-        ConnectorsAndTasks duplicatedAssignments = duplicatedAssignments(memberConfigs);
+        ConnectorsAndTasks duplicatedAssignments = duplicatedAssignments(memberAssignments);
         log.debug("Duplicated assignments: {}", duplicatedAssignments);
 
         Map<String, ConnectorsAndTasks> toRevoke = new HashMap<>();
@@ -421,8 +460,7 @@ private Map<String, ConnectorsAndTasks> computeDuplicatedAssignments(Map<String,
     // visible for testing
     protected void handleLostAssignments(ConnectorsAndTasks lostAssignments,
                                          ConnectorsAndTasks newSubmissions,
-                                         List<WorkerLoad> completeWorkerAssignment,
-                                         Map<String, ExtendedWorkerState> memberConfigs) {
+                                         List<WorkerLoad> completeWorkerAssignment) {
         if (lostAssignments.isEmpty()) {
             resetDelay();
             return;
@@ -432,7 +470,10 @@ protected void handleLostAssignments(ConnectorsAndTasks lostAssignments,
         log.debug("Found the following connectors and tasks missing from previous assignments: "
                 + lostAssignments);
 
-        if (scheduledRebalance <= 0 && memberConfigs.keySet().containsAll(previousMembers)) {
+        Set<String> activeMembers = completeWorkerAssignment.stream()
+                .map(WorkerLoad::worker)
+                .collect(Collectors.toSet());
+        if (scheduledRebalance <= 0 && activeMembers.containsAll(previousMembers)) {
             log.debug("No worker seems to have departed the group during the rebalance. The "
                     + "missing assignments that the leader is detecting are probably due to some "
                     + "workers failing to receive the new assignments in the previous rebalance. "
@@ -489,7 +530,7 @@ protected void handleLostAssignments(ConnectorsAndTasks lostAssignments,
                 log.debug("Delayed rebalance in progress. Task reassignment is postponed. New computed rebalance delay: {}", delay);
             } else {
                 // This means scheduledRebalance == 0
-                // We could also also extract the current minimum delay from the group, to make
+                // We could also extract the current minimum delay from the group, to make
                 // independent of consecutive leader failures, but this optimization is skipped
                 // at the moment
                 delay = maxDelay;
@@ -526,7 +567,7 @@ private List<WorkerLoad> pickCandidateWorkerForReassignment(List<WorkerLoad> com
     }
 
     /**
-     * Task revocation is based on an rough estimation of the lower average number of tasks before
+     * Task revocation is based on a rough estimation of the lower average number of tasks before
      * and after new workers join the group. If no new workers join, no revocation takes place.
      * Based on this estimation, tasks are revoked until the new floor average is reached for
      * each existing worker. The revoked tasks, once assigned to the new workers will maintain
@@ -610,16 +651,14 @@ private Map<String, ConnectorsAndTasks> performTaskRevocation(ConnectorsAndTasks
 
     private Map<String, ExtendedAssignment> fillAssignments(Collection<String> members, short error,
                                                             String leaderId, String leaderUrl, long maxOffset,
-                                                            Map<String, Collection<String>> connectorAssignments,
-                                                            Map<String, Collection<ConnectorTaskId>> taskAssignments,
-                                                            Map<String, ConnectorsAndTasks> revoked,
+                                                            ClusterAssignment clusterAssignment,
                                                             int delay, short protocolVersion) {
         Map<String, ExtendedAssignment> groupAssignment = new HashMap<>();
         for (String member : members) {
-            Collection<String> connectorsToStart = connectorAssignments.getOrDefault(member, Collections.emptyList());
-            Collection<ConnectorTaskId> tasksToStart = taskAssignments.getOrDefault(member, Collections.emptyList());
-            Collection<String> connectorsToStop = revoked.getOrDefault(member, ConnectorsAndTasks.EMPTY).connectors();
-            Collection<ConnectorTaskId> tasksToStop = revoked.getOrDefault(member, ConnectorsAndTasks.EMPTY).tasks();
+            Collection<String> connectorsToStart = clusterAssignment.newlyAssignedConnectors(member);
+            Collection<ConnectorTaskId> tasksToStart = clusterAssignment.newlyAssignedTasks(member);
+            Collection<String> connectorsToStop = clusterAssignment.newlyRevokedConnectors(member);
+            Collection<ConnectorTaskId> tasksToStop = clusterAssignment.newlyRevokedTasks(member);
             ExtendedAssignment assignment =
                     new ExtendedAssignment(protocolVersion, error, leaderId, leaderUrl, maxOffset,
                             connectorsToStart, tasksToStart, connectorsToStop, tasksToStop, delay);
@@ -637,12 +676,13 @@ private Map<String, ExtendedAssignment> fillAssignments(Collection<String> membe
      * @param assignments the map of worker assignments
      * @return the serialized map of assignments to workers
      */
-    protected Map<String, ByteBuffer> serializeAssignments(Map<String, ExtendedAssignment> assignments) {
+    protected Map<String, ByteBuffer> serializeAssignments(Map<String, ExtendedAssignment> assignments, short protocolVersion) {
+        boolean sessioned = protocolVersion >= CONNECT_PROTOCOL_V2;
         return assignments.entrySet()
                 .stream()
                 .collect(Collectors.toMap(
                     Map.Entry::getKey,
-                    e -> IncrementalCooperativeConnectProtocol.serializeAssignment(e.getValue())));
+                    e -> IncrementalCooperativeConnectProtocol.serializeAssignment(e.getValue(), sessioned)));
     }
 
     private static ConnectorsAndTasks diff(ConnectorsAndTasks base,
@@ -661,23 +701,18 @@ private static <T> Map<String, Collection<T>> diff(Map<String, Collection<T>> ba
         Map<String, Collection<T>> incremental = new HashMap<>();
         for (Map.Entry<String, Collection<T>> entry : base.entrySet()) {
             List<T> values = new ArrayList<>(entry.getValue());
-            values.removeAll(toSubtract.get(entry.getKey()));
+            values.removeAll(toSubtract.getOrDefault(entry.getKey(), Collections.emptySet()));
             incremental.put(entry.getKey(), values);
         }
         return incremental;
     }
 
-    private ConnectorsAndTasks assignment(Map<String, ExtendedWorkerState> memberConfigs) {
-        log.debug("Received assignments: {}", memberConfigs);
-        Set<String> connectors = memberConfigs.values()
-                .stream()
-                .flatMap(state -> state.assignment().connectors().stream())
-                .collect(Collectors.toSet());
-        Set<ConnectorTaskId> tasks = memberConfigs.values()
-                .stream()
-                .flatMap(state -> state.assignment().tasks().stream())
-                .collect(Collectors.toSet());
-        return new ConnectorsAndTasks.Builder().with(connectors, tasks).build();
+    private ConnectorsAndTasks assignment(Map<String, ConnectorsAndTasks> memberAssignments) {
+        log.debug("Received assignments: {}", memberAssignments);
+        return new ConnectorsAndTasks.Builder().with(
+                ConnectUtils.combineCollections(memberAssignments.values(), ConnectorsAndTasks::connectors),
+                ConnectUtils.combineCollections(memberAssignments.values(), ConnectorsAndTasks::tasks)
+        ).build();
     }
 
     private int calculateDelay(long now) {
@@ -745,22 +780,120 @@ protected void assignTasks(List<WorkerLoad> workerAssignment, Collection<Connect
         }
     }
 
-    private static List<WorkerLoad> workerAssignment(Map<String, ExtendedWorkerState> memberConfigs,
+    private static List<WorkerLoad> workerAssignment(Map<String, ConnectorsAndTasks> memberAssignments,
                                                      ConnectorsAndTasks toExclude) {
         ConnectorsAndTasks ignore = new ConnectorsAndTasks.Builder()
                 .with(new HashSet<>(toExclude.connectors()), new HashSet<>(toExclude.tasks()))
                 .build();
 
-        return memberConfigs.entrySet().stream()
+        return memberAssignments.entrySet().stream()
                 .map(e -> new WorkerLoad.Builder(e.getKey()).with(
-                        e.getValue().assignment().connectors().stream()
+                        e.getValue().connectors().stream()
                                 .filter(v -> !ignore.connectors().contains(v))
                                 .collect(Collectors.toList()),
-                        e.getValue().assignment().tasks().stream()
+                        e.getValue().tasks().stream()
                                 .filter(v -> !ignore.tasks().contains(v))
                                 .collect(Collectors.toList())
                         ).build()
                 ).collect(Collectors.toList());
     }
 
+    static class ClusterAssignment {
+
+        private final Map<String, Collection<String>> newlyAssignedConnectors;
+        private final Map<String, Collection<ConnectorTaskId>> newlyAssignedTasks;
+        private final Map<String, Collection<String>> newlyRevokedConnectors;
+        private final Map<String, Collection<ConnectorTaskId>> newlyRevokedTasks;
+        private final Map<String, Collection<String>> allAssignedConnectors;
+        private final Map<String, Collection<ConnectorTaskId>> allAssignedTasks;
+        private final Set<String> allWorkers;
+
+        public static final ClusterAssignment EMPTY = new ClusterAssignment(
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptyMap()
+        );
+
+        public ClusterAssignment(
+                Map<String, Collection<String>> newlyAssignedConnectors,
+                Map<String, Collection<ConnectorTaskId>> newlyAssignedTasks,
+                Map<String, Collection<String>> newlyRevokedConnectors,
+                Map<String, Collection<ConnectorTaskId>> newlyRevokedTasks,
+                Map<String, Collection<String>> allAssignedConnectors,
+                Map<String, Collection<ConnectorTaskId>> allAssignedTasks
+        ) {
+            this.newlyAssignedConnectors = newlyAssignedConnectors;
+            this.newlyAssignedTasks = newlyAssignedTasks;
+            this.newlyRevokedConnectors = newlyRevokedConnectors;
+            this.newlyRevokedTasks = newlyRevokedTasks;
+            this.allAssignedConnectors = allAssignedConnectors;
+            this.allAssignedTasks = allAssignedTasks;
+            this.allWorkers = combineCollections(
+                    Arrays.asList(newlyAssignedConnectors, newlyAssignedTasks, newlyRevokedConnectors, newlyRevokedTasks, allAssignedConnectors, allAssignedTasks),
+                    Map::keySet,
+                    Collectors.toSet()
+            );
+        }
+
+        public Map<String, Collection<String>> newlyAssignedConnectors() {
+            return newlyAssignedConnectors;
+        }
+
+        public Collection<String> newlyAssignedConnectors(String worker) {
+            return newlyAssignedConnectors.getOrDefault(worker, Collections.emptySet());
+        }
+
+        public Map<String, Collection<ConnectorTaskId>> newlyAssignedTasks() {
+            return newlyAssignedTasks;
+        }
+
+        public Collection<ConnectorTaskId> newlyAssignedTasks(String worker) {
+            return newlyAssignedTasks.getOrDefault(worker, Collections.emptySet());
+        }
+
+        public Map<String, Collection<String>> newlyRevokedConnectors() {
+            return newlyRevokedConnectors;
+        }
+
+        public Collection<String> newlyRevokedConnectors(String worker) {
+            return newlyRevokedConnectors.getOrDefault(worker, Collections.emptySet());
+        }
+
+        public Map<String, Collection<ConnectorTaskId>> newlyRevokedTasks() {
+            return newlyRevokedTasks;
+        }
+
+        public Collection<ConnectorTaskId> newlyRevokedTasks(String worker) {
+            return newlyRevokedTasks.getOrDefault(worker, Collections.emptySet());
+        }
+
+        public Map<String, Collection<String>> allAssignedConnectors() {
+            return allAssignedConnectors;
+        }
+
+        public Map<String, Collection<ConnectorTaskId>> allAssignedTasks() {
+            return allAssignedTasks;
+        }
+
+        public Set<String> allWorkers() {
+            return allWorkers;
+        }
+
+        @Override
+        public String toString() {
+            return "ClusterAssignment{"
+                    + "newlyAssignedConnectors=" + newlyAssignedConnectors
+                    + ", newlyAssignedTasks=" + newlyAssignedTasks
+                    + ", newlyRevokedConnectors=" + newlyRevokedConnectors
+                    + ", newlyRevokedTasks=" + newlyRevokedTasks
+                    + ", allAssignedConnectors=" + allAssignedConnectors
+                    + ", allAssignedTasks=" + allAssignedTasks
+                    + ", allWorkers=" + allWorkers
+                    + '}';
+        }
+    }
+
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeConnectProtocol.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeConnectProtocol.java
index 6bcf9be65eb64..c32009c794fdb 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeConnectProtocol.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeConnectProtocol.java
@@ -154,7 +154,7 @@ public static ByteBuffer serializeMetadata(ExtendedWorkerState workerState, bool
                 .set(CONFIG_OFFSET_KEY_NAME, workerState.offset());
         // Not a big issue if we embed the protocol version with the assignment in the metadata
         Struct allocation = new Struct(ALLOCATION_V1)
-                .set(ALLOCATION_KEY_NAME, serializeAssignment(workerState.assignment()));
+                .set(ALLOCATION_KEY_NAME, serializeAssignment(workerState.assignment(), sessioned));
         Struct connectProtocolHeader = sessioned ? CONNECT_PROTOCOL_HEADER_V2 : CONNECT_PROTOCOL_HEADER_V1;
         ByteBuffer buffer = ByteBuffer.allocate(connectProtocolHeader.sizeOf()
                                                 + CONFIG_STATE_V1.sizeOf(configState)
@@ -230,15 +230,16 @@ public static ExtendedWorkerState deserializeMetadata(ByteBuffer buffer) {
      *   ScheduledDelay     => Int32
      * </pre>
      */
-    public static ByteBuffer serializeAssignment(ExtendedAssignment assignment) {
+    public static ByteBuffer serializeAssignment(ExtendedAssignment assignment, boolean sessioned) {
         // comparison depends on reference equality for now
         if (assignment == null || ExtendedAssignment.empty().equals(assignment)) {
             return null;
         }
         Struct struct = assignment.toStruct();
-        ByteBuffer buffer = ByteBuffer.allocate(CONNECT_PROTOCOL_HEADER_V1.sizeOf()
+        Struct protocolHeader = sessioned ? CONNECT_PROTOCOL_HEADER_V2 : CONNECT_PROTOCOL_HEADER_V1;
+        ByteBuffer buffer = ByteBuffer.allocate(protocolHeader.sizeOf()
                                                 + ASSIGNMENT_V1.sizeOf(struct));
-        CONNECT_PROTOCOL_HEADER_V1.writeTo(buffer);
+        protocolHeader.writeTo(buffer);
         ASSIGNMENT_V1.write(buffer, struct);
         buffer.flip();
         return buffer;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinator.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinator.java
index 65720e2a78782..ced67427a3f13 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinator.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinator.java
@@ -25,6 +25,7 @@
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Timer;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.ConfigBackingStore;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.slf4j.Logger;
@@ -224,7 +225,7 @@ protected Map<String, ByteBuffer> onLeaderElected(String leaderId,
     }
 
     @Override
-    protected boolean onJoinPrepare(int generation, String memberId) {
+    protected boolean onJoinPrepare(Timer timer, int generation, String memberId) {
         log.info("Rebalance started");
         leaderState(null);
         final ExtendedAssignment localAssignmentSnapshot = assignmentSnapshot;
@@ -416,6 +417,29 @@ private String ownerUrl(String connector) {
             return allMembers.get(ownerId).url();
         }
 
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (!(o instanceof LeaderState)) return false;
+            LeaderState that = (LeaderState) o;
+            return Objects.equals(allMembers, that.allMembers)
+                    && Objects.equals(connectorOwners, that.connectorOwners)
+                    && Objects.equals(taskOwners, that.taskOwners);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(allMembers, connectorOwners, taskOwners);
+        }
+
+        @Override
+        public String toString() {
+            return "LeaderState{"
+                    + "allMembers=" + allMembers
+                    + ", connectorOwners=" + connectorOwners
+                    + ", taskOwners=" + taskOwners
+                    + '}';
+        }
     }
 
     public static class ConnectorsAndTasks {
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/LoaderSwap.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/LoaderSwap.java
new file mode 100644
index 0000000000000..47e8c12d54b25
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/LoaderSwap.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime.isolation;
+
+/**
+ * Helper for having {@code Plugins} use a given classloader within a try-with-resources statement.
+ * See {@link Plugins#withClassLoader(ClassLoader)}.
+ */
+public class LoaderSwap implements AutoCloseable {
+
+    private final ClassLoader savedLoader;
+
+    public LoaderSwap(ClassLoader savedLoader) {
+        this.savedLoader = savedLoader;
+    }
+
+    @Override
+    public void close() {
+        Plugins.compareAndSwapLoaders(savedLoader);
+    }
+
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/Plugins.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/Plugins.java
index 7ec73ba78b83d..6d961272399e8 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/Plugins.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/isolation/Plugins.java
@@ -145,6 +145,16 @@ public ClassLoader compareAndSwapLoaders(Connector connector) {
         return compareAndSwapLoaders(connectorLoader);
     }
 
+    public LoaderSwap withClassLoader(ClassLoader loader) {
+        ClassLoader savedLoader = compareAndSwapLoaders(loader);
+        try {
+            return new LoaderSwap(savedLoader);
+        } catch (Throwable t) {
+            compareAndSwapLoaders(savedLoader);
+            throw t;
+        }
+    }
+
     public DelegatingClassLoader delegatingLoader() {
         return delegatingLoader;
     }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestClient.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestClient.java
index 03325526cd0cb..48e2d42ebf60a 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestClient.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestClient.java
@@ -19,10 +19,6 @@
 
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
-
-import javax.crypto.SecretKey;
-import javax.ws.rs.core.HttpHeaders;
-
 import org.apache.kafka.connect.runtime.WorkerConfig;
 import org.apache.kafka.connect.runtime.rest.entities.ErrorMessage;
 import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
@@ -37,6 +33,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.crypto.SecretKey;
+import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.Response;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -100,6 +98,21 @@ public static <T> HttpResponse<T> httpRequest(String url, String method, HttpHea
             throw new ConnectRestException(Response.Status.INTERNAL_SERVER_ERROR, "Failed to start RestClient: " + e.getMessage(), e);
         }
 
+        try {
+            return httpRequest(client, url, method, headers, requestBodyData, responseFormat, sessionKey, requestSignatureAlgorithm);
+        } finally {
+            try {
+                client.stop();
+            } catch (Exception e) {
+                log.error("Failed to stop HTTP client", e);
+            }
+        }
+    }
+
+    static <T> HttpResponse<T> httpRequest(HttpClient client, String url, String method,
+                                           HttpHeaders headers, Object requestBodyData,
+                                           TypeReference<T> responseFormat, SecretKey sessionKey,
+                                           String requestSignatureAlgorithm) {
         try {
             String serializedBody = requestBodyData == null ? null : JSON_SERDE.writeValueAsString(requestBodyData);
             log.trace("Sending {} with input {} to {}", method, serializedBody, url);
@@ -112,14 +125,15 @@ public static <T> HttpResponse<T> httpRequest(String url, String method, HttpHea
 
             if (serializedBody != null) {
                 req.content(new StringContentProvider(serializedBody, StandardCharsets.UTF_8), "application/json");
-                if (sessionKey != null && requestSignatureAlgorithm != null) {
-                    InternalRequestSignature.addToRequest(
-                        sessionKey,
-                        serializedBody.getBytes(StandardCharsets.UTF_8),
-                        requestSignatureAlgorithm,
-                        req
-                    );
-                }
+            }
+
+            if (sessionKey != null && requestSignatureAlgorithm != null) {
+                InternalRequestSignature.addToRequest(
+                    sessionKey,
+                    serializedBody != null ? serializedBody.getBytes(StandardCharsets.UTF_8) : null,
+                    requestSignatureAlgorithm,
+                    req
+                );
             }
 
             ContentResponse res = req.send();
@@ -142,15 +156,14 @@ public static <T> HttpResponse<T> httpRequest(String url, String method, HttpHea
         } catch (IOException | InterruptedException | TimeoutException | ExecutionException e) {
             log.error("IO error forwarding REST request: ", e);
             throw new ConnectRestException(Response.Status.INTERNAL_SERVER_ERROR, "IO Error trying to forward REST request: " + e.getMessage(), e);
+        } catch (ConnectRestException e) {
+            // catching any explicitly thrown ConnectRestException-s to preserve its status code
+            // and to avoid getting it overridden by the more generic catch (Throwable) clause down below
+            log.error("Error forwarding REST request", e);
+            throw e;
         } catch (Throwable t) {
             log.error("Error forwarding REST request", t);
             throw new ConnectRestException(Response.Status.INTERNAL_SERVER_ERROR, "Error trying to forward REST request: " + t.getMessage(), t);
-        } finally {
-            try {
-                client.stop();
-            } catch (Exception e) {
-                log.error("Failed to stop HTTP client", e);
-            }
         }
     }
 
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java
index ab18419efc7b8..3c89ddb55fc11 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/RestServer.java
@@ -28,6 +28,7 @@
 import org.apache.kafka.connect.runtime.health.ConnectClusterDetailsImpl;
 import org.apache.kafka.connect.runtime.health.ConnectClusterStateImpl;
 import org.apache.kafka.connect.runtime.rest.errors.ConnectExceptionMapper;
+import org.apache.kafka.connect.runtime.rest.resources.ConnectResource;
 import org.apache.kafka.connect.runtime.rest.resources.ConnectorPluginsResource;
 import org.apache.kafka.connect.runtime.rest.resources.ConnectorsResource;
 import org.apache.kafka.connect.runtime.rest.resources.LoggingResource;
@@ -60,6 +61,7 @@
 import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.List;
@@ -88,6 +90,7 @@ public class RestServer {
     private final ContextHandlerCollection handlers;
     private final Server jettyServer;
 
+    private Collection<ConnectResource> resources;
     private List<ConnectRestExtension> connectRestExtensions = Collections.emptyList();
 
     /**
@@ -210,9 +213,11 @@ public void initializeResources(Herder herder) {
         ResourceConfig resourceConfig = new ResourceConfig();
         resourceConfig.register(new JacksonJsonProvider());
 
-        resourceConfig.register(new RootResource(herder));
-        resourceConfig.register(new ConnectorsResource(herder, config));
-        resourceConfig.register(new ConnectorPluginsResource(herder));
+        this.resources = new ArrayList<>();
+        resources.add(new RootResource(herder));
+        resources.add(new ConnectorsResource(herder, config));
+        resources.add(new ConnectorPluginsResource(herder));
+        resources.forEach(resourceConfig::register);
 
         resourceConfig.register(ConnectExceptionMapper.class);
         resourceConfig.property(ServerProperties.WADL_FEATURE_DISABLE, true);
@@ -224,14 +229,18 @@ public void initializeResources(Herder herder) {
         if (adminListeners == null) {
             log.info("Adding admin resources to main listener");
             adminResourceConfig = resourceConfig;
-            adminResourceConfig.register(new LoggingResource());
+            LoggingResource loggingResource = new LoggingResource();
+            this.resources.add(loggingResource);
+            adminResourceConfig.register(loggingResource);
         } else if (adminListeners.size() > 0) {
             // TODO: we need to check if these listeners are same as 'listeners'
             // TODO: the following code assumes that they are different
             log.info("Adding admin resources to admin listener");
             adminResourceConfig = new ResourceConfig();
             adminResourceConfig.register(new JacksonJsonProvider());
-            adminResourceConfig.register(new LoggingResource());
+            LoggingResource loggingResource = new LoggingResource();
+            this.resources.add(loggingResource);
+            adminResourceConfig.register(loggingResource);
             adminResourceConfig.register(ConnectExceptionMapper.class);
         } else {
             log.info("Skipping adding admin resources");
@@ -385,6 +394,11 @@ public URI adminUrl() {
         return builder.build();
     }
 
+    // For testing only
+    public void requestTimeout(long requestTimeoutMs) {
+        this.resources.forEach(resource -> resource.requestTimeout(requestTimeoutMs));
+    }
+
     String determineAdvertisedProtocol() {
         String advertisedSecurityProtocol = config.getString(WorkerConfig.REST_ADVERTISED_LISTENER_CONFIG);
         if (advertisedSecurityProtocol == null) {
@@ -432,7 +446,7 @@ void registerRestExtensions(Herder herder, ResourceConfig resourceConfig) {
             config.getList(WorkerConfig.REST_EXTENSION_CLASSES_CONFIG),
             config, ConnectRestExtension.class);
 
-        long herderRequestTimeoutMs = ConnectorsResource.REQUEST_TIMEOUT_MS;
+        long herderRequestTimeoutMs = ConnectResource.DEFAULT_REST_REQUEST_TIMEOUT_MS;
 
         Integer rebalanceTimeoutMs = config.getRebalanceTimeout();
 
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/errors/ConnectExceptionMapper.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/errors/ConnectExceptionMapper.java
index 8678fbf16cd58..2bb90e21470bf 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/errors/ConnectExceptionMapper.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/errors/ConnectExceptionMapper.java
@@ -45,7 +45,7 @@ public Response toResponse(Exception exception) {
                     .build();
         }
 
-        if (exception instanceof NotFoundException) {
+        if (exception instanceof NotFoundException || exception instanceof javax.ws.rs.NotFoundException) {
             return Response.status(Response.Status.NOT_FOUND)
                     .entity(new ErrorMessage(Response.Status.NOT_FOUND.getStatusCode(), exception.getMessage()))
                     .build();
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectResource.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectResource.java
new file mode 100644
index 0000000000000..49d61a727a955
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectResource.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime.rest.resources;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * This interface defines shared logic for all Connect REST resources.
+ */
+public interface ConnectResource {
+
+    // TODO: This should not be so long. However, due to potentially long rebalances that may have to wait a full
+    // session timeout to complete, during which we cannot serve some requests. Ideally we could reduce this, but
+    // we need to consider all possible scenarios this could fail. It might be ok to fail with a timeout in rare cases,
+    // but currently a worker simply leaving the group can take this long as well.
+    long DEFAULT_REST_REQUEST_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(90);
+
+    /**
+     * Set how long the resource will await the completion of each request before returning a 500 error.
+     * If the resource does not perform any operations that can be expected to block under reasonable
+     * circumstances, this can be implemented as a no-op.
+     * @param requestTimeoutMs the new timeout in milliseconds; must be positive
+     */
+    void requestTimeout(long requestTimeoutMs);
+
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java
index 2beda9fb8a142..05b8375183c91 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorPluginsResource.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.connect.runtime.rest.resources;
 
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
 import org.apache.kafka.connect.runtime.ConnectorConfig;
 import org.apache.kafka.connect.runtime.Herder;
 import org.apache.kafka.connect.runtime.PredicatedTransformation;
@@ -59,11 +61,12 @@
 @Path("/connector-plugins")
 @Produces(MediaType.APPLICATION_JSON)
 @Consumes(MediaType.APPLICATION_JSON)
-public class ConnectorPluginsResource {
+public class ConnectorPluginsResource implements ConnectResource {
 
     private static final String ALIAS_SUFFIX = "Connector";
     private final Herder herder;
     private final List<PluginInfo> connectorPlugins;
+    private long requestTimeoutMs;
 
     static final List<Class<? extends SinkConnector>> SINK_CONNECTOR_EXCLUDES = Arrays.asList(
             VerifiableSinkConnector.class,
@@ -84,6 +87,7 @@ public class ConnectorPluginsResource {
     public ConnectorPluginsResource(Herder herder) {
         this.herder = herder;
         this.connectorPlugins = new ArrayList<>();
+        this.requestTimeoutMs = DEFAULT_REST_REQUEST_TIMEOUT_MS;
 
         // TODO: improve once plugins are allowed to be added/removed during runtime.
         addConnectorPlugins(herder.plugins().sinkConnectors(), SINK_CONNECTOR_EXCLUDES);
@@ -101,18 +105,24 @@ private <T> void addConnectorPlugins(Collection<PluginDesc<T>> plugins, Collecti
                 .forEach(connectorPlugins::add);
     }
 
+    @Override
+    public void requestTimeout(long requestTimeoutMs) {
+        this.requestTimeoutMs = requestTimeoutMs;
+    }
+
     @PUT
-    @Path("/{connectorType}/config/validate")
+    @Path("/{pluginName}/config/validate")
+    @Operation(summary = "Validate the provided configuration against the configuration definition for the specified pluginName")
     public ConfigInfos validateConfigs(
-        final @PathParam("connectorType") String connType,
+        final @PathParam("pluginName") String pluginName,
         final Map<String, String> connectorConfig
     ) throws Throwable {
         String includedConnType = connectorConfig.get(ConnectorConfig.CONNECTOR_CLASS_CONFIG);
         if (includedConnType != null
-            && !normalizedPluginName(includedConnType).endsWith(normalizedPluginName(connType))) {
+            && !normalizedPluginName(includedConnType).endsWith(normalizedPluginName(pluginName))) {
             throw new BadRequestException(
                 "Included connector type " + includedConnType + " does not match request type "
-                    + connType
+                    + pluginName
             );
         }
 
@@ -121,7 +131,7 @@ public ConfigInfos validateConfigs(
         herder.validateConnectorConfig(connectorConfig, validationCallback, false);
 
         try {
-            return validationCallback.get(ConnectorsResource.REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS);
+            return validationCallback.get(requestTimeoutMs, TimeUnit.MILLISECONDS);
         } catch (TimeoutException e) {
             // This timeout is for the operation itself. None of the timeout error codes are relevant, so internal server
             // error is the best option
@@ -133,7 +143,10 @@ public ConfigInfos validateConfigs(
 
     @GET
     @Path("/")
-    public List<PluginInfo> listConnectorPlugins(@DefaultValue("true") @QueryParam("connectorsOnly") boolean connectorsOnly) {
+    @Operation(summary = "List all connector plugins installed")
+    public List<PluginInfo> listConnectorPlugins(
+            @DefaultValue("true") @QueryParam("connectorsOnly") @Parameter(description = "Whether to list only connectors instead of all plugins") boolean connectorsOnly
+    ) {
         synchronized (this) {
             if (connectorsOnly) {
                 return Collections.unmodifiableList(connectorPlugins.stream()
@@ -146,8 +159,9 @@ public List<PluginInfo> listConnectorPlugins(@DefaultValue("true") @QueryParam("
     }
 
     @GET
-    @Path("/{name}/config")
-    public List<ConfigKeyInfo> getConnectorConfigDef(final @PathParam("name") String pluginName) {
+    @Path("/{pluginName}/config")
+    @Operation(summary = "Get the configuration definition for the specified pluginName")
+    public List<ConfigKeyInfo> getConnectorConfigDef(final @PathParam("pluginName") String pluginName) {
         synchronized (this) {
             return herder.connectorPluginConfig(pluginName);
         }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResource.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResource.java
index dbf246f00ef1c..92a7d543fff21 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResource.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResource.java
@@ -22,6 +22,8 @@
 import javax.ws.rs.core.HttpHeaders;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
 import org.apache.kafka.connect.errors.NotFoundException;
 import org.apache.kafka.connect.runtime.ConnectorConfig;
 import org.apache.kafka.connect.runtime.Herder;
@@ -74,22 +76,14 @@
 @Path("/connectors")
 @Produces(MediaType.APPLICATION_JSON)
 @Consumes(MediaType.APPLICATION_JSON)
-public class ConnectorsResource {
+public class ConnectorsResource implements ConnectResource {
     private static final Logger log = LoggerFactory.getLogger(ConnectorsResource.class);
     private static final TypeReference<List<Map<String, String>>> TASK_CONFIGS_TYPE =
         new TypeReference<List<Map<String, String>>>() { };
 
-    // TODO: This should not be so long. However, due to potentially long rebalances that may have to wait a full
-    // session timeout to complete, during which we cannot serve some requests. Ideally we could reduce this, but
-    // we need to consider all possible scenarios this could fail. It might be ok to fail with a timeout in rare cases,
-    // but currently a worker simply leaving the group can take this long as well.
-    public static final long REQUEST_TIMEOUT_MS = 90 * 1000;
-    // Mutable for integration testing; otherwise, some tests would take at least REQUEST_TIMEOUT_MS
-    // to run
-    private static long requestTimeoutMs = REQUEST_TIMEOUT_MS;
-
     private final Herder herder;
     private final WorkerConfig config;
+    private long requestTimeoutMs;
     @javax.ws.rs.core.Context
     private ServletContext context;
     private final boolean isTopicTrackingDisabled;
@@ -100,19 +94,20 @@ public ConnectorsResource(Herder herder, WorkerConfig config) {
         this.config = config;
         isTopicTrackingDisabled = !config.getBoolean(TOPIC_TRACKING_ENABLE_CONFIG);
         isTopicTrackingResetDisabled = !config.getBoolean(TOPIC_TRACKING_ALLOW_RESET_CONFIG);
+        this.requestTimeoutMs = DEFAULT_REST_REQUEST_TIMEOUT_MS;
     }
 
-    // For testing purposes only
-    public static void setRequestTimeout(long requestTimeoutMs) {
-        ConnectorsResource.requestTimeoutMs = requestTimeoutMs;
-    }
-
-    public static void resetRequestTimeout() {
-        ConnectorsResource.requestTimeoutMs = REQUEST_TIMEOUT_MS;
+    @Override
+    public void requestTimeout(long requestTimeoutMs) {
+        if (requestTimeoutMs < 1) {
+            throw new IllegalArgumentException("REST request timeout must be positive");
+        }
+        this.requestTimeoutMs = requestTimeoutMs;
     }
 
     @GET
     @Path("/")
+    @Operation(summary = "List all active connectors")
     public Response listConnectors(
         final @Context UriInfo uriInfo,
         final @Context HttpHeaders headers
@@ -150,7 +145,8 @@ public Response listConnectors(
 
     @POST
     @Path("/")
-    public Response createConnector(final @QueryParam("forward") Boolean forward,
+    @Operation(summary = "Create a new connector")
+    public Response createConnector(final @Parameter(hidden = true) @QueryParam("forward") Boolean forward,
                                     final @Context HttpHeaders headers,
                                     final CreateConnectorRequest createRequest) throws Throwable {
         // Trim leading and trailing whitespaces from the connector name, replace null with empty string
@@ -172,9 +168,10 @@ public Response createConnector(final @QueryParam("forward") Boolean forward,
 
     @GET
     @Path("/{connector}")
+    @Operation(summary = "Get the details for the specified connector")
     public ConnectorInfo getConnector(final @PathParam("connector") String connector,
                                       final @Context HttpHeaders headers,
-                                      final @QueryParam("forward") Boolean forward) throws Throwable {
+                                      final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         FutureCallback<ConnectorInfo> cb = new FutureCallback<>();
         herder.connectorInfo(connector, cb);
         return completeOrForwardRequest(cb, "/connectors/" + connector, "GET", headers, null, forward);
@@ -182,9 +179,10 @@ public ConnectorInfo getConnector(final @PathParam("connector") String connector
 
     @GET
     @Path("/{connector}/config")
+    @Operation(summary = "Get the configuration for the specified connector")
     public Map<String, String> getConnectorConfig(final @PathParam("connector") String connector,
                                                   final @Context HttpHeaders headers,
-                                                  final @QueryParam("forward") Boolean forward) throws Throwable {
+                                                  final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         FutureCallback<Map<String, String>> cb = new FutureCallback<>();
         herder.connectorConfig(connector, cb);
         return completeOrForwardRequest(cb, "/connectors/" + connector + "/config", "GET", headers, null, forward);
@@ -192,10 +190,11 @@ public Map<String, String> getConnectorConfig(final @PathParam("connector") Stri
 
     @GET
     @Path("/{connector}/tasks-config")
+    @Operation(summary = "Get the configuration of all tasks for the specified connector")
     public Map<ConnectorTaskId, Map<String, String>> getTasksConfig(
             final @PathParam("connector") String connector,
             final @Context HttpHeaders headers,
-            final @QueryParam("forward") Boolean forward) throws Throwable {
+            final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         FutureCallback<Map<ConnectorTaskId, Map<String, String>>> cb = new FutureCallback<>();
         herder.tasksConfig(connector, cb);
         return completeOrForwardRequest(cb, "/connectors/" + connector + "/tasks-config", "GET", headers, null, forward);
@@ -203,12 +202,14 @@ public Map<ConnectorTaskId, Map<String, String>> getTasksConfig(
 
     @GET
     @Path("/{connector}/status")
+    @Operation(summary = "Get the status for the specified connector")
     public ConnectorStateInfo getConnectorStatus(final @PathParam("connector") String connector) {
         return herder.connectorStatus(connector);
     }
 
     @GET
     @Path("/{connector}/topics")
+    @Operation(summary = "Get the list of topics actively used by the specified connector")
     public Response getConnectorActiveTopics(final @PathParam("connector") String connector) {
         if (isTopicTrackingDisabled) {
             throw new ConnectRestException(Response.Status.FORBIDDEN.getStatusCode(),
@@ -220,6 +221,7 @@ public Response getConnectorActiveTopics(final @PathParam("connector") String co
 
     @PUT
     @Path("/{connector}/topics/reset")
+    @Operation(summary = "Reset the list of topics actively used by the specified connector")
     public Response resetConnectorActiveTopics(final @PathParam("connector") String connector, final @Context HttpHeaders headers) {
         if (isTopicTrackingDisabled) {
             throw new ConnectRestException(Response.Status.FORBIDDEN.getStatusCode(),
@@ -235,9 +237,10 @@ public Response resetConnectorActiveTopics(final @PathParam("connector") String
 
     @PUT
     @Path("/{connector}/config")
+    @Operation(summary = "Create or reconfigure the specified connector")
     public Response putConnectorConfig(final @PathParam("connector") String connector,
                                        final @Context HttpHeaders headers,
-                                       final @QueryParam("forward") Boolean forward,
+                                       final @Parameter(hidden = true) @QueryParam("forward") Boolean forward,
                                        final Map<String, String> connectorConfig) throws Throwable {
         FutureCallback<Herder.Created<ConnectorInfo>> cb = new FutureCallback<>();
         checkAndPutConnectorConfigName(connector, connectorConfig);
@@ -257,11 +260,12 @@ public Response putConnectorConfig(final @PathParam("connector") String connecto
 
     @POST
     @Path("/{connector}/restart")
+    @Operation(summary = "Restart the specified connector")
     public Response restartConnector(final @PathParam("connector") String connector,
                                  final @Context HttpHeaders headers,
-                                 final @DefaultValue("false") @QueryParam("includeTasks") Boolean includeTasks,
-                                 final @DefaultValue("false") @QueryParam("onlyFailed") Boolean onlyFailed,
-                                 final @QueryParam("forward") Boolean forward) throws Throwable {
+                                 final @DefaultValue("false") @QueryParam("includeTasks") @Parameter(description = "Whether to also restart tasks") Boolean includeTasks,
+                                 final @DefaultValue("false") @QueryParam("onlyFailed") @Parameter(description = "Whether to only restart failed tasks/connectors")Boolean onlyFailed,
+                                 final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         RestartRequest restartRequest = new RestartRequest(connector, onlyFailed, includeTasks);
         String forwardingPath = "/connectors/" + connector + "/restart";
         if (restartRequest.forceRestartConnectorOnly()) {
@@ -285,6 +289,8 @@ public Response restartConnector(final @PathParam("connector") String connector,
 
     @PUT
     @Path("/{connector}/pause")
+    @Operation(summary = "Pause the specified connector",
+               description = "This operation is idempotent and has no effects if the connector is already paused")
     public Response pauseConnector(@PathParam("connector") String connector, final @Context HttpHeaders headers) {
         herder.pauseConnector(connector);
         return Response.accepted().build();
@@ -292,6 +298,8 @@ public Response pauseConnector(@PathParam("connector") String connector, final @
 
     @PUT
     @Path("/{connector}/resume")
+    @Operation(summary = "Resume the specified connector",
+               description = "This operation is idempotent and has no effects if the connector is already running")
     public Response resumeConnector(@PathParam("connector") String connector) {
         herder.resumeConnector(connector);
         return Response.accepted().build();
@@ -299,9 +307,10 @@ public Response resumeConnector(@PathParam("connector") String connector) {
 
     @GET
     @Path("/{connector}/tasks")
+    @Operation(summary = "List all tasks for the specified connector")
     public List<TaskInfo> getTaskConfigs(final @PathParam("connector") String connector,
                                          final @Context HttpHeaders headers,
-                                         final @QueryParam("forward") Boolean forward) throws Throwable {
+                                         final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         FutureCallback<List<TaskInfo>> cb = new FutureCallback<>();
         herder.taskConfigs(connector, cb);
         return completeOrForwardRequest(cb, "/connectors/" + connector + "/tasks", "GET", headers, null, new TypeReference<List<TaskInfo>>() {
@@ -310,6 +319,7 @@ public List<TaskInfo> getTaskConfigs(final @PathParam("connector") String connec
 
     @POST
     @Path("/{connector}/tasks")
+    @Operation(hidden = true, summary = "This operation is only for inter-worker communications")
     public void putTaskConfigs(final @PathParam("connector") String connector,
                                final @Context HttpHeaders headers,
                                final @QueryParam("forward") Boolean forward,
@@ -320,8 +330,21 @@ public void putTaskConfigs(final @PathParam("connector") String connector,
         completeOrForwardRequest(cb, "/connectors/" + connector + "/tasks", "POST", headers, taskConfigs, forward);
     }
 
+    @PUT
+    @Path("/{connector}/fence")
+    @Operation(hidden = true, summary = "This operation is only for inter-worker communications")
+    public void fenceZombies(final @PathParam("connector") String connector,
+                             final @Context HttpHeaders headers,
+                             final @QueryParam("forward") Boolean forward,
+                             final byte[] requestBody) throws Throwable {
+        FutureCallback<Void> cb = new FutureCallback<>();
+        herder.fenceZombieSourceTasks(connector, cb, InternalRequestSignature.fromHeaders(requestBody, headers));
+        completeOrForwardRequest(cb, "/connectors/" + connector + "/fence", "PUT", headers, requestBody, forward);
+    }
+
     @GET
     @Path("/{connector}/tasks/{task}/status")
+    @Operation(summary = "Get the state of the specified task for the specified connector")
     public ConnectorStateInfo.TaskState getTaskStatus(final @PathParam("connector") String connector,
                                                       final @Context HttpHeaders headers,
                                                       final @PathParam("task") Integer task) {
@@ -330,10 +353,11 @@ public ConnectorStateInfo.TaskState getTaskStatus(final @PathParam("connector")
 
     @POST
     @Path("/{connector}/tasks/{task}/restart")
+    @Operation(summary = "Restart the specified task for the specified connector")
     public void restartTask(final @PathParam("connector") String connector,
                             final @PathParam("task") Integer task,
                             final @Context HttpHeaders headers,
-                            final @QueryParam("forward") Boolean forward) throws Throwable {
+                            final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         FutureCallback<Void> cb = new FutureCallback<>();
         ConnectorTaskId taskId = new ConnectorTaskId(connector, task);
         herder.restartTask(taskId, cb);
@@ -342,9 +366,10 @@ public void restartTask(final @PathParam("connector") String connector,
 
     @DELETE
     @Path("/{connector}")
+    @Operation(summary = "Delete the specified connector")
     public void destroyConnector(final @PathParam("connector") String connector,
                                  final @Context HttpHeaders headers,
-                                 final @QueryParam("forward") Boolean forward) throws Throwable {
+                                 final @Parameter(hidden = true) @QueryParam("forward") Boolean forward) throws Throwable {
         FutureCallback<Herder.Created<ConnectorInfo>> cb = new FutureCallback<>();
         herder.deleteConnectorConfig(connector, cb);
         completeOrForwardRequest(cb, "/connectors/" + connector, "DELETE", headers, null, forward);
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/LoggingResource.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/LoggingResource.java
index ce9ce14e97488..008842b5721f4 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/LoggingResource.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/LoggingResource.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.connect.runtime.rest.resources;
 
+import io.swagger.v3.oas.annotations.Operation;
 import org.apache.kafka.connect.errors.NotFoundException;
 import org.apache.kafka.connect.runtime.rest.errors.BadRequestException;
 import org.apache.log4j.Level;
@@ -45,13 +46,18 @@
 @Path("/admin/loggers")
 @Produces(MediaType.APPLICATION_JSON)
 @Consumes(MediaType.APPLICATION_JSON)
-public class LoggingResource {
+public class LoggingResource implements ConnectResource {
 
     /**
      * Log4j uses "root" (case insensitive) as name of the root logger.
      */
     private static final String ROOT_LOGGER_NAME = "root";
 
+    @Override
+    public void requestTimeout(long requestTimeoutMs) {
+        // No-op
+    }
+
     /**
      * List the current loggers that have their levels explicitly set and their log levels.
      *
@@ -59,6 +65,7 @@ public class LoggingResource {
      */
     @GET
     @Path("/")
+    @Operation(summary = "List the current loggers that have their levels explicitly set and their log levels")
     public Response listLoggers() {
         Map<String, Map<String, String>> loggers = new TreeMap<>();
         Enumeration<Logger> enumeration = currentLoggers();
@@ -83,6 +90,7 @@ public Response listLoggers() {
      */
     @GET
     @Path("/{logger}")
+    @Operation(summary = "Get the log level for the specified logger")
     public Response getLogger(final @PathParam("logger") String namedLogger) {
         Objects.requireNonNull(namedLogger, "require non-null name");
 
@@ -120,6 +128,7 @@ public Response getLogger(final @PathParam("logger") String namedLogger) {
      */
     @PUT
     @Path("/{logger}")
+    @Operation(summary = "Set the level for the specified logger")
     public Response setLevel(final @PathParam("logger") String namedLogger,
                              final Map<String, String> levelMap) {
         String desiredLevelStr = levelMap.get("level");
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/RootResource.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/RootResource.java
index 9666bf15954f9..fe09e26903924 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/RootResource.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/rest/resources/RootResource.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.connect.runtime.rest.resources;
 
+import io.swagger.v3.oas.annotations.Operation;
 import org.apache.kafka.connect.runtime.Herder;
 import org.apache.kafka.connect.runtime.rest.entities.ServerInfo;
 
@@ -26,7 +27,7 @@
 
 @Path("/")
 @Produces(MediaType.APPLICATION_JSON)
-public class RootResource {
+public class RootResource implements ConnectResource {
 
     private final Herder herder;
 
@@ -34,8 +35,14 @@ public RootResource(Herder herder) {
         this.herder = herder;
     }
 
+    @Override
+    public void requestTimeout(long requestTimeoutMs) {
+        // No-op
+    }
+
     @GET
     @Path("/")
+    @Operation(summary = "Get details about this Connect worker and the id of the Kafka cluster it is connected to")
     public ServerInfo serverInfo() {
         return new ServerInfo(herder.kafkaClusterId());
     }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java
index dac389ba0e346..8afe3c7b3deb2 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerder.java
@@ -31,7 +31,7 @@
 import org.apache.kafka.connect.runtime.SourceConnectorConfig;
 import org.apache.kafka.connect.runtime.TargetState;
 import org.apache.kafka.connect.runtime.Worker;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.rest.InternalRequestSignature;
 import org.apache.kafka.connect.runtime.rest.entities.ConfigInfos;
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo;
@@ -265,6 +265,11 @@ public void putTaskConfigs(String connName, List<Map<String, String>> configs, C
         throw new UnsupportedOperationException("Kafka Connect in standalone mode does not support externally setting task configurations.");
     }
 
+    @Override
+    public void fenceZombieSourceTasks(String connName, Callback<Void> callback, InternalRequestSignature requestSignature) {
+        throw new UnsupportedOperationException("Kafka Connect in standalone mode does not support exactly-once source connectors.");
+    }
+
     @Override
     public synchronized void restartTask(ConnectorTaskId taskId, Callback<Void> cb) {
         if (!configState.contains(taskId.connector()))
@@ -275,9 +280,8 @@ public synchronized void restartTask(ConnectorTaskId taskId, Callback<Void> cb)
             cb.onCompletion(new NotFoundException("Task " + taskId + " not found", null), null);
         Map<String, String> connConfigProps = configState.connectorConfig(taskId.connector());
 
-        TargetState targetState = configState.targetState(taskId.connector());
         worker.stopAndAwaitTask(taskId);
-        if (worker.startTask(taskId, configState, connConfigProps, taskConfigProps, this, targetState))
+        if (startTask(taskId, connConfigProps))
             cb.onCompletion(null, null);
         else
             cb.onCompletion(new ConnectException("Failed to start task: " + taskId), null);
@@ -372,11 +376,34 @@ private void createConnectorTasks(String connName) {
     }
 
     private void createConnectorTasks(String connName, Collection<ConnectorTaskId> taskIds) {
-        TargetState initialState = configState.targetState(connName);
         Map<String, String> connConfigs = configState.connectorConfig(connName);
         for (ConnectorTaskId taskId : taskIds) {
-            Map<String, String> taskConfigMap = configState.taskConfig(taskId);
-            worker.startTask(taskId, configState, connConfigs, taskConfigMap, this, initialState);
+            startTask(taskId, connConfigs);
+        }
+    }
+
+    private boolean startTask(ConnectorTaskId taskId, Map<String, String> connProps) {
+        switch (connectorTypeForClass(connProps.get(ConnectorConfig.CONNECTOR_CLASS_CONFIG))) {
+            case SINK:
+                return worker.startSinkTask(
+                        taskId,
+                        configState,
+                        connProps,
+                        configState.taskConfig(taskId),
+                        this,
+                        configState.targetState(taskId.connector())
+                );
+            case SOURCE:
+                return worker.startSourceTask(
+                        taskId,
+                        configState,
+                        connProps,
+                        configState.taskConfig(taskId),
+                        this,
+                        configState.targetState(taskId.connector())
+                );
+            default:
+                throw new ConnectException("Failed to start task " + taskId + " since it is not a recognizable type (source or sink)");
         }
     }
 
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ClusterConfigState.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ClusterConfigState.java
similarity index 81%
rename from connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ClusterConfigState.java
rename to connect/runtime/src/main/java/org/apache/kafka/connect/storage/ClusterConfigState.java
index 717120d8508ee..99000f7a8f48f 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/runtime/distributed/ClusterConfigState.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ClusterConfigState.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.kafka.connect.runtime.distributed;
+package org.apache.kafka.connect.storage;
 
 import org.apache.kafka.common.config.provider.ConfigProvider;
 import org.apache.kafka.connect.runtime.SessionKey;
@@ -42,16 +42,22 @@ public class ClusterConfigState {
             Collections.emptyMap(),
             Collections.emptyMap(),
             Collections.emptyMap(),
+            Collections.emptyMap(),
+            Collections.emptyMap(),
+            Collections.emptySet(),
             Collections.emptySet());
 
     private final long offset;
     private final SessionKey sessionKey;
-    private final Map<String, Integer> connectorTaskCounts;
-    private final Map<String, Map<String, String>> connectorConfigs;
-    private final Map<String, TargetState> connectorTargetStates;
-    private final Map<ConnectorTaskId, Map<String, String>> taskConfigs;
-    private final Set<String> inconsistentConnectors;
     private final WorkerConfigTransformer configTransformer;
+    final Map<String, Integer> connectorTaskCounts;
+    final Map<String, Map<String, String>> connectorConfigs;
+    final Map<String, TargetState> connectorTargetStates;
+    final Map<ConnectorTaskId, Map<String, String>> taskConfigs;
+    final Map<String, Integer> connectorTaskCountRecords;
+    final Map<String, Integer> connectorTaskConfigGenerations;
+    final Set<String> connectorsPendingFencing;
+    final Set<String> inconsistentConnectors;
 
     public ClusterConfigState(long offset,
                               SessionKey sessionKey,
@@ -59,6 +65,9 @@ public ClusterConfigState(long offset,
                               Map<String, Map<String, String>> connectorConfigs,
                               Map<String, TargetState> connectorTargetStates,
                               Map<ConnectorTaskId, Map<String, String>> taskConfigs,
+                              Map<String, Integer> connectorTaskCountRecords,
+                              Map<String, Integer> connectorTaskConfigGenerations,
+                              Set<String> connectorsPendingFencing,
                               Set<String> inconsistentConnectors) {
         this(offset,
                 sessionKey,
@@ -66,6 +75,9 @@ public ClusterConfigState(long offset,
                 connectorConfigs,
                 connectorTargetStates,
                 taskConfigs,
+                connectorTaskCountRecords,
+                connectorTaskConfigGenerations,
+                connectorsPendingFencing,
                 inconsistentConnectors,
                 null);
     }
@@ -76,6 +88,9 @@ public ClusterConfigState(long offset,
                               Map<String, Map<String, String>> connectorConfigs,
                               Map<String, TargetState> connectorTargetStates,
                               Map<ConnectorTaskId, Map<String, String>> taskConfigs,
+                              Map<String, Integer> connectorTaskCountRecords,
+                              Map<String, Integer> connectorTaskConfigGenerations,
+                              Set<String> connectorsPendingFencing,
                               Set<String> inconsistentConnectors,
                               WorkerConfigTransformer configTransformer) {
         this.offset = offset;
@@ -84,6 +99,9 @@ public ClusterConfigState(long offset,
         this.connectorConfigs = connectorConfigs;
         this.connectorTargetStates = connectorTargetStates;
         this.taskConfigs = taskConfigs;
+        this.connectorTaskCountRecords = connectorTaskCountRecords;
+        this.connectorTaskConfigGenerations = connectorTaskConfigGenerations;
+        this.connectorsPendingFencing = connectorsPendingFencing;
         this.inconsistentConnectors = inconsistentConnectors;
         this.configTransformer = configTransformer;
     }
@@ -202,6 +220,15 @@ public int taskCount(String connectorName) {
         return count == null ? 0 : count;
     }
 
+    /**
+     * Get whether the connector requires a round of zombie fencing before
+     * a new generation of tasks can be brought up for it.
+     * @param connectorName name of the connector
+     */
+    public boolean pendingFencing(String connectorName) {
+        return connectorsPendingFencing.contains(connectorName);
+    }
+
     /**
      * Get the current set of task IDs for the specified connector.
      * @param connectorName the name of the connector to look up task configs for
@@ -225,6 +252,25 @@ public List<ConnectorTaskId> tasks(String connectorName) {
         return Collections.unmodifiableList(taskIds);
     }
 
+    /**
+     * Get the task count record for the connector, if one exists
+     * @param connector name of the connector
+     * @return the latest task count record for the connector, or {@code null} if none exists
+     */
+    public Integer taskCountRecord(String connector) {
+        return connectorTaskCountRecords.get(connector);
+    }
+
+    /**
+     * Get the generation number for the connector's task configurations, if one exists.
+     * Generation numbers increase monotonically each time a new set of task configurations is detected for the connector
+     * @param connector name of the connector
+     * @return the latest task config generation number for the connector, or {@code null} if none exists
+     */
+    public Integer taskConfigGeneration(String connector) {
+        return connectorTaskConfigGenerations.get(connector);
+    }
+
     /**
      * Get the set of connectors which have inconsistent data in this snapshot. These inconsistencies can occur due to
      * partially completed writes combined with log compaction.
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConfigBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConfigBackingStore.java
index 826f934ffd550..490cfdafa3f35 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConfigBackingStore.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConfigBackingStore.java
@@ -19,7 +19,6 @@
 import org.apache.kafka.connect.runtime.RestartRequest;
 import org.apache.kafka.connect.runtime.SessionKey;
 import org.apache.kafka.connect.runtime.TargetState;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 
 import java.util.Collection;
@@ -90,6 +89,10 @@ public interface ConfigBackingStore {
      */
     void putTargetState(String connector, TargetState state);
 
+    /**
+     * Store a new {@link SessionKey} that can be used to validate internal (i.e., non-user-triggered) inter-worker communication.
+     * @param sessionKey the session key to store
+     */
     void putSessionKey(SessionKey sessionKey);
 
     /**
@@ -98,6 +101,22 @@ public interface ConfigBackingStore {
      */
     void putRestartRequest(RestartRequest restartRequest);
 
+    /**
+     * Record the number of tasks for the connector after a successful round of zombie fencing.
+     * @param connector name of the connector
+     * @param taskCount number of tasks used by the connector
+     */
+    void putTaskCountRecord(String connector, int taskCount);
+
+    /**
+     * Prepare to write to the backing config store. May be required by some implementations (such as those that only permit a single
+     * writer at a time across a cluster of workers) before performing mutating operations like writing configurations, target states, etc.
+     * The default implementation is a no-op; it is the responsibility of the implementing class to override this and document any expectations for
+     * when it must be invoked.
+     */
+    default void claimWritePrivileges() {
+    }
+
     /**
      * Set an update listener to get notifications when there are config/target state
      * changes.
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConnectorOffsetBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConnectorOffsetBackingStore.java
new file mode 100644
index 0000000000000..b33315b9f3cad
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/ConnectorOffsetBackingStore.java
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.storage;
+
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.connect.runtime.WorkerConfig;
+import org.apache.kafka.connect.util.Callback;
+import org.apache.kafka.connect.util.LoggingContext;
+import org.apache.kafka.connect.util.TopicAdmin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.function.Supplier;
+
+/**
+ * An {@link OffsetBackingStore} with support for reading from and writing to a worker-global
+ * offset backing store and/or a connector-specific offset backing store.
+ */
+public class ConnectorOffsetBackingStore implements OffsetBackingStore {
+
+    private static final Logger log = LoggerFactory.getLogger(ConnectorOffsetBackingStore.class);
+
+    /**
+     * Builds an offset store that uses a connector-specific offset topic as the primary store and
+     * the worker-global offset store as the secondary store.
+     *
+     * @param loggingContext a {@link Supplier} for the {@link LoggingContext} that should be used
+     *                       for messages logged by this offset store; may not be null, and may never return null
+     * @param workerStore the worker-global offset store; may not be null
+     * @param connectorStore the connector-specific offset store; may not be null
+     * @param connectorOffsetsTopic the name of the connector-specific offset topic; may not be null
+     * @param connectorStoreAdmin the topic admin to use for the connector-specific offset topic; may not be null
+     * @return an offset store backed primarily by the connector-specific offset topic and secondarily
+     * by the worker-global offset store; never null
+     */
+    public static ConnectorOffsetBackingStore withConnectorAndWorkerStores(
+            Supplier<LoggingContext> loggingContext,
+            OffsetBackingStore workerStore,
+            KafkaOffsetBackingStore connectorStore,
+            String connectorOffsetsTopic,
+            TopicAdmin connectorStoreAdmin
+    ) {
+        Objects.requireNonNull(loggingContext);
+        Objects.requireNonNull(workerStore);
+        Objects.requireNonNull(connectorStore);
+        Objects.requireNonNull(connectorOffsetsTopic);
+        Objects.requireNonNull(connectorStoreAdmin);
+        return new ConnectorOffsetBackingStore(
+                Time.SYSTEM,
+                loggingContext,
+                connectorOffsetsTopic,
+                workerStore,
+                connectorStore,
+                connectorStoreAdmin
+        );
+    }
+
+    /**
+     * Builds an offset store that uses the worker-global offset store as the primary store, and no secondary store.
+     *
+     * @param loggingContext a {@link Supplier} for the {@link LoggingContext} that should be used
+     *                       for messages logged by this offset store; may not be null, and may never return null
+     * @param workerStore the worker-global offset store; may not be null
+     * @param workerOffsetsTopic the name of the worker-global offset topic; may be null if the worker
+     *                           does not use an offset topic for its offset store
+     * @return an offset store for the connector backed solely by the worker-global offset store; never null
+     */
+    public static ConnectorOffsetBackingStore withOnlyWorkerStore(
+            Supplier<LoggingContext> loggingContext,
+            OffsetBackingStore workerStore,
+            String workerOffsetsTopic
+    ) {
+        Objects.requireNonNull(loggingContext);
+        Objects.requireNonNull(workerStore);
+        return new ConnectorOffsetBackingStore(Time.SYSTEM, loggingContext, workerOffsetsTopic, workerStore, null, null);
+    }
+
+    /**
+     * Builds an offset store that uses a connector-specific offset topic as the primary store, and no secondary store.
+     *
+     * @param loggingContext a {@link Supplier} for the {@link LoggingContext} that should be used
+     *                       for messages logged by this offset store; may not be null, and may never return null
+     * @param connectorStore the connector-specific offset store; may not be null
+     * @param connectorOffsetsTopic the name of the connector-specific offset topic; may not be null
+     * @param connectorStoreAdmin the topic admin to use for the connector-specific offset topic; may not be null
+     * @return an offset store for the connector backed solely by the connector-specific offset topic; never null
+     */
+    public static ConnectorOffsetBackingStore withOnlyConnectorStore(
+            Supplier<LoggingContext> loggingContext,
+            KafkaOffsetBackingStore connectorStore,
+            String connectorOffsetsTopic,
+            TopicAdmin connectorStoreAdmin
+    ) {
+        Objects.requireNonNull(loggingContext);
+        Objects.requireNonNull(connectorOffsetsTopic);
+        Objects.requireNonNull(connectorStoreAdmin);
+        return new ConnectorOffsetBackingStore(
+                Time.SYSTEM,
+                loggingContext,
+                connectorOffsetsTopic,
+                null,
+                connectorStore,
+                connectorStoreAdmin
+        );
+    }
+
+    private final Time time;
+    private final Supplier<LoggingContext> loggingContext;
+    private final String primaryOffsetsTopic;
+    private final Optional<OffsetBackingStore> workerStore;
+    private final Optional<KafkaOffsetBackingStore> connectorStore;
+    private final Optional<TopicAdmin> connectorStoreAdmin;
+
+    ConnectorOffsetBackingStore(
+            Time time,
+            Supplier<LoggingContext> loggingContext,
+            String primaryOffsetsTopic,
+            OffsetBackingStore workerStore,
+            KafkaOffsetBackingStore connectorStore,
+            TopicAdmin connectorStoreAdmin
+    ) {
+        if (workerStore == null && connectorStore == null) {
+            throw new IllegalArgumentException("At least one non-null offset store must be provided");
+        }
+        this.time = time;
+        this.loggingContext = loggingContext;
+        this.primaryOffsetsTopic = primaryOffsetsTopic;
+        this.workerStore = Optional.ofNullable(workerStore);
+        this.connectorStore = Optional.ofNullable(connectorStore);
+        this.connectorStoreAdmin = Optional.ofNullable(connectorStoreAdmin);
+    }
+
+    public String primaryOffsetsTopic() {
+        return primaryOffsetsTopic;
+    }
+
+    /**
+     * If configured to use a connector-specific offset store, {@link OffsetBackingStore#start() start} that store.
+     *
+     * <p>The worker-global offset store is not modified; it is the caller's responsibility to ensure that it is started
+     * before calls to {@link #get(Collection)} and {@link #set(Map, Callback)} take place.
+     */
+    @Override
+    public void start() {
+        // Worker offset store should already be started
+        connectorStore.ifPresent(OffsetBackingStore::start);
+    }
+
+    /**
+     * If configured to use a connector-specific offset store, {@link OffsetBackingStore#stop() stop} that store,
+     * and {@link TopicAdmin#close(Duration) close} the topic admin used by that store.
+     *
+     * <p>The worker-global offset store is not modified as it may be used for other connectors that either already exist,
+     * or will be created, on this worker.
+     */
+    @Override
+    public void stop() {
+        // Worker offset store should not be stopped as it may be used for multiple connectors
+        connectorStore.ifPresent(OffsetBackingStore::stop);
+        connectorStoreAdmin.ifPresent(TopicAdmin::close);
+    }
+
+    /**
+     * Get the offset values for the specified keys.
+     *
+     * <p>If configured to use a connector-specific offset store, priority is given to the values contained in that store,
+     * and the values in the worker-global offset store (if one is provided) are used as a fallback for keys that are not
+     * present in the connector-specific store.
+     *
+     * <p>If not configured to use a connector-specific offset store, only the values contained in the worker-global
+     * offset store are returned.
+
+     * @param keys list of keys to look up
+     * @return future for the resulting map from key to value
+     */
+    @Override
+    public Future<Map<ByteBuffer, ByteBuffer>> get(Collection<ByteBuffer> keys) {
+        Future<Map<ByteBuffer, ByteBuffer>> workerGetFuture = getFromStore(workerStore, keys);
+        Future<Map<ByteBuffer, ByteBuffer>> connectorGetFuture = getFromStore(connectorStore, keys);
+
+        return new Future<Map<ByteBuffer, ByteBuffer>>() {
+            @Override
+            public boolean cancel(boolean mayInterruptIfRunning) {
+                // Note the use of | instead of || here; this causes cancel to be invoked on both futures,
+                // even if the first call to cancel returns true
+                return workerGetFuture.cancel(mayInterruptIfRunning)
+                        | connectorGetFuture.cancel(mayInterruptIfRunning);
+            }
+
+            @Override
+            public boolean isCancelled() {
+                return workerGetFuture.isCancelled()
+                        || connectorGetFuture.isCancelled();
+            }
+
+            @Override
+            public boolean isDone() {
+                return workerGetFuture.isDone()
+                        && connectorGetFuture.isDone();
+            }
+
+            @Override
+            public Map<ByteBuffer, ByteBuffer> get() throws InterruptedException, ExecutionException {
+                Map<ByteBuffer, ByteBuffer> result = new HashMap<>(workerGetFuture.get());
+                result.putAll(connectorGetFuture.get());
+                return result;
+            }
+
+            @Override
+            public Map<ByteBuffer, ByteBuffer> get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
+                long timeoutMs = unit.toMillis(timeout);
+                long endTime = time.milliseconds() + timeoutMs;
+                Map<ByteBuffer, ByteBuffer> result = new HashMap<>(workerGetFuture.get(timeoutMs, unit));
+                timeoutMs = Math.max(1, endTime - time.milliseconds());
+                result.putAll(connectorGetFuture.get(timeoutMs, TimeUnit.MILLISECONDS));
+                return result;
+            }
+        };
+    }
+
+    /**
+     * Store the specified offset key/value pairs.
+     *
+     * <p>If configured to use a connector-specific offset store, the returned {@link Future} corresponds to a
+     * write to that store, and the passed-in {@link Callback} is invoked once that write completes. If a worker-global
+     * store is provided, a secondary write is made to that store if the write to the connector-specific store
+     * succeeds. Errors with this secondary write are not reflected in the returned {@link Future} or the passed-in
+     * {@link Callback}; they are only logged as a warning to users.
+     *
+     * <p>If not configured to use a connector-specific offset store, the returned {@link Future} corresponds to a
+     * write to the worker-global offset store, and the passed-in {@link Callback} is invoked once that write completes.
+
+     * @param values map from key to value
+     * @param callback callback to invoke on completion of the primary write
+     * @return void future for the primary write
+     */
+    @Override
+    public Future<Void> set(Map<ByteBuffer, ByteBuffer> values, Callback<Void> callback) {
+        final OffsetBackingStore primaryStore;
+        final OffsetBackingStore secondaryStore;
+        if (connectorStore.isPresent()) {
+            primaryStore = connectorStore.get();
+            secondaryStore = workerStore.orElse(null);
+        } else if (workerStore.isPresent()) {
+            primaryStore = workerStore.get();
+            secondaryStore = null;
+        } else {
+            // Should never happen since we check for this case in the constructor, but just in case, this should
+            // be more informative than the NPE that would otherwise be thrown
+            throw new IllegalStateException("At least one non-null offset store must be provided");
+        }
+
+        return primaryStore.set(values, (primaryWriteError, ignored) -> {
+            if (secondaryStore != null) {
+                if (primaryWriteError != null) {
+                    log.trace("Skipping offsets write to secondary store because primary write has failed", primaryWriteError);
+                } else {
+                    try {
+                        // Invoke OffsetBackingStore::set but ignore the resulting future; we don't block on writes to this
+                        // backing store.
+                        secondaryStore.set(values, (secondaryWriteError, ignored2) -> {
+                            try (LoggingContext context = loggingContext()) {
+                                if (secondaryWriteError != null) {
+                                    log.warn("Failed to write offsets to secondary backing store", secondaryWriteError);
+                                } else {
+                                    log.debug("Successfully flushed offsets to secondary backing store");
+                                }
+                            }
+                        });
+                    } catch (Exception e) {
+                        log.warn("Failed to write offsets to secondary backing store", e);
+                    }
+                }
+            }
+            try (LoggingContext context = loggingContext()) {
+                callback.onCompletion(primaryWriteError, ignored);
+            }
+        });
+    }
+
+    /**
+     * If configured to use a connector-specific offset store,
+     * {@link OffsetBackingStore#configure(WorkerConfig) configure} that store.
+     *
+     * <p>The worker-global offset store is not modified; it is the caller's responsibility to ensure that it is configured
+     * before calls to {@link #start()}, {@link #get(Collection)} and {@link #set(Map, Callback)} take place.
+     */
+    @Override
+    public void configure(WorkerConfig config) {
+        // Worker offset store should already be configured
+        connectorStore.ifPresent(store -> store.configure(config));
+    }
+
+    // For testing
+    public boolean hasConnectorSpecificStore() {
+        return connectorStore.isPresent();
+    }
+
+    // For testing
+    public boolean hasWorkerGlobalStore() {
+        return workerStore.isPresent();
+    }
+
+    private LoggingContext loggingContext() {
+        LoggingContext result = loggingContext.get();
+        Objects.requireNonNull(result);
+        return result;
+    }
+
+    private static Future<Map<ByteBuffer, ByteBuffer>> getFromStore(Optional<? extends OffsetBackingStore> store, Collection<ByteBuffer> keys) {
+        return store.map(s -> s.get(keys)).orElseGet(() -> CompletableFuture.completedFuture(Collections.emptyMap()));
+    }
+
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java
index 669c72b224df8..76c626964e6d2 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaConfigBackingStore.java
@@ -19,9 +19,14 @@
 import org.apache.kafka.clients.admin.NewTopic;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.TopicConfig;
+import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.serialization.ByteArrayDeserializer;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.StringDeserializer;
@@ -39,7 +44,6 @@
 import org.apache.kafka.connect.runtime.TargetState;
 import org.apache.kafka.connect.runtime.WorkerConfig;
 import org.apache.kafka.connect.runtime.WorkerConfigTransformer;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
 import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
 import org.apache.kafka.connect.util.Callback;
 import org.apache.kafka.connect.util.ConnectUtils;
@@ -51,6 +55,7 @@
 import org.slf4j.LoggerFactory;
 
 import javax.crypto.spec.SecretKeySpec;
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Base64;
@@ -58,6 +63,7 @@
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
@@ -185,6 +191,12 @@ public static String COMMIT_TASKS_KEY(String connectorName) {
         return COMMIT_TASKS_PREFIX + connectorName;
     }
 
+    public static final String TASK_COUNT_RECORD_PREFIX = "tasks-fencing-";
+
+    public static String TASK_COUNT_RECORD_KEY(String connectorName) {
+        return TASK_COUNT_RECORD_PREFIX + connectorName;
+    }
+
     public static final String SESSION_KEY_KEY = "session-key";
 
     // Note that while using real serialization for values as we have here, but ad hoc string serialization for keys,
@@ -201,6 +213,9 @@ public static String COMMIT_TASKS_KEY(String connectorName) {
     public static final Schema TARGET_STATE_V0 = SchemaBuilder.struct()
             .field("state", Schema.STRING_SCHEMA)
             .build();
+    public static final Schema TASK_COUNT_RECORD_V0 = SchemaBuilder.struct()
+            .field("task-count", Schema.INT32_SCHEMA)
+            .build();
     // The key is logically a byte array, but we can't use the JSON converter to (de-)serialize that without a schema.
     // So instead, we base 64-encode it before serializing and decode it after deserializing.
     public static final Schema SESSION_KEY_V0 = SchemaBuilder.struct()
@@ -231,23 +246,25 @@ public static String RESTART_KEY(String connectorName) {
     private volatile boolean started;
     // Although updateListener is not final, it's guaranteed to be visible to any thread after its
     // initialization as long as we always read the volatile variable "started" before we access the listener.
-    private UpdateListener updateListener;
+    private ConfigBackingStore.UpdateListener updateListener;
+
+    private final Map<String, Object> baseProducerProps;
 
     private final String topic;
     // Data is passed to the log already serialized. We use a converter to handle translating to/from generic Connect
     // format to serialized form
     private final KafkaBasedLog<String, byte[]> configLog;
     // Connector -> # of tasks
-    private final Map<String, Integer> connectorTaskCounts = new HashMap<>();
+    final Map<String, Integer> connectorTaskCounts = new HashMap<>();
     // Connector and task configs: name or id -> config map
-    private final Map<String, Map<String, String>> connectorConfigs = new HashMap<>();
-    private final Map<ConnectorTaskId, Map<String, String>> taskConfigs = new HashMap<>();
+    final Map<String, Map<String, String>> connectorConfigs = new HashMap<>();
+    final Map<ConnectorTaskId, Map<String, String>> taskConfigs = new HashMap<>();
     private final Supplier<TopicAdmin> topicAdminSupplier;
     private SharedTopicAdmin ownTopicAdmin;
 
     // Set of connectors where we saw a task commit with an incomplete set of task config updates, indicating the data
     // is in an inconsistent state and we cannot safely use them until they have been refreshed.
-    private final Set<String> inconsistent = new HashSet<>();
+    final Set<String> inconsistent = new HashSet<>();
     // The most recently read offset. This does not take into account deferred task updates/commits, so we may have
     // outstanding data to be applied.
     private volatile long offset;
@@ -257,22 +274,41 @@ public static String RESTART_KEY(String connectorName) {
     // Connector -> Map[ConnectorTaskId -> Configs]
     private final Map<String, Map<ConnectorTaskId, Map<String, String>>> deferredTaskUpdates = new HashMap<>();
 
-    private final Map<String, TargetState> connectorTargetStates = new HashMap<>();
+    final Map<String, TargetState> connectorTargetStates = new HashMap<>();
+
+    final Map<String, Integer> connectorTaskCountRecords = new HashMap<>();
+    final Map<String, Integer> connectorTaskConfigGenerations = new HashMap<>();
+    final Set<String> connectorsPendingFencing = new HashSet<>();
 
     private final WorkerConfigTransformer configTransformer;
 
+    private final boolean usesFencableWriter;
+    private volatile Producer<String, byte[]> fencableProducer;
+    private final Map<String, Object> fencableProducerProps;
+
     @Deprecated
-    public KafkaConfigBackingStore(Converter converter, WorkerConfig config, WorkerConfigTransformer configTransformer) {
+    public KafkaConfigBackingStore(Converter converter, DistributedConfig config, WorkerConfigTransformer configTransformer) {
         this(converter, config, configTransformer, null);
     }
 
-    public KafkaConfigBackingStore(Converter converter, WorkerConfig config, WorkerConfigTransformer configTransformer, Supplier<TopicAdmin> adminSupplier) {
+    public KafkaConfigBackingStore(Converter converter, DistributedConfig config, WorkerConfigTransformer configTransformer, Supplier<TopicAdmin> adminSupplier) {
         this.lock = new Object();
         this.started = false;
         this.converter = converter;
         this.offset = -1;
         this.topicAdminSupplier = adminSupplier;
 
+        this.baseProducerProps = baseProducerProps(config);
+        // By default, Connect disables idempotent behavior for all producers, even though idempotence became
+        // default for Kafka producers. This is to ensure Connect continues to work with many Kafka broker versions, including older brokers that do not support
+        // idempotent producers or require explicit steps to enable them (e.g. adding the IDEMPOTENT_WRITE ACL to brokers older than 2.8).
+        // These settings might change when https://cwiki.apache.org/confluence/display/KAFKA/KIP-318%3A+Make+Kafka+Connect+Source+idempotent
+        // gets approved and scheduled for release.
+        baseProducerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false");
+
+        this.fencableProducerProps = fencableProducerProps(config);
+
+        this.usesFencableWriter = config.transactionalLeaderEnabled();
         this.topic = config.getString(DistributedConfig.CONFIG_TOPIC_CONFIG);
         if (this.topic == null || this.topic.trim().length() == 0)
             throw new ConfigException("Must specify topic for connector configuration.");
@@ -282,7 +318,7 @@ public KafkaConfigBackingStore(Converter converter, WorkerConfig config, WorkerC
     }
 
     @Override
-    public void setUpdateListener(UpdateListener listener) {
+    public void setUpdateListener(ConfigBackingStore.UpdateListener listener) {
         this.updateListener = listener;
     }
 
@@ -291,7 +327,16 @@ public void start() {
         log.info("Starting KafkaConfigBackingStore");
         // Before startup, callbacks are *not* invoked. You can grab a snapshot after starting -- just take care that
         // updates can continue to occur in the background
-        configLog.start();
+        try {
+            configLog.start();
+        } catch (UnsupportedVersionException e) {
+            throw new ConnectException(
+                    "Enabling exactly-once support for source connectors requires a Kafka broker version that allows "
+                            + "admin clients to read consumer offsets. Please either disable the worker's exactly-once "
+                            + "support for source connectors, or use a newer Kafka broker version.",
+                    e
+            );
+        }
 
         int partitionCount = configLog.partitionCount();
         if (partitionCount > 1) {
@@ -309,14 +354,63 @@ public void start() {
     @Override
     public void stop() {
         log.info("Closing KafkaConfigBackingStore");
-        try {
-            configLog.stop();
-        } finally {
-            if (ownTopicAdmin != null) {
-                ownTopicAdmin.close();
+
+        relinquishWritePrivileges();
+        Utils.closeQuietly(ownTopicAdmin, "admin for config topic");
+        Utils.closeQuietly(configLog::stop, "KafkaBasedLog for config topic");
+
+        log.info("Closed KafkaConfigBackingStore");
+    }
+
+    @Override
+    public void claimWritePrivileges() {
+        if (usesFencableWriter && fencableProducer == null) {
+            try {
+                fencableProducer = createFencableProducer();
+                fencableProducer.initTransactions();
+            } catch (Exception e) {
+                relinquishWritePrivileges();
+                throw new ConnectException("Failed to create and initialize fencable producer for config topic", e);
             }
         }
-        log.info("Closed KafkaConfigBackingStore");
+    }
+
+    private Map<String, Object> baseProducerProps(WorkerConfig workerConfig) {
+        Map<String, Object> producerProps = new HashMap<>(workerConfig.originals());
+        String kafkaClusterId = ConnectUtils.lookupKafkaClusterId(workerConfig);
+        producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
+        producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
+        producerProps.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.MAX_VALUE);
+        ConnectUtils.addMetricsContextProperties(producerProps, workerConfig, kafkaClusterId);
+        return producerProps;
+    }
+
+    // Visible for testing
+    Map<String, Object> fencableProducerProps(DistributedConfig workerConfig) {
+        Map<String, Object> result = new HashMap<>(baseProducerProps(workerConfig));
+
+        // Always require producer acks to all to ensure durable writes
+        result.put(ProducerConfig.ACKS_CONFIG, "all");
+        // We can set this to 5 instead of 1 without risking reordering because we are using an idempotent producer
+        result.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, 5);
+
+        ConnectUtils.ensureProperty(
+                result, ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true",
+                "for the worker's config topic producer when exactly-once source support is enabled or in preparation to be enabled",
+                false
+        );
+        ConnectUtils.ensureProperty(
+                result, ProducerConfig.TRANSACTIONAL_ID_CONFIG, workerConfig.transactionalProducerId(),
+                "for the worker's config topic producer when exactly-once source support is enabled or in preparation to be enabled",
+                true
+        );
+
+        return result;
+    }
+
+    // Visible in order to be mocked during testing
+    Producer<String, byte[]> createFencableProducer() {
+        return new KafkaProducer<>(fencableProducerProps);
     }
 
     /**
@@ -334,6 +428,9 @@ public ClusterConfigState snapshot() {
                     new HashMap<>(connectorConfigs),
                     new HashMap<>(connectorTargetStates),
                     new HashMap<>(taskConfigs),
+                    new HashMap<>(connectorTaskCountRecords),
+                    new HashMap<>(connectorTaskConfigGenerations),
+                    new HashSet<>(connectorsPendingFencing),
                     new HashSet<>(inconsistent),
                     configTransformer
             );
@@ -349,10 +446,15 @@ public boolean contains(String connector) {
 
     /**
      * Write this connector configuration to persistent storage and wait until it has been acknowledged and read back by
-     * tailing the Kafka log with a consumer.
+     * tailing the Kafka log with a consumer. {@link #claimWritePrivileges()} must be successfully invoked before calling
+     * this method if the worker is configured to use a fencable producer for writes to the config topic.
      *
      * @param connector  name of the connector to write data for
      * @param properties the configuration to write
+     * @throws IllegalStateException if {@link #claimWritePrivileges()} is required, but was not successfully invoked before
+     * this method was called
+     * @throws PrivilegedWriteException if the worker is configured to use a fencable producer for writes to the config topic
+     * and the write fails
      */
     @Override
     public void putConnectorConfig(String connector, Map<String, String> properties) {
@@ -360,19 +462,30 @@ public void putConnectorConfig(String connector, Map<String, String> properties)
         Struct connectConfig = new Struct(CONNECTOR_CONFIGURATION_V0);
         connectConfig.put("properties", properties);
         byte[] serializedConfig = converter.fromConnectData(topic, CONNECTOR_CONFIGURATION_V0, connectConfig);
-        updateConnectorConfig(connector, serializedConfig);
+        try {
+            sendPrivileged(CONNECTOR_KEY(connector), serializedConfig);
+            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException | ExecutionException | TimeoutException e) {
+            log.error("Failed to write connector configuration to Kafka: ", e);
+            throw new ConnectException("Error writing connector configuration to Kafka", e);
+        }
     }
 
     /**
-     * Remove configuration for a given connector.
+     * Remove configuration for a given connector. {@link #claimWritePrivileges()} must be successfully invoked before calling
+     * this method if the worker is configured to use a fencable producer for writes to the config topic.
      * @param connector name of the connector to remove
+     * @throws IllegalStateException if {@link #claimWritePrivileges()} is required, but was not successfully invoked before
+     * this method was called
+     * @throws PrivilegedWriteException if the worker is configured to use a fencable producer for writes to the config topic
+     * and the write fails
      */
     @Override
     public void removeConnectorConfig(String connector) {
         log.debug("Removing connector configuration for connector '{}'", connector);
         try {
-            configLog.send(CONNECTOR_KEY(connector), null);
-            configLog.send(TARGET_STATE_KEY(connector), null);
+            sendPrivileged(CONNECTOR_KEY(connector), null);
+            sendPrivileged(TARGET_STATE_KEY(connector), null);
             configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
         } catch (InterruptedException | ExecutionException | TimeoutException e) {
             log.error("Failed to remove connector configuration from Kafka: ", e);
@@ -385,24 +498,20 @@ public void removeTaskConfigs(String connector) {
         throw new UnsupportedOperationException("Removal of tasks is not currently supported");
     }
 
-    private void updateConnectorConfig(String connector, byte[] serializedConfig) {
-        try {
-            configLog.send(CONNECTOR_KEY(connector), serializedConfig);
-            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
-        } catch (InterruptedException | ExecutionException | TimeoutException e) {
-            log.error("Failed to write connector configuration to Kafka: ", e);
-            throw new ConnectException("Error writing connector configuration to Kafka", e);
-        }
-    }
-
     /**
      * Write these task configurations and associated commit messages, unless an inconsistency is found that indicates
-     * that we would be leaving one of the referenced connectors with an inconsistent state.
+     * that we would be leaving one of the referenced connectors with an inconsistent state. {@link #claimWritePrivileges()}
+     * must be successfully invoked before calling this method if the worker is configured to use a fencable producer for
+     * writes to the config topic.
      *
      * @param connector the connector to write task configuration
      * @param configs list of task configurations for the connector
      * @throws ConnectException if the task configurations do not resolve inconsistencies found in the existing root
      *                          and task configurations.
+     * @throws IllegalStateException if {@link #claimWritePrivileges()} is required, but was not successfully invoked before
+     * this method was called
+     * @throws PrivilegedWriteException if the worker is configured to use a fencable producer for writes to the config topic
+     * and the write fails
      */
     @Override
     public void putTaskConfigs(String connector, List<Map<String, String>> configs) {
@@ -425,7 +534,7 @@ public void putTaskConfigs(String connector, List<Map<String, String>> configs)
             byte[] serializedConfig = converter.fromConnectData(topic, TASK_CONFIGURATION_V0, connectConfig);
             log.debug("Writing configuration for connector '{}' task {}", connector, index);
             ConnectorTaskId connectorTaskId = new ConnectorTaskId(connector, index);
-            configLog.send(TASK_KEY(connectorTaskId), serializedConfig);
+            sendPrivileged(TASK_KEY(connectorTaskId), serializedConfig);
             index++;
         }
 
@@ -441,7 +550,7 @@ public void putTaskConfigs(String connector, List<Map<String, String>> configs)
             connectConfig.put("tasks", taskCount);
             byte[] serializedConfig = converter.fromConnectData(topic, CONNECTOR_TASKS_COMMIT_V0, connectConfig);
             log.debug("Writing commit for connector '{}' with {} tasks.", connector, taskCount);
-            configLog.send(COMMIT_TASKS_KEY(connector), serializedConfig);
+            sendPrivileged(COMMIT_TASKS_KEY(connector), serializedConfig);
 
             // Read to end to ensure all the commit messages have been written
             configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
@@ -460,6 +569,12 @@ public void refresh(long timeout, TimeUnit unit) throws TimeoutException {
         }
     }
 
+    /**
+     * Write a new {@link TargetState} for the connector. Note that {@link #claimWritePrivileges()} does not need to be
+     * invoked before invoking this method.
+     * @param connector the name of the connector
+     * @param state the desired target state for the connector
+     */
     @Override
     public void putTargetState(String connector, TargetState state) {
         Struct connectTargetState = new Struct(TARGET_STATE_V0);
@@ -469,6 +584,42 @@ public void putTargetState(String connector, TargetState state) {
         configLog.send(TARGET_STATE_KEY(connector), serializedTargetState);
     }
 
+    /**
+     * Write a task count record for a connector to persistent storage and wait until it has been acknowledged and read back by
+     * tailing the Kafka log with a consumer. {@link #claimWritePrivileges()} must be successfully invoked before calling this method
+     * if the worker is configured to use a fencable producer for writes to the config topic.
+     * @param connector name of the connector
+     * @param taskCount number of tasks used by the connector
+     * @throws IllegalStateException if {@link #claimWritePrivileges()} is required, but was not successfully invoked before
+     * this method was called
+     * @throws PrivilegedWriteException if the worker is configured to use a fencable producer for writes to the config topic
+     * and the write fails
+     */
+    @Override
+    public void putTaskCountRecord(String connector, int taskCount) {
+        Struct taskCountRecord = new Struct(TASK_COUNT_RECORD_V0);
+        taskCountRecord.put("task-count", taskCount);
+        byte[] serializedTaskCountRecord = converter.fromConnectData(topic, TASK_COUNT_RECORD_V0, taskCountRecord);
+        log.debug("Writing task count record {} for connector {}", taskCount, connector);
+        try {
+            sendPrivileged(TASK_COUNT_RECORD_KEY(connector), serializedTaskCountRecord);
+            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException | ExecutionException | TimeoutException e) {
+            log.error("Failed to write task count record with {} tasks for connector {} to Kafka: ", taskCount, connector, e);
+            throw new ConnectException("Error writing task count record to Kafka", e);
+        }
+    }
+
+    /**
+     * Write a session key to persistent storage and wait until it has been acknowledged and read back by tailing the Kafka log
+     * with a consumer. {@link #claimWritePrivileges()} must be successfully invoked before calling this method if the worker
+     * is configured to use a fencable producer for writes to the config topic.
+     * @param sessionKey the session key to distributed
+     * @throws IllegalStateException if {@link #claimWritePrivileges()} is required, but was not successfully invoked before
+     * this method was called
+     * @throws PrivilegedWriteException if the worker is configured to use a fencable producer for writes to the config topic
+     * and the write fails
+     */
     @Override
     public void putSessionKey(SessionKey sessionKey) {
         log.debug("Distributing new session key");
@@ -478,7 +629,7 @@ public void putSessionKey(SessionKey sessionKey) {
         sessionKeyStruct.put("creation-timestamp", sessionKey.creationTimestamp());
         byte[] serializedSessionKey = converter.fromConnectData(topic, SESSION_KEY_V0, sessionKeyStruct);
         try {
-            configLog.send(SESSION_KEY_KEY, serializedSessionKey);
+            sendPrivileged(SESSION_KEY_KEY, serializedSessionKey);
             configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
         } catch (InterruptedException | ExecutionException | TimeoutException e) {
             log.error("Failed to write session key to Kafka: ", e);
@@ -486,6 +637,12 @@ public void putSessionKey(SessionKey sessionKey) {
         }
     }
 
+    /**
+     * Write a restart request for the connector and optionally its tasks to persistent storage and wait until it has been
+     * acknowledged and read back by tailing the Kafka log with a consumer. {@link #claimWritePrivileges()} must be successfully
+     * invoked before calling this method if the worker is configured to use a fencable producer for writes to the config topic.
+     * @param restartRequest the restart request details
+     */
     @Override
     public void putRestartRequest(RestartRequest restartRequest) {
         log.debug("Writing {} to Kafka", restartRequest);
@@ -495,7 +652,7 @@ public void putRestartRequest(RestartRequest restartRequest) {
         value.put(ONLY_FAILED_FIELD_NAME, restartRequest.onlyFailed());
         byte[] serializedValue = converter.fromConnectData(topic, value.schema(), value);
         try {
-            configLog.send(key, serializedValue);
+            sendPrivileged(key, serializedValue);
             configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
         } catch (InterruptedException | ExecutionException | TimeoutException e) {
             log.error("Failed to write {} to Kafka: ", restartRequest, e);
@@ -505,18 +662,22 @@ public void putRestartRequest(RestartRequest restartRequest) {
 
     // package private for testing
     KafkaBasedLog<String, byte[]> setupAndCreateKafkaBasedLog(String topic, final WorkerConfig config) {
+        Map<String, Object> producerProps = new HashMap<>(baseProducerProps);
+
         String clusterId = ConnectUtils.lookupKafkaClusterId(config);
         Map<String, Object> originals = config.originals();
-        Map<String, Object> producerProps = new HashMap<>(originals);
-        producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
-        producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
-        producerProps.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.MAX_VALUE);
-        ConnectUtils.addMetricsContextProperties(producerProps, config, clusterId);
 
         Map<String, Object> consumerProps = new HashMap<>(originals);
         consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
         consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
         ConnectUtils.addMetricsContextProperties(consumerProps, config, clusterId);
+        if (config.exactlyOnceSourceEnabled()) {
+            ConnectUtils.ensureProperty(
+                    consumerProps, ConsumerConfig.ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                    "for the worker's config topic consumer when exactly-once source support is enabled",
+                    true
+            );
+        }
 
         Map<String, Object> adminProps = new HashMap<>(originals);
         ConnectUtils.addMetricsContextProperties(adminProps, config, clusterId);
@@ -541,6 +702,34 @@ KafkaBasedLog<String, byte[]> setupAndCreateKafkaBasedLog(String topic, final Wo
         return createKafkaBasedLog(topic, producerProps, consumerProps, new ConsumeCallback(), topicDescription, adminSupplier);
     }
 
+    private void sendPrivileged(String key, byte[] value) {
+        if (!usesFencableWriter) {
+            configLog.send(key, value);
+            return;
+        }
+
+        if (fencableProducer == null) {
+            throw new IllegalStateException("Cannot produce to config topic without claiming write privileges first");
+        }
+
+        try {
+            fencableProducer.beginTransaction();
+            fencableProducer.send(new ProducerRecord<>(topic, key, value));
+            fencableProducer.commitTransaction();
+        } catch (Exception e) {
+            log.warn("Failed to perform fencable send to config topic", e);
+            relinquishWritePrivileges();
+            throw new PrivilegedWriteException("Failed to perform fencable send to config topic", e);
+        }
+    }
+
+    private void relinquishWritePrivileges() {
+        if (fencableProducer != null) {
+            Utils.closeQuietly(() -> fencableProducer.close(Duration.ZERO), "fencable producer for config topic");
+            fencableProducer = null;
+        }
+    }
+
     private KafkaBasedLog<String, byte[]> createKafkaBasedLog(String topic, Map<String, Object> producerProps,
                                                               Map<String, Object> consumerProps,
                                                               Callback<ConsumerRecord<String, byte[]>> consumedCallback,
@@ -559,7 +748,6 @@ private KafkaBasedLog<String, byte[]> createKafkaBasedLog(String topic, Map<Stri
         return new KafkaBasedLog<>(topic, producerProps, consumerProps, adminSupplier, consumedCallback, Time.SYSTEM, createTopics);
     }
 
-    @SuppressWarnings("unchecked")
     private class ConsumeCallback implements Callback<ConsumerRecord<String, byte[]>> {
         @Override
         public void onCompletion(Throwable error, ConsumerRecord<String, byte[]> record) {
@@ -581,226 +769,221 @@ public void onCompletion(Throwable error, ConsumerRecord<String, byte[]> record)
 
             if (record.key().startsWith(TARGET_STATE_PREFIX)) {
                 String connectorName = record.key().substring(TARGET_STATE_PREFIX.length());
-                boolean removed = false;
-                synchronized (lock) {
-                    if (value.value() == null) {
-                        // When connector configs are removed, we also write tombstones for the target state.
-                        log.debug("Removed target state for connector {} due to null value in topic.", connectorName);
-                        connectorTargetStates.remove(connectorName);
-                        removed = true;
-
-                        // If for some reason we still have configs for the connector, add back the default
-                        // STARTED state to ensure each connector always has a valid target state.
-                        if (connectorConfigs.containsKey(connectorName))
-                            connectorTargetStates.put(connectorName, TargetState.STARTED);
-                    } else {
-                        if (!(value.value() instanceof Map)) {
-                            log.error("Found target state ({}) in wrong format: {}",  record.key(), value.value().getClass());
-                            return;
-                        }
-                        Object targetState = ((Map<String, Object>) value.value()).get("state");
-                        if (!(targetState instanceof String)) {
-                            log.error("Invalid data for target state for connector '{}': 'state' field should be a Map but is {}",
-                                    connectorName, targetState == null ? null : targetState.getClass());
-                            return;
-                        }
-
-                        try {
-                            TargetState state = TargetState.valueOf((String) targetState);
-                            log.debug("Setting target state for connector '{}' to {}", connectorName, targetState);
-                            connectorTargetStates.put(connectorName, state);
-                        } catch (IllegalArgumentException e) {
-                            log.error("Invalid target state for connector '{}': {}", connectorName, targetState);
-                            return;
-                        }
-                    }
-                }
-
-                // Note that we do not notify the update listener if the target state has been removed.
-                // Instead we depend on the removal callback of the connector config itself to notify the worker.
-                if (started && !removed)
-                    updateListener.onConnectorTargetStateChange(connectorName);
-
+                processTargetStateRecord(connectorName, value);
             } else if (record.key().startsWith(CONNECTOR_PREFIX)) {
                 String connectorName = record.key().substring(CONNECTOR_PREFIX.length());
-                boolean removed = false;
-                synchronized (lock) {
-                    if (value.value() == null) {
-                        // Connector deletion will be written as a null value
-                        log.info("Successfully processed removal of connector '{}'", connectorName);
-                        connectorConfigs.remove(connectorName);
-                        connectorTaskCounts.remove(connectorName);
-                        taskConfigs.keySet().removeIf(taskId -> taskId.connector().equals(connectorName));
-                        removed = true;
-                    } else {
-                        // Connector configs can be applied and callbacks invoked immediately
-                        if (!(value.value() instanceof Map)) {
-                            log.error("Found configuration for connector '{}' in wrong format: {}", record.key(), value.value().getClass());
-                            return;
-                        }
-                        Object newConnectorConfig = ((Map<String, Object>) value.value()).get("properties");
-                        if (!(newConnectorConfig instanceof Map)) {
-                            log.error("Invalid data for config for connector '{}': 'properties' field should be a Map but is {}",
-                                      connectorName, newConnectorConfig == null ? null : newConnectorConfig.getClass());
-                            return;
-                        }
-                        log.debug("Updating configuration for connector '{}'", connectorName);
-                        connectorConfigs.put(connectorName, (Map<String, String>) newConnectorConfig);
-
-                        // Set the initial state of the connector to STARTED, which ensures that any connectors
-                        // which were created with 0.9 Connect will be initialized in the STARTED state.
-                        if (!connectorTargetStates.containsKey(connectorName))
-                            connectorTargetStates.put(connectorName, TargetState.STARTED);
-                    }
-                }
-                if (started) {
-                    if (removed)
-                        updateListener.onConnectorConfigRemove(connectorName);
-                    else
-                        updateListener.onConnectorConfigUpdate(connectorName);
-                }
+                processConnectorConfigRecord(connectorName, value);
             } else if (record.key().startsWith(TASK_PREFIX)) {
-                synchronized (lock) {
-                    ConnectorTaskId taskId = parseTaskId(record.key());
-                    if (taskId == null) {
-                        log.error("Ignoring task configuration because {} couldn't be parsed as a task config key", record.key());
-                        return;
-                    }
-                    if (value.value() == null) {
-                        log.error("Ignoring task configuration for task {} because it is unexpectedly null", taskId);
-                        return;
-                    }
-                    if (!(value.value() instanceof Map)) {
-                        log.error("Ignoring task configuration for task {} because the value is not a Map but is {}", taskId, value.value().getClass());
-                        return;
-                    }
-
-                    Object newTaskConfig = ((Map<String, Object>) value.value()).get("properties");
-                    if (!(newTaskConfig instanceof Map)) {
-                        log.error("Invalid data for config of task {} 'properties' field should be a Map but is {}", taskId, newTaskConfig.getClass());
-                        return;
-                    }
-
-                    Map<ConnectorTaskId, Map<String, String>> deferred = deferredTaskUpdates.computeIfAbsent(taskId.connector(), k -> new HashMap<>());
-                    log.debug("Storing new config for task {}; this will wait for a commit message before the new config will take effect.", taskId);
-                    deferred.put(taskId, (Map<String, String>) newTaskConfig);
+                ConnectorTaskId taskId = parseTaskId(record.key());
+                if (taskId == null) {
+                    log.error("Ignoring task configuration because {} couldn't be parsed as a task config key", record.key());
+                    return;
                 }
+                processTaskConfigRecord(taskId, value);
             } else if (record.key().startsWith(COMMIT_TASKS_PREFIX)) {
                 String connectorName = record.key().substring(COMMIT_TASKS_PREFIX.length());
-                List<ConnectorTaskId> updatedTasks = new ArrayList<>();
-                synchronized (lock) {
-                    // Apply any outstanding deferred task updates for the given connector. Note that just because we
-                    // encounter a commit message does not mean it will result in consistent output. In particular due to
-                    // compaction, there may be cases where . For example if we have the following sequence of writes:
-                    //
-                    // 1. Write connector "foo"'s config
-                    // 2. Write connector "foo", task 1's config <-- compacted
-                    // 3. Write connector "foo", task 2's config
-                    // 4. Write connector "foo" task commit message
-                    // 5. Write connector "foo", task 1's config
-                    // 6. Write connector "foo", task 2's config
-                    // 7. Write connector "foo" task commit message
-                    //
-                    // then when a new worker starts up, if message 2 had been compacted, then when message 4 is applied
-                    // "foo" will not have a complete set of configs. Only when message 7 is applied will the complete
-                    // configuration be available. Worse, if the leader died while writing messages 5, 6, and 7 such that
-                    // only 5 was written, then there may be nothing that will finish writing the configs and get the
-                    // log back into a consistent state.
-                    //
-                    // It is expected that the user of this class (i.e., the Herder) will take the necessary action to
-                    // resolve this (i.e., get the connector to recommit its configuration). This inconsistent state is
-                    // exposed in the snapshots provided via ClusterConfigState so they are easy to handle.
-                    if (!(value.value() instanceof Map)) { // Schema-less, so we get maps instead of structs
-                        log.error("Ignoring connector tasks configuration commit for connector '{}' because it is in the wrong format: {}", connectorName, value.value());
-                        return;
-                    }
-                    Map<ConnectorTaskId, Map<String, String>> deferred = deferredTaskUpdates.get(connectorName);
-
-                    int newTaskCount = intValue(((Map<String, Object>) value.value()).get("tasks"));
-
-                    // Validate the configs we're supposed to update to ensure we're getting a complete configuration
-                    // update of all tasks that are expected based on the number of tasks in the commit message.
-                    Set<Integer> taskIdSet = taskIds(connectorName, deferred);
-                    if (!completeTaskIdSet(taskIdSet, newTaskCount)) {
-                        // Given the logic for writing commit messages, we should only hit this condition due to compacted
-                        // historical data, in which case we would not have applied any updates yet and there will be no
-                        // task config data already committed for the connector, so we shouldn't have to clear any data
-                        // out. All we need to do is add the flag marking it inconsistent.
-                        log.debug("We have an incomplete set of task configs for connector '{}' probably due to compaction. So we are not doing anything with the new configuration.", connectorName);
-                        inconsistent.add(connectorName);
-                    } else {
-                        if (deferred != null) {
-                            taskConfigs.putAll(deferred);
-                            updatedTasks.addAll(deferred.keySet());
-                        }
-                        inconsistent.remove(connectorName);
-                    }
-                    // Always clear the deferred entries, even if we didn't apply them. If they represented an inconsistent
-                    // update, then we need to see a completely fresh set of configs after this commit message, so we don't
-                    // want any of these outdated configs
-                    if (deferred != null)
-                        deferred.clear();
-
-                    connectorTaskCounts.put(connectorName, newTaskCount);
-                }
-
-                if (started)
-                    updateListener.onTaskConfigUpdate(updatedTasks);
+                processTasksCommitRecord(connectorName, value);
             } else if (record.key().startsWith(RESTART_PREFIX)) {
                 RestartRequest request = recordToRestartRequest(record, value);
                 // Only notify the listener if this backing store is already successfully started (having caught up the first time)
                 if (request != null && started) {
                     updateListener.onRestartRequest(request);
                 }
+            } else if (record.key().startsWith(TASK_COUNT_RECORD_PREFIX)) {
+                String connectorName = record.key().substring(TASK_COUNT_RECORD_PREFIX.length());
+                processTaskCountRecord(connectorName, value);
             } else if (record.key().equals(SESSION_KEY_KEY)) {
-                if (value.value() == null) {
-                    log.error("Ignoring session key because it is unexpectedly null");
+                processSessionKeyRecord(value);
+            } else {
+                log.error("Discarding config update record with invalid key: {}", record.key());
+            }
+        }
+
+    }
+
+    private void processTargetStateRecord(String connectorName, SchemaAndValue value) {
+        boolean removed = false;
+        synchronized (lock) {
+            if (value.value() == null) {
+                // When connector configs are removed, we also write tombstones for the target state.
+                log.debug("Removed target state for connector {} due to null value in topic.", connectorName);
+                connectorTargetStates.remove(connectorName);
+                removed = true;
+
+                // If for some reason we still have configs for the connector, add back the default
+                // STARTED state to ensure each connector always has a valid target state.
+                if (connectorConfigs.containsKey(connectorName))
+                    connectorTargetStates.put(connectorName, TargetState.STARTED);
+            } else {
+                if (!(value.value() instanceof Map)) {
+                    log.error("Ignoring target state for connector '{}' because it is in the wrong format: {}", connectorName, className(value.value()));
                     return;
                 }
-                if (!(value.value() instanceof Map)) {
-                    log.error("Ignoring session key because the value is not a Map but is {}", value.value().getClass());
+                @SuppressWarnings("unchecked")
+                Object targetState = ((Map<String, Object>) value.value()).get("state");
+                if (!(targetState instanceof String)) {
+                    log.error("Invalid data for target state for connector '{}': 'state' field should be a String but is {}",
+                            connectorName, className(targetState));
                     return;
                 }
 
-                Map<String, Object> valueAsMap = (Map<String, Object>) value.value();
-
-                Object sessionKey = valueAsMap.get("key");
-                if (!(sessionKey instanceof String)) {
-                    log.error("Invalid data for session key 'key' field should be a String but is {}", sessionKey.getClass());
+                try {
+                    TargetState state = TargetState.valueOf((String) targetState);
+                    log.debug("Setting target state for connector '{}' to {}", connectorName, targetState);
+                    connectorTargetStates.put(connectorName, state);
+                } catch (IllegalArgumentException e) {
+                    log.error("Invalid target state for connector '{}': {}", connectorName, targetState);
                     return;
                 }
-                byte[] key = Base64.getDecoder().decode((String) sessionKey);
+            }
+        }
+
+        // Note that we do not notify the update listener if the target state has been removed.
+        // Instead we depend on the removal callback of the connector config itself to notify the worker.
+        if (started && !removed)
+            updateListener.onConnectorTargetStateChange(connectorName);
+    }
 
-                Object keyAlgorithm = valueAsMap.get("algorithm");
-                if (!(keyAlgorithm instanceof String)) {
-                    log.error("Invalid data for session key 'algorithm' field should be a String but it is {}", keyAlgorithm.getClass());
+    private void processConnectorConfigRecord(String connectorName, SchemaAndValue value) {
+        boolean removed = false;
+        synchronized (lock) {
+            if (value.value() == null) {
+                // Connector deletion will be written as a null value
+                log.info("Successfully processed removal of connector '{}'", connectorName);
+                connectorConfigs.remove(connectorName);
+                connectorTaskCounts.remove(connectorName);
+                taskConfigs.keySet().removeIf(taskId -> taskId.connector().equals(connectorName));
+                removed = true;
+            } else {
+                // Connector configs can be applied and callbacks invoked immediately
+                if (!(value.value() instanceof Map)) {
+                    log.error("Ignoring configuration for connector '{}' because it is in the wrong format: {}", connectorName, className(value.value()));
                     return;
                 }
-
-                Object creationTimestamp = valueAsMap.get("creation-timestamp");
-                if (!(creationTimestamp instanceof Long)) {
-                    log.error("Invalid data for session key 'creation-timestamp' field should be a long but it is {}", creationTimestamp.getClass());
+                @SuppressWarnings("unchecked")
+                Object newConnectorConfig = ((Map<String, Object>) value.value()).get("properties");
+                if (!(newConnectorConfig instanceof Map)) {
+                    log.error("Invalid data for config for connector '{}': 'properties' field should be a Map but is {}",
+                            connectorName, className(newConnectorConfig));
                     return;
                 }
-                KafkaConfigBackingStore.this.sessionKey = new SessionKey(
-                        new SecretKeySpec(key, (String) keyAlgorithm),
-                        (long) creationTimestamp
-                );
+                log.debug("Updating configuration for connector '{}'", connectorName);
+                @SuppressWarnings("unchecked")
+                Map<String, String> stringsConnectorConfig = (Map<String, String>) newConnectorConfig;
+                connectorConfigs.put(connectorName, stringsConnectorConfig);
+
+                // Set the initial state of the connector to STARTED, which ensures that any connectors
+                // which were created with 0.9 Connect will be initialized in the STARTED state.
+                if (!connectorTargetStates.containsKey(connectorName))
+                    connectorTargetStates.put(connectorName, TargetState.STARTED);
+            }
+        }
+        if (started) {
+            if (removed)
+                updateListener.onConnectorConfigRemove(connectorName);
+            else
+                updateListener.onConnectorConfigUpdate(connectorName);
+        }
+    }
 
-                if (started)
-                    updateListener.onSessionKeyUpdate(KafkaConfigBackingStore.this.sessionKey);
+    private void processTaskConfigRecord(ConnectorTaskId taskId, SchemaAndValue value) {
+        synchronized (lock) {
+            if (value.value() == null) {
+                log.error("Ignoring task configuration for task {} because it is unexpectedly null", taskId);
+                return;
+            }
+            if (!(value.value() instanceof Map)) {
+                log.error("Ignoring task configuration for task {} because the value is not a Map but is {}", taskId, className(value.value()));
+                return;
+            }
+
+            @SuppressWarnings("unchecked")
+            Object newTaskConfig = ((Map<String, Object>) value.value()).get("properties");
+            if (!(newTaskConfig instanceof Map)) {
+                log.error("Invalid data for config of task {} 'properties' field should be a Map but is {}", taskId, className(newTaskConfig));
+                return;
+            }
+
+            Map<ConnectorTaskId, Map<String, String>> deferred = deferredTaskUpdates.computeIfAbsent(taskId.connector(), k -> new HashMap<>());
+            log.debug("Storing new config for task {}; this will wait for a commit message before the new config will take effect.", taskId);
+            @SuppressWarnings("unchecked")
+            Map<String, String> stringsTaskConfig = (Map<String, String>) newTaskConfig;
+            deferred.put(taskId, stringsTaskConfig);
+        }
+    }
+
+    private void processTasksCommitRecord(String connectorName, SchemaAndValue value) {
+        List<ConnectorTaskId> updatedTasks = new ArrayList<>();
+        synchronized (lock) {
+            // Apply any outstanding deferred task updates for the given connector. Note that just because we
+            // encounter a commit message does not mean it will result in consistent output. In particular due to
+            // compaction, there may be cases where . For example if we have the following sequence of writes:
+            //
+            // 1. Write connector "foo"'s config
+            // 2. Write connector "foo", task 1's config <-- compacted
+            // 3. Write connector "foo", task 2's config
+            // 4. Write connector "foo" task commit message
+            // 5. Write connector "foo", task 1's config
+            // 6. Write connector "foo", task 2's config
+            // 7. Write connector "foo" task commit message
+            //
+            // then when a new worker starts up, if message 2 had been compacted, then when message 4 is applied
+            // "foo" will not have a complete set of configs. Only when message 7 is applied will the complete
+            // configuration be available. Worse, if the leader died while writing messages 5, 6, and 7 such that
+            // only 5 was written, then there may be nothing that will finish writing the configs and get the
+            // log back into a consistent state.
+            //
+            // It is expected that the user of this class (i.e., the Herder) will take the necessary action to
+            // resolve this (i.e., get the connector to recommit its configuration). This inconsistent state is
+            // exposed in the snapshots provided via ClusterConfigState so they are easy to handle.
+            if (!(value.value() instanceof Map)) { // Schema-less, so we get maps instead of structs
+                log.error("Ignoring connector tasks configuration commit for connector '{}' because it is in the wrong format: {}", connectorName, className(value.value()));
+                return;
+            }
+            Map<ConnectorTaskId, Map<String, String>> deferred = deferredTaskUpdates.get(connectorName);
+
+            @SuppressWarnings("unchecked")
+            int newTaskCount = intValue(((Map<String, Object>) value.value()).get("tasks"));
+
+            // Validate the configs we're supposed to update to ensure we're getting a complete configuration
+            // update of all tasks that are expected based on the number of tasks in the commit message.
+            Set<Integer> taskIdSet = taskIds(connectorName, deferred);
+            if (!completeTaskIdSet(taskIdSet, newTaskCount)) {
+                // Given the logic for writing commit messages, we should only hit this condition due to compacted
+                // historical data, in which case we would not have applied any updates yet and there will be no
+                // task config data already committed for the connector, so we shouldn't have to clear any data
+                // out. All we need to do is add the flag marking it inconsistent.
+                log.debug("We have an incomplete set of task configs for connector '{}' probably due to compaction. So we are not doing anything with the new configuration.", connectorName);
+                inconsistent.add(connectorName);
             } else {
-                log.error("Discarding config update record with invalid key: {}", record.key());
+                if (deferred != null) {
+                    taskConfigs.putAll(deferred);
+                    updatedTasks.addAll(deferred.keySet());
+                    connectorTaskConfigGenerations.compute(connectorName, (ignored, generation) -> generation != null ? generation + 1 : 0);
+                }
+                inconsistent.remove(connectorName);
             }
+            // Always clear the deferred entries, even if we didn't apply them. If they represented an inconsistent
+            // update, then we need to see a completely fresh set of configs after this commit message, so we don't
+            // want any of these outdated configs
+            if (deferred != null)
+                deferred.clear();
+
+            connectorTaskCounts.put(connectorName, newTaskCount);
         }
 
+        // If task configs appear after the latest task count record, the connector needs a new round of zombie fencing
+        // before it can start tasks with these configs
+        connectorsPendingFencing.add(connectorName);
+        if (started)
+            updateListener.onTaskConfigUpdate(updatedTasks);
     }
 
     @SuppressWarnings("unchecked")
     RestartRequest recordToRestartRequest(ConsumerRecord<String, byte[]> record, SchemaAndValue value) {
         String connectorName = record.key().substring(RESTART_PREFIX.length());
         if (!(value.value() instanceof Map)) {
-            log.error("Ignoring restart request because the value is not a Map but is {}", value.value() == null ? "null" : value.value().getClass());
+            log.error("Ignoring restart request because the value is not a Map but is {}", className(value.value()));
             return null;
         }
 
@@ -809,7 +992,7 @@ RestartRequest recordToRestartRequest(ConsumerRecord<String, byte[]> record, Sch
         Object failed = valueAsMap.get(ONLY_FAILED_FIELD_NAME);
         boolean onlyFailed;
         if (!(failed instanceof Boolean)) {
-            log.warn("Invalid data for restart request '{}' field should be a Boolean but is {}, defaulting to {}", ONLY_FAILED_FIELD_NAME, failed == null ? "null" : failed.getClass(), ONLY_FAILED_DEFAULT);
+            log.warn("Invalid data for restart request '{}' field should be a Boolean but is {}, defaulting to {}", ONLY_FAILED_FIELD_NAME, className(failed), ONLY_FAILED_DEFAULT);
             onlyFailed = ONLY_FAILED_DEFAULT;
         } else {
             onlyFailed = (Boolean) failed;
@@ -818,7 +1001,7 @@ RestartRequest recordToRestartRequest(ConsumerRecord<String, byte[]> record, Sch
         Object withTasks = valueAsMap.get(INCLUDE_TASKS_FIELD_NAME);
         boolean includeTasks;
         if (!(withTasks instanceof Boolean)) {
-            log.warn("Invalid data for restart request '{}' field should be a Boolean but is {}, defaulting to {}", INCLUDE_TASKS_FIELD_NAME, withTasks == null ? "null" : withTasks.getClass(), INCLUDE_TASKS_DEFAULT);
+            log.warn("Invalid data for restart request '{}' field should be a Boolean but is {}, defaulting to {}", INCLUDE_TASKS_FIELD_NAME, className(withTasks), INCLUDE_TASKS_DEFAULT);
             includeTasks = INCLUDE_TASKS_DEFAULT;
         } else {
             includeTasks = (Boolean) withTasks;
@@ -826,6 +1009,61 @@ RestartRequest recordToRestartRequest(ConsumerRecord<String, byte[]> record, Sch
         return new RestartRequest(connectorName, onlyFailed, includeTasks);
     }
 
+    private void processTaskCountRecord(String connectorName, SchemaAndValue value) {
+        if (!(value.value() instanceof Map)) {
+            log.error("Ignoring task count record for connector '{}' because it is in the wrong format: {}",  connectorName, className(value.value()));
+            return;
+        }
+        @SuppressWarnings("unchecked")
+        int taskCount = intValue(((Map<String, Object>) value.value()).get("task-count"));
+
+        log.debug("Setting task count record for connector '{}' to {}", connectorName, taskCount);
+        connectorTaskCountRecords.put(connectorName, taskCount);
+        // If a task count record appears after the latest task configs, the connectors doesn't need a round of zombie
+        // fencing before it can start tasks with the latest configs
+        connectorsPendingFencing.remove(connectorName);
+    }
+
+    private void processSessionKeyRecord(SchemaAndValue value) {
+        if (value.value() == null) {
+            log.error("Ignoring session key because it is unexpectedly null");
+            return;
+        }
+        if (!(value.value() instanceof Map)) {
+            log.error("Ignoring session key because the value is not a Map but is {}", className(value.value()));
+            return;
+        }
+
+        @SuppressWarnings("unchecked")
+        Map<String, Object> valueAsMap = (Map<String, Object>) value.value();
+
+        Object sessionKey = valueAsMap.get("key");
+        if (!(sessionKey instanceof String)) {
+            log.error("Invalid data for session key 'key' field should be a String but is {}", className(sessionKey));
+            return;
+        }
+        byte[] key = Base64.getDecoder().decode((String) sessionKey);
+
+        Object keyAlgorithm = valueAsMap.get("algorithm");
+        if (!(keyAlgorithm instanceof String)) {
+            log.error("Invalid data for session key 'algorithm' field should be a String but it is {}", className(keyAlgorithm));
+            return;
+        }
+
+        Object creationTimestamp = valueAsMap.get("creation-timestamp");
+        if (!(creationTimestamp instanceof Long)) {
+            log.error("Invalid data for session key 'creation-timestamp' field should be a long but it is {}", className(creationTimestamp));
+            return;
+        }
+        KafkaConfigBackingStore.this.sessionKey = new SessionKey(
+                new SecretKeySpec(key, (String) keyAlgorithm),
+                (long) creationTimestamp
+        );
+
+        if (started)
+            updateListener.onSessionKeyUpdate(KafkaConfigBackingStore.this.sessionKey);
+    }
+
     private ConnectorTaskId parseTaskId(String key) {
         String[] parts = key.split("-");
         if (parts.length < 3) return null;
@@ -896,5 +1134,9 @@ else if (value instanceof Long)
         else
             throw new ConnectException("Expected integer value to be either Integer or Long");
     }
+
+    private String className(Object o) {
+        return o != null ? o.getClass().getName() : "null";
+    }
 }
 
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java
index 313baf72c58c0..6693572002616 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStore.java
@@ -17,15 +17,20 @@
 package org.apache.kafka.connect.storage;
 
 import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.TopicConfig;
+import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.serialization.ByteArrayDeserializer;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.runtime.WorkerConfig;
 import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
 import org.apache.kafka.connect.util.Callback;
@@ -38,9 +43,11 @@
 import org.slf4j.LoggerFactory;
 
 import java.nio.ByteBuffer;
+import java.time.Duration;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
@@ -63,40 +70,134 @@
 public class KafkaOffsetBackingStore implements OffsetBackingStore {
     private static final Logger log = LoggerFactory.getLogger(KafkaOffsetBackingStore.class);
 
-    private KafkaBasedLog<byte[], byte[]> offsetLog;
-    private HashMap<ByteBuffer, ByteBuffer> data;
+    /**
+     * Build a connector-specific offset store with read and write support. The producer will be {@link Producer#close(Duration) closed}
+     * and the consumer will be {@link Consumer#close(Duration) closed} when this store is {@link #stop() stopped}, but the topic admin
+     * must be {@link TopicAdmin#close(Duration) closed} by the caller.
+     * @param topic the name of the offsets topic to use
+     * @param producer the producer to use for writing to the offsets topic
+     * @param consumer the consumer to use for reading from the offsets topic
+     * @param topicAdmin the topic admin to use for creating and querying metadata for the offsets topic
+     * @return an offset store backed by the given topic and Kafka clients
+     */
+    public static KafkaOffsetBackingStore forTask(
+            String topic,
+            Producer<byte[], byte[]> producer,
+            Consumer<byte[], byte[]> consumer,
+            TopicAdmin topicAdmin
+    ) {
+        return new KafkaOffsetBackingStore(() -> topicAdmin) {
+            @Override
+            public void configure(final WorkerConfig config) {
+                this.exactlyOnce = config.exactlyOnceSourceEnabled();
+                this.offsetLog = KafkaBasedLog.withExistingClients(
+                        topic,
+                        consumer,
+                        producer,
+                        topicAdmin,
+                        consumedCallback,
+                        Time.SYSTEM,
+                        initialize(topic, newTopicDescription(topic, config))
+                );
+            }
+        };
+    }
+
+    /**
+     * Build a connector-specific offset store with read-only support. The consumer will be {@link Consumer#close(Duration) closed}
+     * when this store is {@link #stop() stopped}, but the topic admin must be {@link TopicAdmin#close(Duration) closed} by the caller.
+     * @param topic the name of the offsets topic to use
+     * @param consumer the consumer to use for reading from the offsets topic
+     * @param topicAdmin the topic admin to use for creating and querying metadata for the offsets topic
+     * @return a read-only offset store backed by the given topic and Kafka clients
+     */
+    public static KafkaOffsetBackingStore forConnector(
+            String topic,
+            Consumer<byte[], byte[]> consumer,
+            TopicAdmin topicAdmin
+    ) {
+        return new KafkaOffsetBackingStore(() -> topicAdmin) {
+            @Override
+            public void configure(final WorkerConfig config) {
+                this.exactlyOnce = config.exactlyOnceSourceEnabled();
+                this.offsetLog = KafkaBasedLog.withExistingClients(
+                        topic,
+                        consumer,
+                        null,
+                        topicAdmin,
+                        consumedCallback,
+                        Time.SYSTEM,
+                        initialize(topic, newTopicDescription(topic, config))
+                );
+            }
+        };
+    }
+
+    protected KafkaBasedLog<byte[], byte[]> offsetLog;
+    private final HashMap<ByteBuffer, ByteBuffer> data = new HashMap<>();
     private final Supplier<TopicAdmin> topicAdminSupplier;
     private SharedTopicAdmin ownTopicAdmin;
+    protected boolean exactlyOnce;
 
+    /**
+     * Create an {@link OffsetBackingStore} backed by a Kafka topic. This constructor will cause the
+     * store to instantiate and close its own {@link TopicAdmin} during {@link #configure(WorkerConfig)}
+     * and {@link #stop()}, respectively.
+     *
+     * @deprecated use {@link #KafkaOffsetBackingStore(Supplier)} instead
+     */
     @Deprecated
     public KafkaOffsetBackingStore() {
         this.topicAdminSupplier = null;
     }
 
+    /**
+     * Create an {@link OffsetBackingStore} backed by a Kafka topic. This constructor will use the given
+     * {@link Supplier} to acquire a {@link TopicAdmin} that will be used for interactions with the backing
+     * Kafka topic. The caller is expected to manage the lifecycle of that object, including
+     * {@link TopicAdmin#close(Duration) closing} it when it is no longer needed.
+     * @param topicAdmin a {@link Supplier} for the {@link TopicAdmin} to use for this backing store;
+     *                   may not be null, and may not return null
+     */
     public KafkaOffsetBackingStore(Supplier<TopicAdmin> topicAdmin) {
         this.topicAdminSupplier = Objects.requireNonNull(topicAdmin);
     }
 
+
     @Override
     public void configure(final WorkerConfig config) {
         String topic = config.getString(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG);
         if (topic == null || topic.trim().length() == 0)
             throw new ConfigException("Offset storage topic must be specified");
 
+        this.exactlyOnce = config.exactlyOnceSourceEnabled();
+
         String clusterId = ConnectUtils.lookupKafkaClusterId(config);
-        data = new HashMap<>();
 
         Map<String, Object> originals = config.originals();
         Map<String, Object> producerProps = new HashMap<>(originals);
         producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
         producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
         producerProps.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.MAX_VALUE);
+        // By default, Connect disables idempotent behavior for all producers, even though idempotence became
+        // default for Kafka producers. This is to ensure Connect continues to work with many Kafka broker versions, including older brokers that do not support
+        // idempotent producers or require explicit steps to enable them (e.g. adding the IDEMPOTENT_WRITE ACL to brokers older than 2.8).
+        // These settings might change when https://cwiki.apache.org/confluence/display/KAFKA/KIP-318%3A+Make+Kafka+Connect+Source+idempotent
+        // gets approved and scheduled for release.
+        producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false");
         ConnectUtils.addMetricsContextProperties(producerProps, config, clusterId);
 
         Map<String, Object> consumerProps = new HashMap<>(originals);
         consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
         consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
         ConnectUtils.addMetricsContextProperties(consumerProps, config, clusterId);
+        if (config.exactlyOnceSourceEnabled()) {
+            ConnectUtils.ensureProperty(
+                    consumerProps, ConsumerConfig.ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                    "for the worker offsets topic consumer when exactly-once source support is enabled",
+                    false
+            );
+        }
 
         Map<String, Object> adminProps = new HashMap<>(originals);
         ConnectUtils.addMetricsContextProperties(adminProps, config, clusterId);
@@ -105,27 +206,36 @@ public void configure(final WorkerConfig config) {
             adminSupplier = topicAdminSupplier;
         } else {
             // Create our own topic admin supplier that we'll close when we're stopped
-            ownTopicAdmin = new SharedTopicAdmin(adminProps);
+            this.ownTopicAdmin = new SharedTopicAdmin(adminProps);
             adminSupplier = ownTopicAdmin;
         }
+        NewTopic topicDescription = newTopicDescription(topic, config);
+
+        this.offsetLog = createKafkaBasedLog(topic, producerProps, consumerProps, consumedCallback, topicDescription, adminSupplier);
+    }
+
+    private KafkaBasedLog<byte[], byte[]> createKafkaBasedLog(String topic, Map<String, Object> producerProps,
+                                                              Map<String, Object> consumerProps,
+                                                              Callback<ConsumerRecord<byte[], byte[]>> consumedCallback,
+                                                              final NewTopic topicDescription, Supplier<TopicAdmin> adminSupplier) {
+        java.util.function.Consumer<TopicAdmin> createTopics = initialize(topic, topicDescription);
+        return new KafkaBasedLog<>(topic, producerProps, consumerProps, adminSupplier, consumedCallback, Time.SYSTEM, createTopics);
+    }
+
+    protected NewTopic newTopicDescription(final String topic, final WorkerConfig config) {
         Map<String, Object> topicSettings = config instanceof DistributedConfig
-                                            ? ((DistributedConfig) config).offsetStorageTopicSettings()
-                                            : Collections.emptyMap();
-        NewTopic topicDescription = TopicAdmin.defineTopic(topic)
+                ? ((DistributedConfig) config).offsetStorageTopicSettings()
+                : Collections.emptyMap();
+        return TopicAdmin.defineTopic(topic)
                 .config(topicSettings) // first so that we override user-supplied settings as needed
                 .compacted()
                 .partitions(config.getInt(DistributedConfig.OFFSET_STORAGE_PARTITIONS_CONFIG))
                 .replicationFactor(config.getShort(DistributedConfig.OFFSET_STORAGE_REPLICATION_FACTOR_CONFIG))
                 .build();
-
-        offsetLog = createKafkaBasedLog(topic, producerProps, consumerProps, consumedCallback, topicDescription, adminSupplier);
     }
 
-    private KafkaBasedLog<byte[], byte[]> createKafkaBasedLog(String topic, Map<String, Object> producerProps,
-                                                              Map<String, Object> consumerProps,
-                                                              Callback<ConsumerRecord<byte[], byte[]>> consumedCallback,
-                                                              final NewTopic topicDescription, Supplier<TopicAdmin> adminSupplier) {
-        java.util.function.Consumer<TopicAdmin> createTopics = admin -> {
+    protected java.util.function.Consumer<TopicAdmin> initialize(final String topic, final NewTopic topicDescription) {
+        return admin -> {
             log.debug("Creating admin client to manage Connect internal offset topic");
             // Create the topic if it doesn't exist
             Set<String> newTopics = admin.createTopics(topicDescription);
@@ -136,16 +246,40 @@ private KafkaBasedLog<byte[], byte[]> createKafkaBasedLog(String topic, Map<Stri
                         DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, "source connector offsets");
             }
         };
-        return new KafkaBasedLog<>(topic, producerProps, consumerProps, adminSupplier, consumedCallback, Time.SYSTEM, createTopics);
     }
 
     @Override
     public void start() {
         log.info("Starting KafkaOffsetBackingStore");
-        offsetLog.start();
+        try {
+            offsetLog.start();
+        } catch (UnsupportedVersionException e) {
+            String message;
+            if (exactlyOnce) {
+                message = "Enabling exactly-once support for source connectors requires a Kafka broker version that allows "
+                        + "admin clients to read consumer offsets. Please either disable the worker's exactly-once "
+                        + "support for source connectors, or upgrade to a newer Kafka broker version.";
+            } else {
+                message = "When " + ConsumerConfig.ISOLATION_LEVEL_CONFIG + "is set to "
+                        + IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)
+                        + ", a Kafka broker version that allows admin clients to read consumer offsets is required. "
+                        + "Please either reconfigure the worker or connector, or upgrade to a newer Kafka broker version.";
+            }
+            throw new ConnectException(message, e);
+        }
         log.info("Finished reading offsets topic and starting KafkaOffsetBackingStore");
     }
 
+    /**
+     * Stop reading from and writing to the offsets topic, and relinquish resources allocated for interacting
+     * with it, including Kafka clients.
+     * <p>
+     * <b>Note:</b> if the now-deprecated {@link #KafkaOffsetBackingStore()} constructor was used to create
+     * this store, the underlying admin client allocated for interacting with the offsets topic will be closed.
+     * On the other hand, if the recommended {@link #KafkaOffsetBackingStore(Supplier)} constructor was used to
+     * create this store, the admin client derived from the given {@link Supplier} will not be closed and it is the
+     * caller's responsibility to manage its lifecycle accordingly.
+     */
     @Override
     public void stop() {
         log.info("Stopping KafkaOffsetBackingStore");
@@ -191,7 +325,7 @@ public Future<Void> set(final Map<ByteBuffer, ByteBuffer> values, final Callback
         return producerCallback;
     }
 
-    private final Callback<ConsumerRecord<byte[], byte[]>> consumedCallback = new Callback<ConsumerRecord<byte[], byte[]>>() {
+    protected final Callback<ConsumerRecord<byte[], byte[]>> consumedCallback = new Callback<ConsumerRecord<byte[], byte[]>>() {
         @Override
         public void onCompletion(Throwable error, ConsumerRecord<byte[], byte[]> record) {
             ByteBuffer key = record.key() != null ? ByteBuffer.wrap(record.key()) : null;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaStatusBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaStatusBackingStore.java
index c2aeba808012c..3ba6996da8ab7 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaStatusBackingStore.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/KafkaStatusBackingStore.java
@@ -170,7 +170,12 @@ public void configure(final WorkerConfig config) {
         producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
         producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
         producerProps.put(ProducerConfig.RETRIES_CONFIG, 0); // we handle retries in this class
-        producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, false); // disable idempotence since retries is force to 0
+        // By default, Connect disables idempotent behavior for all producers, even though idempotence became
+        // default for Kafka producers. This is to ensure Connect continues to work with many Kafka broker versions, including older brokers that do not support
+        // idempotent producers or require explicit steps to enable them (e.g. adding the IDEMPOTENT_WRITE ACL to brokers older than 2.8).
+        // These settings might change when https://cwiki.apache.org/confluence/display/KAFKA/KIP-318%3A+Make+Kafka+Connect+Source+idempotent
+        // gets approved and scheduled for release.
+        producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false"); // disable idempotence since retries is force to 0
         ConnectUtils.addMetricsContextProperties(producerProps, config, clusterId);
 
         Map<String, Object> consumerProps = new HashMap<>(originals);
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/MemoryConfigBackingStore.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/MemoryConfigBackingStore.java
index a8b2820404b71..1e483ec731920 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/MemoryConfigBackingStore.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/MemoryConfigBackingStore.java
@@ -20,7 +20,6 @@
 import org.apache.kafka.connect.runtime.SessionKey;
 import org.apache.kafka.connect.runtime.TargetState;
 import org.apache.kafka.connect.runtime.WorkerConfigTransformer;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 
 import java.util.Collections;
@@ -75,8 +74,12 @@ public synchronized ClusterConfigState snapshot() {
                 connectorConfigs,
                 connectorTargetStates,
                 taskConfigs,
+                Collections.emptyMap(),
+                Collections.emptyMap(),
                 Collections.emptySet(),
-                configTransformer);
+                Collections.emptySet(),
+                configTransformer
+        );
     }
 
     @Override
@@ -156,6 +159,11 @@ public void putRestartRequest(RestartRequest restartRequest) {
         // no-op
     }
 
+    @Override
+    public void putTaskCountRecord(String connector, int taskCount) {
+        // no-op
+    }
+
     @Override
     public synchronized void setUpdateListener(UpdateListener listener) {
         this.updateListener = listener;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageReaderImpl.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageReaderImpl.java
index a1eea43103a39..49b9d69d8ebc3 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageReaderImpl.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageReaderImpl.java
@@ -141,6 +141,7 @@ public <T> Map<Map<String, T>, Map<String, Object>> offsets(Collection<Map<Strin
         return result;
     }
 
+    @Override
     public void close() {
         if (!closed.getAndSet(true)) {
             synchronized (offsetReadFutures) {
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageWriter.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageWriter.java
index 7766e2cf5d1dc..b67e3d7b1b434 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageWriter.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/OffsetStorageWriter.java
@@ -89,8 +89,9 @@ public OffsetStorageWriter(OffsetBackingStore backingStore,
      * @param partition the partition to store an offset for
      * @param offset the offset
      */
-    public synchronized void offset(Map<String, Object> partition, Map<String, Object> offset) {
-        data.put(partition, offset);
+    @SuppressWarnings("unchecked")
+    public synchronized void offset(Map<String, ?> partition, Map<String, ?> offset) {
+        data.put((Map<String, Object>) partition, (Map<String, Object>) offset);
     }
 
     private boolean flushing() {
@@ -113,12 +114,18 @@ public synchronized boolean beginFlush() {
         if (data.isEmpty())
             return false;
 
-        assert !flushing();
         toFlush = data;
         data = new HashMap<>();
         return true;
     }
 
+    /**
+     * @return whether there's anything to flush right now.
+     */
+    public synchronized boolean willFlush() {
+        return !data.isEmpty();
+    }
+
     /**
      * Flush the current offsets and clear them from this writer. This is non-blocking: it
      * moves the current set of offsets out of the way, serializes the data, and asynchronously
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/storage/PrivilegedWriteException.java b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/PrivilegedWriteException.java
new file mode 100644
index 0000000000000..e4900fa9b0e59
--- /dev/null
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/storage/PrivilegedWriteException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.storage;
+
+import org.apache.kafka.connect.errors.ConnectException;
+
+/**
+ * Used when a write that requires {@link ConfigBackingStore#claimWritePrivileges() special privileges} fails
+ */
+public class PrivilegedWriteException extends ConnectException {
+    public PrivilegedWriteException(String message) {
+        super(message);
+    }
+
+    public PrivilegedWriteException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/util/ConnectUtils.java b/connect/runtime/src/main/java/org/apache/kafka/connect/util/ConnectUtils.java
index 7adbd8f92dfd8..0af14cc7f30ec 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/util/ConnectUtils.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/util/ConnectUtils.java
@@ -30,10 +30,15 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Collection;
+import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.concurrent.ExecutionException;
+import java.util.function.Function;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
 
 public final class ConnectUtils {
     private static final Logger log = LoggerFactory.getLogger(ConnectUtils.class);
@@ -160,4 +165,40 @@ public static boolean isSourceConnector(Connector connector) {
         return SourceConnector.class.isAssignableFrom(connector.getClass());
     }
 
+    public static <K, I, O> Map<K, O> transformValues(Map<K, I> map, Function<I, O> transformation) {
+        return map.entrySet().stream().collect(Collectors.toMap(
+                Map.Entry::getKey,
+                transformation.compose(Map.Entry::getValue)
+        ));
+    }
+
+    public static <I> List<I> combineCollections(Collection<Collection<I>> collections) {
+        return combineCollections(collections, Function.identity());
+    }
+
+    public static <I, T> List<T> combineCollections(Collection<I> collection, Function<I, Collection<T>> extractCollection) {
+        return combineCollections(collection, extractCollection, Collectors.toList());
+    }
+
+    public static <I, T, C> C combineCollections(
+            Collection<I> collection,
+            Function<I, Collection<T>> extractCollection,
+            Collector<T, ?, C> collector
+    ) {
+        return collection.stream()
+                .map(extractCollection)
+                .flatMap(Collection::stream)
+                .collect(collector);
+    }
+
+    public static ConnectException maybeWrap(Throwable t, String message) {
+        if (t == null) {
+            return null;
+        }
+        if (t instanceof ConnectException) {
+            return (ConnectException) t;
+        }
+        return new ConnectException(message, t);
+    }
+
 }
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/util/KafkaBasedLog.java b/connect/runtime/src/main/java/org/apache/kafka/connect/util/KafkaBasedLog.java
index 735c61919dc36..5da5be04a51f5 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/util/KafkaBasedLog.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/util/KafkaBasedLog.java
@@ -25,6 +25,7 @@
 import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.PartitionInfo;
 import org.apache.kafka.common.TopicPartition;
@@ -33,6 +34,7 @@
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.errors.WakeupException;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.errors.ConnectException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -40,10 +42,13 @@
 import java.time.Duration;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.Future;
@@ -88,8 +93,9 @@ public class KafkaBasedLog<K, V> {
     private final Map<String, Object> consumerConfigs;
     private final Callback<ConsumerRecord<K, V>> consumedCallback;
     private final Supplier<TopicAdmin> topicAdminSupplier;
+    private final boolean requireAdminForOffsets;
     private Consumer<K, V> consumer;
-    private Producer<K, V> producer;
+    private Optional<Producer<K, V>> producer;
     private TopicAdmin admin;
 
     private Thread thread;
@@ -160,6 +166,56 @@ public KafkaBasedLog(String topic,
         this.readLogEndOffsetCallbacks = new ArrayDeque<>();
         this.time = time;
         this.initializer = initializer != null ? initializer : admin -> { };
+
+        // If the consumer is configured with isolation.level = read_committed, then its end offsets method cannot be relied on
+        // as it will not take records from currently-open transactions into account. We want to err on the side of caution in that
+        // case: when users request a read to the end of the log, we will read up to the point where the latest offsets visible to the
+        // consumer are at least as high as the (possibly-part-of-a-transaction) end offsets of the topic.
+        this.requireAdminForOffsets = IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)
+                .equals(consumerConfigs.get(ConsumerConfig.ISOLATION_LEVEL_CONFIG));
+    }
+
+    /**
+     * Create a new KafkaBasedLog object using pre-existing Kafka clients. This does not start reading the log and writing
+     * is not permitted until {@link #start()} is invoked. Note that the consumer and (if not null) producer given to this log
+     * will be closed when this log is {@link #stop() stopped}.
+     *
+     * @param topic the topic to treat as a log
+     * @param consumer the consumer to use for reading from the log; may not be null
+     * @param producer the producer to use for writing to the log; may be null, which will create a read-only log
+     * @param topicAdmin an admin client, the lifecycle of which is expected to be controlled by the calling component;
+     *                   may not be null
+     * @param consumedCallback   callback to invoke for each {@link ConsumerRecord} consumed when tailing the log
+     * @param time               Time interface
+     * @param initializer        the function that should be run when this log is {@link #start() started}; may be null
+     * @return a {@link KafkaBasedLog} using the given clients
+     */
+    public static <K, V> KafkaBasedLog<K, V> withExistingClients(String topic,
+                                                                 Consumer<K, V> consumer,
+                                                                 Producer<K, V> producer,
+                                                                 TopicAdmin topicAdmin,
+                                                                 Callback<ConsumerRecord<K, V>> consumedCallback,
+                                                                 Time time,
+                                                                 java.util.function.Consumer<TopicAdmin> initializer) {
+        Objects.requireNonNull(topicAdmin);
+        return new KafkaBasedLog<K, V>(topic,
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                () -> topicAdmin,
+                consumedCallback,
+                time,
+                initializer) {
+
+            @Override
+            protected Producer<K, V> createProducer() {
+                return producer;
+            }
+
+            @Override
+            protected Consumer<K, V> createConsumer() {
+                return consumer;
+            }
+        };
     }
 
     public void start() {
@@ -167,10 +223,19 @@ public void start() {
 
         // Create the topic admin client and initialize the topic ...
         admin = topicAdminSupplier.get();   // may be null
+        if (admin == null && requireAdminForOffsets) {
+            throw new ConnectException(
+                    "Must provide a TopicAdmin to KafkaBasedLog when consumer is configured with "
+                            + ConsumerConfig.ISOLATION_LEVEL_CONFIG + " set to "
+                            + IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)
+            );
+        }
         initializer.accept(admin);
 
         // Then create the producer and consumer
-        producer = createProducer();
+        producer = Optional.ofNullable(createProducer());
+        if (!producer.isPresent())
+            log.trace("Creating read-only KafkaBasedLog for topic " + topic);
         consumer = createConsumer();
 
         List<TopicPartition> partitions = new ArrayList<>();
@@ -214,26 +279,21 @@ public void stop() {
         synchronized (this) {
             stopRequested = true;
         }
-        consumer.wakeup();
-
-        try {
-            thread.join();
-        } catch (InterruptedException e) {
-            throw new ConnectException("Failed to stop KafkaBasedLog. Exiting without cleanly shutting " +
-                    "down it's producer and consumer.", e);
+        if (consumer != null) {
+            consumer.wakeup();
         }
 
-        try {
-            producer.close();
-        } catch (KafkaException e) {
-            log.error("Failed to stop KafkaBasedLog producer", e);
+        if (thread != null) {
+            try {
+                thread.join();
+            } catch (InterruptedException e) {
+                throw new ConnectException("Failed to stop KafkaBasedLog. Exiting without cleanly shutting " +
+                        "down it's producer and consumer.", e);
+            }
         }
 
-        try {
-            consumer.close();
-        } catch (KafkaException e) {
-            log.error("Failed to stop KafkaBasedLog consumer", e);
-        }
+        producer.ifPresent(p -> Utils.closeQuietly(p, "KafkaBasedLog producer for topic " + topic));
+        Utils.closeQuietly(consumer, "KafkaBasedLog consumer for topic " + topic);
 
         // do not close the admin client, since we don't own it
         admin = null;
@@ -243,7 +303,7 @@ public void stop() {
 
     /**
      * Flushes any outstanding writes and then reads to the current end of the log and invokes the specified callback.
-     * Note that this checks the current, offsets, reads to them, and invokes the callback regardless of whether
+     * Note that this checks the current offsets, reads to them, and invokes the callback regardless of whether
      * additional records have been written to the log. If the caller needs to ensure they have truly reached the end
      * of the log, they must ensure there are no other writers during this period.
      *
@@ -256,7 +316,7 @@ public void stop() {
      */
     public void readToEnd(Callback<Void> callback) {
         log.trace("Starting read to end log for topic {}", topic);
-        producer.flush();
+        flush();
         synchronized (this) {
             readLogEndOffsetCallbacks.add(callback);
         }
@@ -267,7 +327,7 @@ public void readToEnd(Callback<Void> callback) {
      * Flush the underlying producer to ensure that all pending writes have been sent.
      */
     public void flush() {
-        producer.flush();
+        producer.ifPresent(Producer::flush);
     }
 
     /**
@@ -285,14 +345,16 @@ public void send(K key, V value) {
     }
 
     public void send(K key, V value, org.apache.kafka.clients.producer.Callback callback) {
-        producer.send(new ProducerRecord<>(topic, key, value), callback);
+        producer.orElseThrow(() ->
+                new IllegalStateException("This KafkaBasedLog was created in read-only mode and does not support write operations")
+        ).send(new ProducerRecord<>(topic, key, value), callback);
     }
 
     public int partitionCount() {
         return partitionCount;
     }
 
-    private Producer<K, V> createProducer() {
+    protected Producer<K, V> createProducer() {
         // Always require producer acks to all to ensure durable writes
         producerConfigs.put(ProducerConfig.ACKS_CONFIG, "all");
 
@@ -301,7 +363,7 @@ private Producer<K, V> createProducer() {
         return new KafkaProducer<>(producerConfigs);
     }
 
-    private Consumer<K, V> createConsumer() {
+    protected Consumer<K, V> createConsumer() {
         // Always force reset to the beginning of the log since this class wants to consume all available log data
         consumerConfigs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
 
@@ -356,7 +418,15 @@ private void readToLogEnd(boolean shouldRetry) {
     }
 
     // Visible for testing
-    Map<TopicPartition, Long> readEndOffsets(Set<TopicPartition> assignment, boolean shouldRetry) {
+    /**
+     * Read to the end of the given list of topic partitions
+     * @param assignment the topic partitions to read to the end of
+     * @param shouldRetry boolean flag to enable retry for the admin client {@code listOffsets()} call.
+     * @throws UnsupportedVersionException if the log's consumer is using the "read_committed" isolation level (and
+     * therefore a separate admin client is required to read end offsets for the topic), but the broker does not support
+     * reading end offsets using an admin client
+     */
+    Map<TopicPartition, Long> readEndOffsets(Set<TopicPartition> assignment, boolean shouldRetry) throws UnsupportedVersionException {
         log.trace("Reading to end of offset log");
 
         // Note that we'd prefer to not use the consumer to find the end offsets for the assigned topic partitions.
@@ -381,6 +451,10 @@ Map<TopicPartition, Long> readEndOffsets(Set<TopicPartition> assignment, boolean
             } catch (UnsupportedVersionException e) {
                 // This may happen with really old brokers that don't support the auto topic creation
                 // field in metadata requests
+                if (requireAdminForOffsets) {
+                    // Should be handled by the caller during log startup
+                    throw e;
+                }
                 log.debug("Reading to end of log offsets with consumer since admin client is unsupported: {}", e.getMessage());
                 // Forget the reference to the admin so that we won't even try to use the admin the next time this method is called
                 admin = null;
diff --git a/connect/runtime/src/main/java/org/apache/kafka/connect/util/TopicAdmin.java b/connect/runtime/src/main/java/org/apache/kafka/connect/util/TopicAdmin.java
index faf7b372bedca..f9defc77ca202 100644
--- a/connect/runtime/src/main/java/org/apache/kafka/connect/util/TopicAdmin.java
+++ b/connect/runtime/src/main/java/org/apache/kafka/connect/util/TopicAdmin.java
@@ -23,11 +23,13 @@
 import org.apache.kafka.clients.admin.CreateTopicsOptions;
 import org.apache.kafka.clients.admin.DescribeConfigsOptions;
 import org.apache.kafka.clients.admin.DescribeTopicsOptions;
+import org.apache.kafka.clients.admin.ListOffsetsOptions;
 import org.apache.kafka.clients.admin.ListOffsetsResult;
 import org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo;
 import org.apache.kafka.clients.admin.NewTopic;
 import org.apache.kafka.clients.admin.OffsetSpec;
 import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.config.ConfigException;
@@ -264,39 +266,34 @@ public static NewTopicBuilder defineTopic(String topicName) {
     }
 
     private static final Logger log = LoggerFactory.getLogger(TopicAdmin.class);
-    private final Map<String, Object> adminConfig;
+    private final String bootstrapServers;
     private final Admin admin;
     private final boolean logCreation;
 
     /**
      * Create a new topic admin component with the given configuration.
+     * <p>
+     * Note that this will create an underlying {@link Admin} instance which must be freed when this
+     * topic admin is no longer needed by calling {@link #close()} or {@link #close(Duration)}.
      *
      * @param adminConfig the configuration for the {@link Admin}
      */
     public TopicAdmin(Map<String, Object> adminConfig) {
-        this(adminConfig, Admin.create(adminConfig));
+        this(adminConfig.get(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG), Admin.create(adminConfig));
     }
 
     // visible for testing
-    TopicAdmin(Map<String, Object> adminConfig, Admin adminClient) {
-        this(adminConfig, adminClient, true);
+    TopicAdmin(Object bootstrapServers, Admin adminClient) {
+        this(bootstrapServers, adminClient, true);
     }
 
     // visible for testing
-    TopicAdmin(Map<String, Object> adminConfig, Admin adminClient, boolean logCreation) {
+    TopicAdmin(Object bootstrapServers, Admin adminClient, boolean logCreation) {
         this.admin = adminClient;
-        this.adminConfig = adminConfig != null ? adminConfig : Collections.emptyMap();
+        this.bootstrapServers = bootstrapServers != null ? bootstrapServers.toString() : "<unknown>";
         this.logCreation = logCreation;
     }
 
-    /**
-     * Get the {@link Admin} client used by this topic admin object.
-     * @return the Kafka admin instance; never null
-     */
-    public Admin admin() {
-        return admin;
-    }
-
    /**
      * Attempt to create the topic described by the given definition, returning true if the topic was created or false
      * if the topic already existed.
@@ -371,7 +368,6 @@ public TopicCreationResponse createOrFindTopics(NewTopic... topics) {
             }
         }
         if (topicsByName.isEmpty()) return EMPTY_CREATION;
-        String bootstrapServers = bootstrapServers();
         String topicNameList = Utils.join(topicsByName.keySet(), "', '");
 
         // Attempt to create any missing topics
@@ -448,7 +444,6 @@ public Map<String, TopicDescription> describeTopics(String... topics) {
         if (topics == null) {
             return Collections.emptyMap();
         }
-        String bootstrapServers = bootstrapServers();
         String topicNameList = String.join(", ", topics);
 
         Map<String, KafkaFuture<TopicDescription>> newResults =
@@ -604,7 +599,6 @@ public Map<String, Config> describeTopicConfigs(String... topicNames) {
         if (topics.isEmpty()) {
             return Collections.emptyMap();
         }
-        String bootstrapServers = bootstrapServers();
         String topicNameList = String.join(", ", topics);
         Collection<ConfigResource> resources = topics.stream()
                                                      .map(t -> new ConfigResource(ConfigResource.Type.TOPIC, t))
@@ -664,7 +658,7 @@ public Map<TopicPartition, Long> endOffsets(Set<TopicPartition> partitions) {
             return Collections.emptyMap();
         }
         Map<TopicPartition, OffsetSpec> offsetSpecMap = partitions.stream().collect(Collectors.toMap(Function.identity(), tp -> OffsetSpec.latest()));
-        ListOffsetsResult resultFuture = admin.listOffsets(offsetSpecMap);
+        ListOffsetsResult resultFuture = admin.listOffsets(offsetSpecMap, new ListOffsetsOptions(IsolationLevel.READ_UNCOMMITTED));
         // Get the individual result for each topic partition so we have better error messages
         Map<TopicPartition, Long> result = new HashMap<>();
         for (TopicPartition partition : partitions) {
@@ -675,28 +669,28 @@ public Map<TopicPartition, Long> endOffsets(Set<TopicPartition> partitions) {
                 Throwable cause = e.getCause();
                 String topic = partition.topic();
                 if (cause instanceof AuthorizationException) {
-                    String msg = String.format("Not authorized to get the end offsets for topic '%s' on brokers at %s", topic, bootstrapServers());
+                    String msg = String.format("Not authorized to get the end offsets for topic '%s' on brokers at %s", topic, bootstrapServers);
                     throw new ConnectException(msg, e);
                 } else if (cause instanceof UnsupportedVersionException) {
                     // Should theoretically never happen, because this method is the same as what the consumer uses and therefore
                     // should exist in the broker since before the admin client was added
-                    String msg = String.format("API to get the get the end offsets for topic '%s' is unsupported on brokers at %s", topic, bootstrapServers());
+                    String msg = String.format("API to get the get the end offsets for topic '%s' is unsupported on brokers at %s", topic, bootstrapServers);
                     throw new UnsupportedVersionException(msg, e);
                 } else if (cause instanceof TimeoutException) {
-                    String msg = String.format("Timed out while waiting to get end offsets for topic '%s' on brokers at %s", topic, bootstrapServers());
+                    String msg = String.format("Timed out while waiting to get end offsets for topic '%s' on brokers at %s", topic, bootstrapServers);
                     throw new TimeoutException(msg, e);
                 } else if (cause instanceof LeaderNotAvailableException) {
-                    String msg = String.format("Unable to get end offsets during leader election for topic '%s' on brokers at %s", topic, bootstrapServers());
+                    String msg = String.format("Unable to get end offsets during leader election for topic '%s' on brokers at %s", topic, bootstrapServers);
                     throw new LeaderNotAvailableException(msg, e);
                 } else if (cause instanceof org.apache.kafka.common.errors.RetriableException) {
                     throw (org.apache.kafka.common.errors.RetriableException) cause;
                 } else {
-                    String msg = String.format("Error while getting end offsets for topic '%s' on brokers at %s", topic, bootstrapServers());
+                    String msg = String.format("Error while getting end offsets for topic '%s' on brokers at %s", topic, bootstrapServers);
                     throw new ConnectException(msg, e);
                 }
             } catch (InterruptedException e) {
                 Thread.interrupted();
-                String msg = String.format("Interrupted while attempting to read end offsets for topic '%s' on brokers at %s", partition.topic(), bootstrapServers());
+                String msg = String.format("Interrupted while attempting to read end offsets for topic '%s' on brokers at %s", partition.topic(), bootstrapServers);
                 throw new RetriableException(msg, e);
             }
         }
@@ -714,6 +708,7 @@ public Map<TopicPartition, Long> endOffsets(Set<TopicPartition> partitions) {
      *                          must be 0 or more
      * @return                  the map of offset for each topic partition, or an empty map if the supplied partitions
      *                          are null or empty
+     * @throws UnsupportedVersionException if the broker is too old to support the admin client API to read end offsets
      * @throws ConnectException if {@code timeoutDuration} is exhausted
      * @see TopicAdmin#endOffsets(Set)
      */
@@ -725,6 +720,9 @@ public Map<TopicPartition, Long> retryEndOffsets(Set<TopicPartition> partitions,
                     () -> "list offsets for topic partitions",
                     timeoutDuration,
                     retryBackoffMs);
+        } catch (UnsupportedVersionException e) {
+            // Older brokers don't support this admin method, so rethrow it without wrapping it
+            throw e;
         } catch (Exception e) {
             throw new ConnectException("Failed to list offsets for topic partitions.", e);
         }
@@ -738,9 +736,4 @@ public void close() {
     public void close(Duration timeout) {
         admin.close(timeout);
     }
-
-    private String bootstrapServers() {
-        Object servers = adminConfig.get(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG);
-        return servers != null ? servers.toString() : "<unknown>";
-    }
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/connector/policy/BaseConnectorClientConfigOverridePolicyTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/connector/policy/BaseConnectorClientConfigOverridePolicyTest.java
index 28fee73a93966..719de7ed7b700 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/connector/policy/BaseConnectorClientConfigOverridePolicyTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/connector/policy/BaseConnectorClientConfigOverridePolicyTest.java
@@ -19,7 +19,7 @@
 
 import org.apache.kafka.common.config.ConfigValue;
 import org.apache.kafka.connect.health.ConnectorType;
-import org.apache.kafka.connect.runtime.WorkerTest;
+import org.apache.kafka.connect.runtime.SampleSourceConnector;
 import org.junit.Assert;
 
 import java.util.List;
@@ -43,7 +43,7 @@ private List<ConfigValue> configValues(Map<String, Object> clientConfig) {
         ConnectorClientConfigRequest connectorClientConfigRequest = new ConnectorClientConfigRequest(
             "test",
             ConnectorType.SOURCE,
-            WorkerTest.WorkerTestConnector.class,
+            SampleSourceConnector.class,
             clientConfig,
             ConnectorClientConfigRequest.ClientType.PRODUCER);
         return policyToTest().validate(connectorClientConfigRequest);
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/BlockingConnectorTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/BlockingConnectorTest.java
index 571cfbb6a861c..ebb604b2a505c 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/BlockingConnectorTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/BlockingConnectorTest.java
@@ -27,7 +27,7 @@
 import org.apache.kafka.connect.connector.Task;
 import org.apache.kafka.connect.runtime.Worker;
 import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
-import org.apache.kafka.connect.runtime.rest.resources.ConnectorsResource;
+import org.apache.kafka.connect.runtime.rest.resources.ConnectResource;
 import org.apache.kafka.connect.sink.SinkConnector;
 import org.apache.kafka.connect.sink.SinkRecord;
 import org.apache.kafka.connect.sink.SinkTask;
@@ -42,9 +42,11 @@
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.function.ThrowingRunnable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.ws.rs.core.Response;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -61,8 +63,9 @@
 import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG;
 import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG;
 import static org.apache.kafka.connect.runtime.SinkConnectorConfig.TOPICS_CONFIG;
-import static org.apache.kafka.test.TestUtils.waitForCondition;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
 
 /**
  * Tests situations during which certain connector operations, such as start, validation,
@@ -78,9 +81,9 @@ public class BlockingConnectorTest {
     private static final String NORMAL_CONNECTOR_NAME = "normal-connector";
     private static final String TEST_TOPIC = "normal-topic";
     private static final int NUM_RECORDS_PRODUCED = 100;
-    private static final long CONNECT_WORKER_STARTUP_TIMEOUT = TimeUnit.SECONDS.toMillis(60);
-    private static final long RECORD_TRANSFER_DURATION_MS = TimeUnit.SECONDS.toMillis(30);
-    private static final long REST_REQUEST_TIMEOUT = Worker.CONNECTOR_GRACEFUL_SHUTDOWN_TIMEOUT_MS * 2;
+    private static final long CONNECTOR_BLOCK_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(60);
+    private static final long RECORD_TRANSFER_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(60);
+    private static final long REDUCED_REST_REQUEST_TIMEOUT = Worker.CONNECTOR_GRACEFUL_SHUTDOWN_TIMEOUT_MS * 2;
 
     private static final String CONNECTOR_INITIALIZE = "Connector::initialize";
     private static final String CONNECTOR_INITIALIZE_WITH_TASK_CONFIGS = "Connector::initializeWithTaskConfigs";
@@ -114,8 +117,6 @@ public class BlockingConnectorTest {
 
     @Before
     public void setup() throws Exception {
-        // Artificially reduce the REST request timeout so that these don't take forever
-        ConnectorsResource.setRequestTimeout(REST_REQUEST_TIMEOUT);
         // build a Connect cluster backed by Kafka and Zk
         connect = new EmbeddedConnectCluster.Builder()
                 .name("connect-cluster")
@@ -128,13 +129,9 @@ public void setup() throws Exception {
         // start the clusters
         connect.start();
 
-        // wait for the Connect REST API to become available. necessary because of the reduced REST
-        // request timeout; otherwise, we may get an unexpected 500 with our first real REST request
-        // if the worker is still getting on its feet.
-        waitForCondition(
-            () -> connect.requestGet(connect.endpointForResource("connectors/nonexistent")).getStatus() == 404,
-            CONNECT_WORKER_STARTUP_TIMEOUT,
-            "Worker did not complete startup in time"
+        connect.assertions().assertAtLeastNumWorkersAreUp(
+                NUM_WORKERS,
+                "Initial group of workers did not start in time"
         );
     }
 
@@ -142,14 +139,13 @@ public void setup() throws Exception {
     public void close() {
         // stop all Connect, Kafka and Zk threads.
         connect.stop();
-        ConnectorsResource.resetRequestTimeout();
         Block.resetBlockLatch();
     }
 
     @Test
     public void testBlockInConnectorValidate() throws Exception {
         log.info("Starting test testBlockInConnectorValidate");
-        assertThrows(ConnectRestException.class, () -> createConnectorWithBlock(ValidateBlockingConnector.class, CONNECTOR_VALIDATE));
+        assertRequestTimesOut("create connector that blocks during validation", () -> createConnectorWithBlock(ValidateBlockingConnector.class, CONNECTOR_VALIDATE));
         // Will NOT assert that connector has failed, since the request should fail before it's even created
 
         // Connector should already be blocked so this should return immediately, but check just to
@@ -163,7 +159,7 @@ public void testBlockInConnectorValidate() throws Exception {
     @Test
     public void testBlockInConnectorConfig() throws Exception {
         log.info("Starting test testBlockInConnectorConfig");
-        assertThrows(ConnectRestException.class, () -> createConnectorWithBlock(ConfigBlockingConnector.class, CONNECTOR_CONFIG));
+        assertRequestTimesOut("create connector that blocks while getting config", () -> createConnectorWithBlock(ConfigBlockingConnector.class, CONNECTOR_CONFIG));
         // Will NOT assert that connector has failed, since the request should fail before it's even created
 
         // Connector should already be blocked so this should return immediately, but check just to
@@ -329,8 +325,28 @@ private void waitForConnectorStart(String connector) throws InterruptedException
 
     private void verifyNormalConnector() throws InterruptedException {
         waitForConnectorStart(NORMAL_CONNECTOR_NAME);
-        normalConnectorHandle.awaitRecords(RECORD_TRANSFER_DURATION_MS);
-        normalConnectorHandle.awaitCommits(RECORD_TRANSFER_DURATION_MS);
+        normalConnectorHandle.awaitRecords(RECORD_TRANSFER_TIMEOUT_MS);
+        normalConnectorHandle.awaitCommits(RECORD_TRANSFER_TIMEOUT_MS);
+    }
+
+    private void assertRequestTimesOut(String requestDescription, ThrowingRunnable request) {
+        // Artificially reduce the REST request timeout so that these don't take 90 seconds
+        connect.requestTimeout(REDUCED_REST_REQUEST_TIMEOUT);
+        ConnectRestException exception = assertThrows(
+                "Should have failed to " + requestDescription,
+                ConnectRestException.class, request
+        );
+        assertEquals(
+                "Should have gotten 500 error from trying to " + requestDescription,
+                Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), exception.statusCode()
+        );
+        assertTrue(
+                "Should have gotten timeout message from trying to " + requestDescription
+                        + "; instead, message was: " + exception.getMessage(),
+                exception.getMessage().contains("Request timed out")
+        );
+        // Reset the REST request timeout so that other requests aren't impacted
+        connect.requestTimeout(ConnectResource.DEFAULT_REST_REQUEST_TIMEOUT_MS);
     }
 
     private static class Block {
@@ -360,7 +376,7 @@ public static void waitForBlock() throws InterruptedException, TimeoutException
             }
 
             log.debug("Waiting for connector to block");
-            if (!blockLatch.await(60, TimeUnit.SECONDS)) {
+            if (!blockLatch.await(CONNECTOR_BLOCK_TIMEOUT_MS, TimeUnit.MILLISECONDS)) {
                 throw new TimeoutException("Timed out waiting for connector to block.");
             }
             log.debug("Connector should now be blocked");
@@ -393,10 +409,6 @@ public Block(String block) {
             }
         }
 
-        public Map<String, String> taskConfig() {
-            return Collections.singletonMap(BLOCK_CONFIG, block);
-        }
-
         public void maybeBlockOn(String block) {
             if (block.equals(this.block)) {
                 log.info("Will block on {}", block);
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ConnectorHandle.java b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ConnectorHandle.java
index b31455b248483..bed05fa21e41c 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ConnectorHandle.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ConnectorHandle.java
@@ -112,6 +112,14 @@ public void deleteTask(String taskId) {
         taskHandles.remove(taskId);
     }
 
+    /**
+     * Delete all task handles for this connector.
+     */
+    public void clearTasks() {
+        log.info("Clearing {} existing task handles for connector {}", taskHandles.size(), connectorName);
+        taskHandles.clear();
+    }
+
     /**
      * Set the number of expected records for this connector.
      *
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ErrorHandlingIntegrationTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ErrorHandlingIntegrationTest.java
index b3dd9a097eda1..5bc5fcdbd255f 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ErrorHandlingIntegrationTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ErrorHandlingIntegrationTest.java
@@ -29,8 +29,10 @@
 import org.apache.kafka.test.IntegrationTest;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -65,9 +67,9 @@
  */
 @Category(IntegrationTest.class)
 public class ErrorHandlingIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final Logger log = LoggerFactory.getLogger(ErrorHandlingIntegrationTest.class);
-
     private static final int NUM_WORKERS = 1;
     private static final String DLQ_TOPIC = "my-connector-errors";
     private static final String CONNECTOR_NAME = "error-conn";
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ExactlyOnceSourceIntegrationTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ExactlyOnceSourceIntegrationTest.java
new file mode 100644
index 0000000000000..bd9bceba064fd
--- /dev/null
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/ExactlyOnceSourceIntegrationTest.java
@@ -0,0 +1,1221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.integration;
+
+import org.apache.kafka.clients.admin.Admin;
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.Producer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.acl.AccessControlEntry;
+import org.apache.kafka.common.acl.AclBinding;
+import org.apache.kafka.common.acl.AclOperation;
+import org.apache.kafka.common.acl.AclPermissionType;
+import org.apache.kafka.common.config.ConfigDef;
+import org.apache.kafka.common.errors.ProducerFencedException;
+import org.apache.kafka.common.resource.PatternType;
+import org.apache.kafka.common.resource.ResourcePattern;
+import org.apache.kafka.common.resource.ResourceType;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.connect.connector.Task;
+import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.json.JsonConverter;
+import org.apache.kafka.connect.json.JsonConverterConfig;
+import org.apache.kafka.connect.runtime.Worker;
+import org.apache.kafka.connect.runtime.WorkerConfig;
+import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
+import org.apache.kafka.connect.runtime.rest.entities.ConfigInfo;
+import org.apache.kafka.connect.runtime.rest.entities.ConfigInfos;
+import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
+import org.apache.kafka.connect.source.SourceConnector;
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.StringConverter;
+import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster;
+import org.apache.kafka.connect.util.clusters.EmbeddedConnectClusterAssertions;
+import org.apache.kafka.connect.util.clusters.EmbeddedKafkaCluster;
+import org.apache.kafka.test.IntegrationTest;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Closeable;
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.LongStream;
+
+import static org.apache.kafka.clients.producer.ProducerConfig.BOOTSTRAP_SERVERS_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.CLIENT_ID_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.TRANSACTIONAL_ID_CONFIG;
+import static org.apache.kafka.connect.integration.MonitorableSourceConnector.CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG;
+import static org.apache.kafka.connect.integration.MonitorableSourceConnector.CUSTOM_TRANSACTION_BOUNDARIES_CONFIG;
+import static org.apache.kafka.connect.integration.MonitorableSourceConnector.MESSAGES_PER_POLL_CONFIG;
+import static org.apache.kafka.connect.integration.MonitorableSourceConnector.TOPIC_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.OFFSETS_TOPIC_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TRANSACTION_BOUNDARY_INTERVAL_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.CONNECTOR;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.INTERVAL;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.POLL;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+@Category(IntegrationTest.class)
+public class ExactlyOnceSourceIntegrationTest {
+
+    private static final Logger log = LoggerFactory.getLogger(ExactlyOnceSourceIntegrationTest.class);
+    private static final String CLUSTER_GROUP_ID = "exactly-once-source-integration-test";
+    private static final String CONNECTOR_NAME = "exactlyOnceQuestionMark";
+
+    private static final int CONSUME_RECORDS_TIMEOUT_MS = 60_000;
+    private static final int SOURCE_TASK_PRODUCE_TIMEOUT_MS = 30_000;
+    private static final int DEFAULT_NUM_WORKERS = 3;
+
+    private Properties brokerProps;
+    private Map<String, String> workerProps;
+    private EmbeddedConnectCluster.Builder connectBuilder;
+    private EmbeddedConnectCluster connect;
+    private ConnectorHandle connectorHandle;
+
+    @Before
+    public void setup() {
+        workerProps = new HashMap<>();
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        workerProps.put(DistributedConfig.GROUP_ID_CONFIG, CLUSTER_GROUP_ID);
+
+        brokerProps = new Properties();
+        brokerProps.put("transaction.state.log.replication.factor", "1");
+        brokerProps.put("transaction.state.log.min.isr", "1");
+
+        // build a Connect cluster backed by Kafka and Zk
+        connectBuilder = new EmbeddedConnectCluster.Builder()
+                .numWorkers(DEFAULT_NUM_WORKERS)
+                .numBrokers(1)
+                .workerProps(workerProps)
+                .brokerProps(brokerProps);
+
+        // get a handle to the connector
+        connectorHandle = RuntimeHandles.get().connectorHandle(CONNECTOR_NAME);
+    }
+
+    private void startConnect() {
+        connect = connectBuilder.build();
+        connect.start();
+    }
+
+    @After
+    public void close() {
+        try {
+            // stop all Connect, Kafka and Zk threads.
+            connect.stop();
+        } finally {
+            // Clear the handle for the connector. Fun fact: if you don't do this, your tests become quite flaky.
+            RuntimeHandles.get().deleteConnector(CONNECTOR_NAME);
+        }
+    }
+
+    /**
+     * A simple test for the pre-flight validation API for connectors to provide their own delivery guarantees.
+     */
+    @Test
+    public void testPreflightValidation() {
+        connectBuilder.numWorkers(1);
+        startConnect();
+
+        Map<String, String> props = new HashMap<>();
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TASKS_MAX_CONFIG, "1");
+        props.put(TOPIC_CONFIG, "topic");
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+
+        // Test out the "exactly.once.support" property
+        props.put(EXACTLY_ONCE_SUPPORT_CONFIG, "required");
+
+        // Connector will return null from SourceConnector::exactlyOnceSupport
+        props.put(CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG, MonitorableSourceConnector.EXACTLY_ONCE_NULL);
+        ConfigInfos validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have exactly one error", 1, validation.errorCount());
+        ConfigInfo propertyValidation = findConfigInfo(EXACTLY_ONCE_SUPPORT_CONFIG, validation);
+        assertFalse("Preflight validation for exactly-once support property should have at least one error message",
+                propertyValidation.configValue().errors().isEmpty());
+
+        // Connector will return UNSUPPORTED from SourceConnector::exactlyOnceSupport
+        props.put(CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG, MonitorableSourceConnector.EXACTLY_ONCE_UNSUPPORTED);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have exactly one error", 1, validation.errorCount());
+        propertyValidation = findConfigInfo(EXACTLY_ONCE_SUPPORT_CONFIG, validation);
+        assertFalse("Preflight validation for exactly-once support property should have at least one error message",
+                propertyValidation.configValue().errors().isEmpty());
+
+        // Connector will throw an exception from SourceConnector::exactlyOnceSupport
+        props.put(CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG, MonitorableSourceConnector.EXACTLY_ONCE_FAIL);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have exactly one error", 1, validation.errorCount());
+        propertyValidation = findConfigInfo(EXACTLY_ONCE_SUPPORT_CONFIG, validation);
+        assertFalse("Preflight validation for exactly-once support property should have at least one error message",
+                propertyValidation.configValue().errors().isEmpty());
+
+        // Connector will return SUPPORTED from SourceConnector::exactlyOnceSupport
+        props.put(CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG, MonitorableSourceConnector.EXACTLY_ONCE_SUPPORTED);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have zero errors", 0, validation.errorCount());
+
+        // Test out the transaction boundary definition property
+        props.put(TRANSACTION_BOUNDARY_CONFIG, CONNECTOR.toString());
+
+        // Connector will return null from SourceConnector::canDefineTransactionBoundaries
+        props.put(CUSTOM_TRANSACTION_BOUNDARIES_CONFIG, MonitorableSourceConnector.TRANSACTION_BOUNDARIES_NULL);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have exactly one error", 1, validation.errorCount());
+        propertyValidation = findConfigInfo(TRANSACTION_BOUNDARY_CONFIG, validation);
+        assertFalse("Preflight validation for transaction boundary property should have at least one error message",
+                propertyValidation.configValue().errors().isEmpty());
+
+        // Connector will return UNSUPPORTED from SourceConnector::canDefineTransactionBoundaries
+        props.put(CUSTOM_TRANSACTION_BOUNDARIES_CONFIG, MonitorableSourceConnector.TRANSACTION_BOUNDARIES_UNSUPPORTED);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have exactly one error", 1, validation.errorCount());
+        propertyValidation = findConfigInfo(TRANSACTION_BOUNDARY_CONFIG, validation);
+        assertFalse("Preflight validation for transaction boundary property should have at least one error message",
+                propertyValidation.configValue().errors().isEmpty());
+
+        // Connector will throw an exception from SourceConnector::canDefineTransactionBoundaries
+        props.put(CUSTOM_TRANSACTION_BOUNDARIES_CONFIG, MonitorableSourceConnector.TRANSACTION_BOUNDARIES_FAIL);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have exactly one error", 1, validation.errorCount());
+        propertyValidation = findConfigInfo(TRANSACTION_BOUNDARY_CONFIG, validation);
+        assertFalse("Preflight validation for transaction boundary property should have at least one error message",
+                propertyValidation.configValue().errors().isEmpty());
+
+        // Connector will return SUPPORTED from SourceConnector::canDefineTransactionBoundaries
+        props.put(CUSTOM_TRANSACTION_BOUNDARIES_CONFIG, MonitorableSourceConnector.TRANSACTION_BOUNDARIES_SUPPORTED);
+        validation = connect.validateConnectorConfig(MonitorableSourceConnector.class.getSimpleName(), props);
+        assertEquals("Preflight validation should have zero errors", 0, validation.errorCount());
+    }
+
+    /**
+     * A simple green-path test that ensures the worker can start up a source task with exactly-once support enabled
+     * and write some records to Kafka that will be visible to a downstream consumer using the "READ_COMMITTED"
+     * isolation level. The "poll" transaction boundary is used.
+     */
+    @Test
+    public void testPollBoundary() throws Exception {
+        // Much slower offset commit interval; should never be triggered during this test
+        workerProps.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "600000");
+        connectBuilder.numWorkers(1);
+        startConnect();
+
+        String topic = "test-topic";
+        connect.kafka().createTopic(topic, 3);
+
+        int numTasks = 1;
+        int recordsProduced = 100;
+
+        Map<String, String> props = new HashMap<>();
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TASKS_MAX_CONFIG, Integer.toString(numTasks));
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(TRANSACTION_BOUNDARY_CONFIG, POLL.toString());
+        props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+
+        // expect all records to be consumed and committed by the connector
+        connectorHandle.expectedRecords(recordsProduced);
+        connectorHandle.expectedCommits(recordsProduced);
+
+        // start a source connector
+        connect.configureConnector(CONNECTOR_NAME, props);
+
+        log.info("Waiting for records to be provided to worker by task");
+        // wait for the connector tasks to produce enough records
+        connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+        log.info("Waiting for records to be committed to Kafka by worker");
+        // wait for the connector tasks to commit enough records
+        connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+        StartAndStopLatch connectorStop = connectorHandle.expectedStops(1, true);
+        connect.deleteConnector(CONNECTOR_NAME);
+        assertConnectorStopped(connectorStop);
+
+        // consume all records from the source topic or fail, to ensure that they were correctly produced
+        ConsumerRecords<byte[], byte[]> records = connect.kafka().consumeAll(
+                CONSUME_RECORDS_TIMEOUT_MS,
+                Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                null,
+                topic
+        );
+        assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + records.count(),
+                records.count() >= recordsProduced);
+        assertExactlyOnceSeqnos(records, numTasks);
+    }
+
+    /**
+     * A simple green-path test that ensures the worker can start up a source task with exactly-once support enabled
+     * and write some records to Kafka that will be visible to a downstream consumer using the "READ_COMMITTED"
+     * isolation level. The "interval" transaction boundary is used with a connector-specific override.
+     */
+    @Test
+    public void testIntervalBoundary() throws Exception {
+        // Much slower offset commit interval; should never be triggered during this test
+        workerProps.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "600000");
+        connectBuilder.numWorkers(1);
+        startConnect();
+
+        String topic = "test-topic";
+        connect.kafka().createTopic(topic, 3);
+
+        int numTasks = 1;
+        int recordsProduced = 100;
+
+        Map<String, String> props = new HashMap<>();
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TASKS_MAX_CONFIG, Integer.toString(numTasks));
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(TRANSACTION_BOUNDARY_CONFIG, INTERVAL.toString());
+        props.put(TRANSACTION_BOUNDARY_INTERVAL_CONFIG, "10000");
+        props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+
+        // expect all records to be consumed and committed by the connector
+        connectorHandle.expectedRecords(recordsProduced);
+        connectorHandle.expectedCommits(recordsProduced);
+
+        // start a source connector
+        connect.configureConnector(CONNECTOR_NAME, props);
+
+        log.info("Waiting for records to be provided to worker by task");
+        // wait for the connector tasks to produce enough records
+        connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+        log.info("Waiting for records to be committed to Kafka by worker");
+        // wait for the connector tasks to commit enough records
+        connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+        StartAndStopLatch connectorStop = connectorHandle.expectedStops(1, true);
+        connect.deleteConnector(CONNECTOR_NAME);
+        assertConnectorStopped(connectorStop);
+
+        // consume all records from the source topic or fail, to ensure that they were correctly produced
+        ConsumerRecords<byte[], byte[]> records = connect.kafka().consumeAll(
+                CONSUME_RECORDS_TIMEOUT_MS,
+                Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                null,
+                topic
+        );
+        assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + records.count(),
+                records.count() >= recordsProduced);
+        assertExactlyOnceSeqnos(records, numTasks);
+    }
+
+    /**
+     * A simple green-path test that ensures the worker can start up a source task with exactly-once support enabled
+     * and write some records to Kafka that will be visible to a downstream consumer using the "READ_COMMITTED"
+     * isolation level. The "connector" transaction boundary is used with a connector that defines transactions whose
+     * size correspond to successive elements of the Fibonacci sequence, where transactions with an even number of
+     * records are aborted, and those with an odd number of records are committed.
+     */
+    @Test
+    public void testConnectorBoundary() throws Exception {
+        String offsetsTopic = "exactly-once-source-cluster-offsets";
+        workerProps.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, offsetsTopic);
+        connectBuilder.numWorkers(1);
+        startConnect();
+
+        String topic = "test-topic";
+        connect.kafka().createTopic(topic, 3);
+
+        int recordsProduced = 100;
+
+        Map<String, String> props = new HashMap<>();
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TASKS_MAX_CONFIG, "1");
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(TRANSACTION_BOUNDARY_CONFIG, CONNECTOR.toString());
+        props.put(CUSTOM_TRANSACTION_BOUNDARIES_CONFIG, MonitorableSourceConnector.TRANSACTION_BOUNDARIES_SUPPORTED);
+        props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+
+        // expect all records to be consumed and committed by the connector
+        connectorHandle.expectedRecords(recordsProduced);
+        connectorHandle.expectedCommits(recordsProduced);
+
+        // start a source connector
+        connect.configureConnector(CONNECTOR_NAME, props);
+
+        log.info("Waiting for records to be provided to worker by task");
+        // wait for the connector tasks to produce enough records
+        connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+        log.info("Waiting for records to be committed to Kafka by worker");
+        // wait for the connector tasks to commit enough records
+        connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+        Map<String, Object> consumerProps = new HashMap<>();
+        consumerProps.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
+        // consume all records from the source topic or fail, to ensure that they were correctly produced
+        ConsumerRecords<byte[], byte[]> sourceRecords = connect.kafka().consumeAll(
+                CONSUME_RECORDS_TIMEOUT_MS,
+                Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                null,
+                topic
+        );
+        assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + sourceRecords.count(),
+                sourceRecords.count() >= recordsProduced);
+
+        // also consume from the cluster's offsets topic to verify that the expected offsets (which should correspond to the connector's
+        // custom transaction boundaries) were committed
+        List<Long> expectedOffsetSeqnos = new ArrayList<>();
+        long lastExpectedOffsetSeqno = 1;
+        long nextExpectedOffsetSeqno = 1;
+        while (nextExpectedOffsetSeqno <= recordsProduced) {
+            expectedOffsetSeqnos.add(nextExpectedOffsetSeqno);
+            nextExpectedOffsetSeqno += lastExpectedOffsetSeqno;
+            lastExpectedOffsetSeqno = nextExpectedOffsetSeqno - lastExpectedOffsetSeqno;
+        }
+        ConsumerRecords<byte[], byte[]> offsetRecords = connect.kafka()
+                .consume(
+                        expectedOffsetSeqnos.size(),
+                        TimeUnit.MINUTES.toMillis(1),
+                        consumerProps,
+                        offsetsTopic
+                );
+
+        List<Long> actualOffsetSeqnos = parseAndAssertOffsetsForSingleTask(offsetRecords);
+
+        assertEquals("Committed offsets should match connector-defined transaction boundaries",
+                expectedOffsetSeqnos, actualOffsetSeqnos.subList(0, expectedOffsetSeqnos.size()));
+
+        List<Long> expectedRecordSeqnos = LongStream.range(1, recordsProduced + 1).boxed().collect(Collectors.toList());
+        long priorBoundary = 1;
+        long nextBoundary = 2;
+        while (priorBoundary < expectedRecordSeqnos.get(expectedRecordSeqnos.size() - 1)) {
+            if (nextBoundary % 2 == 0) {
+                for (long i = priorBoundary + 1; i < nextBoundary + 1; i++) {
+                    expectedRecordSeqnos.remove(i);
+                }
+            }
+            nextBoundary += priorBoundary;
+            priorBoundary = nextBoundary - priorBoundary;
+        }
+        List<Long> actualRecordSeqnos = parseAndAssertValuesForSingleTask(sourceRecords);
+        // Have to sort the records by seqno since we produce to multiple partitions and in-order consumption isn't guaranteed
+        Collections.sort(actualRecordSeqnos);
+        assertEquals("Committed records should exclude connector-aborted transactions",
+                expectedRecordSeqnos, actualRecordSeqnos.subList(0, expectedRecordSeqnos.size()));
+    }
+
+    /**
+     * Brings up a one-node cluster, then intentionally fences out the transactional producer used by the leader
+     * for writes to the config topic to simulate a zombie leader being active in the cluster. The leader should
+     * automatically recover, verify that it is still the leader, and then succeed to create a connector when the
+     * user resends the request.
+     */
+    @Test
+    public void testFencedLeaderRecovery() throws Exception {
+        connectBuilder.numWorkers(1);
+        // Much slower offset commit interval; should never be triggered during this test
+        workerProps.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "600000");
+        startConnect();
+
+        String topic = "test-topic";
+        connect.kafka().createTopic(topic, 3);
+
+        int numTasks = 1;
+        int recordsProduced = 100;
+
+        Map<String, String> props = new HashMap<>();
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TASKS_MAX_CONFIG, Integer.toString(numTasks));
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(TRANSACTION_BOUNDARY_CONFIG, POLL.toString());
+        props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+
+        // expect all records to be consumed and committed by the connector
+        connectorHandle.expectedRecords(recordsProduced);
+        connectorHandle.expectedCommits(recordsProduced);
+
+        // make sure the worker is actually up (otherwise, it may fence out our simulated zombie leader, instead of the other way around)
+        assertEquals(404, connect.requestGet(connect.endpointForResource("connectors/nonexistent")).getStatus());
+
+        // fence out the leader of the cluster
+        Producer<?, ?> zombieLeader = transactionalProducer(
+                "simulated-zombie-leader",
+                DistributedConfig.transactionalProducerId(CLUSTER_GROUP_ID)
+        );
+        zombieLeader.initTransactions();
+        zombieLeader.close();
+
+        // start a source connector--should fail the first time
+        assertThrows(ConnectRestException.class, () -> connect.configureConnector(CONNECTOR_NAME, props));
+
+        // the second request should succeed because the leader has reclaimed write privileges for the config topic
+        connect.configureConnector(CONNECTOR_NAME, props);
+
+        log.info("Waiting for records to be provided to worker by task");
+        // wait for the connector tasks to produce enough records
+        connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+        log.info("Waiting for records to be committed to Kafka by worker");
+        // wait for the connector tasks to commit enough records
+        connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+        StartAndStopLatch connectorStop = connectorHandle.expectedStops(1, true);
+        connect.deleteConnector(CONNECTOR_NAME);
+        assertConnectorStopped(connectorStop);
+
+        // consume all records from the source topic or fail, to ensure that they were correctly produced
+        ConsumerRecords<byte[], byte[]> records = connect.kafka().consumeAll(
+                CONSUME_RECORDS_TIMEOUT_MS,
+                Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                null,
+                topic
+        );
+        assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + records.count(),
+                records.count() >= recordsProduced);
+        assertExactlyOnceSeqnos(records, numTasks);
+    }
+
+    /**
+     * A moderately-complex green-path test that ensures the worker can start up and run tasks for a source
+     * connector that gets reconfigured, and will fence out potential zombie tasks for older generations before
+     * bringing up new task instances.
+     */
+    @Test
+    public void testConnectorReconfiguration() throws Exception {
+        // Much slower offset commit interval; should never be triggered during this test
+        workerProps.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, "600000");
+        startConnect();
+
+        String topic = "test-topic";
+        connect.kafka().createTopic(topic, 3);
+
+        int recordsProduced = 100;
+
+        Map<String, String> props = new HashMap<>();
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+
+        // expect all records to be consumed and committed by the connector
+        connectorHandle.expectedRecords(recordsProduced);
+        connectorHandle.expectedCommits(recordsProduced);
+
+        StartAndStopLatch connectorStart = connectorAndTaskStart(3);
+        props.put(TASKS_MAX_CONFIG, "3");
+        // start a source connector
+        connect.configureConnector(CONNECTOR_NAME, props);
+        assertConnectorStarted(connectorStart);
+
+        assertProducersAreFencedOnReconfiguration(3, 5, topic, props);
+        assertProducersAreFencedOnReconfiguration(5, 1, topic, props);
+        assertProducersAreFencedOnReconfiguration(1, 5, topic, props);
+        assertProducersAreFencedOnReconfiguration(5, 3, topic, props);
+
+        // Do a final sanity check to make sure that the last generation of tasks is able to run
+        log.info("Waiting for records to be provided to worker by task");
+        // wait for the connector tasks to produce enough records
+        connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+        log.info("Waiting for records to be committed to Kafka by worker");
+        // wait for the connector tasks to commit enough records
+        connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+        StartAndStopLatch connectorStop = connectorHandle.expectedStops(1, true);
+        connect.deleteConnector(CONNECTOR_NAME);
+        assertConnectorStopped(connectorStop);
+
+        // consume all records from the source topic or fail, to ensure that they were correctly produced
+        ConsumerRecords<byte[], byte[]> records = connect.kafka().consumeAll(
+                CONSUME_RECORDS_TIMEOUT_MS,
+                Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                null,
+                topic
+        );
+        assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + records.count(),
+                records.count() >= recordsProduced);
+        // We used at most five tasks during the tests; each of them should have been able to produce records
+        assertExactlyOnceSeqnos(records, 5);
+    }
+
+    /**
+     * This test ensures that tasks are marked failed in the status API when the round of
+     * zombie fencing that takes place before they are brought up fails. In addition, once
+     * the issue with the connector config that made fencing impossible is rectified, tasks
+     * can be successfully restarted.
+     * <p>
+     * Fencing failures are induced by bringing up an ACL-secured Kafka cluster and creating
+     * a connector whose principal is not authorized to access the transactional IDs that Connect
+     * uses for its tasks.
+     * <p>
+     * When the connector is initially brought up, no fencing is necessary. However, once it is
+     * reconfigured and generates new task configs, a round of zombie fencing is triggered,
+     * and all of its tasks fail when that round of zombie fencing fails.
+     * <p>
+     * After, the connector's principal is granted access to the necessary transactional IDs,
+     * all of its tasks are restarted, and we verify that they are able to come up successfully
+     * this time.
+     */
+    @Test
+    public void testTasksFailOnInabilityToFence() throws Exception {
+        brokerProps.put("authorizer.class.name", "kafka.security.authorizer.AclAuthorizer");
+        brokerProps.put("sasl.enabled.mechanisms", "PLAIN");
+        brokerProps.put("sasl.mechanism.inter.broker.protocol", "PLAIN");
+        brokerProps.put("security.inter.broker.protocol", "SASL_PLAINTEXT");
+        brokerProps.put("listeners", "SASL_PLAINTEXT://localhost:0");
+        brokerProps.put("listener.name.sasl_plaintext.plain.sasl.jaas.config",
+                "org.apache.kafka.common.security.plain.PlainLoginModule required "
+                        + "username=\"super\" "
+                        + "password=\"super_pwd\" "
+                        + "user_connector=\"connector_pwd\" "
+                        + "user_super=\"super_pwd\";");
+        brokerProps.put("super.users", "User:super");
+
+        Map<String, String> superUserClientConfig = new HashMap<>();
+        superUserClientConfig.put("sasl.mechanism", "PLAIN");
+        superUserClientConfig.put("security.protocol", "SASL_PLAINTEXT");
+        superUserClientConfig.put("sasl.jaas.config",
+                "org.apache.kafka.common.security.plain.PlainLoginModule required "
+                        + "username=\"super\" "
+                        + "password=\"super_pwd\";");
+        // Give the worker super-user privileges
+        workerProps.putAll(superUserClientConfig);
+
+        final String globalOffsetsTopic = "connect-worker-offsets-topic";
+        workerProps.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, globalOffsetsTopic);
+
+        startConnect();
+
+        String topic = "test-topic";
+        Admin admin = connect.kafka().createAdminClient(Utils.mkProperties(superUserClientConfig));
+        admin.createTopics(Collections.singleton(new NewTopic(topic, 3, (short) 1))).all().get();
+
+        Map<String, String> props = new HashMap<>();
+        int tasksMax = 2; // Use two tasks since single-task connectors don't require zombie fencing
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(TASKS_MAX_CONFIG, Integer.toString(tasksMax));
+        // Give the connectors' consumer and producer super-user privileges
+        superUserClientConfig.forEach((property, value) -> {
+            props.put(CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX + property, value);
+            props.put(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + property, value);
+        });
+        // But limit its admin client's privileges
+        props.put(CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX + "sasl.mechanism", "PLAIN");
+        props.put(CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX + "security.protocol", "SASL_PLAINTEXT");
+        props.put(CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX + "sasl.jaas.config",
+                "org.apache.kafka.common.security.plain.PlainLoginModule required "
+                        + "username=\"connector\" "
+                        + "password=\"connector_pwd\";");
+        // Grant the connector's admin permissions to access the topics for its records and offsets
+        // Intentionally leave out permissions required for fencing
+        admin.createAcls(Arrays.asList(
+                new AclBinding(
+                        new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL),
+                        new AccessControlEntry("User:connector", "*", AclOperation.ALL, AclPermissionType.ALLOW)
+                ),
+                new AclBinding(
+                        new ResourcePattern(ResourceType.TOPIC, globalOffsetsTopic, PatternType.LITERAL),
+                        new AccessControlEntry("User:connector", "*", AclOperation.ALL, AclPermissionType.ALLOW)
+                )
+        )).all().get();
+
+        StartAndStopLatch connectorStart = connectorAndTaskStart(tasksMax);
+
+        log.info("Bringing up connector with fresh slate; fencing should not be necessary");
+        connect.configureConnector(CONNECTOR_NAME, props);
+        assertConnectorStarted(connectorStart);
+        // Verify that the connector and its tasks have been able to start successfully
+        connect.assertions().assertConnectorAndExactlyNumTasksAreRunning(CONNECTOR_NAME, tasksMax, "Connector and task should have started successfully");
+
+        log.info("Reconfiguring connector; fencing should be necessary, and tasks should fail to start");
+        props.put("message.in.a.bottle", "19e184427ac45bd34c8588a4e771aa1a");
+        connect.configureConnector(CONNECTOR_NAME, props);
+
+        // Verify that the task has failed, and that the failure is visible to users via the REST API
+        connect.assertions().assertConnectorIsRunningAndTasksHaveFailed(CONNECTOR_NAME, tasksMax, "Task should have failed on startup");
+
+        // Now grant the necessary permissions for fencing to the connector's admin
+        admin.createAcls(Arrays.asList(
+                new AclBinding(
+                        new ResourcePattern(ResourceType.TRANSACTIONAL_ID, Worker.taskTransactionalId(CLUSTER_GROUP_ID, CONNECTOR_NAME, 0), PatternType.LITERAL),
+                        new AccessControlEntry("User:connector", "*", AclOperation.ALL, AclPermissionType.ALLOW)
+                ),
+                new AclBinding(
+                        new ResourcePattern(ResourceType.TRANSACTIONAL_ID, Worker.taskTransactionalId(CLUSTER_GROUP_ID, CONNECTOR_NAME, 1), PatternType.LITERAL),
+                        new AccessControlEntry("User:connector", "*", AclOperation.ALL, AclPermissionType.ALLOW)
+                )
+        ));
+
+        log.info("Restarting connector after tweaking its ACLs; fencing should succeed this time");
+        connect.restartConnectorAndTasks(CONNECTOR_NAME, false, true, false);
+        // Verify that the connector and its tasks have been able to restart successfully
+        connect.assertions().assertConnectorAndExactlyNumTasksAreRunning(CONNECTOR_NAME, tasksMax, "Connector and task should have restarted successfully");
+    }
+
+    /**
+     * This test focuses extensively on the per-connector offsets feature.
+     * <p>
+     * First, a connector is brought up whose producer is configured to write to a different Kafka cluster
+     * than the one the Connect cluster users for its internal topics, then the contents of the connector's
+     * dedicated offsets topic and the worker's internal offsets topic are inspected to ensure that offsets
+     * have been backed up from the dedicated topic to the global topic.
+     * <p>
+     * Then, a "soft downgrade" is simulated: the Connect cluster is shut down and reconfigured to disable
+     * exactly-once support. The cluster is brought up again, the connector is allowed to produce some data,
+     * the connector is shut down, and this time, the records the connector has produced are inspected for
+     * accuracy. Because of the downgrade, exactly-once guarantees are lost, but we check to make sure that
+     * the task has maintained exactly-once delivery <i>up to the last-committed record</i>.
+     */
+    @Test
+    public void testSeparateOffsetsTopic() throws Exception {
+        final String globalOffsetsTopic = "connect-worker-offsets-topic";
+        workerProps.put(DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG, globalOffsetsTopic);
+
+        startConnect();
+        EmbeddedKafkaCluster connectorTargetedCluster = new EmbeddedKafkaCluster(1, brokerProps);
+        try (Closeable clusterShutdown = connectorTargetedCluster::stop) {
+            connectorTargetedCluster.start();
+            String topic = "test-topic";
+            connectorTargetedCluster.createTopic(topic, 3);
+
+            int numTasks = 1;
+            int recordsProduced = 100;
+
+            Map<String, String> props = new HashMap<>();
+            props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getName());
+            props.put(TASKS_MAX_CONFIG, Integer.toString(numTasks));
+            props.put(TOPIC_CONFIG, topic);
+            props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+            props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+            props.put(NAME_CONFIG, CONNECTOR_NAME);
+            props.put(TRANSACTION_BOUNDARY_CONFIG, POLL.toString());
+            props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+            props.put(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, connectorTargetedCluster.bootstrapServers());
+            props.put(CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, connectorTargetedCluster.bootstrapServers());
+            props.put(CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, connectorTargetedCluster.bootstrapServers());
+            String offsetsTopic = CONNECTOR_NAME + "-offsets";
+            props.put(OFFSETS_TOPIC_CONFIG, offsetsTopic);
+
+            // expect all records to be consumed and committed by the connector
+            connectorHandle.expectedRecords(recordsProduced);
+            connectorHandle.expectedCommits(recordsProduced);
+
+            // start a source connector
+            connect.configureConnector(CONNECTOR_NAME, props);
+
+            log.info("Waiting for records to be provided to worker by task");
+            // wait for the connector tasks to produce enough records
+            connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+            log.info("Waiting for records to be committed to Kafka by worker");
+            // wait for the connector tasks to commit enough records
+            connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+            // consume at least the expected number of records from the source topic or fail, to ensure that they were correctly produced
+            int recordNum = connectorTargetedCluster
+                    .consume(
+                            recordsProduced,
+                            TimeUnit.MINUTES.toMillis(1),
+                            Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                            "test-topic")
+                    .count();
+            assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + recordNum,
+                    recordNum >= recordsProduced);
+
+            // also consume from the connector's dedicated offsets topic
+            ConsumerRecords<byte[], byte[]> offsetRecords = connectorTargetedCluster
+                    .consumeAll(
+                            TimeUnit.MINUTES.toMillis(1),
+                            Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                            null,
+                            offsetsTopic
+                    );
+            List<Long> seqnos = parseAndAssertOffsetsForSingleTask(offsetRecords);
+            seqnos.forEach(seqno ->
+                assertEquals("Offset commits should occur on connector-defined poll boundaries, which happen every " + recordsProduced + " records",
+                        0, seqno % recordsProduced)
+            );
+
+            // also consume from the cluster's global offsets topic
+            offsetRecords = connect.kafka()
+                    .consumeAll(
+                            TimeUnit.MINUTES.toMillis(1),
+                            null,
+                            null,
+                            globalOffsetsTopic
+                    );
+            seqnos = parseAndAssertOffsetsForSingleTask(offsetRecords);
+            seqnos.forEach(seqno ->
+                assertEquals("Offset commits should occur on connector-defined poll boundaries, which happen every " + recordsProduced + " records",
+                        0, seqno % recordsProduced)
+            );
+
+            // Shut down the whole cluster
+            connect.workers().forEach(connect::removeWorker);
+            // Reconfigure the cluster with exactly-once support disabled
+            workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "disabled");
+
+            // Establish new expectations for records+offsets
+            connectorHandle.expectedRecords(recordsProduced);
+            connectorHandle.expectedCommits(recordsProduced);
+
+            // Restart the whole cluster
+            for (int i = 0; i < DEFAULT_NUM_WORKERS; i++) {
+                connect.addWorker();
+            }
+
+            // And perform a basic sanity check that the cluster is able to come back up, our connector and its task are able to resume running,
+            // and the task is still able to produce source records and commit offsets
+            connect.assertions().assertAtLeastNumWorkersAreUp(DEFAULT_NUM_WORKERS, "cluster did not restart in time");
+            connect.assertions().assertConnectorAndExactlyNumTasksAreRunning(
+                    CONNECTOR_NAME,
+                    1,
+                    "connector and tasks did not resume running after cluster restart in time"
+            );
+
+            log.info("Waiting for records to be provided to worker by task");
+            // wait for the connector tasks to produce enough records
+            connectorHandle.awaitRecords(SOURCE_TASK_PRODUCE_TIMEOUT_MS);
+
+            log.info("Waiting for records to be committed to Kafka by worker");
+            // wait for the connector tasks to commit enough records
+            connectorHandle.awaitCommits(TimeUnit.MINUTES.toMillis(1));
+
+            StartAndStopLatch connectorStop = connectorHandle.expectedStops(1, true);
+            connect.deleteConnector(CONNECTOR_NAME);
+            assertConnectorStopped(connectorStop);
+
+            // consume all records from the source topic or fail, to ensure that they were correctly produced
+            ConsumerRecords<byte[], byte[]> sourceRecords = connectorTargetedCluster.consumeAll(
+                    CONSUME_RECORDS_TIMEOUT_MS,
+                    Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                    null,
+                    topic
+            );
+            assertTrue("Not enough records produced by source connector. Expected at least: " + recordsProduced + " + but got " + sourceRecords.count(),
+                    sourceRecords.count() >= recordsProduced);
+            // also have to check which offsets have actually been committed, since we no longer have exactly-once guarantees
+            offsetRecords = connectorTargetedCluster.consumeAll(
+                    CONSUME_RECORDS_TIMEOUT_MS,
+                    Collections.singletonMap(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed"),
+                    null,
+                    offsetsTopic
+            );
+            assertAtLeastOnceSeqnos(sourceRecords, offsetRecords, numTasks);
+        }
+    }
+
+    /**
+     * A simple test to ensure that source tasks fail when trying to produce to their own offsets topic.
+     * <p>
+     * We fail the tasks in order to prevent deadlock that occurs when:
+     * <ol>
+     *     <li>
+     *         A task provides a record whose topic is the task's offsets topic
+     *     </li>
+     *     <li>
+     *         That record is dispatched to the task's producer in a transaction that remains open
+     *         at least until the worker polls the task again
+     *     </li>
+     *     <li>
+     *         In the subsequent call to SourceTask::poll, the task requests offsets from the worker
+     *         (which requires a read to the end of the offsets topic, and will block until any open
+     *         transactions on the topic are either committed or aborted)
+     *     </li>
+     * </ol>
+     */
+    @Test
+    public void testPotentialDeadlockWhenProducingToOffsetsTopic() throws Exception {
+        connectBuilder.numWorkers(1);
+        startConnect();
+
+        String topic = "test-topic";
+        connect.kafka().createTopic(topic, 3);
+
+        int recordsProduced = 100;
+
+        Map<String, String> props = new HashMap<>();
+        // See below; this connector does nothing except request offsets from the worker in SourceTask::poll
+        // and then return a single record targeted at its offsets topic
+        props.put(CONNECTOR_CLASS_CONFIG, NaughtyConnector.class.getName());
+        props.put(TASKS_MAX_CONFIG, "1");
+        props.put(NAME_CONFIG, CONNECTOR_NAME);
+        props.put(TRANSACTION_BOUNDARY_CONFIG, INTERVAL.toString());
+        props.put(MESSAGES_PER_POLL_CONFIG, Integer.toString(recordsProduced));
+        props.put(OFFSETS_TOPIC_CONFIG, "whoops");
+
+        // start a source connector
+        connect.configureConnector(CONNECTOR_NAME, props);
+
+        connect.assertions().assertConnectorIsRunningAndTasksHaveFailed(
+            CONNECTOR_NAME, 1, "Task should have failed after trying to produce to its own offsets topic");
+    }
+
+    private ConfigInfo findConfigInfo(String property, ConfigInfos validationResult) {
+        return validationResult.values().stream()
+                .filter(info -> property.equals(info.configKey().name()))
+                .findAny()
+                .orElseThrow(() -> new AssertionError("Failed to find configuration validation result for property '" + property + "'"));
+    }
+
+    private List<Long> parseAndAssertOffsetsForSingleTask(ConsumerRecords<byte[], byte[]> offsetRecords) {
+        Map<Integer, List<Long>> parsedOffsets = parseOffsetForTasks(offsetRecords);
+        assertEquals("Expected records to only be produced from a single task", Collections.singleton(0), parsedOffsets.keySet());
+        return parsedOffsets.get(0);
+    }
+
+    private List<Long> parseAndAssertValuesForSingleTask(ConsumerRecords<byte[], byte[]> sourceRecords) {
+        Map<Integer, List<Long>> parsedValues = parseValuesForTasks(sourceRecords);
+        assertEquals("Expected records to only be produced from a single task", Collections.singleton(0), parsedValues.keySet());
+        return parsedValues.get(0);
+    }
+
+    private void assertExactlyOnceSeqnos(ConsumerRecords<byte[], byte[]> sourceRecords, int numTasks) {
+        Map<Integer, List<Long>> parsedValues = parseValuesForTasks(sourceRecords);
+        assertSeqnos(parsedValues, numTasks);
+    }
+
+    private void assertAtLeastOnceSeqnos(ConsumerRecords<byte[], byte[]> sourceRecords, ConsumerRecords<byte[], byte[]> offsetRecords, int numTasks) {
+        Map<Integer, List<Long>> parsedValues = parseValuesForTasks(sourceRecords);
+        Map<Integer, Long> lastCommittedValues = parseOffsetForTasks(offsetRecords)
+                .entrySet().stream().collect(Collectors.toMap(
+                        Map.Entry::getKey,
+                        e -> Collections.max(e.getValue())
+                ));
+        parsedValues.replaceAll((task, values) -> {
+            Long committedValue = lastCommittedValues.get(task);
+            assertNotNull("No committed offset found for task " + task, committedValue);
+            return values.stream().filter(v -> v <= committedValue).collect(Collectors.toList());
+        });
+        assertSeqnos(parsedValues, numTasks);
+    }
+
+    private void assertSeqnos(Map<Integer, List<Long>> parsedValues, int numTasks) {
+        Set<Integer> expectedKeys = IntStream.range(0, numTasks).boxed().collect(Collectors.toSet());
+        assertEquals("Expected records to be produced by each task", expectedKeys, parsedValues.keySet());
+
+        parsedValues.forEach((taskId, seqnos) -> {
+            // We don't check for order here because the records may have been produced to multiple topic partitions,
+            // which makes in-order consumption impossible
+            Set<Long> expectedSeqnos = LongStream.range(1, seqnos.size() + 1).boxed().collect(Collectors.toSet());
+            Set<Long> actualSeqnos = new HashSet<>(seqnos);
+
+            Set<Long> missingSeqnos = new HashSet<>(expectedSeqnos);
+            missingSeqnos.removeAll(actualSeqnos);
+            Set<Long> extraSeqnos = new HashSet<>(actualSeqnos);
+            extraSeqnos.removeAll(expectedSeqnos);
+
+            // Try to provide the most friendly error message possible if this test fails
+            assertTrue(
+                    "Seqnos for task " + taskId + " should start at 1 and increase strictly by 1 with each record, " +
+                            "but the actual seqnos did not.\n" +
+                            "Seqnos that should have been emitted but were not: " + missingSeqnos + "\n" +
+                            "seqnos that should not have been emitted but were: " + extraSeqnos,
+                    missingSeqnos.isEmpty() && extraSeqnos.isEmpty()
+            );
+        });
+    }
+
+    private Map<Integer, List<Long>> parseValuesForTasks(ConsumerRecords<byte[], byte[]> sourceRecords) {
+        Map<Integer, List<Long>> result = new HashMap<>();
+        for (ConsumerRecord<byte[], byte[]> sourceRecord : sourceRecords) {
+            assertNotNull("Record key should not be null", sourceRecord.key());
+            assertNotNull("Record value should not be null", sourceRecord.value());
+
+            String key = new String(sourceRecord.key());
+            String value = new String(sourceRecord.value());
+
+            String keyPrefix = "key-";
+            String valuePrefix = "value-";
+
+            assertTrue("Key should start with \"" + keyPrefix + "\"", key.startsWith(keyPrefix));
+            assertTrue("Value should start with \"" + valuePrefix + "\"", value.startsWith(valuePrefix));
+            assertEquals(
+                    "key and value should be identical after prefix",
+                    key.substring(keyPrefix.length()),
+                    value.substring(valuePrefix.length())
+            );
+
+            String[] split = key.substring(keyPrefix.length()).split("-");
+            assertEquals("Key should match pattern 'key-<connectorName>-<taskId>-<seqno>", 3, split.length);
+            assertEquals("Key should match pattern 'key-<connectorName>-<taskId>-<seqno>", CONNECTOR_NAME, split[0]);
+
+            int taskId;
+            try {
+                taskId = Integer.parseInt(split[1], 10);
+            } catch (NumberFormatException e) {
+                throw new AssertionError("Task ID in key should be an integer, was '" + split[1] + "'", e);
+            }
+
+            long seqno;
+            try {
+                seqno = Long.parseLong(split[2], 10);
+            } catch (NumberFormatException e) {
+                throw new AssertionError("Seqno in key should be a long, was '" + split[2] + "'", e);
+            }
+
+            result.computeIfAbsent(taskId, t -> new ArrayList<>()).add(seqno);
+        }
+        return result;
+    }
+
+    private Map<Integer, List<Long>> parseOffsetForTasks(ConsumerRecords<byte[], byte[]> offsetRecords) {
+        JsonConverter offsetsConverter = new JsonConverter();
+        // The JSON converter behaves identically for keys and values. If that ever changes, we may need to update this test to use
+        // separate converter instances.
+        offsetsConverter.configure(Collections.singletonMap(JsonConverterConfig.SCHEMAS_ENABLE_CONFIG, "false"), false);
+
+        Map<Integer, List<Long>> result = new HashMap<>();
+        for (ConsumerRecord<byte[], byte[]> offsetRecord : offsetRecords) {
+            Object keyObject = offsetsConverter.toConnectData("topic name is not used by converter", offsetRecord.key()).value();
+            Object valueObject = offsetsConverter.toConnectData("topic name is not used by converter", offsetRecord.value()).value();
+
+            assertNotNull("Offset key should not be null", keyObject);
+            assertNotNull("Offset value should not be null", valueObject);
+
+            @SuppressWarnings("unchecked")
+            List<Object> key = assertAndCast(keyObject, List.class, "Key");
+            assertEquals(
+                    "Offset topic key should be a list containing two elements: the name of the connector, and the connector-provided source partition",
+                    2,
+                    key.size()
+            );
+            assertEquals(CONNECTOR_NAME, key.get(0));
+            @SuppressWarnings("unchecked")
+            Map<String, Object> partition = assertAndCast(key.get(1), Map.class, "Key[1]");
+            Object taskIdObject = partition.get("task.id");
+            assertNotNull("Serialized source partition should contain 'task.id' field from MonitorableSourceConnector", taskIdObject);
+            String taskId = assertAndCast(taskIdObject, String.class, "task ID");
+            assertTrue("task ID should match pattern '<connectorName>-<taskId>", taskId.startsWith(CONNECTOR_NAME + "-"));
+            String taskIdRemainder = taskId.substring(CONNECTOR_NAME.length() + 1);
+            int taskNum;
+            try {
+                taskNum = Integer.parseInt(taskIdRemainder);
+            } catch (NumberFormatException e) {
+                throw new AssertionError("task ID should match pattern '<connectorName>-<taskId>', where <taskId> is an integer", e);
+            }
+
+            @SuppressWarnings("unchecked")
+            Map<String, Object> value = assertAndCast(valueObject, Map.class, "Value");
+
+            Object seqnoObject = value.get("saved");
+            assertNotNull("Serialized source offset should contain 'seqno' field from MonitorableSourceConnector", seqnoObject);
+            long seqno = assertAndCast(seqnoObject, Long.class, "Seqno offset field");
+
+            result.computeIfAbsent(taskNum, t -> new ArrayList<>()).add(seqno);
+        }
+        return result;
+    }
+
+    @SuppressWarnings("unchecked")
+    private static <T> T assertAndCast(Object o, Class<T> klass, String objectDescription) {
+        String className = o == null ? "null" : o.getClass().getName();
+        assertTrue(objectDescription + " should be " + klass.getName() + "; was " + className + " instead", klass.isInstance(o));
+        return (T) o;
+    }
+
+    /**
+     * Clear all existing task handles for the connector, then preemptively create {@code numTasks} many task handles for it,
+     * and return a {@link StartAndStopLatch} that can be used to {@link StartAndStopLatch#await(long, TimeUnit) await}
+     * the startup of that connector and the expected number of tasks.
+     * @param numTasks the number of tasks that should be started
+     * @return a {@link StartAndStopLatch} that will block until the connector and the expected number of tasks have started
+     */
+    private StartAndStopLatch connectorAndTaskStart(int numTasks) {
+        connectorHandle.clearTasks();
+        IntStream.range(0, numTasks)
+                .mapToObj(i -> MonitorableSourceConnector.taskId(CONNECTOR_NAME, i))
+                .forEach(connectorHandle::taskHandle);
+        return connectorHandle.expectedStarts(1, true);
+    }
+
+    private void assertConnectorStarted(StartAndStopLatch connectorStart) throws InterruptedException {
+        assertTrue("Connector and tasks did not finish startup in time",
+                connectorStart.await(
+                        EmbeddedConnectClusterAssertions.CONNECTOR_SETUP_DURATION_MS,
+                        TimeUnit.MILLISECONDS
+                )
+        );
+    }
+
+    private void assertConnectorStopped(StartAndStopLatch connectorStop) throws InterruptedException {
+        assertTrue(
+                "Connector and tasks did not finish shutdown in time",
+                connectorStop.await(
+                        EmbeddedConnectClusterAssertions.CONNECTOR_SHUTDOWN_DURATION_MS,
+                        TimeUnit.MILLISECONDS
+                )
+        );
+    }
+
+    private void assertProducersAreFencedOnReconfiguration(
+            int currentNumTasks,
+            int newNumTasks,
+            String topic,
+            Map<String, String> baseConnectorProps) throws InterruptedException {
+
+        // create a collection of producers that simulate the producers used for the existing tasks
+        List<KafkaProducer<byte[], byte[]>> producers = IntStream.range(0, currentNumTasks)
+                .mapToObj(i -> transactionalProducer(
+                        "simulated-task-producer-" + CONNECTOR_NAME + "-" + i,
+                        Worker.taskTransactionalId(CLUSTER_GROUP_ID, CONNECTOR_NAME, i)
+                )).collect(Collectors.toList());
+
+        producers.forEach(KafkaProducer::initTransactions);
+
+        // reconfigure the connector with a new number of tasks
+        StartAndStopLatch connectorStart = connectorAndTaskStart(newNumTasks);
+        baseConnectorProps.put(TASKS_MAX_CONFIG, Integer.toString(newNumTasks));
+        log.info("Reconfiguring connector from {} tasks to {}", currentNumTasks, newNumTasks);
+        connect.configureConnector(CONNECTOR_NAME, baseConnectorProps);
+        assertConnectorStarted(connectorStart);
+
+        // validate that the old producers were fenced out
+        producers.forEach(producer -> assertTransactionalProducerIsFenced(producer, topic));
+    }
+
+    private KafkaProducer<byte[], byte[]> transactionalProducer(String clientId, String transactionalId) {
+        Map<String, Object> transactionalProducerProps = new HashMap<>();
+        transactionalProducerProps.put(CLIENT_ID_CONFIG, clientId);
+        transactionalProducerProps.put(ENABLE_IDEMPOTENCE_CONFIG, true);
+        transactionalProducerProps.put(TRANSACTIONAL_ID_CONFIG, transactionalId);
+        return connect.kafka().createProducer(transactionalProducerProps);
+    }
+
+    private void assertTransactionalProducerIsFenced(KafkaProducer<byte[], byte[]> producer, String topic) {
+        producer.beginTransaction();
+        assertThrows("Producer should be fenced out",
+                ProducerFencedException.class,
+                () -> {
+                    producer.send(new ProducerRecord<>(topic, new byte[] {69}, new byte[] {96}));
+                    producer.commitTransaction();
+                }
+        );
+        producer.close(Duration.ZERO);
+    }
+
+    public static class NaughtyConnector extends SourceConnector {
+        private Map<String, String> props;
+
+        @Override
+        public void start(Map<String, String> props) {
+            this.props = props;
+        }
+
+        @Override
+        public Class<? extends Task> taskClass() {
+            return NaughtyTask.class;
+        }
+
+        @Override
+        public List<Map<String, String>> taskConfigs(int maxTasks) {
+            return IntStream.range(0, maxTasks).mapToObj(i -> props).collect(Collectors.toList());
+        }
+
+        @Override
+        public void stop() {
+        }
+
+        @Override
+        public ConfigDef config() {
+            return new ConfigDef();
+        }
+
+        @Override
+        public String version() {
+            return "none";
+        }
+    }
+
+    public static class NaughtyTask extends SourceTask {
+        private String topic;
+
+        @Override
+        public void start(Map<String, String> props) {
+            if (!props.containsKey(OFFSETS_TOPIC_CONFIG)) {
+                throw new ConnectException("No offsets topic");
+            }
+            this.topic = props.get(OFFSETS_TOPIC_CONFIG);
+        }
+
+        @Override
+        public List<SourceRecord> poll() {
+            // Request a read to the end of the offsets topic
+            context.offsetStorageReader().offset(Collections.singletonMap("", null));
+            // Produce a record to the offsets topic
+            return Collections.singletonList(new SourceRecord(null, null, topic, null, "", null, null));
+        }
+
+        @Override
+        public void stop() {
+        }
+
+        @Override
+        public String version() {
+            return "none";
+        }
+    }
+}
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/MonitorableSourceConnector.java b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/MonitorableSourceConnector.java
index 4f13ad08a2d76..33ba1588a7d04 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/MonitorableSourceConnector.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/MonitorableSourceConnector.java
@@ -20,8 +20,11 @@
 import org.apache.kafka.clients.producer.RecordMetadata;
 import org.apache.kafka.connect.connector.Task;
 import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.header.ConnectHeaders;
 import org.apache.kafka.connect.runtime.SampleSourceConnector;
+import org.apache.kafka.connect.source.ConnectorTransactionBoundaries;
+import org.apache.kafka.connect.source.ExactlyOnceSupport;
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTask;
 import org.apache.kafka.tools.ThroughputThrottler;
@@ -32,6 +35,7 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
@@ -47,6 +51,20 @@ public class MonitorableSourceConnector extends SampleSourceConnector {
     private static final Logger log = LoggerFactory.getLogger(MonitorableSourceConnector.class);
 
     public static final String TOPIC_CONFIG = "topic";
+    public static final String MESSAGES_PER_POLL_CONFIG = "messages.per.poll";
+
+    public static final String CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG = "custom.exactly.once.support";
+    public static final String EXACTLY_ONCE_SUPPORTED = "supported";
+    public static final String EXACTLY_ONCE_UNSUPPORTED = "unsupported";
+    public static final String EXACTLY_ONCE_NULL = "null";
+    public static final String EXACTLY_ONCE_FAIL = "fail";
+
+    public static final String CUSTOM_TRANSACTION_BOUNDARIES_CONFIG = "custom.transaction.boundaries";
+    public static final String TRANSACTION_BOUNDARIES_SUPPORTED = "supported";
+    public static final String TRANSACTION_BOUNDARIES_UNSUPPORTED = "unsupported";
+    public static final String TRANSACTION_BOUNDARIES_NULL = "null";
+    public static final String TRANSACTION_BOUNDARIES_FAIL = "fail";
+
     private String connectorName;
     private ConnectorHandle connectorHandle;
     private Map<String, String> commonConfigs;
@@ -74,7 +92,7 @@ public List<Map<String, String>> taskConfigs(int maxTasks) {
         for (int i = 0; i < maxTasks; i++) {
             Map<String, String> config = new HashMap<>(commonConfigs);
             config.put("connector.name", connectorName);
-            config.put("task.id", connectorName + "-" + i);
+            config.put("task.id", taskId(connectorName, i));
             configs.add(config);
         }
         return configs;
@@ -92,18 +110,55 @@ public ConfigDef config() {
         return new ConfigDef();
     }
 
+    @Override
+    public ExactlyOnceSupport exactlyOnceSupport(Map<String, String> connectorConfig) {
+        String supportLevel = connectorConfig.getOrDefault(CUSTOM_EXACTLY_ONCE_SUPPORT_CONFIG, "null").toLowerCase(Locale.ROOT);
+        switch (supportLevel) {
+            case EXACTLY_ONCE_SUPPORTED:
+                return ExactlyOnceSupport.SUPPORTED;
+            case EXACTLY_ONCE_UNSUPPORTED:
+                return ExactlyOnceSupport.UNSUPPORTED;
+            case EXACTLY_ONCE_FAIL:
+                throw new ConnectException("oops");
+            default:
+            case EXACTLY_ONCE_NULL:
+                return null;
+        }
+    }
+
+    @Override
+    public ConnectorTransactionBoundaries canDefineTransactionBoundaries(Map<String, String> connectorConfig) {
+        String supportLevel = connectorConfig.getOrDefault(CUSTOM_TRANSACTION_BOUNDARIES_CONFIG, TRANSACTION_BOUNDARIES_UNSUPPORTED).toLowerCase(Locale.ROOT);
+        switch (supportLevel) {
+            case TRANSACTION_BOUNDARIES_SUPPORTED:
+                return ConnectorTransactionBoundaries.SUPPORTED;
+            case TRANSACTION_BOUNDARIES_FAIL:
+                throw new ConnectException("oh no :(");
+            case TRANSACTION_BOUNDARIES_NULL:
+                return null;
+            default:
+            case TRANSACTION_BOUNDARIES_UNSUPPORTED:
+                return ConnectorTransactionBoundaries.UNSUPPORTED;
+        }
+    }
+
+    public static String taskId(String connectorName, int taskId) {
+        return connectorName + "-" + taskId;
+    }
+
     public static class MonitorableSourceTask extends SourceTask {
-        private String connectorName;
         private String taskId;
         private String topicName;
         private TaskHandle taskHandle;
         private volatile boolean stopped;
         private long startingSeqno;
         private long seqno;
-        private long throughput;
         private int batchSize;
         private ThroughputThrottler throttler;
 
+        private long priorTransactionBoundary;
+        private long nextTransactionBoundary;
+
         @Override
         public String version() {
             return "unknown";
@@ -112,21 +167,24 @@ public String version() {
         @Override
         public void start(Map<String, String> props) {
             taskId = props.get("task.id");
-            connectorName = props.get("connector.name");
+            String connectorName = props.get("connector.name");
             topicName = props.getOrDefault(TOPIC_CONFIG, "sequential-topic");
-            throughput = Long.parseLong(props.getOrDefault("throughput", "-1"));
-            batchSize = Integer.parseInt(props.getOrDefault("messages.per.poll", "1"));
+            batchSize = Integer.parseInt(props.getOrDefault(MESSAGES_PER_POLL_CONFIG, "1"));
             taskHandle = RuntimeHandles.get().connectorHandle(connectorName).taskHandle(taskId);
             Map<String, Object> offset = Optional.ofNullable(
-                    context.offsetStorageReader().offset(Collections.singletonMap("task.id", taskId)))
+                    context.offsetStorageReader().offset(sourcePartition(taskId)))
                     .orElse(Collections.emptyMap());
             startingSeqno = Optional.ofNullable((Long) offset.get("saved")).orElse(0L);
+            seqno = startingSeqno;
             log.info("Started {} task {} with properties {}", this.getClass().getSimpleName(), taskId, props);
-            throttler = new ThroughputThrottler(throughput, System.currentTimeMillis());
+            throttler = new ThroughputThrottler(Long.parseLong(props.getOrDefault("throughput", "-1")), System.currentTimeMillis());
             taskHandle.recordTaskStart();
+            priorTransactionBoundary = 0;
+            nextTransactionBoundary = 1;
             if (Boolean.parseBoolean(props.getOrDefault("task-" + taskId + ".start.inject.error", "false"))) {
                 throw new RuntimeException("Injecting errors during task start");
             }
+            calculateNextBoundary();
         }
 
         @Override
@@ -136,19 +194,24 @@ public List<SourceRecord> poll() {
                     throttler.throttle();
                 }
                 taskHandle.record(batchSize);
-                log.info("Returning batch of {} records", batchSize);
+                log.trace("Returning batch of {} records", batchSize);
                 return LongStream.range(0, batchSize)
-                        .mapToObj(i -> new SourceRecord(
-                                Collections.singletonMap("task.id", taskId),
-                                Collections.singletonMap("saved", ++seqno),
-                                topicName,
-                                null,
-                                Schema.STRING_SCHEMA,
-                                "key-" + taskId + "-" + seqno,
-                                Schema.STRING_SCHEMA,
-                                "value-" + taskId + "-" + seqno,
-                                null,
-                                new ConnectHeaders().addLong("header-" + seqno, seqno)))
+                        .mapToObj(i -> {
+                            seqno++;
+                            SourceRecord record = new SourceRecord(
+                                    sourcePartition(taskId),
+                                    sourceOffset(seqno),
+                                    topicName,
+                                    null,
+                                    Schema.STRING_SCHEMA,
+                                    "key-" + taskId + "-" + seqno,
+                                    Schema.STRING_SCHEMA,
+                                    "value-" + taskId + "-" + seqno,
+                                    null,
+                                    new ConnectHeaders().addLong("header-" + seqno, seqno));
+                            maybeDefineTransactionBoundary(record);
+                            return record;
+                        })
                         .collect(Collectors.toList());
             }
             return null;
@@ -172,5 +235,43 @@ public void stop() {
             stopped = true;
             taskHandle.recordTaskStop();
         }
+
+        /**
+         * Calculate the next transaction boundary, i.e., the seqno whose corresponding source record should be used to
+         * either {@link org.apache.kafka.connect.source.TransactionContext#commitTransaction(SourceRecord) commit}
+         * or {@link org.apache.kafka.connect.source.TransactionContext#abortTransaction(SourceRecord) abort} the next transaction.
+         * <p>
+         * This connector defines transactions whose size correspond to successive elements of the Fibonacci sequence,
+         * where transactions with an even number of records are aborted, and those with an odd number of records are committed.
+         */
+        private void calculateNextBoundary() {
+            while (nextTransactionBoundary <= seqno) {
+                nextTransactionBoundary += priorTransactionBoundary;
+                priorTransactionBoundary = nextTransactionBoundary - priorTransactionBoundary;
+            }
+        }
+
+        private void maybeDefineTransactionBoundary(SourceRecord record) {
+            if (context.transactionContext() == null || seqno != nextTransactionBoundary) {
+                return;
+            }
+            // If the transaction boundary ends on an even-numbered offset, abort it
+            // Otherwise, commit
+            boolean abort = nextTransactionBoundary % 2 == 0;
+            calculateNextBoundary();
+            if (abort) {
+                context.transactionContext().abortTransaction(record);
+            } else {
+                context.transactionContext().commitTransaction(record);
+            }
+        }
+    }
+
+    public static Map<String, Object> sourcePartition(String taskId) {
+        return Collections.singletonMap("task.id", taskId);
+    }
+
+    public static Map<String, Object> sourceOffset(long seqno) {
+        return Collections.singletonMap("saved", seqno);
     }
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/RebalanceSourceConnectorsIntegrationTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/RebalanceSourceConnectorsIntegrationTest.java
index ae3e20d2a8f03..855882c9e697b 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/integration/RebalanceSourceConnectorsIntegrationTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/integration/RebalanceSourceConnectorsIntegrationTest.java
@@ -188,6 +188,7 @@ public void testReconfigConnector() throws Exception {
     }
 
     @Test
+    @Ignore // TODO: To be re-enabled once we can make it less flaky (KAFKA-8391)
     public void testDeleteConnector() throws Exception {
         // create test topic
         connect.kafka().createTopic(TOPIC_NAME, NUM_TOPIC_PARTITIONS);
@@ -269,8 +270,7 @@ public void testRemovingWorker() throws Exception {
                 WORKER_SETUP_DURATION_MS, "Connect and tasks are imbalanced between the workers.");
     }
 
-    // should enable it after KAFKA-12495 fixed
-    @Ignore
+    @Ignore // TODO: To be re-enabled once we can make it less flaky (KAFKA-12495, KAFKA-12283)
     @Test
     public void testMultipleWorkersRejoining() throws Exception {
         // create test topic
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java
index 5b9e199e5a1ee..ada507f1eb35b 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractHerderTest.java
@@ -31,7 +31,6 @@
 import org.apache.kafka.connect.connector.policy.NoneConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.connector.policy.PrincipalConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.errors.NotFoundException;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
 import org.apache.kafka.connect.runtime.isolation.PluginDesc;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
 import org.apache.kafka.connect.runtime.rest.entities.ConfigInfo;
@@ -43,6 +42,7 @@
 import org.apache.kafka.connect.runtime.rest.errors.BadRequestException;
 import org.apache.kafka.connect.source.SourceConnector;
 import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.ConfigBackingStore;
 import org.apache.kafka.connect.storage.StatusBackingStore;
 import org.apache.kafka.connect.transforms.Transformation;
@@ -53,6 +53,7 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.powermock.api.easymock.PowerMock;
+import org.powermock.api.easymock.annotation.Mock;
 import org.powermock.api.easymock.annotation.MockStrict;
 import org.powermock.core.classloader.annotations.PrepareForTest;
 import org.powermock.modules.junit4.PowerMockRunner;
@@ -72,16 +73,16 @@
 
 import static org.apache.kafka.connect.runtime.AbstractHerder.keysWithVariableValues;
 import static org.easymock.EasyMock.anyString;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThrows;
-import static org.powermock.api.easymock.PowerMock.verifyAll;
-import static org.powermock.api.easymock.PowerMock.replayAll;
-import static org.easymock.EasyMock.strictMock;
 import static org.easymock.EasyMock.partialMockBuilder;
+import static org.easymock.EasyMock.strictMock;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
+import static org.powermock.api.easymock.PowerMock.replayAll;
+import static org.powermock.api.easymock.PowerMock.verifyAll;
 
 @RunWith(PowerMockRunner.class)
 @PrepareForTest({AbstractHerder.class})
@@ -129,10 +130,10 @@ public class AbstractHerderTest {
     }
     private static final ClusterConfigState SNAPSHOT = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
     private static final ClusterConfigState SNAPSHOT_NO_TASKS = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            Collections.emptyMap(), Collections.emptySet());
+            Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
 
     private final String workerId = "workerId";
     private final String kafkaClusterId = "I4ZmrWqfT2e-upky_4fdPA";
@@ -142,10 +143,10 @@ public class AbstractHerderTest {
 
     @MockStrict private Worker worker;
     @MockStrict private WorkerConfigTransformer transformer;
-    @MockStrict private Plugins plugins;
-    @MockStrict private ClassLoader classLoader;
     @MockStrict private ConfigBackingStore configStore;
     @MockStrict private StatusBackingStore statusStore;
+    @MockStrict private ClassLoader classLoader;
+    @Mock private Plugins plugins;
 
     @Test
     public void testConnectors() {
@@ -436,13 +437,18 @@ public void testConfigValidationMissingName() {
         // We expect there to be errors due to the missing name and .... Note that these assertions depend heavily on
         // the config fields for SourceConnectorConfig, but we expect these to change rarely.
         assertEquals(SampleSourceConnector.class.getName(), result.name());
-        assertEquals(Arrays.asList(ConnectorConfig.COMMON_GROUP, ConnectorConfig.TRANSFORMS_GROUP,
-                ConnectorConfig.PREDICATES_GROUP, ConnectorConfig.ERROR_GROUP, SourceConnectorConfig.TOPIC_CREATION_GROUP), result.groups());
+        assertEquals(
+                Arrays.asList(
+                        ConnectorConfig.COMMON_GROUP, ConnectorConfig.TRANSFORMS_GROUP,
+                        ConnectorConfig.PREDICATES_GROUP, ConnectorConfig.ERROR_GROUP,
+                        SourceConnectorConfig.TOPIC_CREATION_GROUP, SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_GROUP,
+                        SourceConnectorConfig.OFFSETS_TOPIC_GROUP),
+                result.groups());
         assertEquals(2, result.errorCount());
         Map<String, ConfigInfo> infos = result.values().stream()
                 .collect(Collectors.toMap(info -> info.configKey().name(), Function.identity()));
-        // Base connector config has 14 fields, connector's configs add 2
-        assertEquals(17, infos.size());
+        // Base connector config has 14 fields, connector's configs add 7
+        assertEquals(21, infos.size());
         // Missing name should generate an error
         assertEquals(ConnectorConfig.NAME_CONFIG,
                 infos.get(ConnectorConfig.NAME_CONFIG).configValue().name());
@@ -531,6 +537,8 @@ public void testConfigValidationTransformsExtendResults() {
                 ConnectorConfig.PREDICATES_GROUP,
                 ConnectorConfig.ERROR_GROUP,
                 SourceConnectorConfig.TOPIC_CREATION_GROUP,
+                SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_GROUP,
+                SourceConnectorConfig.OFFSETS_TOPIC_GROUP,
                 "Transforms: xformA",
                 "Transforms: xformB"
         );
@@ -538,7 +546,7 @@ public void testConfigValidationTransformsExtendResults() {
         assertEquals(2, result.errorCount());
         Map<String, ConfigInfo> infos = result.values().stream()
                 .collect(Collectors.toMap(info -> info.configKey().name(), Function.identity()));
-        assertEquals(22, infos.size());
+        assertEquals(26, infos.size());
         // Should get 2 type fields from the transforms, first adds its own config since it has a valid class
         assertEquals("transforms.xformA.type",
                 infos.get("transforms.xformA.type").configValue().name());
@@ -590,6 +598,8 @@ public void testConfigValidationPredicatesExtendResults() {
                 ConnectorConfig.PREDICATES_GROUP,
                 ConnectorConfig.ERROR_GROUP,
                 SourceConnectorConfig.TOPIC_CREATION_GROUP,
+                SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_GROUP,
+                SourceConnectorConfig.OFFSETS_TOPIC_GROUP,
                 "Transforms: xformA",
                 "Predicates: predX",
                 "Predicates: predY"
@@ -598,7 +608,7 @@ public void testConfigValidationPredicatesExtendResults() {
         assertEquals(2, result.errorCount());
         Map<String, ConfigInfo> infos = result.values().stream()
                 .collect(Collectors.toMap(info -> info.configKey().name(), Function.identity()));
-        assertEquals(24, infos.size());
+        assertEquals(28, infos.size());
         // Should get 2 type fields from the transforms, first adds its own config since it has a valid class
         assertEquals("transforms.xformA.type",
                 infos.get("transforms.xformA.type").configValue().name());
@@ -659,12 +669,14 @@ public void testConfigValidationPrincipalOnlyOverride() {
             ConnectorConfig.TRANSFORMS_GROUP,
             ConnectorConfig.PREDICATES_GROUP,
             ConnectorConfig.ERROR_GROUP,
-            SourceConnectorConfig.TOPIC_CREATION_GROUP
+            SourceConnectorConfig.TOPIC_CREATION_GROUP,
+            SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_GROUP,
+            SourceConnectorConfig.OFFSETS_TOPIC_GROUP
         );
         assertEquals(expectedGroups, result.groups());
         assertEquals(1, result.errorCount());
-        // Base connector config has 14 fields, connector's configs add 2, and 2 producer overrides
-        assertEquals(19, result.values().size());
+        // Base connector config has 14 fields, connector's configs add 7, and 2 producer overrides
+        assertEquals(23, result.values().size());
         assertTrue(result.values().stream().anyMatch(
             configInfo -> ackConfigKey.equals(configInfo.configValue().name()) && !configInfo.configValue().errors().isEmpty()));
         assertTrue(result.values().stream().anyMatch(
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractWorkerSourceTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractWorkerSourceTaskTest.java
new file mode 100644
index 0000000000000..d0833dbffc794
--- /dev/null
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/AbstractWorkerSourceTaskTest.java
@@ -0,0 +1,842 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime;
+
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.InvalidRecordException;
+import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.TopicPartitionInfo;
+import org.apache.kafka.common.errors.TopicAuthorizationException;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.data.SchemaAndValue;
+import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.errors.RetriableException;
+import org.apache.kafka.connect.header.ConnectHeaders;
+import org.apache.kafka.connect.integration.MonitorableSourceConnector;
+import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperatorTest;
+import org.apache.kafka.connect.runtime.isolation.Plugins;
+import org.apache.kafka.connect.runtime.standalone.StandaloneConfig;
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
+import org.apache.kafka.connect.storage.Converter;
+import org.apache.kafka.connect.storage.HeaderConverter;
+import org.apache.kafka.connect.storage.OffsetStorageWriter;
+import org.apache.kafka.connect.storage.StatusBackingStore;
+import org.apache.kafka.connect.storage.StringConverter;
+import org.apache.kafka.connect.util.ConnectorTaskId;
+import org.apache.kafka.connect.util.TopicAdmin;
+import org.apache.kafka.connect.util.TopicCreationGroup;
+import org.easymock.Capture;
+import org.easymock.EasyMock;
+import org.easymock.IAnswer;
+import org.easymock.IExpectationSetters;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.powermock.api.easymock.PowerMock;
+import org.powermock.api.easymock.annotation.Mock;
+import org.powermock.api.easymock.annotation.MockStrict;
+import org.powermock.core.classloader.annotations.PowerMockIgnore;
+import org.powermock.modules.junit4.PowerMockRunner;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeoutException;
+
+import static org.apache.kafka.connect.integration.MonitorableSourceConnector.TOPIC_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TOPIC_CREATION_GROUPS_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_PREFIX;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.EXCLUDE_REGEX_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.INCLUDE_REGEX_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.PARTITIONS_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.REPLICATION_FACTOR_CONFIG;
+import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_CREATION_ENABLE_CONFIG;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+@PowerMockIgnore({"javax.management.*",
+        "org.apache.log4j.*"})
+@RunWith(PowerMockRunner.class)
+public class AbstractWorkerSourceTaskTest {
+
+    private static final String TOPIC = "topic";
+    private static final String OTHER_TOPIC = "other-topic";
+    private static final Map<String, byte[]> PARTITION = Collections.singletonMap("key", "partition".getBytes());
+    private static final Map<String, Integer> OFFSET = Collections.singletonMap("key", 12);
+
+    // Connect-format data
+    private static final Schema KEY_SCHEMA = Schema.INT32_SCHEMA;
+    private static final Integer KEY = -1;
+    private static final Schema RECORD_SCHEMA = Schema.INT64_SCHEMA;
+    private static final Long RECORD = 12L;
+    // Serialized data. The actual format of this data doesn't matter -- we just want to see that the right version
+    // is used in the right place.
+    private static final byte[] SERIALIZED_KEY = "converted-key".getBytes();
+    private static final byte[] SERIALIZED_RECORD = "converted-record".getBytes();
+
+    @Mock private SourceTask sourceTask;
+    @Mock private TopicAdmin admin;
+    @Mock private KafkaProducer<byte[], byte[]> producer;
+    @Mock private Converter keyConverter;
+    @Mock private Converter valueConverter;
+    @Mock private HeaderConverter headerConverter;
+    @Mock private TransformationChain<SourceRecord> transformationChain;
+    @Mock private CloseableOffsetStorageReader offsetReader;
+    @Mock private OffsetStorageWriter offsetWriter;
+    @Mock private ConnectorOffsetBackingStore offsetStore;
+    @Mock private StatusBackingStore statusBackingStore;
+    @Mock private WorkerSourceTaskContext sourceTaskContext;
+    @MockStrict private TaskStatus.Listener statusListener;
+
+    private final ConnectorTaskId taskId = new ConnectorTaskId("job", 0);
+    private final ConnectorTaskId taskId1 = new ConnectorTaskId("job", 1);
+
+    private Plugins plugins;
+    private WorkerConfig config;
+    private SourceConnectorConfig sourceConfig;
+    private MockConnectMetrics metrics = new MockConnectMetrics();
+    private Capture<Callback> producerCallbacks;
+
+    private AbstractWorkerSourceTask workerTask;
+
+    @Before
+    public void setup() {
+        Map<String, String> workerProps = workerProps();
+        plugins = new Plugins(workerProps);
+        config = new StandaloneConfig(workerProps);
+        sourceConfig = new SourceConnectorConfig(plugins, sourceConnectorPropsWithGroups(TOPIC), true);
+        producerCallbacks = EasyMock.newCapture();
+        metrics = new MockConnectMetrics();
+    }
+
+    private Map<String, String> workerProps() {
+        Map<String, String> props = new HashMap<>();
+        props.put("key.converter", "org.apache.kafka.connect.json.JsonConverter");
+        props.put("value.converter", "org.apache.kafka.connect.json.JsonConverter");
+        props.put("offset.storage.file.filename", "/tmp/connect.offsets");
+        props.put(TOPIC_CREATION_ENABLE_CONFIG, "true");
+        return props;
+    }
+
+    private Map<String, String> sourceConnectorPropsWithGroups(String topic) {
+        // setup up props for the source connector
+        Map<String, String> props = new HashMap<>();
+        props.put("name", "foo-connector");
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getSimpleName());
+        props.put(TASKS_MAX_CONFIG, String.valueOf(1));
+        props.put(TOPIC_CONFIG, topic);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(TOPIC_CREATION_GROUPS_CONFIG, String.join(",", "foo", "bar"));
+        props.put(DEFAULT_TOPIC_CREATION_PREFIX + REPLICATION_FACTOR_CONFIG, String.valueOf(1));
+        props.put(DEFAULT_TOPIC_CREATION_PREFIX + PARTITIONS_CONFIG, String.valueOf(1));
+        props.put(SourceConnectorConfig.TOPIC_CREATION_PREFIX + "foo" + "." + INCLUDE_REGEX_CONFIG, topic);
+        props.put(SourceConnectorConfig.TOPIC_CREATION_PREFIX + "bar" + "." + INCLUDE_REGEX_CONFIG, ".*");
+        props.put(SourceConnectorConfig.TOPIC_CREATION_PREFIX + "bar" + "." + EXCLUDE_REGEX_CONFIG, topic);
+        return props;
+    }
+
+    @After
+    public void tearDown() {
+        if (metrics != null) metrics.stop();
+    }
+
+    @Test
+    public void testMetricsGroup() {
+        AbstractWorkerSourceTask.SourceTaskMetricsGroup group = new AbstractWorkerSourceTask.SourceTaskMetricsGroup(taskId, metrics);
+        AbstractWorkerSourceTask.SourceTaskMetricsGroup group1 = new AbstractWorkerSourceTask.SourceTaskMetricsGroup(taskId1, metrics);
+        for (int i = 0; i != 10; ++i) {
+            group.recordPoll(100, 1000 + i * 100);
+            group.recordWrite(10);
+        }
+        for (int i = 0; i != 20; ++i) {
+            group1.recordPoll(100, 1000 + i * 100);
+            group1.recordWrite(10);
+        }
+        assertEquals(1900.0, metrics.currentMetricValueAsDouble(group.metricGroup(), "poll-batch-max-time-ms"), 0.001d);
+        assertEquals(1450.0, metrics.currentMetricValueAsDouble(group.metricGroup(), "poll-batch-avg-time-ms"), 0.001d);
+        assertEquals(33.333, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-poll-rate"), 0.001d);
+        assertEquals(1000, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-poll-total"), 0.001d);
+        assertEquals(3.3333, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-write-rate"), 0.001d);
+        assertEquals(100, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-write-total"), 0.001d);
+        assertEquals(900.0, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-active-count"), 0.001d);
+
+        // Close the group
+        group.close();
+
+        for (MetricName metricName : group.metricGroup().metrics().metrics().keySet()) {
+            // Metrics for this group should no longer exist
+            assertFalse(group.metricGroup().groupId().includes(metricName));
+        }
+        // Sensors for this group should no longer exist
+        assertNull(group.metricGroup().metrics().getSensor("sink-record-read"));
+        assertNull(group.metricGroup().metrics().getSensor("sink-record-send"));
+        assertNull(group.metricGroup().metrics().getSensor("sink-record-active-count"));
+        assertNull(group.metricGroup().metrics().getSensor("partition-count"));
+        assertNull(group.metricGroup().metrics().getSensor("offset-seq-number"));
+        assertNull(group.metricGroup().metrics().getSensor("offset-commit-completion"));
+        assertNull(group.metricGroup().metrics().getSensor("offset-commit-completion-skip"));
+        assertNull(group.metricGroup().metrics().getSensor("put-batch-time"));
+
+        assertEquals(2900.0, metrics.currentMetricValueAsDouble(group1.metricGroup(), "poll-batch-max-time-ms"), 0.001d);
+        assertEquals(1950.0, metrics.currentMetricValueAsDouble(group1.metricGroup(), "poll-batch-avg-time-ms"), 0.001d);
+        assertEquals(66.667, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-poll-rate"), 0.001d);
+        assertEquals(2000, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-poll-total"), 0.001d);
+        assertEquals(6.667, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-write-rate"), 0.001d);
+        assertEquals(200, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-write-total"), 0.001d);
+        assertEquals(1800.0, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-active-count"), 0.001d);
+    }
+
+    @Test
+    public void testSendRecordsConvertsData() {
+        createWorkerTask();
+
+        List<SourceRecord> records = new ArrayList<>();
+        // Can just use the same record for key and value
+        records.add(new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD));
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
+
+        expectTopicCreation(TOPIC);
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = records;
+        workerTask.sendRecords();
+        assertEquals(SERIALIZED_KEY, sent.getValue().key());
+        assertEquals(SERIALIZED_RECORD, sent.getValue().value());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSendRecordsPropagatesTimestamp() {
+        final Long timestamp = System.currentTimeMillis();
+
+        createWorkerTask();
+
+        List<SourceRecord> records = Collections.singletonList(
+                new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, timestamp)
+        );
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
+
+        expectTopicCreation(TOPIC);
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = records;
+        workerTask.sendRecords();
+        assertEquals(timestamp, sent.getValue().timestamp());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSendRecordsCorruptTimestamp() {
+        final Long timestamp = -3L;
+        createWorkerTask();
+
+        List<SourceRecord> records = Collections.singletonList(
+                new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, timestamp)
+        );
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = records;
+        assertThrows(InvalidRecordException.class, workerTask::sendRecords);
+        assertFalse(sent.hasCaptured());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSendRecordsNoTimestamp() {
+        final Long timestamp = -1L;
+        createWorkerTask();
+
+        List<SourceRecord> records = Collections.singletonList(
+                new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, timestamp)
+        );
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
+
+        expectTopicCreation(TOPIC);
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = records;
+        workerTask.sendRecords();
+        assertNull(sent.getValue().timestamp());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testHeaders() {
+        Headers headers = new RecordHeaders();
+        headers.add("header_key", "header_value".getBytes());
+
+        org.apache.kafka.connect.header.Headers connectHeaders = new ConnectHeaders();
+        connectHeaders.add("header_key", new SchemaAndValue(Schema.STRING_SCHEMA, "header_value"));
+
+        createWorkerTask();
+
+        List<SourceRecord> records = new ArrayList<>();
+        records.add(new SourceRecord(PARTITION, OFFSET, TOPIC, null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, null, connectHeaders));
+
+        expectTopicCreation(TOPIC);
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecord(TOPIC, true, headers);
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = records;
+        workerTask.sendRecords();
+        assertEquals(SERIALIZED_KEY, sent.getValue().key());
+        assertEquals(SERIALIZED_RECORD, sent.getValue().value());
+        assertEquals(headers, sent.getValue().headers());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testHeadersWithCustomConverter() throws Exception {
+        StringConverter stringConverter = new StringConverter();
+        SampleConverterWithHeaders testConverter = new SampleConverterWithHeaders();
+
+        createWorkerTask(stringConverter, testConverter, stringConverter);
+
+        List<SourceRecord> records = new ArrayList<>();
+
+        String stringA = "Árvíztűrő tükörfúrógép";
+        org.apache.kafka.connect.header.Headers headersA = new ConnectHeaders();
+        String encodingA = "latin2";
+        headersA.addString("encoding", encodingA);
+
+        records.add(new SourceRecord(PARTITION, OFFSET, "topic", null, Schema.STRING_SCHEMA, "a", Schema.STRING_SCHEMA, stringA, null, headersA));
+
+        String stringB = "Тестовое сообщение";
+        org.apache.kafka.connect.header.Headers headersB = new ConnectHeaders();
+        String encodingB = "koi8_r";
+        headersB.addString("encoding", encodingB);
+
+        records.add(new SourceRecord(PARTITION, OFFSET, "topic", null, Schema.STRING_SCHEMA, "b", Schema.STRING_SCHEMA, stringB, null, headersB));
+
+        expectTopicCreation(TOPIC);
+
+        Capture<ProducerRecord<byte[], byte[]>> sentRecordA = expectSendRecord(TOPIC, false, null);
+        Capture<ProducerRecord<byte[], byte[]>> sentRecordB = expectSendRecord(TOPIC, false, null);
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = records;
+        workerTask.sendRecords();
+
+        assertEquals(ByteBuffer.wrap("a".getBytes()), ByteBuffer.wrap(sentRecordA.getValue().key()));
+        assertEquals(
+                ByteBuffer.wrap(stringA.getBytes(encodingA)),
+                ByteBuffer.wrap(sentRecordA.getValue().value())
+        );
+        assertEquals(encodingA, new String(sentRecordA.getValue().headers().lastHeader("encoding").value()));
+
+        assertEquals(ByteBuffer.wrap("b".getBytes()), ByteBuffer.wrap(sentRecordB.getValue().key()));
+        assertEquals(
+                ByteBuffer.wrap(stringB.getBytes(encodingB)),
+                ByteBuffer.wrap(sentRecordB.getValue().value())
+        );
+        assertEquals(encodingB, new String(sentRecordB.getValue().headers().lastHeader("encoding").value()));
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testTopicCreateWhenTopicExists() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        TopicPartitionInfo topicPartitionInfo = new TopicPartitionInfo(0, null, Collections.emptyList(), Collections.emptyList());
+        TopicDescription topicDesc = new TopicDescription(TOPIC, false, Collections.singletonList(topicPartitionInfo));
+        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.singletonMap(TOPIC, topicDesc));
+
+        expectSendRecord();
+        expectSendRecord();
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        workerTask.sendRecords();
+    }
+
+    @Test
+    public void testSendRecordsTopicDescribeRetries() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        // First round - call to describe the topic times out
+        EasyMock.expect(admin.describeTopics(TOPIC))
+                .andThrow(new RetriableException(new TimeoutException("timeout")));
+
+        // Second round - calls to describe and create succeed
+        expectTopicCreation(TOPIC);
+        // Exactly two records are sent
+        expectSendRecord();
+        expectSendRecord();
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        workerTask.sendRecords();
+        assertEquals(Arrays.asList(record1, record2), workerTask.toSend);
+
+        // Next they all succeed
+        workerTask.sendRecords();
+        assertNull(workerTask.toSend);
+    }
+
+    @Test
+    public void testSendRecordsTopicCreateRetries() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        // First call to describe the topic times out
+        expectPreliminaryCalls();
+        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
+        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture)))
+                .andThrow(new RetriableException(new TimeoutException("timeout")));
+
+        // Second round
+        expectTopicCreation(TOPIC);
+        expectSendRecord();
+        expectSendRecord();
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        workerTask.sendRecords();
+        assertEquals(Arrays.asList(record1, record2), workerTask.toSend);
+
+        // Next they all succeed
+        workerTask.sendRecords();
+        assertNull(workerTask.toSend);
+    }
+
+    @Test
+    public void testSendRecordsTopicDescribeRetriesMidway() {
+        createWorkerTask();
+
+        // Differentiate only by Kafka partition so we can reuse conversion expectations
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record3 = new SourceRecord(PARTITION, OFFSET, OTHER_TOPIC, 3, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        // First round
+        expectPreliminaryCalls(OTHER_TOPIC);
+        expectTopicCreation(TOPIC);
+        expectSendRecord();
+        expectSendRecord();
+
+        // First call to describe the topic times out
+        EasyMock.expect(admin.describeTopics(OTHER_TOPIC))
+                .andThrow(new RetriableException(new TimeoutException("timeout")));
+
+        // Second round
+        expectTopicCreation(OTHER_TOPIC);
+        expectSendRecord(OTHER_TOPIC, false, emptyHeaders());
+
+        PowerMock.replayAll();
+
+        // Try to send 3, make first pass, second fail. Should save last two
+        workerTask.toSend = Arrays.asList(record1, record2, record3);
+        workerTask.sendRecords();
+        assertEquals(Arrays.asList(record3), workerTask.toSend);
+
+        // Next they all succeed
+        workerTask.sendRecords();
+        assertNull(workerTask.toSend);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSendRecordsTopicCreateRetriesMidway() {
+        createWorkerTask();
+
+        // Differentiate only by Kafka partition so we can reuse conversion expectations
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record3 = new SourceRecord(PARTITION, OFFSET, OTHER_TOPIC, 3, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        // First round
+        expectPreliminaryCalls(OTHER_TOPIC);
+        expectTopicCreation(TOPIC);
+        expectSendRecord();
+        expectSendRecord();
+
+        EasyMock.expect(admin.describeTopics(OTHER_TOPIC)).andReturn(Collections.emptyMap());
+        // First call to create the topic times out
+        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture)))
+                .andThrow(new RetriableException(new TimeoutException("timeout")));
+
+        // Second round
+        expectTopicCreation(OTHER_TOPIC);
+        expectSendRecord(OTHER_TOPIC, false, emptyHeaders());
+
+        PowerMock.replayAll();
+
+        // Try to send 3, make first pass, second fail. Should save last two
+        workerTask.toSend = Arrays.asList(record1, record2, record3);
+        workerTask.sendRecords();
+        assertEquals(Arrays.asList(record3), workerTask.toSend);
+
+        // Next they all succeed
+        workerTask.sendRecords();
+        assertNull(workerTask.toSend);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testTopicDescribeFails() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        EasyMock.expect(admin.describeTopics(TOPIC))
+                .andThrow(new ConnectException(new TopicAuthorizationException("unauthorized")));
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        assertThrows(ConnectException.class, workerTask::sendRecords);
+    }
+
+    @Test
+    public void testTopicCreateFails() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
+
+        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture)))
+                .andThrow(new ConnectException(new TopicAuthorizationException("unauthorized")));
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        assertThrows(ConnectException.class, workerTask::sendRecords);
+        assertTrue(newTopicCapture.hasCaptured());
+    }
+
+    @Test
+    public void testTopicCreateFailsWithExceptionWhenCreateReturnsTopicNotCreatedOrFound() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
+
+        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(TopicAdmin.EMPTY_CREATION);
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        assertThrows(ConnectException.class, workerTask::sendRecords);
+        assertTrue(newTopicCapture.hasCaptured());
+    }
+
+    @Test
+    public void testTopicCreateSucceedsWhenCreateReturnsExistingTopicFound() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
+
+        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(foundTopic(TOPIC));
+
+        expectSendRecord();
+        expectSendRecord();
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        workerTask.sendRecords();
+    }
+
+    @Test
+    public void testTopicCreateSucceedsWhenCreateReturnsNewTopicFound() {
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectPreliminaryCalls();
+        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
+
+        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(createdTopic(TOPIC));
+
+        expectSendRecord();
+        expectSendRecord();
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        workerTask.sendRecords();
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecord(
+            String topic,
+            boolean anyTimes,
+            Headers headers
+    ) {
+        if (headers != null)
+            expectConvertHeadersAndKeyValue(topic, anyTimes, headers);
+
+        expectApplyTransformationChain(anyTimes);
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = EasyMock.newCapture();
+
+        IExpectationSetters<Future<RecordMetadata>> expect = EasyMock.expect(
+                producer.send(EasyMock.capture(sent), EasyMock.capture(producerCallbacks)));
+
+        IAnswer<Future<RecordMetadata>> expectResponse = () -> {
+            synchronized (producerCallbacks) {
+                for (Callback cb : producerCallbacks.getValues()) {
+                    cb.onCompletion(new RecordMetadata(new TopicPartition("foo", 0), 0, 0, 0L, 0, 0), null);
+                }
+                producerCallbacks.reset();
+            }
+            return null;
+        };
+
+        if (anyTimes)
+            expect.andStubAnswer(expectResponse);
+        else
+            expect.andAnswer(expectResponse);
+
+        expectTaskGetTopic(anyTimes);
+
+        return sent;
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecordAnyTimes() {
+        return expectSendRecord(TOPIC, true, emptyHeaders());
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecord() {
+        return expectSendRecord(TOPIC, false, emptyHeaders());
+    }
+
+    private void expectTaskGetTopic(boolean anyTimes) {
+        final Capture<String> connectorCapture = EasyMock.newCapture();
+        final Capture<String> topicCapture = EasyMock.newCapture();
+        IExpectationSetters<TopicStatus> expect = EasyMock.expect(statusBackingStore.getTopic(
+                EasyMock.capture(connectorCapture),
+                EasyMock.capture(topicCapture)));
+        if (anyTimes) {
+            expect.andStubAnswer(() -> new TopicStatus(
+                    topicCapture.getValue(),
+                    new ConnectorTaskId(connectorCapture.getValue(), 0),
+                    Time.SYSTEM.milliseconds()));
+        } else {
+            expect.andAnswer(() -> new TopicStatus(
+                    topicCapture.getValue(),
+                    new ConnectorTaskId(connectorCapture.getValue(), 0),
+                    Time.SYSTEM.milliseconds()));
+        }
+        if (connectorCapture.hasCaptured() && topicCapture.hasCaptured()) {
+            assertEquals("job", connectorCapture.getValue());
+            assertEquals(TOPIC, topicCapture.getValue());
+        }
+    }
+
+    private void expectTopicCreation(String topic) {
+        if (config.topicCreationEnable()) {
+            EasyMock.expect(admin.describeTopics(topic)).andReturn(Collections.emptyMap());
+            Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+            EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(createdTopic(topic));
+        }
+    }
+
+    private TopicAdmin.TopicCreationResponse createdTopic(String topic) {
+        Set<String> created = Collections.singleton(topic);
+        Set<String> existing = Collections.emptySet();
+        return new TopicAdmin.TopicCreationResponse(created, existing);
+    }
+
+    private TopicAdmin.TopicCreationResponse foundTopic(String topic) {
+        Set<String> created = Collections.emptySet();
+        Set<String> existing = Collections.singleton(topic);
+        return new TopicAdmin.TopicCreationResponse(created, existing);
+    }
+
+    private void expectPreliminaryCalls() {
+        expectPreliminaryCalls(TOPIC);
+    }
+
+    private void expectPreliminaryCalls(String topic) {
+        expectConvertHeadersAndKeyValue(topic, true, emptyHeaders());
+        expectApplyTransformationChain(false);
+        PowerMock.expectLastCall();
+    }
+
+    private void expectConvertHeadersAndKeyValue(String topic, boolean anyTimes, Headers headers) {
+        for (Header header : headers) {
+            IExpectationSetters<byte[]> convertHeaderExpect = EasyMock.expect(headerConverter.fromConnectHeader(topic, header.key(), Schema.STRING_SCHEMA, new String(header.value())));
+            if (anyTimes)
+                convertHeaderExpect.andStubReturn(header.value());
+            else
+                convertHeaderExpect.andReturn(header.value());
+        }
+        IExpectationSetters<byte[]> convertKeyExpect = EasyMock.expect(keyConverter.fromConnectData(topic, headers, KEY_SCHEMA, KEY));
+        if (anyTimes)
+            convertKeyExpect.andStubReturn(SERIALIZED_KEY);
+        else
+            convertKeyExpect.andReturn(SERIALIZED_KEY);
+        IExpectationSetters<byte[]> convertValueExpect = EasyMock.expect(valueConverter.fromConnectData(topic, headers, RECORD_SCHEMA, RECORD));
+        if (anyTimes)
+            convertValueExpect.andStubReturn(SERIALIZED_RECORD);
+        else
+            convertValueExpect.andReturn(SERIALIZED_RECORD);
+    }
+
+    private void expectApplyTransformationChain(boolean anyTimes) {
+        final Capture<SourceRecord> recordCapture = EasyMock.newCapture();
+        IExpectationSetters<SourceRecord> convertKeyExpect = EasyMock.expect(transformationChain.apply(EasyMock.capture(recordCapture)));
+        if (anyTimes)
+            convertKeyExpect.andStubAnswer(recordCapture::getValue);
+        else
+            convertKeyExpect.andAnswer(recordCapture::getValue);
+    }
+
+    private RecordHeaders emptyHeaders() {
+        return new RecordHeaders();
+    }
+
+    private void createWorkerTask() {
+        createWorkerTask(keyConverter, valueConverter, headerConverter);
+    }
+
+    private void createWorkerTask(Converter keyConverter, Converter valueConverter, HeaderConverter headerConverter) {
+        workerTask = new AbstractWorkerSourceTask(
+                taskId, sourceTask, statusListener, TargetState.STARTED, keyConverter, valueConverter, headerConverter, transformationChain,
+                sourceTaskContext, producer, admin, TopicCreationGroup.configuredGroups(sourceConfig), offsetReader, offsetWriter, offsetStore,
+                config, metrics, plugins.delegatingLoader(), Time.SYSTEM, RetryWithToleranceOperatorTest.NOOP_OPERATOR,
+                statusBackingStore, Runnable::run) {
+            @Override
+            protected void prepareToInitializeTask() {
+            }
+
+            @Override
+            protected void prepareToEnterSendLoop() {
+            }
+
+            @Override
+            protected void beginSendIteration() {
+            }
+
+            @Override
+            protected void prepareToPollTask() {
+            }
+
+            @Override
+            protected void recordDropped(SourceRecord record) {
+            }
+
+            @Override
+            protected Optional<SubmittedRecords.SubmittedRecord> prepareToSendRecord(SourceRecord sourceRecord, ProducerRecord<byte[], byte[]> producerRecord) {
+                return Optional.empty();
+            }
+
+            @Override
+            protected void recordDispatched(SourceRecord record) {
+            }
+
+            @Override
+            protected void batchDispatched() {
+            }
+
+            @Override
+            protected void recordSent(SourceRecord sourceRecord, ProducerRecord<byte[], byte[]> producerRecord, RecordMetadata recordMetadata) {
+            }
+
+            @Override
+            protected void producerSendFailed(boolean synchronous, ProducerRecord<byte[], byte[]> producerRecord, SourceRecord preTransformRecord, Exception e) {
+            }
+
+            @Override
+            protected void finalOffsetCommit(boolean failed) {
+            }
+        };
+
+    }
+
+}
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ErrorHandlingTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ErrorHandlingTaskTest.java
index a63430d6457dc..b2ba4178805c0 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ErrorHandlingTaskTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ErrorHandlingTaskTest.java
@@ -33,7 +33,7 @@
 import org.apache.kafka.connect.errors.RetriableException;
 import org.apache.kafka.connect.integration.MonitorableSourceConnector;
 import org.apache.kafka.connect.json.JsonConverter;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.errors.ErrorHandlingMetrics;
 import org.apache.kafka.connect.runtime.errors.ErrorReporter;
 import org.apache.kafka.connect.runtime.errors.LogReporter;
@@ -48,6 +48,7 @@
 import org.apache.kafka.connect.sink.SinkTask;
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.storage.Converter;
 import org.apache.kafka.connect.storage.HeaderConverter;
 import org.apache.kafka.connect.storage.OffsetStorageReaderImpl;
@@ -76,6 +77,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.time.Duration;
 import java.util.Arrays;
 import java.util.Collection;
@@ -160,6 +162,8 @@ public class ErrorHandlingTaskTest {
     OffsetStorageReaderImpl offsetReader;
     @Mock
     OffsetStorageWriter offsetWriter;
+    @Mock
+    private ConnectorOffsetBackingStore offsetStore;
 
     private Capture<ConsumerRebalanceListener> rebalanceListener = EasyMock.newCapture();
     @SuppressWarnings("unused")
@@ -241,6 +245,9 @@ public void testSinkTasksCloseErrorReporters() throws Exception {
         consumer.close();
         EasyMock.expectLastCall();
 
+        headerConverter.close();
+        EasyMock.expectLastCall();
+
         PowerMock.replayAll();
 
         workerSinkTask.initialize(TASK_CONFIG);
@@ -378,9 +385,10 @@ public void testErrorHandlingInSourceTasks() throws Exception {
 
         EasyMock.expect(workerSourceTask.commitOffsets()).andReturn(true);
 
+        offsetStore.start();
+        EasyMock.expectLastCall();
         sourceTask.initialize(EasyMock.anyObject());
         EasyMock.expectLastCall();
-
         sourceTask.start(EasyMock.anyObject());
         EasyMock.expectLastCall();
 
@@ -442,9 +450,10 @@ public void testErrorHandlingInSourceTasksWthBadConverter() throws Exception {
 
         EasyMock.expect(workerSourceTask.commitOffsets()).andReturn(true);
 
+        offsetStore.start();
+        EasyMock.expectLastCall();
         sourceTask.initialize(EasyMock.anyObject());
         EasyMock.expectLastCall();
-
         sourceTask.start(EasyMock.anyObject());
         EasyMock.expectLastCall();
 
@@ -530,6 +539,19 @@ private void expectClose() {
 
         admin.close(EasyMock.anyObject(Duration.class));
         EasyMock.expectLastCall();
+
+        offsetReader.close();
+        EasyMock.expectLastCall();
+
+        offsetStore.stop();
+        EasyMock.expectLastCall();
+
+        try {
+            headerConverter.close();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        EasyMock.expectLastCall();
     }
 
     private void expectTopicCreation(String topic) {
@@ -590,7 +612,7 @@ private void createSourceTask(TargetState initialState, RetryWithToleranceOperat
             WorkerSourceTask.class, new String[]{"commitOffsets", "isStopping"},
             taskId, sourceTask, statusListener, initialState, converter, converter, headerConverter, sourceTransforms,
             producer, admin, TopicCreationGroup.configuredGroups(sourceConfig),
-            offsetReader, offsetWriter, workerConfig,
+            offsetReader, offsetWriter, offsetStore, workerConfig,
             ClusterConfigState.EMPTY, metrics, pluginLoader, time, retryWithToleranceOperator,
             statusBackingStore, (Executor) Runnable::run);
     }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ExactlyOnceWorkerSourceTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ExactlyOnceWorkerSourceTaskTest.java
new file mode 100644
index 0000000000000..44427b5b54200
--- /dev/null
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/ExactlyOnceWorkerSourceTaskTest.java
@@ -0,0 +1,1324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime;
+
+import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.InvalidTopicException;
+import org.apache.kafka.common.errors.RecordTooLargeException;
+import org.apache.kafka.common.errors.TopicAuthorizationException;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.connect.data.Schema;
+import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.integration.MonitorableSourceConnector;
+import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
+import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperatorTest;
+import org.apache.kafka.connect.runtime.isolation.Plugins;
+import org.apache.kafka.connect.runtime.standalone.StandaloneConfig;
+import org.apache.kafka.connect.source.SourceRecord;
+import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.source.SourceTaskContext;
+import org.apache.kafka.connect.source.TransactionContext;
+import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ClusterConfigState;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
+import org.apache.kafka.connect.storage.Converter;
+import org.apache.kafka.connect.storage.HeaderConverter;
+import org.apache.kafka.connect.storage.OffsetStorageWriter;
+import org.apache.kafka.connect.storage.StatusBackingStore;
+import org.apache.kafka.connect.storage.StringConverter;
+import org.apache.kafka.connect.util.Callback;
+import org.apache.kafka.connect.util.ConnectorTaskId;
+import org.apache.kafka.connect.util.ParameterizedTest;
+import org.apache.kafka.connect.util.ThreadedTest;
+import org.apache.kafka.connect.util.TopicAdmin;
+import org.apache.kafka.connect.util.TopicCreationGroup;
+import org.easymock.Capture;
+import org.easymock.EasyMock;
+import org.easymock.IAnswer;
+import org.easymock.IExpectationSetters;
+import org.junit.After;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.powermock.api.easymock.PowerMock;
+import org.powermock.api.easymock.annotation.Mock;
+import org.powermock.api.easymock.annotation.MockStrict;
+import org.powermock.core.classloader.annotations.PowerMockIgnore;
+import org.powermock.modules.junit4.PowerMockRunner;
+import org.powermock.modules.junit4.PowerMockRunnerDelegate;
+
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.apache.kafka.connect.integration.MonitorableSourceConnector.TOPIC_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TOPIC_CREATION_GROUPS_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TRANSACTION_BOUNDARY_INTERVAL_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_PREFIX;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.EXCLUDE_REGEX_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.INCLUDE_REGEX_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.PARTITIONS_CONFIG;
+import static org.apache.kafka.connect.runtime.TopicCreationConfig.REPLICATION_FACTOR_CONFIG;
+import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_CREATION_ENABLE_CONFIG;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+@PowerMockIgnore({"javax.management.*",
+        "org.apache.log4j.*"})
+@RunWith(PowerMockRunner.class)
+@PowerMockRunnerDelegate(ParameterizedTest.class)
+public class ExactlyOnceWorkerSourceTaskTest extends ThreadedTest {
+    private static final String TOPIC = "topic";
+    private static final Map<String, byte[]> PARTITION = Collections.singletonMap("key", "partition".getBytes());
+    private static final Map<String, Integer> OFFSET = Collections.singletonMap("key", 12);
+
+    // Connect-format data
+    private static final Schema KEY_SCHEMA = Schema.INT32_SCHEMA;
+    private static final Integer KEY = -1;
+    private static final Schema RECORD_SCHEMA = Schema.INT64_SCHEMA;
+    private static final Long RECORD = 12L;
+    // Serialized data. The actual format of this data doesn't matter -- we just want to see that the right version
+    // is used in the right place.
+    private static final byte[] SERIALIZED_KEY = "converted-key".getBytes();
+    private static final byte[] SERIALIZED_RECORD = "converted-record".getBytes();
+
+    private final ExecutorService executor = Executors.newSingleThreadExecutor();
+    private final ConnectorTaskId taskId = new ConnectorTaskId("job", 0);
+    private WorkerConfig config;
+    private SourceConnectorConfig sourceConfig;
+    private Plugins plugins;
+    private MockConnectMetrics metrics;
+    private Time time;
+    private CountDownLatch pollLatch;
+    @Mock private SourceTask sourceTask;
+    @Mock private Converter keyConverter;
+    @Mock private Converter valueConverter;
+    @Mock private HeaderConverter headerConverter;
+    @Mock private TransformationChain<SourceRecord> transformationChain;
+    @Mock private KafkaProducer<byte[], byte[]> producer;
+    @Mock private TopicAdmin admin;
+    @Mock private CloseableOffsetStorageReader offsetReader;
+    @Mock private OffsetStorageWriter offsetWriter;
+    @Mock private ClusterConfigState clusterConfigState;
+    private ExactlyOnceWorkerSourceTask workerTask;
+    @Mock private Future<RecordMetadata> sendFuture;
+    @MockStrict private TaskStatus.Listener statusListener;
+    @Mock private StatusBackingStore statusBackingStore;
+    @Mock private ConnectorOffsetBackingStore offsetStore;
+    @Mock private Runnable preProducerCheck;
+    @Mock private Runnable postProducerCheck;
+
+    private Capture<org.apache.kafka.clients.producer.Callback> producerCallbacks;
+
+    private static final Map<String, String> TASK_PROPS = new HashMap<>();
+    static {
+        TASK_PROPS.put(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
+    }
+    private static final TaskConfig TASK_CONFIG = new TaskConfig(TASK_PROPS);
+
+    private static final SourceRecord SOURCE_RECORD =
+            new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+    private static final List<SourceRecord> RECORDS = Collections.singletonList(SOURCE_RECORD);
+
+    private final boolean enableTopicCreation;
+
+    @ParameterizedTest.Parameters
+    public static Collection<Boolean> parameters() {
+        return Arrays.asList(false, true);
+    }
+
+    public ExactlyOnceWorkerSourceTaskTest(boolean enableTopicCreation) {
+        this.enableTopicCreation = enableTopicCreation;
+    }
+
+    @Override
+    public void setup() {
+        super.setup();
+        Map<String, String> workerProps = workerProps();
+        plugins = new Plugins(workerProps);
+        config = new StandaloneConfig(workerProps);
+        sourceConfig = new SourceConnectorConfig(plugins, sourceConnectorProps(), true);
+        producerCallbacks = EasyMock.newCapture();
+        metrics = new MockConnectMetrics();
+        time = Time.SYSTEM;
+        EasyMock.expect(offsetStore.primaryOffsetsTopic()).andStubReturn("offsets-topic");
+        pollLatch = new CountDownLatch(1);
+    }
+
+    private Map<String, String> workerProps() {
+        Map<String, String> props = new HashMap<>();
+        props.put("key.converter", "org.apache.kafka.connect.json.JsonConverter");
+        props.put("value.converter", "org.apache.kafka.connect.json.JsonConverter");
+        props.put("internal.key.converter", "org.apache.kafka.connect.json.JsonConverter");
+        props.put("internal.value.converter", "org.apache.kafka.connect.json.JsonConverter");
+        props.put("internal.key.converter.schemas.enable", "false");
+        props.put("internal.value.converter.schemas.enable", "false");
+        props.put("offset.storage.file.filename", "/tmp/connect.offsets");
+        props.put(TOPIC_CREATION_ENABLE_CONFIG, String.valueOf(enableTopicCreation));
+        return props;
+    }
+
+    private Map<String, String> sourceConnectorProps() {
+        return sourceConnectorProps(SourceTask.TransactionBoundary.DEFAULT);
+    }
+
+    private Map<String, String> sourceConnectorProps(SourceTask.TransactionBoundary transactionBoundary) {
+        // setup up props for the source connector
+        Map<String, String> props = new HashMap<>();
+        props.put("name", "foo-connector");
+        props.put(CONNECTOR_CLASS_CONFIG, MonitorableSourceConnector.class.getSimpleName());
+        props.put(TASKS_MAX_CONFIG, String.valueOf(1));
+        props.put(TOPIC_CONFIG, TOPIC);
+        props.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
+        props.put(TOPIC_CREATION_GROUPS_CONFIG, String.join(",", "foo", "bar"));
+        props.put(DEFAULT_TOPIC_CREATION_PREFIX + REPLICATION_FACTOR_CONFIG, String.valueOf(1));
+        props.put(DEFAULT_TOPIC_CREATION_PREFIX + PARTITIONS_CONFIG, String.valueOf(1));
+        props.put(TRANSACTION_BOUNDARY_CONFIG, transactionBoundary.toString());
+        props.put(SourceConnectorConfig.TOPIC_CREATION_PREFIX + "foo" + "." + INCLUDE_REGEX_CONFIG, TOPIC);
+        props.put(SourceConnectorConfig.TOPIC_CREATION_PREFIX + "bar" + "." + INCLUDE_REGEX_CONFIG, ".*");
+        props.put(SourceConnectorConfig.TOPIC_CREATION_PREFIX + "bar" + "." + EXCLUDE_REGEX_CONFIG, TOPIC);
+        return props;
+    }
+
+    @After
+    public void tearDown() {
+        if (metrics != null) metrics.stop();
+    }
+
+    private void createWorkerTask() {
+        createWorkerTask(TargetState.STARTED);
+    }
+
+    private void createWorkerTask(TargetState initialState) {
+        createWorkerTask(initialState, keyConverter, valueConverter, headerConverter);
+    }
+
+    private void createWorkerTask(TargetState initialState, Converter keyConverter, Converter valueConverter, HeaderConverter headerConverter) {
+        workerTask = new ExactlyOnceWorkerSourceTask(taskId, sourceTask, statusListener, initialState, keyConverter, valueConverter, headerConverter,
+                transformationChain, producer, admin, TopicCreationGroup.configuredGroups(sourceConfig), offsetReader, offsetWriter, offsetStore,
+                config, clusterConfigState, metrics, plugins.delegatingLoader(), time, RetryWithToleranceOperatorTest.NOOP_OPERATOR, statusBackingStore,
+                sourceConfig, Runnable::run, preProducerCheck, postProducerCheck);
+    }
+
+    @Test
+    public void testStartPaused() throws Exception {
+        final CountDownLatch pauseLatch = new CountDownLatch(1);
+
+        createWorkerTask(TargetState.PAUSED);
+
+        expectCall(() -> statusListener.onPause(taskId)).andAnswer(() -> {
+            pauseLatch.countDown();
+            return null;
+        });
+
+        // The task checks to see if there are offsets to commit before pausing
+        EasyMock.expect(offsetWriter.willFlush()).andReturn(false);
+
+        expectClose();
+
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(pauseLatch.await(5, TimeUnit.SECONDS));
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testPause() throws Exception {
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        AtomicInteger polls = new AtomicInteger(0);
+        AtomicInteger flushes = new AtomicInteger(0);
+        pollLatch = new CountDownLatch(10);
+        expectPolls(polls);
+        expectAnyFlushes(flushes);
+
+        expectTopicCreation(TOPIC);
+
+        expectCall(() -> statusListener.onPause(taskId));
+
+        expectCall(sourceTask::stop);
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+        assertTrue(awaitLatch(pollLatch));
+
+        workerTask.transitionTo(TargetState.PAUSED);
+
+        int priorCount = polls.get();
+        Thread.sleep(100);
+
+        // since the transition is observed asynchronously, the count could be off by one loop iteration
+        assertTrue(polls.get() - priorCount <= 1);
+
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+
+        assertEquals("Task should have flushed offsets for every record poll, once on pause, and once for end-of-life offset commit",
+                flushes.get(), polls.get() + 2);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInPreProducerCheck() {
+        createWorkerTask();
+
+        Exception exception = new ConnectException("Failed to perform zombie fencing");
+        expectCall(preProducerCheck::run).andThrow(exception);
+
+        expectCall(() -> statusListener.onFailure(taskId, exception));
+
+        // Don't expect task to be stopped since it was never started to begin with
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        // No need to execute on a separate thread; preflight checks should all take place before the poll-send loop starts
+        workerTask.run();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInOffsetStoreStart() {
+        createWorkerTask();
+
+        expectCall(preProducerCheck::run);
+        expectCall(producer::initTransactions);
+        expectCall(postProducerCheck::run);
+
+        Exception exception = new ConnectException("No soup for you!");
+        expectCall(offsetStore::start).andThrow(exception);
+
+        expectCall(() -> statusListener.onFailure(taskId, exception));
+
+        // Don't expect task to be stopped since it was never started to begin with
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        // No need to execute on a separate thread; preflight checks should all take place before the poll-send loop starts
+        workerTask.run();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInProducerInitialization() {
+        createWorkerTask();
+
+        expectCall(preProducerCheck::run);
+        expectCall(producer::initTransactions);
+        Exception exception = new ConnectException("You can't do that!");
+        expectCall(postProducerCheck::run).andThrow(exception);
+
+        expectCall(() -> statusListener.onFailure(taskId, exception));
+
+        // Don't expect task to be stopped since it was never started to begin with
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        // No need to execute on a separate thread; preflight checks should all take place before the poll-send loop starts
+        workerTask.run();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInPostProducerCheck() {
+        createWorkerTask();
+
+        expectCall(preProducerCheck::run);
+        Exception exception = new ConnectException("New task configs for the connector have already been generated");
+        expectCall(producer::initTransactions).andThrow(exception);
+
+        expectCall(() -> statusListener.onFailure(taskId, exception));
+
+        // Don't expect task to be stopped since it was never started to begin with
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        // No need to execute on a separate thread; preflight checks should all take place before the poll-send loop starts
+        workerTask.run();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testPollsInBackground() throws Exception {
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        AtomicInteger polls = new AtomicInteger(0);
+        AtomicInteger flushes = new AtomicInteger(0);
+        pollLatch = new CountDownLatch(10);
+        expectPolls(polls);
+        expectAnyFlushes(flushes);
+
+        expectTopicCreation(TOPIC);
+
+        expectCall(sourceTask::stop);
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(pollLatch));
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+        assertPollMetrics(10);
+        assertTransactionMetrics(1);
+
+        assertEquals("Task should have flushed offsets for every record poll and for end-of-life offset commit",
+                flushes.get(), polls.get() + 1);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInPoll() throws Exception {
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        final CountDownLatch pollLatch = new CountDownLatch(1);
+        final RuntimeException exception = new RuntimeException();
+        EasyMock.expect(sourceTask.poll()).andAnswer(() -> {
+            pollLatch.countDown();
+            throw exception;
+        });
+
+        expectCall(() -> statusListener.onFailure(taskId, exception));
+        expectCall(sourceTask::stop);
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(pollLatch));
+        //Failure in poll should trigger automatic stop of the worker
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+        assertPollMetrics(0);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInPollAfterCancel() throws Exception {
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        final CountDownLatch pollLatch = new CountDownLatch(1);
+        final CountDownLatch workerCancelLatch = new CountDownLatch(1);
+        final RuntimeException exception = new RuntimeException();
+        EasyMock.expect(sourceTask.poll()).andAnswer(() -> {
+            pollLatch.countDown();
+            assertTrue(awaitLatch(workerCancelLatch));
+            throw exception;
+        });
+
+        expectCall(offsetReader::close);
+        expectCall(() -> producer.close(Duration.ZERO));
+        expectCall(sourceTask::stop);
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(pollLatch));
+        workerTask.cancel();
+        workerCancelLatch.countDown();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+        assertPollMetrics(0);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFailureInPollAfterStop() throws Exception {
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        final CountDownLatch pollLatch = new CountDownLatch(1);
+        final CountDownLatch workerStopLatch = new CountDownLatch(1);
+        final RuntimeException exception = new RuntimeException();
+        EasyMock.expect(sourceTask.poll()).andAnswer(() -> {
+            pollLatch.countDown();
+            assertTrue(awaitLatch(workerStopLatch));
+            throw exception;
+        });
+
+        expectCall(() -> statusListener.onShutdown(taskId));
+        expectCall(sourceTask::stop);
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(pollLatch));
+        workerTask.stop();
+        workerStopLatch.countDown();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+        assertPollMetrics(0);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testPollReturnsNoRecords() throws Exception {
+        // Test that the task handles an empty list of records
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        final CountDownLatch pollLatch = expectEmptyPolls(1, new AtomicInteger());
+        EasyMock.expect(offsetWriter.willFlush()).andReturn(false).anyTimes();
+
+        expectCall(sourceTask::stop);
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(pollLatch));
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+        assertPollMetrics(0);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testPollBasedCommit() throws Exception {
+        Map<String, String> connectorProps = sourceConnectorProps(SourceTask.TransactionBoundary.POLL);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        AtomicInteger polls = new AtomicInteger();
+        AtomicInteger flushes = new AtomicInteger();
+        expectPolls(polls);
+        expectAnyFlushes(flushes);
+
+        expectTopicCreation(TOPIC);
+
+        expectCall(sourceTask::stop);
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(pollLatch));
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+
+        assertEquals("Task should have flushed offsets for every record poll, and for end-of-life offset commit",
+                flushes.get(), polls.get() + 1);
+
+        assertPollMetrics(1);
+        assertTransactionMetrics(1);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testIntervalBasedCommit() throws Exception {
+        long commitInterval = 618;
+        Map<String, String> connectorProps = sourceConnectorProps(SourceTask.TransactionBoundary.INTERVAL);
+        connectorProps.put(TRANSACTION_BOUNDARY_INTERVAL_CONFIG, Long.toString(commitInterval));
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+
+        time = new MockTime();
+
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        expectPolls();
+        final CountDownLatch firstPollLatch = new CountDownLatch(2);
+        final CountDownLatch secondPollLatch = new CountDownLatch(2);
+        final CountDownLatch thirdPollLatch = new CountDownLatch(2);
+
+        AtomicInteger flushes = new AtomicInteger();
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+
+        expectTopicCreation(TOPIC);
+
+        expectCall(sourceTask::stop);
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        pollLatch = firstPollLatch;
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("No flushes should have taken place before offset commit interval has elapsed", 0, flushes.get());
+        time.sleep(commitInterval);
+
+        pollLatch = secondPollLatch;
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("One flush should have taken place after offset commit interval has elapsed", 1, flushes.get());
+        time.sleep(commitInterval * 2);
+
+        pollLatch = thirdPollLatch;
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("Two flushes should have taken place after offset commit interval has elapsed again", 2, flushes.get());
+
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+
+        assertEquals("Task should have flushed offsets twice based on offset commit interval, and performed final end-of-life offset commit",
+                3, flushes.get());
+
+        assertPollMetrics(2);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConnectorBasedCommit() throws Exception {
+        Map<String, String> connectorProps = sourceConnectorProps(SourceTask.TransactionBoundary.CONNECTOR);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        expectPolls();
+        List<CountDownLatch> pollLatches = IntStream.range(0, 7).mapToObj(i -> new CountDownLatch(3)).collect(Collectors.toList());
+
+        AtomicInteger flushes = new AtomicInteger();
+        // First flush: triggered by TransactionContext::commitTransaction (batch)
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+
+        // Second flush: triggered by TransactionContext::commitTransaction (record)
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+
+        // Third flush: triggered by TransactionContext::abortTransaction (batch)
+        expectCall(producer::abortTransaction);
+        EasyMock.expect(offsetWriter.willFlush()).andReturn(true);
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+
+        // Third flush: triggered by TransactionContext::abortTransaction (record)
+        EasyMock.expect(offsetWriter.willFlush()).andReturn(true);
+        expectCall(producer::abortTransaction);
+        expectFlush(FlushOutcome.SUCCEED, flushes);
+
+        expectTopicCreation(TOPIC);
+
+        expectCall(sourceTask::stop);
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        TransactionContext transactionContext = workerTask.sourceTaskContext.transactionContext();
+
+        int poll = -1;
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("No flushes should have taken place without connector requesting transaction commit", 0, flushes.get());
+
+        transactionContext.commitTransaction();
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("One flush should have taken place after connector requested batch commit", 1, flushes.get());
+
+        transactionContext.commitTransaction(SOURCE_RECORD);
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("Two flushes should have taken place after connector requested individual record commit", 2, flushes.get());
+
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("Only two flushes should still have taken place without connector re-requesting commit, even on identical records", 2, flushes.get());
+
+        transactionContext.abortTransaction();
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("Three flushes should have taken place after connector requested batch abort", 3, flushes.get());
+
+        transactionContext.abortTransaction(SOURCE_RECORD);
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("Four flushes should have taken place after connector requested individual record abort", 4, flushes.get());
+
+        pollLatch = pollLatches.get(++poll);
+        assertTrue(awaitLatch(pollLatch));
+        assertEquals("Only four flushes should still have taken place without connector re-requesting abort, even on identical records", 4, flushes.get());
+
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+
+        assertEquals("Task should have flushed offsets four times based on connector-defined boundaries, and skipped final end-of-life offset commit",
+                4, flushes.get());
+
+        assertPollMetrics(1);
+        assertTransactionMetrics(2);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testCommitFlushCallbackFailure() throws Exception {
+        testCommitFailure(FlushOutcome.FAIL_FLUSH_CALLBACK);
+    }
+
+    @Test
+    public void testCommitTransactionFailure() throws Exception {
+        testCommitFailure(FlushOutcome.FAIL_TRANSACTION_COMMIT);
+    }
+
+    private void testCommitFailure(FlushOutcome causeOfFailure) throws Exception {
+        createWorkerTask();
+
+        expectPreflight();
+        expectStartup();
+
+        expectPolls();
+        expectFlush(causeOfFailure);
+
+        expectTopicCreation(TOPIC);
+
+        expectCall(sourceTask::stop);
+        // Unlike the standard WorkerSourceTask class, this one fails permanently when offset commits don't succeed
+        final CountDownLatch taskFailure = new CountDownLatch(1);
+        expectCall(() -> statusListener.onFailure(EasyMock.eq(taskId), EasyMock.anyObject()))
+                .andAnswer(() -> {
+                    taskFailure.countDown();
+                    return null;
+                });
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> taskFuture = executor.submit(workerTask);
+
+        assertTrue(awaitLatch(taskFailure));
+        workerTask.stop();
+        assertTrue(workerTask.awaitStop(1000));
+
+        taskFuture.get();
+        assertPollMetrics(1);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSendRecordsRetries() throws Exception {
+        createWorkerTask();
+
+        // Differentiate only by Kafka partition so we can reuse conversion expectations
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, "topic", 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, "topic", 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record3 = new SourceRecord(PARTITION, OFFSET, "topic", 3, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectTopicCreation(TOPIC);
+
+        // First round
+        expectSendRecordOnce(false);
+        expectCall(producer::beginTransaction);
+        // Any Producer retriable exception should work here
+        expectSendRecordSyncFailure(new org.apache.kafka.common.errors.TimeoutException("retriable sync failure"));
+
+        // Second round
+        expectSendRecordOnce(true);
+        expectSendRecordOnce(false);
+
+        PowerMock.replayAll();
+
+        // Try to send 3, make first pass, second fail. Should save last two
+        workerTask.toSend = Arrays.asList(record1, record2, record3);
+        workerTask.sendRecords();
+        assertEquals(Arrays.asList(record2, record3), workerTask.toSend);
+
+        // Next they all succeed
+        workerTask.sendRecords();
+        assertNull(workerTask.toSend);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSendRecordsProducerSendFailsImmediately() {
+        if (!enableTopicCreation)
+            // should only test with topic creation enabled
+            return;
+
+        createWorkerTask();
+
+        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+
+        expectCall(producer::beginTransaction);
+        expectTopicCreation(TOPIC);
+        expectConvertHeadersAndKeyValue(TOPIC, true, emptyHeaders());
+        expectApplyTransformationChain(false);
+
+        EasyMock.expect(producer.send(EasyMock.anyObject(), EasyMock.anyObject()))
+                .andThrow(new KafkaException("Producer closed while send in progress", new InvalidTopicException(TOPIC)));
+
+        PowerMock.replayAll();
+
+        workerTask.toSend = Arrays.asList(record1, record2);
+        assertThrows(ConnectException.class, workerTask::sendRecords);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testSlowTaskStart() throws Exception {
+        final CountDownLatch startupLatch = new CountDownLatch(1);
+        final CountDownLatch finishStartupLatch = new CountDownLatch(1);
+
+        createWorkerTask();
+
+        expectPreflight();
+
+        expectCall(() -> sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class)));
+        expectCall(() -> sourceTask.start(TASK_PROPS));
+        EasyMock.expectLastCall().andAnswer(() -> {
+            startupLatch.countDown();
+            assertTrue(awaitLatch(finishStartupLatch));
+            return null;
+        });
+
+        expectCall(() -> statusListener.onStartup(taskId));
+
+        expectCall(sourceTask::stop);
+        EasyMock.expect(offsetWriter.willFlush()).andReturn(false);
+
+        expectCall(() -> statusListener.onShutdown(taskId));
+
+        expectClose();
+
+        PowerMock.replayAll();
+
+        workerTask.initialize(TASK_CONFIG);
+        Future<?> workerTaskFuture = executor.submit(workerTask);
+
+        // Stopping immediately while the other thread has work to do should result in no polling, no offset commits,
+        // exiting the work thread immediately, and the stop() method will be invoked in the background thread since it
+        // cannot be invoked immediately in the thread trying to stop the task.
+        assertTrue(awaitLatch(startupLatch));
+        workerTask.stop();
+        finishStartupLatch.countDown();
+        assertTrue(workerTask.awaitStop(1000));
+
+        workerTaskFuture.get();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testCancel() {
+        createWorkerTask();
+
+        expectCall(offsetReader::close);
+        expectCall(() -> producer.close(Duration.ZERO));
+
+        PowerMock.replayAll();
+
+        // workerTask said something dumb on twitter
+        workerTask.cancel();
+
+        PowerMock.verifyAll();
+    }
+
+    private TopicAdmin.TopicCreationResponse createdTopic(String topic) {
+        Set<String> created = Collections.singleton(topic);
+        Set<String> existing = Collections.emptySet();
+        return new TopicAdmin.TopicCreationResponse(created, existing);
+    }
+
+    private CountDownLatch expectEmptyPolls(int minimum, final AtomicInteger count) throws InterruptedException {
+        final CountDownLatch latch = new CountDownLatch(minimum);
+        // Note that we stub these to allow any number of calls because the thread will continue to
+        // run. The count passed in + latch returned just makes sure we get *at least* that number of
+        // calls
+        EasyMock.expect(sourceTask.poll())
+                .andStubAnswer(() -> {
+                    count.incrementAndGet();
+                    latch.countDown();
+                    Thread.sleep(10);
+                    return Collections.emptyList();
+                });
+        return latch;
+    }
+
+    private void expectPolls(final AtomicInteger pollCount) throws Exception {
+        expectCall(producer::beginTransaction).atLeastOnce();
+        // Note that we stub these to allow any number of calls because the thread will continue to
+        // run. The count passed in + latch returned just makes sure we get *at least* that number of
+        // calls
+        EasyMock.expect(sourceTask.poll())
+                .andStubAnswer(() -> {
+                    pollCount.incrementAndGet();
+                    pollLatch.countDown();
+                    Thread.sleep(10);
+                    return RECORDS;
+                });
+        // Fallout of the poll() call
+        expectSendRecordAnyTimes();
+    }
+
+    private void expectPolls() throws Exception {
+        expectPolls(new AtomicInteger());
+    }
+
+    @SuppressWarnings("unchecked")
+    private void expectSendRecordSyncFailure(Throwable error) {
+        expectConvertHeadersAndKeyValue(false);
+        expectApplyTransformationChain(false);
+
+        offsetWriter.offset(PARTITION, OFFSET);
+        PowerMock.expectLastCall();
+
+        EasyMock.expect(
+                producer.send(EasyMock.anyObject(ProducerRecord.class),
+                        EasyMock.anyObject(org.apache.kafka.clients.producer.Callback.class)))
+                .andThrow(error);
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecordAnyTimes() {
+        return expectSendRecordSendSuccess(true, false);
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecordOnce(boolean isRetry) {
+        return expectSendRecordSendSuccess(false, isRetry);
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecordSendSuccess(boolean anyTimes, boolean isRetry) {
+        return expectSendRecord(TOPIC, anyTimes, isRetry, true, true, emptyHeaders());
+    }
+
+    private Capture<ProducerRecord<byte[], byte[]>> expectSendRecord(
+            String topic,
+            boolean anyTimes,
+            boolean isRetry,
+            boolean sendSuccess,
+            boolean isMockedConverters,
+            Headers headers
+    ) {
+        if (isMockedConverters) {
+            expectConvertHeadersAndKeyValue(topic, anyTimes, headers);
+        }
+
+        expectApplyTransformationChain(anyTimes);
+
+        Capture<ProducerRecord<byte[], byte[]>> sent = EasyMock.newCapture();
+
+        // 1. Offset data is passed to the offset storage.
+        if (!isRetry) {
+            offsetWriter.offset(PARTITION, OFFSET);
+            if (anyTimes)
+                PowerMock.expectLastCall().anyTimes();
+            else
+                PowerMock.expectLastCall();
+        }
+
+        // 2. Converted data passed to the producer, which will need callbacks invoked for flush to work
+        IExpectationSetters<Future<RecordMetadata>> expect = EasyMock.expect(
+                producer.send(EasyMock.capture(sent),
+                        EasyMock.capture(producerCallbacks)));
+        IAnswer<Future<RecordMetadata>> expectResponse = () -> {
+            synchronized (producerCallbacks) {
+                for (org.apache.kafka.clients.producer.Callback cb : producerCallbacks.getValues()) {
+                    if (sendSuccess) {
+                        cb.onCompletion(new RecordMetadata(new TopicPartition("foo", 0), 0, 0,
+                                0L, 0, 0), null);
+                    } else {
+                        cb.onCompletion(null, new TopicAuthorizationException("foo"));
+                    }
+                }
+                producerCallbacks.reset();
+            }
+            return sendFuture;
+        };
+        if (anyTimes)
+            expect.andStubAnswer(expectResponse);
+        else
+            expect.andAnswer(expectResponse);
+
+        if (sendSuccess) {
+            // 3. As a result of a successful producer send callback, we note the use of the topic
+            expectTaskGetTopic(anyTimes);
+        }
+
+        return sent;
+    }
+
+    private void expectConvertHeadersAndKeyValue(boolean anyTimes) {
+        expectConvertHeadersAndKeyValue(TOPIC, anyTimes, emptyHeaders());
+    }
+
+    private void expectConvertHeadersAndKeyValue(String topic, boolean anyTimes, Headers headers) {
+        for (Header header : headers) {
+            IExpectationSetters<byte[]> convertHeaderExpect = EasyMock.expect(headerConverter.fromConnectHeader(topic, header.key(), Schema.STRING_SCHEMA, new String(header.value())));
+            if (anyTimes)
+                convertHeaderExpect.andStubReturn(header.value());
+            else
+                convertHeaderExpect.andReturn(header.value());
+        }
+        IExpectationSetters<byte[]> convertKeyExpect = EasyMock.expect(keyConverter.fromConnectData(topic, headers, KEY_SCHEMA, KEY));
+        if (anyTimes)
+            convertKeyExpect.andStubReturn(SERIALIZED_KEY);
+        else
+            convertKeyExpect.andReturn(SERIALIZED_KEY);
+        IExpectationSetters<byte[]> convertValueExpect = EasyMock.expect(valueConverter.fromConnectData(topic, headers, RECORD_SCHEMA, RECORD));
+        if (anyTimes)
+            convertValueExpect.andStubReturn(SERIALIZED_RECORD);
+        else
+            convertValueExpect.andReturn(SERIALIZED_RECORD);
+    }
+
+    private void expectApplyTransformationChain(boolean anyTimes) {
+        final Capture<SourceRecord> recordCapture = EasyMock.newCapture();
+        IExpectationSetters<SourceRecord> convertKeyExpect = EasyMock.expect(transformationChain.apply(EasyMock.capture(recordCapture)));
+        if (anyTimes)
+            convertKeyExpect.andStubAnswer(recordCapture::getValue);
+        else
+            convertKeyExpect.andAnswer(recordCapture::getValue);
+    }
+
+    private void expectTaskGetTopic(boolean anyTimes) {
+        final Capture<String> connectorCapture = EasyMock.newCapture();
+        final Capture<String> topicCapture = EasyMock.newCapture();
+        IExpectationSetters<TopicStatus> expect = EasyMock.expect(statusBackingStore.getTopic(
+                EasyMock.capture(connectorCapture),
+                EasyMock.capture(topicCapture)));
+        if (anyTimes) {
+            expect.andStubAnswer(() -> new TopicStatus(
+                    topicCapture.getValue(),
+                    new ConnectorTaskId(connectorCapture.getValue(), 0),
+                    time.milliseconds()));
+        } else {
+            expect.andAnswer(() -> new TopicStatus(
+                    topicCapture.getValue(),
+                    new ConnectorTaskId(connectorCapture.getValue(), 0),
+                    time.milliseconds()));
+        }
+        if (connectorCapture.hasCaptured() && topicCapture.hasCaptured()) {
+            assertEquals("job", connectorCapture.getValue());
+            assertEquals(TOPIC, topicCapture.getValue());
+        }
+    }
+
+    private boolean awaitLatch(CountDownLatch latch) {
+        try {
+            return latch.await(5000, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException e) {
+            // ignore
+        }
+        return false;
+    }
+
+    private enum FlushOutcome {
+        SUCCEED,
+        SUCCEED_ANY_TIMES,
+        FAIL_FLUSH_CALLBACK,
+        FAIL_TRANSACTION_COMMIT
+    }
+
+    private CountDownLatch expectFlush(FlushOutcome outcome, AtomicInteger flushCount) {
+        CountDownLatch result = new CountDownLatch(1);
+        org.easymock.IExpectationSetters<Boolean> flushBegin = EasyMock
+                .expect(offsetWriter.beginFlush())
+                .andAnswer(() -> {
+                    flushCount.incrementAndGet();
+                    result.countDown();
+                    return true;
+                });
+        if (FlushOutcome.SUCCEED_ANY_TIMES.equals(outcome)) {
+            flushBegin.anyTimes();
+        }
+
+        Capture<Callback<Void>> flushCallback = EasyMock.newCapture();
+        org.easymock.IExpectationSetters<Future<Void>> offsetFlush =
+                EasyMock.expect(offsetWriter.doFlush(EasyMock.capture(flushCallback)));
+        switch (outcome) {
+            case SUCCEED:
+                // The worker task doesn't actually use the returned future
+                offsetFlush.andReturn(null);
+                expectCall(producer::commitTransaction);
+                expectCall(() -> sourceTask.commitRecord(EasyMock.anyObject(), EasyMock.anyObject()));
+                expectCall(sourceTask::commit);
+                break;
+            case SUCCEED_ANY_TIMES:
+                // The worker task doesn't actually use the returned future
+                offsetFlush.andReturn(null).anyTimes();
+                expectCall(producer::commitTransaction).anyTimes();
+                expectCall(() -> sourceTask.commitRecord(EasyMock.anyObject(), EasyMock.anyObject())).anyTimes();
+                expectCall(sourceTask::commit).anyTimes();
+                break;
+            case FAIL_FLUSH_CALLBACK:
+                expectCall(producer::commitTransaction);
+                offsetFlush.andAnswer(() -> {
+                    flushCallback.getValue().onCompletion(new RecordTooLargeException(), null);
+                    return null;
+                });
+                expectCall(offsetWriter::cancelFlush);
+                break;
+            case FAIL_TRANSACTION_COMMIT:
+                offsetFlush.andReturn(null);
+                expectCall(producer::commitTransaction)
+                        .andThrow(new RecordTooLargeException());
+                expectCall(offsetWriter::cancelFlush);
+                break;
+            default:
+                fail("Unexpected flush outcome: " + outcome);
+        }
+        return result;
+    }
+
+    private CountDownLatch expectFlush(FlushOutcome outcome) {
+        return expectFlush(outcome, new AtomicInteger());
+    }
+
+    private CountDownLatch expectAnyFlushes(AtomicInteger flushCount) {
+        EasyMock.expect(offsetWriter.willFlush()).andReturn(true).anyTimes();
+        return expectFlush(FlushOutcome.SUCCEED_ANY_TIMES, flushCount);
+    }
+
+    private void assertTransactionMetrics(int minimumMaxSizeExpected) {
+        MetricGroup transactionGroup = workerTask.transactionMetricsGroup().metricGroup();
+        double actualMin = metrics.currentMetricValueAsDouble(transactionGroup, "transaction-size-min");
+        double actualMax = metrics.currentMetricValueAsDouble(transactionGroup, "transaction-size-max");
+        double actualAvg = metrics.currentMetricValueAsDouble(transactionGroup, "transaction-size-avg");
+        assertTrue(actualMin >= 0);
+        assertTrue(actualMax >= minimumMaxSizeExpected);
+
+        if (actualMax - actualMin <= 0.000001d) {
+            assertEquals(actualMax, actualAvg, 0.000002d);
+        } else {
+            assertTrue("Average transaction size should be greater than minimum transaction size", actualAvg > actualMin);
+            assertTrue("Average transaction size should be less than maximum transaction size", actualAvg < actualMax);
+        }
+    }
+
+    private void assertPollMetrics(int minimumPollCountExpected) {
+        MetricGroup sourceTaskGroup = workerTask.sourceTaskMetricsGroup().metricGroup();
+        MetricGroup taskGroup = workerTask.taskMetricsGroup().metricGroup();
+        double pollRate = metrics.currentMetricValueAsDouble(sourceTaskGroup, "source-record-poll-rate");
+        double pollTotal = metrics.currentMetricValueAsDouble(sourceTaskGroup, "source-record-poll-total");
+        if (minimumPollCountExpected > 0) {
+            assertEquals(RECORDS.size(), metrics.currentMetricValueAsDouble(taskGroup, "batch-size-max"), 0.000001d);
+            assertEquals(RECORDS.size(), metrics.currentMetricValueAsDouble(taskGroup, "batch-size-avg"), 0.000001d);
+            assertTrue(pollRate > 0.0d);
+        } else {
+            assertTrue(pollRate == 0.0d);
+        }
+        assertTrue(pollTotal >= minimumPollCountExpected);
+
+        double writeRate = metrics.currentMetricValueAsDouble(sourceTaskGroup, "source-record-write-rate");
+        double writeTotal = metrics.currentMetricValueAsDouble(sourceTaskGroup, "source-record-write-total");
+        if (minimumPollCountExpected > 0) {
+            assertTrue(writeRate > 0.0d);
+        } else {
+            assertTrue(writeRate == 0.0d);
+        }
+        assertTrue(writeTotal >= minimumPollCountExpected);
+
+        double pollBatchTimeMax = metrics.currentMetricValueAsDouble(sourceTaskGroup, "poll-batch-max-time-ms");
+        double pollBatchTimeAvg = metrics.currentMetricValueAsDouble(sourceTaskGroup, "poll-batch-avg-time-ms");
+        if (minimumPollCountExpected > 0) {
+            assertTrue(pollBatchTimeMax >= 0.0d);
+        }
+        assertTrue(Double.isNaN(pollBatchTimeAvg) || pollBatchTimeAvg > 0.0d);
+        double activeCount = metrics.currentMetricValueAsDouble(sourceTaskGroup, "source-record-active-count");
+        double activeCountMax = metrics.currentMetricValueAsDouble(sourceTaskGroup, "source-record-active-count-max");
+        assertEquals(0, activeCount, 0.000001d);
+        if (minimumPollCountExpected > 0) {
+            assertEquals(RECORDS.size(), activeCountMax, 0.000001d);
+        }
+    }
+
+    private RecordHeaders emptyHeaders() {
+        return new RecordHeaders();
+    }
+
+    private abstract static class TestSourceTask extends SourceTask {
+    }
+
+    @FunctionalInterface
+    private interface MockedMethodCall {
+        void invoke() throws Exception;
+    }
+
+    private static <T> org.easymock.IExpectationSetters<T> expectCall(MockedMethodCall call) {
+        try {
+            call.invoke();
+        } catch (RuntimeException e) {
+            throw e;
+        } catch (Exception e) {
+            throw new RuntimeException("Mocked method invocation threw a checked exception", e);
+        }
+        return EasyMock.expectLastCall();
+    }
+
+    private void expectPreflight() {
+        expectCall(preProducerCheck::run);
+        expectCall(producer::initTransactions);
+        expectCall(postProducerCheck::run);
+        expectCall(offsetStore::start);
+    }
+
+    private void expectStartup() {
+        expectCall(() -> sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class)));
+        expectCall(() -> sourceTask.start(TASK_PROPS));
+        expectCall(() -> statusListener.onStartup(taskId));
+    }
+
+    private void expectClose() {
+        expectCall(offsetStore::stop);
+        expectCall(() -> producer.close(EasyMock.anyObject(Duration.class)));
+        expectCall(() -> admin.close(EasyMock.anyObject(Duration.class)));
+        expectCall(transformationChain::close);
+        expectCall(offsetReader::close);
+        expectCall(headerConverter::close);
+    }
+
+    private void expectTopicCreation(String topic) {
+        if (config.topicCreationEnable()) {
+            EasyMock.expect(admin.describeTopics(topic)).andReturn(Collections.emptyMap());
+            Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
+            EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(createdTopic(topic));
+        }
+    }
+}
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SourceConnectorConfigTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SourceConnectorConfigTest.java
index 1972b62e81113..251bb72fbe2d9 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SourceConnectorConfigTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SourceConnectorConfigTest.java
@@ -33,6 +33,7 @@
 import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG;
 import static org.apache.kafka.connect.runtime.ConnectorConfig.NAME_CONFIG;
 import static org.apache.kafka.connect.runtime.ConnectorConfigTest.MOCK_PLUGINS;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.TOPIC_CREATION_GROUPS_CONFIG;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_GROUP;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_PREFIX;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.PARTITIONS_CONFIG;
@@ -47,6 +48,8 @@
 public class SourceConnectorConfigTest {
 
     private static final String FOO_CONNECTOR = "foo-source";
+    private static final String TOPIC_CREATION_GROUP_1 = "group1";
+    private static final String TOPIC_CREATION_GROUP_2 = "group2";
     private static final short DEFAULT_REPLICATION_FACTOR = -1;
     private static final int DEFAULT_PARTITIONS = -1;
 
@@ -64,6 +67,16 @@ public Map<String, String> defaultConnectorPropsWithTopicCreation() {
         return props;
     }
 
+    @Test
+    public void shouldNotFailWithExplicitlySpecifiedDefaultTopicCreationGroup() {
+        Map<String, String> props = defaultConnectorProps();
+        props.put(TOPIC_CREATION_GROUPS_CONFIG, String.join(",", DEFAULT_TOPIC_CREATION_GROUP,
+            TOPIC_CREATION_GROUP_1, TOPIC_CREATION_GROUP_2));
+        props.put(DEFAULT_TOPIC_CREATION_PREFIX + REPLICATION_FACTOR_CONFIG, "1");
+        props.put(DEFAULT_TOPIC_CREATION_PREFIX + PARTITIONS_CONFIG, "1");
+        SourceConnectorConfig config = new SourceConnectorConfig(MOCK_PLUGINS, props, true);
+    }
+
     @Test
     public void noTopicCreation() {
         Map<String, String> props = defaultConnectorProps();
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SubmittedRecordsTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SubmittedRecordsTest.java
index 4028249a78ad8..39d680a7d46be 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SubmittedRecordsTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/SubmittedRecordsTest.java
@@ -178,8 +178,8 @@ public void testRemoveLastSubmittedRecord() {
         assertEquals(Collections.emptyMap(), committableOffsets.offsets());
         assertMetadata(committableOffsets, 0, 1, 1, 1, PARTITION1);
 
-        assertTrue("First attempt to remove record from submitted queue should succeed", submittedRecords.removeLastOccurrence(submittedRecord));
-        assertFalse("Attempt to remove already-removed record from submitted queue should fail", submittedRecords.removeLastOccurrence(submittedRecord));
+        assertTrue("First attempt to remove record from submitted queue should succeed", submittedRecord.drop());
+        assertFalse("Attempt to remove already-removed record from submitted queue should fail", submittedRecord.drop());
 
         committableOffsets = submittedRecords.committableOffsets();
         // Even if SubmittedRecords::remove is broken, we haven't ack'd anything yet, so there should be no committable offsets
@@ -203,7 +203,7 @@ public void testRemoveNotLastSubmittedRecord() {
         assertMetadata(committableOffsets, 0, 2, 2, 1, PARTITION1, PARTITION2);
         assertNoEmptyDeques();
 
-        assertTrue("First attempt to remove record from submitted queue should succeed", submittedRecords.removeLastOccurrence(recordToRemove));
+        assertTrue("First attempt to remove record from submitted queue should succeed", recordToRemove.drop());
 
         committableOffsets = submittedRecords.committableOffsets();
         // Even if SubmittedRecords::remove is broken, we haven't ack'd anything yet, so there should be no committable offsets
@@ -269,20 +269,20 @@ public void testAwaitMessagesAfterAllRemoved() {
                 submittedRecords.awaitAllMessages(0, TimeUnit.MILLISECONDS)
         );
 
-        submittedRecords.removeLastOccurrence(recordToRemove1);
+        recordToRemove1.drop();
         assertFalse(
                 "Await should fail since only one of the two submitted records has been removed so far",
                 submittedRecords.awaitAllMessages(0, TimeUnit.MILLISECONDS)
         );
 
-        submittedRecords.removeLastOccurrence(recordToRemove1);
+        recordToRemove1.drop();
         assertFalse(
                 "Await should fail since only one of the two submitted records has been removed so far, "
                         + "even though that record has been removed twice",
                 submittedRecords.awaitAllMessages(0, TimeUnit.MILLISECONDS)
         );
 
-        submittedRecords.removeLastOccurrence(recordToRemove2);
+        recordToRemove2.drop();
         assertTrue(
                 "Await should succeed since both submitted records have now been removed",
                 submittedRecords.awaitAllMessages(0, TimeUnit.MILLISECONDS)
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTest.java
index fbe6800749299..f02892888e3e0 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTest.java
@@ -18,6 +18,7 @@
 
 import org.apache.kafka.clients.CommonClientConfigs;
 import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.config.internals.BrokerSecurityConfigs;
 import org.junit.Test;
 
 import java.util.Arrays;
@@ -147,6 +148,15 @@ public void testValidHeaderConfigs() {
         }
     }
 
+    @Test
+    public void testInvalidSslClientAuthConfig() {
+        Map<String, String> props = baseProps();
+
+        props.put(BrokerSecurityConfigs.SSL_CLIENT_AUTH_CONFIG, "abc");
+        ConfigException ce = assertThrows(ConfigException.class, () -> new WorkerConfig(WorkerConfig.baseConfigDef(), props));
+        assertTrue(ce.getMessage().contains(BrokerSecurityConfigs.SSL_CLIENT_AUTH_CONFIG));
+    }
+
     private void assertInvalidHeaderConfig(String config) {
         assertThrows(ConfigException.class, () -> WorkerConfig.validateHttpResponseHeaderConfig(config));
     }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTransformerTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTransformerTest.java
index 6f4bda66904d7..b13f825b9619d 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTransformerTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConfigTransformerTest.java
@@ -16,17 +16,13 @@
  */
 package org.apache.kafka.connect.runtime;
 
-import org.apache.kafka.common.config.ConfigChangeCallback;
 import org.apache.kafka.common.config.ConfigData;
 import org.apache.kafka.common.config.provider.ConfigProvider;
-import org.easymock.EasyMock;
-import static org.easymock.EasyMock.eq;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
-import org.powermock.api.easymock.PowerMock;
-import org.powermock.api.easymock.annotation.Mock;
-import org.powermock.modules.junit4.PowerMockRunner;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnitRunner;
 
 import java.util.Collections;
 import java.util.HashMap;
@@ -35,12 +31,15 @@
 
 import static org.apache.kafka.connect.runtime.ConnectorConfig.CONFIG_RELOAD_ACTION_CONFIG;
 import static org.apache.kafka.connect.runtime.ConnectorConfig.CONFIG_RELOAD_ACTION_NONE;
-import static org.easymock.EasyMock.notNull;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
-import static org.powermock.api.easymock.PowerMock.replayAll;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.notNull;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
-@RunWith(PowerMockRunner.class)
+@RunWith(MockitoJUnitRunner.class)
 public class WorkerConfigTransformerTest {
 
     public static final String MY_KEY = "myKey";
@@ -53,64 +52,75 @@ public class WorkerConfigTransformerTest {
     public static final String TEST_RESULT_WITH_TTL = "testResultWithTTL";
     public static final String TEST_RESULT_WITH_LONGER_TTL = "testResultWithLongerTTL";
 
-    @Mock private Herder herder;
-    @Mock private Worker worker;
-    @Mock private HerderRequest requestId;
+    @Mock
+    private Herder herder;
+    @Mock
+    private Worker worker;
+    @Mock
+    private HerderRequest requestId;
     private WorkerConfigTransformer configTransformer;
 
     @Before
     public void setup() {
-        worker = PowerMock.createMock(Worker.class);
-        herder = PowerMock.createMock(Herder.class);
         configTransformer = new WorkerConfigTransformer(worker, Collections.singletonMap("test", new TestConfigProvider()));
     }
 
     @Test
     public void testReplaceVariable() {
+        // Execution
         Map<String, String> result = configTransformer.transform(MY_CONNECTOR, Collections.singletonMap(MY_KEY, "${test:testPath:testKey}"));
+
+        // Assertions
         assertEquals(TEST_RESULT, result.get(MY_KEY));
     }
 
     @Test
     public void testReplaceVariableWithTTL() {
-        EasyMock.expect(worker.herder()).andReturn(herder);
-
-        replayAll();
-
+        // Execution
         Map<String, String> props = new HashMap<>();
         props.put(MY_KEY, "${test:testPath:testKeyWithTTL}");
         props.put(CONFIG_RELOAD_ACTION_CONFIG, CONFIG_RELOAD_ACTION_NONE);
         Map<String, String> result = configTransformer.transform(MY_CONNECTOR, props);
+
+        // Assertions
+        assertEquals(TEST_RESULT_WITH_TTL, result.get(MY_KEY));
     }
 
     @Test
     public void testReplaceVariableWithTTLAndScheduleRestart() {
-        EasyMock.expect(worker.herder()).andReturn(herder);
-        EasyMock.expect(herder.restartConnector(eq(1L), eq(MY_CONNECTOR), notNull())).andReturn(requestId);
-        replayAll();
+        // Setup
+        when(worker.herder()).thenReturn(herder);
+        when(herder.restartConnector(eq(1L), eq(MY_CONNECTOR), notNull())).thenReturn(requestId);
 
+        // Execution
         Map<String, String> result = configTransformer.transform(MY_CONNECTOR, Collections.singletonMap(MY_KEY, "${test:testPath:testKeyWithTTL}"));
+
+        // Assertions
         assertEquals(TEST_RESULT_WITH_TTL, result.get(MY_KEY));
+        verify(herder).restartConnector(eq(1L), eq(MY_CONNECTOR), notNull());
     }
 
     @Test
     public void testReplaceVariableWithTTLFirstCancelThenScheduleRestart() {
-        EasyMock.expect(worker.herder()).andReturn(herder);
-        EasyMock.expect(herder.restartConnector(eq(1L), eq(MY_CONNECTOR), notNull())).andReturn(requestId);
-
-        EasyMock.expect(worker.herder()).andReturn(herder);
-        EasyMock.expectLastCall();
-        requestId.cancel();
-        EasyMock.expectLastCall();
-        EasyMock.expect(herder.restartConnector(eq(10L), eq(MY_CONNECTOR), notNull())).andReturn(requestId);
-
-        replayAll();
+        // Setup
+        when(worker.herder()).thenReturn(herder);
+        when(herder.restartConnector(eq(1L), eq(MY_CONNECTOR), notNull())).thenReturn(requestId);
+        when(herder.restartConnector(eq(10L), eq(MY_CONNECTOR), notNull())).thenReturn(requestId);
 
+        // Execution
         Map<String, String> result = configTransformer.transform(MY_CONNECTOR, Collections.singletonMap(MY_KEY, "${test:testPath:testKeyWithTTL}"));
+
+        // Assertions
         assertEquals(TEST_RESULT_WITH_TTL, result.get(MY_KEY));
+        verify(herder).restartConnector(eq(1L), eq(MY_CONNECTOR), notNull());
 
+        // Execution
         result = configTransformer.transform(MY_CONNECTOR, Collections.singletonMap(MY_KEY, "${test:testPath:testKeyWithLongerTTL}"));
+
+        // Assertions
         assertEquals(TEST_RESULT_WITH_LONGER_TTL, result.get(MY_KEY));
+        verify(requestId, times(1)).cancel();
+        verify(herder).restartConnector(eq(10L), eq(MY_CONNECTOR), notNull());
     }
 
     @Test
@@ -120,13 +130,16 @@ public void testTransformNullConfiguration() {
 
     public static class TestConfigProvider implements ConfigProvider {
 
+        @Override
         public void configure(Map<String, ?> configs) {
         }
 
+        @Override
         public ConfigData get(String path) {
             return null;
         }
 
+        @Override
         public ConfigData get(String path, Set<String> keys) {
             if (path.equals(TEST_PATH)) {
                 if (keys.contains(TEST_KEY)) {
@@ -140,14 +153,7 @@ public ConfigData get(String path, Set<String> keys) {
             return new ConfigData(Collections.emptyMap());
         }
 
-        public void subscribe(String path, Set<String> keys, ConfigChangeCallback callback) {
-            throw new UnsupportedOperationException();
-        }
-
-        public void unsubscribe(String path, Set<String> keys) {
-            throw new UnsupportedOperationException();
-        }
-
+        @Override
         public void close() {
         }
     }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConnectorTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConnectorTest.java
index 29b85301e89a2..e716efc091df8 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConnectorTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerConnectorTest.java
@@ -24,29 +24,38 @@
 import org.apache.kafka.connect.sink.SinkConnectorContext;
 import org.apache.kafka.connect.source.SourceConnector;
 import org.apache.kafka.connect.source.SourceConnectorContext;
-import org.apache.kafka.connect.storage.OffsetStorageReader;
-import org.easymock.Capture;
+import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.util.Callback;
-import org.easymock.EasyMock;
-import org.easymock.EasyMockRunner;
-import org.easymock.EasyMockSupport;
-import org.easymock.Mock;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-import org.junit.runner.RunWith;
 
 import java.util.HashMap;
 import java.util.Map;
+import org.junit.runner.RunWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.InOrder;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnitRunner;
 
-import static org.easymock.EasyMock.expectLastCall;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
-
-@RunWith(EasyMockRunner.class)
-public class WorkerConnectorTest extends EasyMockSupport {
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.isNull;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.StrictStubs.class)
+public class WorkerConnectorTest {
 
     private static final String VERSION = "1.1";
     public static final String CONNECTOR = "connector";
@@ -59,14 +68,15 @@ public class WorkerConnectorTest extends EasyMockSupport {
     public ConnectorConfig connectorConfig;
     public MockConnectMetrics metrics;
 
-    @Mock Plugins plugins;
-    @Mock SourceConnector sourceConnector;
-    @Mock SinkConnector sinkConnector;
-    @Mock Connector connector;
-    @Mock CloseableConnectorContext ctx;
-    @Mock ConnectorStatus.Listener listener;
-    @Mock OffsetStorageReader offsetStorageReader;
-    @Mock ClassLoader classLoader;
+    @Mock private Plugins plugins;
+    @Mock private SourceConnector sourceConnector;
+    @Mock private SinkConnector sinkConnector;
+    @Mock private CloseableConnectorContext ctx;
+    @Mock private ConnectorStatus.Listener listener;
+    @Mock private CloseableOffsetStorageReader offsetStorageReader;
+    @Mock private ConnectorOffsetBackingStore offsetStore;
+    @Mock private ClassLoader classLoader;
+    private Connector connector;
 
     @Before
     public void setup() {
@@ -84,24 +94,10 @@ public void testInitializeFailure() {
         RuntimeException exception = new RuntimeException();
         connector = sourceConnector;
 
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SourceConnectorContext.class));
-        expectLastCall().andThrow(exception);
-
-        listener.onFailure(CONNECTOR, exception);
-        expectLastCall();
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
+        doThrow(exception).when(connector).initialize(any());
 
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
         assertFailedMetric(workerConnector);
@@ -109,7 +105,9 @@ public void testInitializeFailure() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(listener).onFailure(CONNECTOR, exception);
+        verifyCleanShutdown(false);
     }
 
     @Test
@@ -117,30 +115,11 @@ public void testFailureIsFinalState() {
         RuntimeException exception = new RuntimeException();
         connector = sinkConnector;
 
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SinkConnectorContext.class));
-        expectLastCall().andThrow(exception);
-
-        listener.onFailure(CONNECTOR, exception);
-        expectLastCall();
-
-        // expect no call to onStartup() after failure
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
+        doThrow(exception).when(connector).initialize(any());
 
-        ctx.close();
-        expectLastCall();
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.anyObject(Exception.class), EasyMock.isNull());
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, null, null, classLoader);
 
         workerConnector.initialize();
         assertFailedMetric(workerConnector);
@@ -150,40 +129,23 @@ public void testFailureIsFinalState() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(listener).onFailure(CONNECTOR, exception);
+        // expect no call to onStartup() after failure
+        verifyCleanShutdown(false);
+
+        verify(onStateChange).onCompletion(any(Exception.class), isNull());
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testStartupAndShutdown() {
         connector = sourceConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SourceConnectorContext.class));
-        expectLastCall();
-
-        connector.start(CONFIG);
-        expectLastCall();
 
-        listener.onStartup(CONNECTOR);
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
 
-        connector.stop();
-        expectLastCall();
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.STARTED));
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
         assertInitializedSourceMetric(workerConnector);
@@ -193,48 +155,26 @@ public void testStartupAndShutdown() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(connector).start(CONFIG);
+        verify(listener).onStartup(CONNECTOR);
+        verifyCleanShutdown(true);
+
+        verify(onStateChange).onCompletion(isNull(), eq(TargetState.STARTED));
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testStartupAndPause() {
         connector = sinkConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
+        when(connector.version()).thenReturn(VERSION);
 
-        connector.initialize(EasyMock.notNull(SinkConnectorContext.class));
-        expectLastCall();
-
-        connector.start(CONFIG);
-        expectLastCall();
-
-        listener.onStartup(CONNECTOR);
-        expectLastCall();
-
-        connector.stop();
-        expectLastCall();
-
-        listener.onPause(CONNECTOR);
-        expectLastCall();
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.STARTED));
-        expectLastCall();
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.PAUSED));
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, null, null, classLoader);
 
         workerConnector.initialize();
         assertInitializedSinkMetric(workerConnector);
+
         workerConnector.doTransitionTo(TargetState.STARTED, onStateChange);
         assertRunningMetric(workerConnector);
         workerConnector.doTransitionTo(TargetState.PAUSED, onStateChange);
@@ -243,45 +183,27 @@ public void testStartupAndPause() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(connector).start(CONFIG);
+        verify(listener).onStartup(CONNECTOR);
+        verify(listener).onPause(CONNECTOR);
+        verifyCleanShutdown(true);
+
+        InOrder inOrder = inOrder(onStateChange);
+        inOrder.verify(onStateChange).onCompletion(isNull(), eq(TargetState.STARTED));
+        inOrder.verify(onStateChange).onCompletion(isNull(), eq(TargetState.PAUSED));
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testOnResume() {
         connector = sourceConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SourceConnectorContext.class));
-        expectLastCall();
-
-        listener.onPause(CONNECTOR);
-        expectLastCall();
 
-        connector.start(CONFIG);
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
 
-        listener.onResume(CONNECTOR);
-        expectLastCall();
+        Callback<TargetState> onStateChange = mockCallback();
 
-        connector.stop();
-        expectLastCall();
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.PAUSED));
-        expectLastCall();
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.STARTED));
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
         assertInitializedSourceMetric(workerConnector);
@@ -293,36 +215,25 @@ public void testOnResume() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(listener).onPause(CONNECTOR);
+        verify(connector).start(CONFIG);
+        verify(listener).onResume(CONNECTOR);
+        verifyCleanShutdown(true);
+
+        InOrder inOrder = inOrder(onStateChange);
+        inOrder.verify(onStateChange).onCompletion(isNull(), eq(TargetState.PAUSED));
+        inOrder.verify(onStateChange).onCompletion(isNull(), eq(TargetState.STARTED));
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testStartupPaused() {
         connector = sinkConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SinkConnectorContext.class));
-        expectLastCall();
-
-        // connector never gets started
-
-        listener.onPause(CONNECTOR);
-        expectLastCall();
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
 
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.PAUSED));
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, null, null, classLoader);
 
         workerConnector.initialize();
         assertInitializedSinkMetric(workerConnector);
@@ -332,39 +243,25 @@ public void testStartupPaused() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        // connector never gets started
+        verify(listener).onPause(CONNECTOR);
+        verifyCleanShutdown(false);
+
+        verify(onStateChange).onCompletion(isNull(), eq(TargetState.PAUSED));
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testStartupFailure() {
         RuntimeException exception = new RuntimeException();
-
         connector = sinkConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SinkConnectorContext.class));
-        expectLastCall();
-
-        connector.start(CONFIG);
-        expectLastCall().andThrow(exception);
 
-        listener.onFailure(CONNECTOR, exception);
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
+        doThrow(exception).when(connector).start(CONFIG);
 
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.anyObject(Exception.class), EasyMock.isNull());
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, null, null, classLoader);
 
         workerConnector.initialize();
         assertInitializedSinkMetric(workerConnector);
@@ -374,7 +271,13 @@ public void testStartupFailure() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(connector).start(CONFIG);
+        verify(listener).onFailure(CONNECTOR, exception);
+        verifyCleanShutdown(false);
+
+        verify(onStateChange).onCompletion(any(Exception.class), isNull());
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
@@ -382,34 +285,12 @@ public void testShutdownFailure() {
         RuntimeException exception = new RuntimeException();
         connector = sourceConnector;
 
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SourceConnectorContext.class));
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
 
-        connector.start(CONFIG);
-        expectLastCall();
+        doThrow(exception).when(connector).stop();
 
-        listener.onStartup(CONNECTOR);
-        expectLastCall();
-
-        connector.stop();
-        expectLastCall().andThrow(exception);
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.STARTED));
-        expectLastCall();
-
-        listener.onFailure(CONNECTOR, exception);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
         assertInitializedSourceMetric(workerConnector);
@@ -419,41 +300,24 @@ public void testShutdownFailure() {
         workerConnector.doShutdown();
         assertFailedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(connector).start(CONFIG);
+        verify(listener).onStartup(CONNECTOR);
+        verify(onStateChange).onCompletion(isNull(), eq(TargetState.STARTED));
+        verifyNoMoreInteractions(onStateChange);
+        verify(listener).onFailure(CONNECTOR, exception);
+        verifyShutdown(false, true);
     }
 
     @Test
     public void testTransitionStartedToStarted() {
         connector = sourceConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SourceConnectorContext.class));
-        expectLastCall();
-
-        connector.start(CONFIG);
-        expectLastCall();
-
-        // expect only one call to onStartup()
-        listener.onStartup(CONNECTOR);
-        expectLastCall();
-
-        connector.stop();
-        expectLastCall();
-
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
 
-        ctx.close();
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
 
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.STARTED));
-        expectLastCall().times(2);
+        Callback<TargetState> onStateChange = mockCallback();
 
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
         assertInitializedSourceMetric(workerConnector);
@@ -465,45 +329,22 @@ public void testTransitionStartedToStarted() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(connector).start(CONFIG);
+        // expect only one call to onStartup()
+        verify(listener).onStartup(CONNECTOR);
+        verifyCleanShutdown(true);
+        verify(onStateChange, times(2)).onCompletion(isNull(), eq(TargetState.STARTED));
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testTransitionPausedToPaused() {
         connector = sourceConnector;
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        connector.initialize(EasyMock.notNull(SourceConnectorContext.class));
-        expectLastCall();
-
-        connector.start(CONFIG);
-        expectLastCall();
-
-        listener.onStartup(CONNECTOR);
-        expectLastCall();
-
-        connector.stop();
-        expectLastCall();
-
-        listener.onPause(CONNECTOR);
-        expectLastCall();
+        when(connector.version()).thenReturn(VERSION);
 
-        listener.onShutdown(CONNECTOR);
-        expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        Callback<TargetState> onStateChange = createStrictMock(Callback.class);
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.STARTED));
-        expectLastCall();
-        onStateChange.onCompletion(EasyMock.isNull(), EasyMock.eq(TargetState.PAUSED));
-        expectLastCall().times(2);
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        Callback<TargetState> onStateChange = mockCallback();
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
         assertInitializedSourceMetric(workerConnector);
@@ -517,28 +358,32 @@ public void testTransitionPausedToPaused() {
         workerConnector.doShutdown();
         assertStoppedMetric(workerConnector);
 
-        verifyAll();
+        verifyInitialize();
+        verify(connector).start(CONFIG);
+        verify(listener).onStartup(CONNECTOR);
+        verify(listener).onPause(CONNECTOR);
+        verifyCleanShutdown(true);
+
+        InOrder inOrder = inOrder(onStateChange);
+        inOrder.verify(onStateChange).onCompletion(isNull(), eq(TargetState.STARTED));
+        inOrder.verify(onStateChange, times(2)).onCompletion(isNull(), eq(TargetState.PAUSED));
+        verifyNoMoreInteractions(onStateChange);
     }
 
     @Test
     public void testFailConnectorThatIsNeitherSourceNorSink() {
-        connector.version();
-        expectLastCall().andReturn(VERSION);
-
-        Capture<Throwable> exceptionCapture = Capture.newInstance();
-        listener.onFailure(EasyMock.eq(CONNECTOR), EasyMock.capture(exceptionCapture));
-        expectLastCall();
-
-        replayAll();
-
-        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, classLoader);
+        connector = mock(Connector.class);
+        when(connector.version()).thenReturn(VERSION);
+        WorkerConnector workerConnector = new WorkerConnector(CONNECTOR, connector, connectorConfig, ctx, metrics, listener, offsetStorageReader, offsetStore, classLoader);
 
         workerConnector.initialize();
+
+        verify(connector).version();
+        ArgumentCaptor<Throwable> exceptionCapture = ArgumentCaptor.forClass(Throwable.class);
+        verify(listener).onFailure(eq(CONNECTOR), exceptionCapture.capture());
         Throwable e = exceptionCapture.getValue();
         assertTrue(e instanceof ConnectException);
         assertTrue(e.getMessage().contains("must be a subclass of"));
-
-        verifyAll();
     }
 
     protected void assertFailedMetric(WorkerConnector workerConnector) {
@@ -592,6 +437,39 @@ protected void assertInitializedMetric(WorkerConnector workerConnector, String e
         assertEquals(VERSION, version);
     }
 
+    @SuppressWarnings("unchecked")
+    private Callback<TargetState> mockCallback() {
+        return mock(Callback.class);
+    }
+
+    private void verifyInitialize() {
+        verify(connector).version();
+        if (connector instanceof SourceConnector) {
+            verify(offsetStore).start();
+            verify(connector).initialize(any(SourceConnectorContext.class));
+        } else {
+            verify(connector).initialize(any(SinkConnectorContext.class));
+        }
+    }
+
+    private void verifyCleanShutdown(boolean started) {
+        verifyShutdown(true, started);
+    }
+
+    private void verifyShutdown(boolean clean, boolean started) {
+        verify(ctx).close();
+        if (connector instanceof SourceConnector) {
+            verify(offsetStorageReader).close();
+            verify(offsetStore).stop();
+        }
+        if (clean) {
+            verify(listener).onShutdown(CONNECTOR);
+        }
+        if (started) {
+            verify(connector).stop();
+        }
+    }
+
     private static abstract class TestConnector extends Connector {
     }
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java
index 65ab0c7e7cbbb..4aaf7649663b9 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskTest.java
@@ -39,7 +39,7 @@
 import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.errors.RetriableException;
 import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.WorkerSinkTask.SinkTaskMetricsGroup;
 import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperatorTest;
 import org.apache.kafka.connect.runtime.isolation.PluginClassLoader;
@@ -348,6 +348,9 @@ public void testShutdown() throws Exception {
         transformationChain.close();
         PowerMock.expectLastCall();
 
+        headerConverter.close();
+        PowerMock.expectLastCall();
+
         PowerMock.replayAll();
 
         workerTask.initialize(TASK_CONFIG);
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskThreadedTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskThreadedTest.java
index a0c99fb47145f..cdd87e230d40d 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskThreadedTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSinkTaskThreadedTest.java
@@ -29,7 +29,7 @@
 import org.apache.kafka.connect.data.Schema;
 import org.apache.kafka.connect.data.SchemaAndValue;
 import org.apache.kafka.connect.errors.ConnectException;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperatorTest;
 import org.apache.kafka.connect.runtime.isolation.PluginClassLoader;
 import org.apache.kafka.connect.runtime.standalone.StandaloneConfig;
@@ -56,6 +56,7 @@
 import org.powermock.modules.junit4.PowerMockRunner;
 import org.powermock.reflect.Whitebox;
 
+import java.io.IOException;
 import java.time.Duration;
 import java.util.Arrays;
 import java.util.Collection;
@@ -552,6 +553,13 @@ private void expectStopTask() {
 
         consumer.close();
         PowerMock.expectLastCall();
+
+        try {
+            headerConverter.close();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        PowerMock.expectLastCall();
     }
 
     // Note that this can only be called once per test currently
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java
index 78db83c7ee3c6..0366677b17cc3 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerSourceTaskTest.java
@@ -16,17 +16,12 @@
  */
 package org.apache.kafka.connect.runtime;
 
-import java.util.Collection;
 import org.apache.kafka.clients.admin.NewTopic;
-import org.apache.kafka.clients.admin.TopicDescription;
 import org.apache.kafka.clients.producer.KafkaProducer;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.clients.producer.RecordMetadata;
-import org.apache.kafka.common.InvalidRecordException;
 import org.apache.kafka.common.KafkaException;
-import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.TopicPartition;
-import org.apache.kafka.common.TopicPartitionInfo;
 import org.apache.kafka.common.errors.InvalidTopicException;
 import org.apache.kafka.common.errors.TopicAuthorizationException;
 import org.apache.kafka.common.header.Header;
@@ -34,14 +29,10 @@
 import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.connect.data.Schema;
-import org.apache.kafka.connect.data.SchemaAndValue;
 import org.apache.kafka.connect.errors.ConnectException;
-import org.apache.kafka.connect.errors.RetriableException;
-import org.apache.kafka.connect.header.ConnectHeaders;
 import org.apache.kafka.connect.integration.MonitorableSourceConnector;
 import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
-import org.apache.kafka.connect.runtime.WorkerSourceTask.SourceTaskMetricsGroup;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
 import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperatorTest;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
@@ -50,6 +41,7 @@
 import org.apache.kafka.connect.source.SourceTask;
 import org.apache.kafka.connect.source.SourceTaskContext;
 import org.apache.kafka.connect.storage.CloseableOffsetStorageReader;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.storage.Converter;
 import org.apache.kafka.connect.storage.HeaderConverter;
 import org.apache.kafka.connect.storage.OffsetStorageWriter;
@@ -76,10 +68,10 @@
 import org.powermock.modules.junit4.PowerMockRunnerDelegate;
 import org.powermock.reflect.Whitebox;
 
-import java.nio.ByteBuffer;
+import java.io.IOException;
 import java.time.Duration;
-import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -106,7 +98,6 @@
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.REPLICATION_FACTOR_CONFIG;
 import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_CREATION_ENABLE_CONFIG;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
@@ -147,6 +138,7 @@ public class WorkerSourceTaskTest extends ThreadedTest {
     @Mock private TopicAdmin admin;
     @Mock private CloseableOffsetStorageReader offsetReader;
     @Mock private OffsetStorageWriter offsetWriter;
+    @Mock private ConnectorOffsetBackingStore offsetStore;
     @Mock private ClusterConfigState clusterConfigState;
     private WorkerSourceTask workerTask;
     @Mock private Future<RecordMetadata> sendFuture;
@@ -235,16 +227,11 @@ private void createWorkerTask(TargetState initialState, RetryWithToleranceOperat
         createWorkerTask(initialState, keyConverter, valueConverter, headerConverter, retryWithToleranceOperator);
     }
 
-    private void createWorkerTask(TargetState initialState, Converter keyConverter, Converter valueConverter,
-                                  HeaderConverter headerConverter) {
-        createWorkerTask(initialState, keyConverter, valueConverter, headerConverter, RetryWithToleranceOperatorTest.NOOP_OPERATOR);
-    }
-
     private void createWorkerTask(TargetState initialState, Converter keyConverter, Converter valueConverter,
                                   HeaderConverter headerConverter, RetryWithToleranceOperator retryWithToleranceOperator) {
         workerTask = new WorkerSourceTask(taskId, sourceTask, statusListener, initialState, keyConverter, valueConverter, headerConverter,
-            transformationChain, producer, admin, TopicCreationGroup.configuredGroups(sourceConfig),
-            offsetReader, offsetWriter, config, clusterConfigState, metrics, plugins.delegatingLoader(), Time.SYSTEM,
+                transformationChain, producer, admin, TopicCreationGroup.configuredGroups(sourceConfig),
+                offsetReader, offsetWriter, offsetStore, config, clusterConfigState, metrics, plugins.delegatingLoader(), Time.SYSTEM,
                 retryWithToleranceOperator, statusBackingStore, Runnable::run);
     }
 
@@ -283,12 +270,7 @@ public void testStartPaused() throws Exception {
     public void testPause() throws Exception {
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         AtomicInteger count = new AtomicInteger(0);
         CountDownLatch pollLatch = expectPolls(10, count);
@@ -337,12 +319,7 @@ public void testPause() throws Exception {
     public void testPollsInBackground() throws Exception {
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         final CountDownLatch pollLatch = expectPolls(10);
         // In this test, we don't flush, so nothing goes any further than the offset writer
@@ -380,12 +357,7 @@ public void testPollsInBackground() throws Exception {
     public void testFailureInPoll() throws Exception {
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         final CountDownLatch pollLatch = new CountDownLatch(1);
         final RuntimeException exception = new RuntimeException();
@@ -422,12 +394,7 @@ public void testFailureInPoll() throws Exception {
     public void testFailureInPollAfterCancel() throws Exception {
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         final CountDownLatch pollLatch = new CountDownLatch(1);
         final CountDownLatch workerCancelLatch = new CountDownLatch(1);
@@ -470,12 +437,7 @@ public void testFailureInPollAfterCancel() throws Exception {
     public void testFailureInPollAfterStop() throws Exception {
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         final CountDownLatch pollLatch = new CountDownLatch(1);
         final CountDownLatch workerStopLatch = new CountDownLatch(1);
@@ -516,12 +478,7 @@ public void testPollReturnsNoRecords() throws Exception {
         // Test that the task handles an empty list of records
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         // We'll wait for some data, then trigger a flush
         final CountDownLatch pollLatch = expectEmptyPolls(1, new AtomicInteger());
@@ -557,12 +514,7 @@ public void testCommit() throws Exception {
         // Test that the task commits properly when prompted
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         // We'll wait for some data, then trigger a flush
         final CountDownLatch pollLatch = expectPolls(1);
@@ -603,12 +555,7 @@ public void testCommitFailure() throws Exception {
         // Test that the task commits properly when prompted
         createWorkerTask();
 
-        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
-        EasyMock.expectLastCall();
-        sourceTask.start(TASK_PROPS);
-        EasyMock.expectLastCall();
-        statusListener.onStartup(taskId);
-        EasyMock.expectLastCall();
+        expectCleanStartup();
 
         // We'll wait for some data, then trigger a flush
         final CountDownLatch pollLatch = expectPolls(1);
@@ -644,93 +591,6 @@ public void testCommitFailure() throws Exception {
         PowerMock.verifyAll();
     }
 
-    @Test
-    public void testSendRecordsConvertsData() throws Exception {
-        createWorkerTask();
-
-        List<SourceRecord> records = new ArrayList<>();
-        // Can just use the same record for key and value
-        records.add(new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD));
-
-        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
-
-        expectTopicCreation(TOPIC);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", records);
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(SERIALIZED_KEY, sent.getValue().key());
-        assertEquals(SERIALIZED_RECORD, sent.getValue().value());
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testSendRecordsPropagatesTimestamp() throws Exception {
-        final Long timestamp = System.currentTimeMillis();
-
-        createWorkerTask();
-
-        List<SourceRecord> records = Collections.singletonList(
-                new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, timestamp)
-        );
-
-        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
-
-        expectTopicCreation(TOPIC);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", records);
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(timestamp, sent.getValue().timestamp());
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testSendRecordsCorruptTimestamp() throws Exception {
-        final Long timestamp = -3L;
-        createWorkerTask();
-
-        List<SourceRecord> records = Collections.singletonList(
-                new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, timestamp)
-        );
-
-        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", records);
-        assertThrows(InvalidRecordException.class, () -> Whitebox.invokeMethod(workerTask, "sendRecords"));
-        assertFalse(sent.hasCaptured());
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testSendRecordsNoTimestamp() throws Exception {
-        final Long timestamp = -1L;
-        createWorkerTask();
-
-        List<SourceRecord> records = Collections.singletonList(
-                new SourceRecord(PARTITION, OFFSET, "topic", null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, timestamp)
-        );
-
-        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecordAnyTimes();
-
-        expectTopicCreation(TOPIC);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", records);
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertNull(sent.getValue().timestamp());
-
-        PowerMock.verifyAll();
-    }
-
     @Test
     public void testSendRecordsRetries() throws Exception {
         createWorkerTask();
@@ -775,6 +635,8 @@ public void testSendRecordsProducerCallbackFail() throws Exception {
         expectTopicCreation(TOPIC);
 
         expectSendRecordProducerCallbackFail();
+        expectApplyTransformationChain(false);
+        expectConvertHeadersAndKeyValue(false);
 
         PowerMock.replayAll();
 
@@ -835,25 +697,39 @@ public void testSourceTaskIgnoresProducerException() throws Exception {
         createWorkerTaskWithErrorToleration();
         expectTopicCreation(TOPIC);
 
+        //Use different offsets for each record so we can verify all were committed
+        final Map<String, Object> offset2 = Collections.singletonMap("key", 13);
+
         // send two records
         // record 1 will succeed
         // record 2 will invoke the producer's failure callback, but ignore the exception via retryOperator
         // and no ConnectException will be thrown
         SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-
+        SourceRecord record2 = new SourceRecord(PARTITION, offset2, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
+        expectOffsetFlush(true);
         expectSendRecordOnce();
         expectSendRecordProducerCallbackFail();
         sourceTask.commitRecord(EasyMock.anyObject(SourceRecord.class), EasyMock.isNull());
-        EasyMock.expectLastCall();
+
+        //As of KAFKA-14079 all offsets should be committed, even for failed records (if ignored)
+        //Only the last offset will be passed to the method as everything up to that point is committed
+        //Before KAFKA-14079 offset 12 would have been passed and not 13 as it would have been unacked
+        offsetWriter.offset(PARTITION, offset2);
+        PowerMock.expectLastCall();
 
         PowerMock.replayAll();
 
+        //Send records and then commit offsets and verify both were committed and no exception
         Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
         Whitebox.invokeMethod(workerTask, "sendRecords");
+        Whitebox.invokeMethod(workerTask, "updateCommittableOffsets");
+        workerTask.commitOffsets();
 
         PowerMock.verifyAll();
+
+        //Double check to make sure all submitted records were cleared
+        assertEquals(0, ((SubmittedRecords) Whitebox.getInternalState(workerTask,
+                "submittedRecords")).records.size());
     }
 
     @Test
@@ -863,6 +739,8 @@ public void testSlowTaskStart() throws Exception {
 
         createWorkerTask();
 
+        offsetStore.start();
+        EasyMock.expectLastCall();
         sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
         EasyMock.expectLastCall();
         sourceTask.start(TASK_PROPS);
@@ -919,440 +797,12 @@ public void testCancel() {
         PowerMock.verifyAll();
     }
 
-    @Test
-    public void testMetricsGroup() {
-        SourceTaskMetricsGroup group = new SourceTaskMetricsGroup(taskId, metrics);
-        SourceTaskMetricsGroup group1 = new SourceTaskMetricsGroup(taskId1, metrics);
-        for (int i = 0; i != 10; ++i) {
-            group.recordPoll(100, 1000 + i * 100);
-            group.recordWrite(10);
-        }
-        for (int i = 0; i != 20; ++i) {
-            group1.recordPoll(100, 1000 + i * 100);
-            group1.recordWrite(10);
-        }
-        assertEquals(1900.0, metrics.currentMetricValueAsDouble(group.metricGroup(), "poll-batch-max-time-ms"), 0.001d);
-        assertEquals(1450.0, metrics.currentMetricValueAsDouble(group.metricGroup(), "poll-batch-avg-time-ms"), 0.001d);
-        assertEquals(33.333, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-poll-rate"), 0.001d);
-        assertEquals(1000, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-poll-total"), 0.001d);
-        assertEquals(3.3333, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-write-rate"), 0.001d);
-        assertEquals(100, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-write-total"), 0.001d);
-        assertEquals(900.0, metrics.currentMetricValueAsDouble(group.metricGroup(), "source-record-active-count"), 0.001d);
-
-        // Close the group
-        group.close();
-
-        for (MetricName metricName : group.metricGroup().metrics().metrics().keySet()) {
-            // Metrics for this group should no longer exist
-            assertFalse(group.metricGroup().groupId().includes(metricName));
-        }
-        // Sensors for this group should no longer exist
-        assertNull(group.metricGroup().metrics().getSensor("sink-record-read"));
-        assertNull(group.metricGroup().metrics().getSensor("sink-record-send"));
-        assertNull(group.metricGroup().metrics().getSensor("sink-record-active-count"));
-        assertNull(group.metricGroup().metrics().getSensor("partition-count"));
-        assertNull(group.metricGroup().metrics().getSensor("offset-seq-number"));
-        assertNull(group.metricGroup().metrics().getSensor("offset-commit-completion"));
-        assertNull(group.metricGroup().metrics().getSensor("offset-commit-completion-skip"));
-        assertNull(group.metricGroup().metrics().getSensor("put-batch-time"));
-
-        assertEquals(2900.0, metrics.currentMetricValueAsDouble(group1.metricGroup(), "poll-batch-max-time-ms"), 0.001d);
-        assertEquals(1950.0, metrics.currentMetricValueAsDouble(group1.metricGroup(), "poll-batch-avg-time-ms"), 0.001d);
-        assertEquals(66.667, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-poll-rate"), 0.001d);
-        assertEquals(2000, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-poll-total"), 0.001d);
-        assertEquals(6.667, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-write-rate"), 0.001d);
-        assertEquals(200, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-write-total"), 0.001d);
-        assertEquals(1800.0, metrics.currentMetricValueAsDouble(group1.metricGroup(), "source-record-active-count"), 0.001d);
-    }
-
-    @Test
-    public void testHeaders() throws Exception {
-        Headers headers = new RecordHeaders();
-        headers.add("header_key", "header_value".getBytes());
-
-        org.apache.kafka.connect.header.Headers connectHeaders = new ConnectHeaders();
-        connectHeaders.add("header_key", new SchemaAndValue(Schema.STRING_SCHEMA, "header_value"));
-
-        createWorkerTask();
-
-        List<SourceRecord> records = new ArrayList<>();
-        records.add(new SourceRecord(PARTITION, OFFSET, TOPIC, null, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD, null, connectHeaders));
-
-        expectTopicCreation(TOPIC);
-
-        Capture<ProducerRecord<byte[], byte[]>> sent = expectSendRecord(TOPIC, true, true, true, true, headers);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", records);
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(SERIALIZED_KEY, sent.getValue().key());
-        assertEquals(SERIALIZED_RECORD, sent.getValue().value());
-        assertEquals(headers, sent.getValue().headers());
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testHeadersWithCustomConverter() throws Exception {
-        StringConverter stringConverter = new StringConverter();
-        SampleConverterWithHeaders testConverter = new SampleConverterWithHeaders();
-
-        createWorkerTask(TargetState.STARTED, stringConverter, testConverter, stringConverter);
-
-        List<SourceRecord> records = new ArrayList<>();
-
-        String stringA = "Árvíztűrő tükörfúrógép";
-        org.apache.kafka.connect.header.Headers headersA = new ConnectHeaders();
-        String encodingA = "latin2";
-        headersA.addString("encoding", encodingA);
-
-        records.add(new SourceRecord(PARTITION, OFFSET, "topic", null, Schema.STRING_SCHEMA, "a", Schema.STRING_SCHEMA, stringA, null, headersA));
-
-        String stringB = "Тестовое сообщение";
-        org.apache.kafka.connect.header.Headers headersB = new ConnectHeaders();
-        String encodingB = "koi8_r";
-        headersB.addString("encoding", encodingB);
-
-        records.add(new SourceRecord(PARTITION, OFFSET, "topic", null, Schema.STRING_SCHEMA, "b", Schema.STRING_SCHEMA, stringB, null, headersB));
-
-        expectTopicCreation(TOPIC);
-
-        Capture<ProducerRecord<byte[], byte[]>> sentRecordA = expectSendRecord(TOPIC, false, true, true, false, null);
-        Capture<ProducerRecord<byte[], byte[]>> sentRecordB = expectSendRecord(TOPIC, false, true, true, false, null);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", records);
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-
-        assertEquals(ByteBuffer.wrap("a".getBytes()), ByteBuffer.wrap(sentRecordA.getValue().key()));
-        assertEquals(
-            ByteBuffer.wrap(stringA.getBytes(encodingA)),
-            ByteBuffer.wrap(sentRecordA.getValue().value())
-        );
-        assertEquals(encodingA, new String(sentRecordA.getValue().headers().lastHeader("encoding").value()));
-
-        assertEquals(ByteBuffer.wrap("b".getBytes()), ByteBuffer.wrap(sentRecordB.getValue().key()));
-        assertEquals(
-            ByteBuffer.wrap(stringB.getBytes(encodingB)),
-            ByteBuffer.wrap(sentRecordB.getValue().value())
-        );
-        assertEquals(encodingB, new String(sentRecordB.getValue().headers().lastHeader("encoding").value()));
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testTopicCreateWhenTopicExists() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        TopicPartitionInfo topicPartitionInfo = new TopicPartitionInfo(0, null, Collections.emptyList(), Collections.emptyList());
-        TopicDescription topicDesc = new TopicDescription(TOPIC, false, Collections.singletonList(topicPartitionInfo));
-        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.singletonMap(TOPIC, topicDesc));
-
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-    }
-
-    @Test
-    public void testSendRecordsTopicDescribeRetries() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        // First round - call to describe the topic times out
-        EasyMock.expect(admin.describeTopics(TOPIC))
-            .andThrow(new RetriableException(new TimeoutException("timeout")));
-
-        // Second round - calls to describe and create succeed
-        expectTopicCreation(TOPIC);
-        // Exactly two records are sent
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(Arrays.asList(record1, record2), Whitebox.getInternalState(workerTask, "toSend"));
-
-        // Next they all succeed
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertNull(Whitebox.getInternalState(workerTask, "toSend"));
-    }
-
-    @Test
-    public void testSendRecordsTopicCreateRetries() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        // First call to describe the topic times out
-        expectPreliminaryCalls();
-        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
-        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
-        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture)))
-            .andThrow(new RetriableException(new TimeoutException("timeout")));
-
-        // Second round
-        expectTopicCreation(TOPIC);
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(Arrays.asList(record1, record2), Whitebox.getInternalState(workerTask, "toSend"));
-
-        // Next they all succeed
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertNull(Whitebox.getInternalState(workerTask, "toSend"));
-    }
-
-    @Test
-    public void testSendRecordsTopicDescribeRetriesMidway() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        // Differentiate only by Kafka partition so we can reuse conversion expectations
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record3 = new SourceRecord(PARTITION, OFFSET, OTHER_TOPIC, 3, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        // First round
-        expectPreliminaryCalls(OTHER_TOPIC);
-        expectTopicCreation(TOPIC);
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        // First call to describe the topic times out
-        EasyMock.expect(admin.describeTopics(OTHER_TOPIC))
-            .andThrow(new RetriableException(new TimeoutException("timeout")));
-
-        // Second round
-        expectTopicCreation(OTHER_TOPIC);
-        expectSendRecord(OTHER_TOPIC, false, true, true, true, emptyHeaders());
-
-        PowerMock.replayAll();
-
-        // Try to send 3, make first pass, second fail. Should save last two
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2, record3));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(Arrays.asList(record3), Whitebox.getInternalState(workerTask, "toSend"));
-
-        // Next they all succeed
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertNull(Whitebox.getInternalState(workerTask, "toSend"));
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testSendRecordsTopicCreateRetriesMidway() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        // Differentiate only by Kafka partition so we can reuse conversion expectations
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record3 = new SourceRecord(PARTITION, OFFSET, OTHER_TOPIC, 3, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        // First round
-        expectPreliminaryCalls(OTHER_TOPIC);
-        expectTopicCreation(TOPIC);
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        EasyMock.expect(admin.describeTopics(OTHER_TOPIC)).andReturn(Collections.emptyMap());
-        // First call to create the topic times out
-        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
-        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture)))
-            .andThrow(new RetriableException(new TimeoutException("timeout")));
-
-        // Second round
-        expectTopicCreation(OTHER_TOPIC);
-        expectSendRecord(OTHER_TOPIC, false, true, true, true, emptyHeaders());
-
-        PowerMock.replayAll();
-
-        // Try to send 3, make first pass, second fail. Should save last two
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2, record3));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertEquals(Arrays.asList(record3), Whitebox.getInternalState(workerTask, "toSend"));
-
-        // Next they all succeed
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-        assertNull(Whitebox.getInternalState(workerTask, "toSend"));
-
-        PowerMock.verifyAll();
-    }
-
-    @Test
-    public void testTopicDescribeFails() {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        EasyMock.expect(admin.describeTopics(TOPIC))
-            .andThrow(new ConnectException(new TopicAuthorizationException("unauthorized")));
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        assertThrows(ConnectException.class, () -> Whitebox.invokeMethod(workerTask, "sendRecords"));
-    }
-
-    @Test
-    public void testTopicCreateFails() {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
-
-        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
-        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture)))
-            .andThrow(new ConnectException(new TopicAuthorizationException("unauthorized")));
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        assertThrows(ConnectException.class, () -> Whitebox.invokeMethod(workerTask, "sendRecords"));
-        assertTrue(newTopicCapture.hasCaptured());
-    }
-
-    @Test
-    public void testTopicCreateFailsWithExceptionWhenCreateReturnsTopicNotCreatedOrFound() {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
-
-        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
-        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(TopicAdmin.EMPTY_CREATION);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        assertThrows(ConnectException.class, () -> Whitebox.invokeMethod(workerTask, "sendRecords"));
-        assertTrue(newTopicCapture.hasCaptured());
-    }
-
-    @Test
-    public void testTopicCreateSucceedsWhenCreateReturnsExistingTopicFound() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
-
-        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
-        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(foundTopic(TOPIC));
-
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-    }
-
-    @Test
-    public void testTopicCreateSucceedsWhenCreateReturnsNewTopicFound() throws Exception {
-        if (!enableTopicCreation)
-            // should only test with topic creation enabled
-            return;
-
-        createWorkerTask();
-
-        SourceRecord record1 = new SourceRecord(PARTITION, OFFSET, TOPIC, 1, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-        SourceRecord record2 = new SourceRecord(PARTITION, OFFSET, TOPIC, 2, KEY_SCHEMA, KEY, RECORD_SCHEMA, RECORD);
-
-        expectPreliminaryCalls();
-        EasyMock.expect(admin.describeTopics(TOPIC)).andReturn(Collections.emptyMap());
-
-        Capture<NewTopic> newTopicCapture = EasyMock.newCapture();
-        EasyMock.expect(admin.createOrFindTopics(EasyMock.capture(newTopicCapture))).andReturn(createdTopic(TOPIC));
-
-        expectSendRecordTaskCommitRecordSucceed(false);
-        expectSendRecordTaskCommitRecordSucceed(false);
-
-        PowerMock.replayAll();
-
-        Whitebox.setInternalState(workerTask, "toSend", Arrays.asList(record1, record2));
-        Whitebox.invokeMethod(workerTask, "sendRecords");
-    }
-
     private TopicAdmin.TopicCreationResponse createdTopic(String topic) {
         Set<String> created = Collections.singleton(topic);
         Set<String> existing = Collections.emptySet();
         return new TopicAdmin.TopicCreationResponse(created, existing);
     }
 
-    private TopicAdmin.TopicCreationResponse foundTopic(String topic) {
-        Set<String> created = Collections.emptySet();
-        Set<String> existing = Collections.singleton(topic);
-        return new TopicAdmin.TopicCreationResponse(created, existing);
-    }
-
     private void expectPreliminaryCalls() {
         expectPreliminaryCalls(TOPIC);
     }
@@ -1616,6 +1066,17 @@ private RecordHeaders emptyHeaders() {
     private abstract static class TestSourceTask extends SourceTask {
     }
 
+    private void expectCleanStartup() {
+        offsetStore.start();
+        EasyMock.expectLastCall();
+        sourceTask.initialize(EasyMock.anyObject(SourceTaskContext.class));
+        EasyMock.expectLastCall();
+        sourceTask.start(TASK_PROPS);
+        EasyMock.expectLastCall();
+        statusListener.onStartup(taskId);
+        EasyMock.expectLastCall();
+    }
+
     private void expectClose() {
         producer.close(EasyMock.anyObject(Duration.class));
         EasyMock.expectLastCall();
@@ -1625,6 +1086,19 @@ private void expectClose() {
 
         transformationChain.close();
         EasyMock.expectLastCall();
+
+        offsetReader.close();
+        EasyMock.expectLastCall();
+
+        offsetStore.stop();
+        EasyMock.expectLastCall();
+
+        try {
+            headerConverter.close();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        EasyMock.expectLastCall();
     }
 
     private void expectTopicCreation(String topic) {
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java
index 94306ff68418e..a064e296b223c 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTest.java
@@ -16,15 +16,15 @@
  */
 package org.apache.kafka.connect.runtime;
 
-import java.util.Collection;
 import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.clients.admin.Admin;
+import org.apache.kafka.clients.admin.FenceProducersResult;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
-import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
-import org.apache.kafka.common.Configurable;
+import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.config.AbstractConfig;
-import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.provider.MockFileConfigProvider;
 import org.apache.kafka.common.metrics.MetricsReporter;
@@ -32,19 +32,16 @@
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.connect.connector.ConnectorContext;
-import org.apache.kafka.connect.connector.Task;
 import org.apache.kafka.connect.connector.policy.AllConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.connector.policy.NoneConnectorClientConfigOverridePolicy;
-import org.apache.kafka.connect.data.Schema;
-import org.apache.kafka.connect.data.SchemaAndValue;
 import org.apache.kafka.connect.errors.ConnectException;
+import org.apache.kafka.connect.health.ConnectorType;
 import org.apache.kafka.connect.json.JsonConverter;
-import org.apache.kafka.connect.json.JsonConverterConfig;
 import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
 import org.apache.kafka.connect.runtime.MockConnectMetrics.MockMetricsReporter;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
-import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
+import org.apache.kafka.connect.storage.ClusterConfigState;
+import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
 import org.apache.kafka.connect.runtime.isolation.DelegatingClassLoader;
 import org.apache.kafka.connect.runtime.isolation.PluginClassLoader;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
@@ -52,15 +49,15 @@
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo;
 import org.apache.kafka.connect.runtime.standalone.StandaloneConfig;
 import org.apache.kafka.connect.sink.SinkConnector;
+import org.apache.kafka.connect.sink.SinkRecord;
 import org.apache.kafka.connect.sink.SinkTask;
 import org.apache.kafka.connect.source.SourceConnector;
 import org.apache.kafka.connect.source.SourceRecord;
 import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.ConnectorOffsetBackingStore;
 import org.apache.kafka.connect.storage.Converter;
 import org.apache.kafka.connect.storage.HeaderConverter;
 import org.apache.kafka.connect.storage.OffsetBackingStore;
-import org.apache.kafka.connect.storage.OffsetStorageReader;
-import org.apache.kafka.connect.storage.OffsetStorageWriter;
 import org.apache.kafka.connect.storage.StatusBackingStore;
 import org.apache.kafka.connect.util.ConnectUtils;
 import org.apache.kafka.connect.util.ConnectorTaskId;
@@ -68,47 +65,56 @@
 import org.apache.kafka.connect.util.ParameterizedTest;
 import org.apache.kafka.connect.util.ThreadedTest;
 import org.apache.kafka.connect.util.TopicAdmin;
-import org.apache.kafka.connect.util.TopicCreationGroup;
-import org.easymock.EasyMock;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
-import org.powermock.api.easymock.PowerMock;
-import org.powermock.api.easymock.annotation.Mock;
-import org.powermock.api.easymock.annotation.MockNice;
-import org.powermock.api.easymock.annotation.MockStrict;
-import org.powermock.core.classloader.annotations.PowerMockIgnore;
-import org.powermock.core.classloader.annotations.PrepareForTest;
-import org.powermock.modules.junit4.PowerMockRunner;
+import org.junit.runners.Parameterized;
+import org.mockito.Mock;
+import org.mockito.MockedConstruction;
+import org.mockito.MockedStatic;
+import org.mockito.Mockito;
+import org.mockito.MockitoSession;
+import org.mockito.internal.stubbing.answers.CallsRealMethods;
+import org.mockito.quality.Strictness;
 
 import javax.management.MBeanServer;
-import javax.management.ObjectInstance;
 import javax.management.ObjectName;
 import java.lang.management.ManagementFactory;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
-import org.powermock.modules.junit4.PowerMockRunnerDelegate;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+
+import static org.apache.kafka.clients.admin.AdminClientConfig.RETRY_BACKOFF_MS_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG;
 
+import static org.apache.kafka.clients.consumer.ConsumerConfig.ISOLATION_LEVEL_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.TRANSACTIONAL_ID_CONFIG;
+import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.DEFAULT_TOPIC_CREATION_PREFIX;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.PARTITIONS_CONFIG;
 import static org.apache.kafka.connect.runtime.TopicCreationConfig.REPLICATION_FACTOR_CONFIG;
+import static org.apache.kafka.connect.runtime.WorkerConfig.BOOTSTRAP_SERVERS_CONFIG;
 import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_CREATION_ENABLE_CONFIG;
-import static org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperatorTest.NOOP_OPERATOR;
-import static org.easymock.EasyMock.anyObject;
-import static org.easymock.EasyMock.eq;
-import static org.easymock.EasyMock.expectLastCall;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.CONFIG_TOPIC_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.GROUP_ID_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.OFFSET_STORAGE_TOPIC_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG;
+import static org.apache.kafka.connect.sink.SinkTask.TOPICS_CONFIG;
 import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertEquals;
@@ -116,12 +122,25 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
-
-@RunWith(PowerMockRunner.class)
-@PowerMockRunnerDelegate(ParameterizedTest.class)
-@PrepareForTest({Worker.class, Plugins.class, ConnectUtils.class})
-@PowerMockIgnore("javax.management.*")
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockConstructionWithAnswer;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
+
+@RunWith(Parameterized.class)
 public class WorkerTest extends ThreadedTest {
 
     private static final String CONNECTOR_ID = "test-connector";
@@ -131,44 +150,61 @@ public class WorkerTest extends ThreadedTest {
     private final ConnectorClientConfigOverridePolicy noneConnectorClientConfigOverridePolicy = new NoneConnectorClientConfigOverridePolicy();
     private final ConnectorClientConfigOverridePolicy allConnectorClientConfigOverridePolicy = new AllConnectorClientConfigOverridePolicy();
 
-    private Map<String, String> workerProps = new HashMap<>();
+    private final Map<String, String> workerProps = new HashMap<>();
     private WorkerConfig config;
     private Worker worker;
 
-    private Map<String, String> defaultProducerConfigs = new HashMap<>();
-    private Map<String, String> defaultConsumerConfigs = new HashMap<>();
+    private final Map<String, String> defaultProducerConfigs = new HashMap<>();
+    private final Map<String, String> defaultConsumerConfigs = new HashMap<>();
 
     @Mock
     private Plugins plugins;
+
     @Mock
     private PluginClassLoader pluginLoader;
+
     @Mock
     private DelegatingClassLoader delegatingLoader;
+
     @Mock
     private OffsetBackingStore offsetBackingStore;
-    @MockStrict
+
+    @Mock
     private TaskStatus.Listener taskStatusListener;
-    @MockStrict
+
+    @Mock
     private ConnectorStatus.Listener connectorStatusListener;
 
-    @Mock private Herder herder;
-    @Mock private StatusBackingStore statusBackingStore;
-    @Mock private SourceConnector sourceConnector;
-    @Mock private SinkConnector sinkConnector;
-    @Mock private CloseableConnectorContext ctx;
+    @Mock
+    private Herder herder;
+
+    @Mock
+    private StatusBackingStore statusBackingStore;
+
+    @Mock
+    private SourceConnector sourceConnector;
+
+    @Mock
+    private SinkConnector sinkConnector;
+
+    @Mock
+    private CloseableConnectorContext ctx;
+
     @Mock private TestSourceTask task;
-    @Mock private WorkerSourceTask workerTask;
-    @Mock private Converter keyConverter;
-    @Mock private Converter valueConverter;
     @Mock private Converter taskKeyConverter;
     @Mock private Converter taskValueConverter;
     @Mock private HeaderConverter taskHeaderConverter;
     @Mock private ExecutorService executorService;
-    @MockNice private ConnectorConfig connectorConfig;
+    @Mock private ConnectorConfig connectorConfig;
     private String mockFileProviderTestId;
     private Map<String, String> connectorProps;
 
-    private boolean enableTopicCreation;
+    private final boolean enableTopicCreation;
+
+    private MockedStatic<Plugins> pluginsMockedStatic;
+    private MockedStatic<ConnectUtils> connectUtilsMockedStatic;
+    private MockedConstruction<WorkerSourceTask> sourceTaskMockedConstruction;
+    private MockitoSession mockitoSession;
 
     @ParameterizedTest.Parameters
     public static Collection<Boolean> parameters() {
@@ -182,6 +218,13 @@ public WorkerTest(boolean enableTopicCreation) {
     @Before
     public void setup() {
         super.setup();
+
+        // Use strict mode to detect unused mocks
+        mockitoSession = Mockito.mockitoSession()
+                                .initMocks(this)
+                                .strictness(Strictness.STRICT_STUBS)
+                                .startMocking();
+
         workerProps.put("key.converter", "org.apache.kafka.connect.json.JsonConverter");
         workerProps.put("value.converter", "org.apache.kafka.connect.json.JsonConverter");
         workerProps.put("offset.storage.file.filename", "/tmp/connect.offsets");
@@ -199,11 +242,17 @@ public void setup() {
         defaultProducerConfigs.put(
             ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
         defaultProducerConfigs.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, Long.toString(Long.MAX_VALUE));
+        // By default, producers that are instantiated and used by Connect have idempotency disabled even after idempotency became
+        // default for Kafka producers. This is chosen to avoid breaking changes when Connect contacts Kafka brokers that do not support
+        // idempotent producers or require explicit steps to enable them (e.g. adding the IDEMPOTENT_WRITE ACL to brokers older than 2.8).
+        // These settings might change when https://cwiki.apache.org/confluence/display/KAFKA/KIP-318%3A+Make+Kafka+Connect+Source+idempotent
+        // gets approved and scheduled for release.
+        defaultProducerConfigs.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false");
         defaultProducerConfigs.put(ProducerConfig.ACKS_CONFIG, "all");
         defaultProducerConfigs.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "1");
         defaultProducerConfigs.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.toString(Integer.MAX_VALUE));
 
-        defaultConsumerConfigs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
+        defaultConsumerConfigs.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
         defaultConsumerConfigs.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
         defaultConsumerConfigs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
         defaultConsumerConfigs
@@ -213,69 +262,72 @@ public void setup() {
 
         // Some common defaults. They might change on individual tests
         connectorProps = anyConnectorConfigMap();
-        PowerMock.mockStatic(Plugins.class);
+
+        pluginsMockedStatic = mockStatic(Plugins.class);
+
+        // pass through things that aren't explicitly mocked out
+        connectUtilsMockedStatic = mockStatic(ConnectUtils.class, new CallsRealMethods());
+        connectUtilsMockedStatic.when(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class))).thenReturn(CLUSTER_ID);
+
+        // Make calls to new WorkerSourceTask() return a mock to avoid the source task trying to connect to a broker.
+        sourceTaskMockedConstruction = mockConstructionWithAnswer(WorkerSourceTask.class, invocation -> {
+
+            // provide implementations of three methods used during testing
+            switch (invocation.getMethod().getName()) {
+                case "id":
+                    return TASK_ID;
+                case "loader":
+                    return pluginLoader;
+                case "awaitStop":
+                    return true;
+                default:
+                    return null;
+            }
+        });
+    }
+
+    @After
+    public void teardown() {
+        // Critical to always close MockedStatics
+        // Ideal would be to use try-with-resources in an individual test, but it introduced a rather large level of
+        // indentation of most test bodies, hence sticking with setup() / teardown()
+        pluginsMockedStatic.close();
+        connectUtilsMockedStatic.close();
+        sourceTaskMockedConstruction.close();
+
+        mockitoSession.finishMocking();
     }
 
     @Test
     public void testStartAndStopConnector() throws Throwable {
-        expectConverters();
-        expectStartStorage();
-
-        final String connectorClass = WorkerTestConnector.class.getName();
+        final String connectorClass = SampleSourceConnector.class.getName();
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, connectorClass);
 
         // Create
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(connectorClass)).andReturn(pluginLoader);
-        EasyMock.expect(plugins.newConnector(connectorClass))
-                .andReturn(sourceConnector);
-        EasyMock.expect(sourceConnector.version()).andReturn("1.0");
-
-        connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, connectorClass);
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(connectorClass)).thenReturn(pluginLoader);
+        when(plugins.newConnector(connectorClass)).thenReturn(sourceConnector);
+        when(sourceConnector.version()).thenReturn("1.0");
 
-        EasyMock.expect(sourceConnector.version()).andReturn("1.0");
-
-        expectFileConfigProvider();
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader))
-                .andReturn(delegatingLoader)
-                .times(3);
-        sourceConnector.initialize(anyObject(ConnectorContext.class));
-        EasyMock.expectLastCall();
-        sourceConnector.start(connectorProps);
-        EasyMock.expectLastCall();
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(pluginLoader).times(3);
-
-        connectorStatusListener.onStartup(CONNECTOR_ID);
-        EasyMock.expectLastCall();
-
-        // Remove
-        sourceConnector.stop();
-        EasyMock.expectLastCall();
-
-        connectorStatusListener.onShutdown(CONNECTOR_ID);
-        EasyMock.expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        expectStopStorage();
-        expectClusterId();
-
-        PowerMock.replayAll();
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
+        connectUtilsMockedStatic.when(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)))
+                                .thenReturn(CLUSTER_ID);
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
-        worker.herder = herder;
         worker.start();
 
         assertEquals(Collections.emptySet(), worker.connectorNames());
 
         FutureCallback<TargetState> onFirstStart = new FutureCallback<>();
+
         worker.startConnector(CONNECTOR_ID, connectorProps, ctx, connectorStatusListener, TargetState.STARTED, onFirstStart);
+
         // Wait for the connector to actually start
         assertEquals(TargetState.STARTED, onFirstStart.get(1000, TimeUnit.MILLISECONDS));
-        assertEquals(new HashSet<>(Arrays.asList(CONNECTOR_ID)), worker.connectorNames());
+        assertEquals(Collections.singleton(CONNECTOR_ID), worker.connectorNames());
+
 
         FutureCallback<TargetState> onSecondStart = new FutureCallback<>();
         worker.startConnector(CONNECTOR_ID, connectorProps, ctx, connectorStatusListener, TargetState.STARTED, onSecondStart);
@@ -289,54 +341,59 @@ public void testStartAndStopConnector() throws Throwable {
         assertStatistics(worker, 1, 0);
         assertStartupStatistics(worker, 1, 0, 0, 0);
         worker.stopAndAwaitConnector(CONNECTOR_ID);
+
         assertStatistics(worker, 0, 0);
         assertStartupStatistics(worker, 1, 0, 0, 0);
         assertEquals(Collections.emptySet(), worker.connectorNames());
+
         // Nothing should be left, so this should effectively be a nop
         worker.stop();
         assertStatistics(worker, 0, 0);
 
-        PowerMock.verifyAll();
+
+        verify(plugins, times(2)).currentThreadLoader();
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(connectorClass);
+        verify(plugins).newConnector(connectorClass);
+        verify(sourceConnector, times(2)).version();
+        verify(sourceConnector).initialize(any(ConnectorContext.class));
+        verify(sourceConnector).start(connectorProps);
+        verify(connectorStatusListener).onStartup(CONNECTOR_ID);
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
+
+        verify(sourceConnector).stop();
+        verify(connectorStatusListener).onShutdown(CONNECTOR_ID);
+        verify(ctx).close();
         MockFileConfigProvider.assertClosed(mockFileProviderTestId);
     }
 
-    private void expectFileConfigProvider() {
-        EasyMock.expect(plugins.newConfigProvider(EasyMock.anyObject(),
-                    EasyMock.eq("config.providers.file"), EasyMock.anyObject()))
-                .andAnswer(() -> {
-                    MockFileConfigProvider mockFileConfigProvider = new MockFileConfigProvider();
-                    mockFileConfigProvider.configure(Collections.singletonMap("testId", mockFileProviderTestId));
-                    return mockFileConfigProvider;
-                });
+    private void mockFileConfigProvider() {
+        MockFileConfigProvider mockFileConfigProvider = new MockFileConfigProvider();
+        mockFileConfigProvider.configure(Collections.singletonMap("testId", mockFileProviderTestId));
+        when(plugins.newConfigProvider(any(AbstractConfig.class),
+                                       eq("config.providers.file"),
+                                       any(ClassLoaderUsage.class)))
+               .thenReturn(mockFileConfigProvider);
     }
 
     @Test
     public void testStartConnectorFailure() throws Exception {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
         final String nonConnectorClass = "java.util.HashMap";
-        connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, nonConnectorClass); // Bad connector class name
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, nonConnectorClass); // Bad connector class name
 
         Exception exception = new ConnectException("Failed to find Connector");
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(nonConnectorClass)).andReturn(delegatingLoader);
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(delegatingLoader).times(2);
-        EasyMock.expect(plugins.newConnector(EasyMock.anyString()))
-                .andThrow(exception);
-
-        connectorStatusListener.onFailure(
-                EasyMock.eq(CONNECTOR_ID),
-                EasyMock.<ConnectException>anyObject()
-        );
-        EasyMock.expectLastCall();
 
-        expectClusterId();
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(nonConnectorClass)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(delegatingLoader);
+        connectUtilsMockedStatic.when(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)))
+                                .thenReturn("test-cluster");
 
-        PowerMock.replayAll();
+        when(plugins.newConnector(anyString())).thenThrow(exception);
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
@@ -361,57 +418,34 @@ public void testStartConnectorFailure() throws Exception {
         assertStatistics(worker, 0, 0);
         assertStartupStatistics(worker, 1, 1, 0, 0);
 
-        PowerMock.verifyAll();
+        verify(plugins).currentThreadLoader();
+        verify(plugins).delegatingLoader();
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(nonConnectorClass);
+        verify(plugins).newConnector(anyString());
+        verify(connectorStatusListener).onFailure(eq(CONNECTOR_ID), any(ConnectException.class));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
     }
 
     @Test
     public void testAddConnectorByAlias() throws Throwable {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
+        final String connectorAlias = "SampleSourceConnector";
 
-        final String connectorAlias = "WorkerTestConnector";
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(plugins.newConnector(connectorAlias)).thenReturn(sinkConnector);
+        when(delegatingLoader.connectorLoader(connectorAlias)).thenReturn(pluginLoader);
+        when(sinkConnector.version()).thenReturn("1.0");
 
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(connectorAlias)).andReturn(pluginLoader);
-        EasyMock.expect(plugins.newConnector(connectorAlias)).andReturn(sinkConnector);
-        EasyMock.expect(sinkConnector.version()).andReturn("1.0");
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
+        connectUtilsMockedStatic.when(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)))
+                                .thenReturn("test-cluster");
 
-        connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, connectorAlias);
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, connectorAlias);
         connectorProps.put(SinkConnectorConfig.TOPICS_CONFIG, "gfieyls, wfru");
 
-        EasyMock.expect(sinkConnector.version()).andReturn("1.0");
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader))
-                .andReturn(delegatingLoader)
-                .times(3);
-        sinkConnector.initialize(anyObject(ConnectorContext.class));
-        EasyMock.expectLastCall();
-        sinkConnector.start(connectorProps);
-        EasyMock.expectLastCall();
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(pluginLoader)
-                .times(3);
-
-        connectorStatusListener.onStartup(CONNECTOR_ID);
-        EasyMock.expectLastCall();
-
-        // Remove
-        sinkConnector.stop();
-        EasyMock.expectLastCall();
-
-        connectorStatusListener.onShutdown(CONNECTOR_ID);
-        EasyMock.expectLastCall();
-
-        ctx.close();
-        expectLastCall();
-
-        expectStopStorage();
-        expectClusterId();
-
-        PowerMock.replayAll();
-
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
         worker.start();
@@ -422,7 +456,7 @@ public void testAddConnectorByAlias() throws Throwable {
         worker.startConnector(CONNECTOR_ID, connectorProps, ctx, connectorStatusListener, TargetState.STARTED, onStart);
         // Wait for the connector to actually start
         assertEquals(TargetState.STARTED, onStart.get(1000, TimeUnit.MILLISECONDS));
-        assertEquals(new HashSet<>(Arrays.asList(CONNECTOR_ID)), worker.connectorNames());
+        assertEquals(Collections.singleton(CONNECTOR_ID), worker.connectorNames());
         assertStatistics(worker, 1, 0);
         assertStartupStatistics(worker, 1, 0, 0, 0);
 
@@ -435,56 +469,36 @@ public void testAddConnectorByAlias() throws Throwable {
         assertStatistics(worker, 0, 0);
         assertStartupStatistics(worker, 1, 0, 0, 0);
 
-        PowerMock.verifyAll();
+        verify(plugins, times(2)).currentThreadLoader();
+        verify(plugins).delegatingLoader();
+        verify(plugins).newConnector(connectorAlias);
+        verify(delegatingLoader).connectorLoader(connectorAlias);
+        verify(sinkConnector, times(2)).version();
+        verify(sinkConnector).initialize(any(ConnectorContext.class));
+        verify(sinkConnector).start(connectorProps);
+        verify(sinkConnector).stop();
+        verify(connectorStatusListener).onStartup(CONNECTOR_ID);
+        verify(ctx).close();
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
     }
 
     @Test
     public void testAddConnectorByShortAlias() throws Throwable {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
         final String shortConnectorAlias = "WorkerTest";
 
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(shortConnectorAlias)).andReturn(pluginLoader);
-        EasyMock.expect(plugins.newConnector(shortConnectorAlias)).andReturn(sinkConnector);
-        EasyMock.expect(sinkConnector.version()).andReturn("1.0");
-
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(plugins.newConnector(shortConnectorAlias)).thenReturn(sinkConnector);
+        when(delegatingLoader.connectorLoader(shortConnectorAlias)).thenReturn(pluginLoader);
+        when(sinkConnector.version()).thenReturn("1.0");
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
         connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, shortConnectorAlias);
-        connectorProps.put(SinkConnectorConfig.TOPICS_CONFIG, "gfieyls, wfru");
-
-        EasyMock.expect(sinkConnector.version()).andReturn("1.0");
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader))
-                .andReturn(delegatingLoader)
-                .times(3);
-        sinkConnector.initialize(anyObject(ConnectorContext.class));
-        EasyMock.expectLastCall();
-        sinkConnector.start(connectorProps);
-        EasyMock.expectLastCall();
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(pluginLoader)
-                .times(3);
-
-        connectorStatusListener.onStartup(CONNECTOR_ID);
-        EasyMock.expectLastCall();
-
-        // Remove
-        sinkConnector.stop();
-        EasyMock.expectLastCall();
-
-        connectorStatusListener.onShutdown(CONNECTOR_ID);
-        EasyMock.expectLastCall();
-
-        ctx.close();
-        expectLastCall();
 
-        expectStopStorage();
-        expectClusterId();
-
-        PowerMock.replayAll();
+        connectorProps.put(SinkConnectorConfig.TOPICS_CONFIG, "gfieyls, wfru");
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
@@ -496,7 +510,7 @@ public void testAddConnectorByShortAlias() throws Throwable {
         worker.startConnector(CONNECTOR_ID, connectorProps, ctx, connectorStatusListener, TargetState.STARTED, onStart);
         // Wait for the connector to actually start
         assertEquals(TargetState.STARTED, onStart.get(1000, TimeUnit.MILLISECONDS));
-        assertEquals(new HashSet<>(Arrays.asList(CONNECTOR_ID)), worker.connectorNames());
+        assertEquals(Collections.singleton(CONNECTOR_ID), worker.connectorNames());
         assertStatistics(worker, 1, 0);
 
         worker.stopAndAwaitConnector(CONNECTOR_ID);
@@ -506,81 +520,54 @@ public void testAddConnectorByShortAlias() throws Throwable {
         worker.stop();
         assertStatistics(worker, 0, 0);
 
-        PowerMock.verifyAll();
+        verify(plugins, times(2)).currentThreadLoader();
+        verify(plugins).delegatingLoader();
+        verify(plugins).newConnector(shortConnectorAlias);
+        verify(sinkConnector, times(2)).version();
+        verify(sinkConnector).initialize(any(ConnectorContext.class));
+        verify(sinkConnector).start(connectorProps);
+        verify(connectorStatusListener).onStartup(CONNECTOR_ID);
+        verify(sinkConnector).stop();
+        verify(connectorStatusListener).onShutdown(CONNECTOR_ID);
+        verify(ctx).close();
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
     }
 
     @Test
     public void testStopInvalidConnector() {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-        expectClusterId();
-
-        PowerMock.replayAll();
-
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
         worker.start();
 
         worker.stopAndAwaitConnector(CONNECTOR_ID);
 
-        PowerMock.verifyAll();
+        verifyConverters();
     }
 
     @Test
     public void testReconfigureConnectorTasks() throws Throwable {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
-        final String connectorClass = WorkerTestConnector.class.getName();
-
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(3);
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader).times(1);
-        EasyMock.expect(delegatingLoader.connectorLoader(connectorClass)).andReturn(pluginLoader);
-        EasyMock.expect(plugins.newConnector(connectorClass))
-                .andReturn(sinkConnector);
-        EasyMock.expect(sinkConnector.version()).andReturn("1.0");
-
-        connectorProps.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar");
-        connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, connectorClass);
-
-        EasyMock.expect(sinkConnector.version()).andReturn("1.0");
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader))
-                .andReturn(delegatingLoader)
-                .times(4);
-        sinkConnector.initialize(anyObject(ConnectorContext.class));
-        EasyMock.expectLastCall();
-        sinkConnector.start(connectorProps);
-        EasyMock.expectLastCall();
+        final String connectorClass = SampleSourceConnector.class.getName();
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(pluginLoader)
-                .times(4);
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(connectorClass)).thenReturn(pluginLoader);
+        when(plugins.newConnector(connectorClass)).thenReturn(sinkConnector);
+        when(sinkConnector.version()).thenReturn("1.0");
 
-        connectorStatusListener.onStartup(CONNECTOR_ID);
-        EasyMock.expectLastCall();
+        Map<String, String> taskProps = Collections.singletonMap("foo", "bar");
+        when(sinkConnector.taskConfigs(2)).thenReturn(Arrays.asList(taskProps, taskProps));
 
-        // Reconfigure
-        EasyMock.<Class<? extends Task>>expect(sinkConnector.taskClass()).andReturn(TestSourceTask.class);
-        Map<String, String> taskProps = new HashMap<>();
-        taskProps.put("foo", "bar");
-        EasyMock.expect(sinkConnector.taskConfigs(2)).andReturn(Arrays.asList(taskProps, taskProps));
-
-        // Remove
-        sinkConnector.stop();
-        EasyMock.expectLastCall();
+        // Use doReturn().when() syntax due to when().thenReturn() not being able to return wildcard generic types
+        doReturn(TestSourceTask.class).when(sinkConnector).taskClass();
 
-        connectorStatusListener.onShutdown(CONNECTOR_ID);
-        EasyMock.expectLastCall();
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
 
-        ctx.close();
-        expectLastCall();
 
-        expectStopStorage();
-        expectClusterId();
-
-        PowerMock.replayAll();
+        connectorProps.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar");
+        connectorProps.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, connectorClass);
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
@@ -593,7 +580,7 @@ public void testReconfigureConnectorTasks() throws Throwable {
         // Wait for the connector to actually start
         assertEquals(TargetState.STARTED, onFirstStart.get(1000, TimeUnit.MILLISECONDS));
         assertStatistics(worker, 1, 0);
-        assertEquals(new HashSet<>(Arrays.asList(CONNECTOR_ID)), worker.connectorNames());
+        assertEquals(Collections.singleton(CONNECTOR_ID), worker.connectorNames());
 
         FutureCallback<TargetState> onSecondStart = new FutureCallback<>();
         worker.startConnector(CONNECTOR_ID, connectorProps, ctx, connectorStatusListener, TargetState.STARTED, onSecondStart);
@@ -611,7 +598,7 @@ public void testReconfigureConnectorTasks() throws Throwable {
         Map<String, String> expectedTaskProps = new HashMap<>();
         expectedTaskProps.put("foo", "bar");
         expectedTaskProps.put(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
-        expectedTaskProps.put(SinkTask.TOPICS_CONFIG, "foo,bar");
+        expectedTaskProps.put(TOPICS_CONFIG, "foo,bar");
         assertEquals(2, taskConfigs.size());
         assertEquals(expectedTaskProps, taskConfigs.get(0));
         assertEquals(expectedTaskProps, taskConfigs.get(1));
@@ -625,77 +612,182 @@ public void testReconfigureConnectorTasks() throws Throwable {
         worker.stop();
         assertStatistics(worker, 0, 0);
 
-        PowerMock.verifyAll();
+        verify(plugins, times(3)).currentThreadLoader();
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(connectorClass);
+        verify(plugins).newConnector(connectorClass);
+        verify(sinkConnector, times(2)).version();
+        verify(sinkConnector).initialize(any(ConnectorContext.class));
+        verify(sinkConnector).start(connectorProps);
+        verify(connectorStatusListener).onStartup(CONNECTOR_ID);
+        verify(sinkConnector).taskClass();
+        verify(sinkConnector).taskConfigs(2);
+        verify(sinkConnector).stop();
+        verify(connectorStatusListener).onShutdown(CONNECTOR_ID);
+        verify(ctx).close();
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(3));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(3));
     }
 
     @Test
-    public void testAddRemoveTask() throws Exception {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
+    public void testAddRemoveSourceTask() {
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSourceConnector.class.getName())).thenReturn(pluginLoader);
+
+        when(plugins.newTask(TestSourceTask.class)).thenReturn(task);
+        when(task.version()).thenReturn("1.0");
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, taskKeyConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, taskValueConverter);
+        mockTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, taskHeaderConverter);
+        when(executorService.submit(any(WorkerSourceTask.class))).thenReturn(null);
+        doReturn(SampleSourceConnector.class).when(plugins).connectorClass(SampleSourceConnector.class.getName());
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
+
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
 
-        EasyMock.expect(workerTask.id()).andStubReturn(TASK_ID);
-
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        expectNewWorkerTask();
-        Map<String, String> origProps = new HashMap<>();
-        origProps.put(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
-
-        TaskConfig taskConfig = new TaskConfig(origProps);
-        // We should expect this call, but the pluginLoader being swapped in is only mocked.
-        // EasyMock.expect(pluginLoader.loadClass(TestSourceTask.class.getName()))
-        //        .andReturn((Class) TestSourceTask.class);
-        EasyMock.expect(plugins.newTask(TestSourceTask.class)).andReturn(task);
-        EasyMock.expect(task.version()).andReturn("1.0");
-
-        workerTask.initialize(taskConfig);
-        EasyMock.expectLastCall();
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, executorService,
+                noneConnectorClientConfigOverridePolicy);
+        worker.herder = herder;
+        worker.start();
 
-        // Expect that the worker will create converters and will find them using the current classloader ...
-        assertNotNull(taskKeyConverter);
-        assertNotNull(taskValueConverter);
-        assertNotNull(taskHeaderConverter);
-        expectTaskKeyConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, taskKeyConverter);
-        expectTaskValueConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, taskValueConverter);
-        expectTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, taskHeaderConverter);
+        assertStatistics(worker, 0, 0);
+        assertEquals(Collections.emptySet(), worker.taskIds());
+        worker.startSourceTask(TASK_ID, ClusterConfigState.EMPTY, anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED);
+        assertStatistics(worker, 0, 1);
+        assertEquals(Collections.singleton(TASK_ID), worker.taskIds());
+        worker.stopAndAwaitTask(TASK_ID);
+        assertStatistics(worker, 0, 0);
+        assertEquals(Collections.emptySet(), worker.taskIds());
+        // Nothing should be left, so this should effectively be a nop
+        worker.stop();
+        assertStatistics(worker, 0, 0);
 
-        EasyMock.expect(executorService.submit(workerTask)).andReturn(null);
+        verify(plugins, times(2)).currentThreadLoader();
+        verify(plugins).newTask(TestSourceTask.class);
+        verify(task).version();
+        verifyTaskConverter(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG);
+        verifyTaskConverter(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG);
+        verifyTaskHeaderConverter();
+
+        verify(executorService).submit(any(WorkerSourceTask.class));
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(SampleSourceConnector.class.getName());
+        verify(plugins).connectorClass(SampleSourceConnector.class.getName());
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
+    }
 
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(WorkerTestConnector.class.getName()))
-                .andReturn(pluginLoader);
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader)).andReturn(delegatingLoader)
-                .times(2);
+    @Test
+    public void testAddRemoveSinkTask() {
+        // Most of the other cases use source tasks; we make sure to get code coverage for sink tasks here as well
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSinkConnector.class.getName())).thenReturn(pluginLoader);
+
+        SinkTask task = mock(TestSinkTask.class);
+        when(plugins.newTask(TestSinkTask.class)).thenReturn(task);
+        when(task.version()).thenReturn("1.0");
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, taskKeyConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, taskValueConverter);
+        mockTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, taskHeaderConverter);
+        when(executorService.submit(any(WorkerSinkTask.class))).thenReturn(null);
+        doReturn(SampleSinkConnector.class).when(plugins).connectorClass(SampleSinkConnector.class.getName());
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
+
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, TestSinkTask.class.getName());
 
-        EasyMock.expect(workerTask.loader()).andReturn(pluginLoader);
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, executorService,
+                noneConnectorClientConfigOverridePolicy);
+        worker.herder = herder;
+        worker.start();
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader)).andReturn(pluginLoader)
-                .times(2);
-        plugins.connectorClass(WorkerTestConnector.class.getName());
-        EasyMock.expectLastCall().andReturn(WorkerTestConnector.class);
-        // Remove
-        workerTask.stop();
-        EasyMock.expectLastCall();
-        EasyMock.expect(workerTask.awaitStop(EasyMock.anyLong())).andStubReturn(true);
-        EasyMock.expectLastCall();
+        assertStatistics(worker, 0, 0);
+        assertEquals(Collections.emptySet(), worker.taskIds());
+        Map<String, String> connectorConfigs = anyConnectorConfigMap();
+        connectorConfigs.put(TOPICS_CONFIG, "t1");
+        connectorConfigs.put(CONNECTOR_CLASS_CONFIG, SampleSinkConnector.class.getName());
 
-        workerTask.removeMetrics();
-        EasyMock.expectLastCall();
+        worker.startSinkTask(TASK_ID, ClusterConfigState.EMPTY, connectorConfigs, origProps, taskStatusListener, TargetState.STARTED);
+        assertStatistics(worker, 0, 1);
+        assertEquals(Collections.singleton(TASK_ID), worker.taskIds());
+        worker.stopAndAwaitTask(TASK_ID);
+        assertStatistics(worker, 0, 0);
+        assertEquals(Collections.emptySet(), worker.taskIds());
+        // Nothing should be left, so this should effectively be a nop
+        worker.stop();
+        assertStatistics(worker, 0, 0);
 
-        expectStopStorage();
-        expectClusterId();
+        verify(plugins, times(2)).currentThreadLoader();
+        verify(plugins).newTask(TestSinkTask.class);
+        verify(task).version();
+        verifyTaskConverter(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG);
+        verifyTaskConverter(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG);
+        verifyTaskHeaderConverter();
+
+        verify(executorService).submit(any(WorkerSinkTask.class));
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(SampleSinkConnector.class.getName());
+        verify(plugins).connectorClass(SampleSinkConnector.class.getName());
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
+    }
 
-        PowerMock.replayAll();
+    @Test
+    public void testAddRemoveExactlyOnceSourceTask() {
+        Map<String, String> workerProps = new HashMap<>();
+        workerProps.put("key.converter", "org.apache.kafka.connect.json.JsonConverter");
+        workerProps.put("value.converter", "org.apache.kafka.connect.json.JsonConverter");
+        workerProps.put(CommonClientConfigs.METRIC_REPORTER_CLASSES_CONFIG, MockMetricsReporter.class.getName());
+        workerProps.put("config.providers", "file");
+        workerProps.put("config.providers.file.class", MockFileConfigProvider.class.getName());
+        mockFileProviderTestId = UUID.randomUUID().toString();
+        workerProps.put("config.providers.file.param.testId", mockFileProviderTestId);
+        workerProps.put(TOPIC_CREATION_ENABLE_CONFIG, String.valueOf(enableTopicCreation));
+        workerProps.put(GROUP_ID_CONFIG, "connect-cluster");
+        workerProps.put(BOOTSTRAP_SERVERS_CONFIG, "localhost:2606");
+        workerProps.put(OFFSET_STORAGE_TOPIC_CONFIG, "connect-offsets");
+        workerProps.put(CONFIG_TOPIC_CONFIG, "connect-configs");
+        workerProps.put(STATUS_STORAGE_TOPIC_CONFIG, "connect-statuses");
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        config = new DistributedConfig(workerProps);
+
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSourceConnector.class.getName())).thenReturn(pluginLoader);
+
+        when(plugins.newTask(TestSourceTask.class)).thenReturn(task);
+        when(task.version()).thenReturn("1.0");
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, taskKeyConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, taskValueConverter);
+        mockTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, taskHeaderConverter);
+        when(executorService.submit(any(ExactlyOnceWorkerSourceTask.class))).thenReturn(null);
+        doReturn(SampleSourceConnector.class).when(plugins).connectorClass(SampleSourceConnector.class.getName());
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
+
+        Runnable preProducer = mock(Runnable.class);
+        Runnable postProducer = mock(Runnable.class);
+
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, executorService,
-                            noneConnectorClientConfigOverridePolicy);
+                noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
         worker.start();
+
         assertStatistics(worker, 0, 0);
         assertEquals(Collections.emptySet(), worker.taskIds());
-        worker.startTask(TASK_ID, ClusterConfigState.EMPTY, anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED);
+        worker.startExactlyOnceSourceTask(TASK_ID, ClusterConfigState.EMPTY,  anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED, preProducer, postProducer);
         assertStatistics(worker, 0, 1);
-        assertEquals(new HashSet<>(Arrays.asList(TASK_ID)), worker.taskIds());
+        assertEquals(Collections.singleton(TASK_ID), worker.taskIds());
         worker.stopAndAwaitTask(TASK_ID);
         assertStatistics(worker, 0, 0);
         assertEquals(Collections.emptySet(), worker.taskIds());
@@ -703,88 +795,64 @@ public void testAddRemoveTask() throws Exception {
         worker.stop();
         assertStatistics(worker, 0, 0);
 
-        PowerMock.verifyAll();
+        verify(plugins, times(2)).currentThreadLoader();
+        verify(plugins).newTask(TestSourceTask.class);
+        verify(task).version();
+        verifyTaskConverter(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG);
+        verifyTaskConverter(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG);
+        verifyTaskHeaderConverter();
+
+        verify(executorService).submit(any(ExactlyOnceWorkerSourceTask.class));
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(SampleSourceConnector.class.getName());
+        verify(plugins).connectorClass(SampleSourceConnector.class.getName());
+
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any(WorkerConfig.class)));
     }
 
     @Test
-    public void testTaskStatusMetricsStatuses() throws Exception {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
+    public void testTaskStatusMetricsStatuses() {
+        mockInternalConverters();
+        mockStorage();
+        mockFileConfigProvider();
 
-        EasyMock.expect(workerTask.id()).andStubReturn(TASK_ID);
 
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        expectNewWorkerTask();
-        Map<String, String> origProps = new HashMap<>();
-        origProps.put(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
 
         TaskConfig taskConfig = new TaskConfig(origProps);
-        // We should expect this call, but the pluginLoader being swapped in is only mocked.
-        // EasyMock.expect(pluginLoader.loadClass(TestSourceTask.class.getName()))
-        //        .andReturn((Class) TestSourceTask.class);
-        EasyMock.expect(plugins.newTask(TestSourceTask.class)).andReturn(task);
-        EasyMock.expect(task.version()).andReturn("1.0");
 
-        workerTask.initialize(taskConfig);
-        EasyMock.expectLastCall();
+        when(plugins.newTask(TestSourceTask.class)).thenReturn(task);
+        when(task.version()).thenReturn("1.0");
 
         // Expect that the worker will create converters and will find them using the current classloader ...
         assertNotNull(taskKeyConverter);
         assertNotNull(taskValueConverter);
         assertNotNull(taskHeaderConverter);
-        expectTaskKeyConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, taskKeyConverter);
-        expectTaskValueConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, taskValueConverter);
-        expectTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, taskHeaderConverter);
-
-        EasyMock.expect(executorService.submit(workerTask)).andReturn(null);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, taskKeyConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, taskValueConverter);
+        mockTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, taskHeaderConverter);
 
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(WorkerTestConnector.class.getName()))
-            .andReturn(pluginLoader);
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader)).andReturn(delegatingLoader)
-            .times(2);
+        when(executorService.submit(any(WorkerSourceTask.class))).thenReturn(null);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSourceConnector.class.getName())).thenReturn(pluginLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader)).andReturn(pluginLoader)
-            .times(2);
-        plugins.connectorClass(WorkerTestConnector.class.getName());
-        EasyMock.expectLastCall().andReturn(WorkerTestConnector.class);
+        doReturn(SampleSourceConnector.class).when(plugins).connectorClass(SampleSourceConnector.class.getName());
 
-        EasyMock.expect(workerTask.awaitStop(EasyMock.anyLong())).andStubReturn(true);
-        EasyMock.expectLastCall();
-
-        workerTask.removeMetrics();
-        EasyMock.expectLastCall();
 
         // Each time we check the task metrics, the worker will call the herder
-        herder.taskStatus(TASK_ID);
-        EasyMock.expectLastCall()
-            .andReturn(new ConnectorStateInfo.TaskState(0, "RUNNING", "worker", "msg"));
-
-        herder.taskStatus(TASK_ID);
-        EasyMock.expectLastCall()
-            .andReturn(new ConnectorStateInfo.TaskState(0, "PAUSED", "worker", "msg"));
-
-        herder.taskStatus(TASK_ID);
-        EasyMock.expectLastCall()
-            .andReturn(new ConnectorStateInfo.TaskState(0, "FAILED", "worker", "msg"));
-
-        herder.taskStatus(TASK_ID);
-        EasyMock.expectLastCall()
-            .andReturn(new ConnectorStateInfo.TaskState(0, "DESTROYED", "worker", "msg"));
-
-        herder.taskStatus(TASK_ID);
-        EasyMock.expectLastCall()
-            .andReturn(new ConnectorStateInfo.TaskState(0, "UNASSIGNED", "worker", "msg"));
-
-        // Called when we stop the worker
-        EasyMock.expect(workerTask.loader()).andReturn(pluginLoader);
-        workerTask.stop();
-        EasyMock.expectLastCall();
-
-        expectClusterId();
-
-        PowerMock.replayAll();
+        when(herder.taskStatus(TASK_ID)).thenReturn(
+                new ConnectorStateInfo.TaskState(0, "RUNNING", "worker", "msg"),
+                new ConnectorStateInfo.TaskState(0, "PAUSED", "worker", "msg"),
+                new ConnectorStateInfo.TaskState(0, "FAILED", "worker", "msg"),
+                new ConnectorStateInfo.TaskState(0, "DESTROYED", "worker", "msg"),
+                new ConnectorStateInfo.TaskState(0, "UNASSIGNED", "worker", "msg")
+        );
 
         worker = new Worker(WORKER_ID,
             new MockTime(),
@@ -800,7 +868,7 @@ public void testTaskStatusMetricsStatuses() throws Exception {
         assertStatistics(worker, 0, 0);
         assertStartupStatistics(worker, 0, 0, 0, 0);
         assertEquals(Collections.emptySet(), worker.taskIds());
-        worker.startTask(
+        worker.startSourceTask(
             TASK_ID,
             ClusterConfigState.EMPTY,
             anyConnectorConfigMap(),
@@ -821,31 +889,39 @@ public void testTaskStatusMetricsStatuses() throws Exception {
         assertStatusMetrics(0L, "connector-destroyed-task-count");
         assertStatusMetrics(0L, "connector-unassigned-task-count");
 
-        PowerMock.verifyAll();
+        WorkerSourceTask instantiatedTask = sourceTaskMockedConstruction.constructed().get(0);
+        verify(instantiatedTask).initialize(taskConfig);
+        verify(herder, times(5)).taskStatus(TASK_ID);
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(SampleSourceConnector.class.getName());
+        verify(executorService).submit(instantiatedTask);
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        verify(plugins).connectorClass(SampleSourceConnector.class.getName());
+        verify(instantiatedTask, atLeastOnce()).id();
+        verify(instantiatedTask).awaitStop(anyLong());
+        verify(instantiatedTask).removeMetrics();
+
+        // Called when we stop the worker
+        verify(instantiatedTask).loader();
+        verify(instantiatedTask).stop();
+        verifyTaskConverter(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG);
+        verifyTaskConverter(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG);
+        verifyTaskHeaderConverter();
+        verify(plugins, times(2)).currentThreadLoader();
     }
 
     @Test
     public void testConnectorStatusMetricsGroup_taskStatusCounter() {
         ConcurrentMap<ConnectorTaskId, WorkerTask> tasks = new ConcurrentHashMap<>();
-        tasks.put(new ConnectorTaskId("c1", 0), workerTask);
-        tasks.put(new ConnectorTaskId("c1", 1), workerTask);
-        tasks.put(new ConnectorTaskId("c2", 0), workerTask);
-
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader)).andReturn(delegatingLoader);
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader)).andReturn(delegatingLoader);
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader)).andReturn(pluginLoader);
+        tasks.put(new ConnectorTaskId("c1", 0), mock(WorkerSourceTask.class));
+        tasks.put(new ConnectorTaskId("c1", 1), mock(WorkerSourceTask.class));
+        tasks.put(new ConnectorTaskId("c2", 0), mock(WorkerSourceTask.class));
 
-        taskStatusListener.onFailure(EasyMock.eq(TASK_ID), EasyMock.<ConfigException>anyObject());
-        EasyMock.expectLastCall();
+        mockInternalConverters();
+        mockFileConfigProvider();
 
-        expectClusterId();
-
-        PowerMock.replayAll();
+        connectUtilsMockedStatic.when(() -> ConnectUtils.lookupKafkaClusterId(any())).thenReturn(CLUSTER_ID);
 
         worker = new Worker(WORKER_ID,
             new MockTime(),
@@ -861,40 +937,23 @@ public void testConnectorStatusMetricsGroup_taskStatusCounter() {
         assertEquals(2L, (long) metricGroup.taskCounter("c1").metricValue(0L));
         assertEquals(1L, (long) metricGroup.taskCounter("c2").metricValue(0L));
         assertEquals(0L, (long) metricGroup.taskCounter("fakeConnector").metricValue(0L));
+
+        connectUtilsMockedStatic.verify(() -> ConnectUtils.lookupKafkaClusterId(any()));
     }
 
     @Test
     public void testStartTaskFailure() {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
-        Map<String, String> origProps = new HashMap<>();
-        origProps.put(TaskConfig.TASK_CLASS_CONFIG, "missing.From.This.Workers.Classpath");
-
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(WorkerTestConnector.class.getName()))
-                .andReturn(pluginLoader);
-
-        // We would normally expect this since the plugin loader would have been swapped in. However, since we mock out
-        // all classloader changes, the call actually goes to the normal default classloader. However, this works out
-        // fine since we just wanted a ClassNotFoundException anyway.
-        // EasyMock.expect(pluginLoader.loadClass(origProps.get(TaskConfig.TASK_CLASS_CONFIG)))
-        //        .andThrow(new ClassNotFoundException());
+        mockInternalConverters();
+        mockFileConfigProvider();
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader))
-                .andReturn(delegatingLoader);
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, "missing.From.This.Workers.Classpath");
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(pluginLoader);
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSourceConnector.class.getName())).thenReturn(pluginLoader);
 
-        taskStatusListener.onFailure(EasyMock.eq(TASK_ID), EasyMock.<ConfigException>anyObject());
-        EasyMock.expectLastCall();
-
-        expectClusterId();
-
-        PowerMock.replayAll();
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
@@ -902,157 +961,116 @@ public void testStartTaskFailure() {
         assertStatistics(worker, 0, 0);
         assertStartupStatistics(worker, 0, 0, 0, 0);
 
-        assertFalse(worker.startTask(TASK_ID, ClusterConfigState.EMPTY, anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED));
+        assertFalse(worker.startSourceTask(TASK_ID, ClusterConfigState.EMPTY, anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED));
         assertStartupStatistics(worker, 0, 0, 1, 1);
 
         assertStatistics(worker, 0, 0);
         assertStartupStatistics(worker, 0, 0, 1, 1);
         assertEquals(Collections.emptySet(), worker.taskIds());
 
-        PowerMock.verifyAll();
+        verify(taskStatusListener).onFailure(eq(TASK_ID), any(ConfigException.class));
+        pluginsMockedStatic.verify(() ->  Plugins.compareAndSwapLoaders(pluginLoader));
+        pluginsMockedStatic.verify(() ->  Plugins.compareAndSwapLoaders(delegatingLoader));
     }
 
     @Test
-    public void testCleanupTasksOnStop() throws Exception {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
-        EasyMock.expect(workerTask.id()).andStubReturn(TASK_ID);
-
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        expectNewWorkerTask();
-        Map<String, String> origProps = new HashMap<>();
-        origProps.put(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
-
-        TaskConfig taskConfig = new TaskConfig(origProps);
-        // We should expect this call, but the pluginLoader being swapped in is only mocked.
-        // EasyMock.expect(pluginLoader.loadClass(TestSourceTask.class.getName()))
-        //        .andReturn((Class) TestSourceTask.class);
-        EasyMock.expect(plugins.newTask(TestSourceTask.class)).andReturn(task);
-        EasyMock.expect(task.version()).andReturn("1.0");
+    public void testCleanupTasksOnStop() {
+        mockInternalConverters();
+        mockStorage();
+        mockFileConfigProvider();
 
-        workerTask.initialize(taskConfig);
-        EasyMock.expectLastCall();
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        when(plugins.newTask(TestSourceTask.class)).thenReturn(task);
+        when(task.version()).thenReturn("1.0");
 
         // Expect that the worker will create converters and will not initially find them using the current classloader ...
         assertNotNull(taskKeyConverter);
         assertNotNull(taskValueConverter);
         assertNotNull(taskHeaderConverter);
-        expectTaskKeyConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
-        expectTaskKeyConverters(ClassLoaderUsage.PLUGINS, taskKeyConverter);
-        expectTaskValueConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
-        expectTaskValueConverters(ClassLoaderUsage.PLUGINS, taskValueConverter);
-        expectTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
-        expectTaskHeaderConverter(ClassLoaderUsage.PLUGINS, taskHeaderConverter);
-
-        EasyMock.expect(executorService.submit(workerTask)).andReturn(null);
-
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(WorkerTestConnector.class.getName()))
-                .andReturn(pluginLoader);
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader)).andReturn(delegatingLoader)
-                .times(2);
-
-        EasyMock.expect(workerTask.loader()).andReturn(pluginLoader);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, null);
+        mockTaskConverter(ClassLoaderUsage.PLUGINS, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, taskKeyConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, null);
+        mockTaskConverter(ClassLoaderUsage.PLUGINS, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, taskValueConverter);
+        mockTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
+        mockTaskHeaderConverter(ClassLoaderUsage.PLUGINS, taskHeaderConverter);
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader)).andReturn(pluginLoader)
-                .times(2);
-        plugins.connectorClass(WorkerTestConnector.class.getName());
-        EasyMock.expectLastCall().andReturn(WorkerTestConnector.class);
-        // Remove on Worker.stop()
-        workerTask.stop();
-        EasyMock.expectLastCall();
+        when(executorService.submit(any(WorkerSourceTask.class))).thenReturn(null);
 
-        EasyMock.expect(workerTask.awaitStop(EasyMock.anyLong())).andReturn(true);
-        // Note that in this case we *do not* commit offsets since it's an unclean shutdown
-        EasyMock.expectLastCall();
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSourceConnector.class.getName())).thenReturn(pluginLoader);
+        doReturn(SampleSourceConnector.class).when(plugins).connectorClass(SampleSourceConnector.class.getName());
 
-        workerTask.removeMetrics();
-        EasyMock.expectLastCall();
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
 
-        expectStopStorage();
-        expectClusterId();
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
 
-        PowerMock.replayAll();
+        TaskConfig taskConfig = new TaskConfig(origProps);
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, executorService,
                             noneConnectorClientConfigOverridePolicy);
         worker.herder = herder;
         worker.start();
         assertStatistics(worker, 0, 0);
-        worker.startTask(TASK_ID, ClusterConfigState.EMPTY, anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED);
+        worker.startSourceTask(TASK_ID, ClusterConfigState.EMPTY, anyConnectorConfigMap(), origProps, taskStatusListener, TargetState.STARTED);
         assertStatistics(worker, 0, 1);
         worker.stop();
         assertStatistics(worker, 0, 0);
 
-        PowerMock.verifyAll();
+        verifyStorage();
+
+        WorkerSourceTask constructedMockTask = sourceTaskMockedConstruction.constructed().get(0);
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        verify(plugins).newTask(TestSourceTask.class);
+        verify(plugins, times(2)).currentThreadLoader();
+
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(SampleSourceConnector.class.getName());
+        verify(plugins).connectorClass(SampleSourceConnector.class.getName());
+        verify(constructedMockTask).initialize(taskConfig);
+        verify(constructedMockTask).loader();
+        verify(constructedMockTask).stop();
+        verify(constructedMockTask).awaitStop(anyLong());
+        verify(constructedMockTask).removeMetrics();
+        verifyConverters();
+
+        verify(executorService).submit(any(WorkerSourceTask.class));
     }
 
     @Test
-    public void testConverterOverrides() throws Exception {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
-
-        EasyMock.expect(workerTask.id()).andStubReturn(TASK_ID);
-
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        expectNewWorkerTask();
-        Map<String, String> origProps = new HashMap<>();
-        origProps.put(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
+    public void testConverterOverrides() {
+        mockInternalConverters();
+        mockStorage();
+        mockFileConfigProvider();
 
+        when(plugins.currentThreadLoader()).thenReturn(delegatingLoader);
+        Map<String, String> origProps = Collections.singletonMap(TaskConfig.TASK_CLASS_CONFIG, TestSourceTask.class.getName());
         TaskConfig taskConfig = new TaskConfig(origProps);
-        // We should expect this call, but the pluginLoader being swapped in is only mocked.
-        // EasyMock.expect(pluginLoader.loadClass(TestSourceTask.class.getName()))
-        //        .andReturn((Class) TestSourceTask.class);
-        EasyMock.expect(plugins.newTask(TestSourceTask.class)).andReturn(task);
-        EasyMock.expect(task.version()).andReturn("1.0");
 
-        workerTask.initialize(taskConfig);
-        EasyMock.expectLastCall();
+        when(plugins.newTask(TestSourceTask.class)).thenReturn(task);
+        when(task.version()).thenReturn("1.0");
 
         // Expect that the worker will create converters and will not initially find them using the current classloader ...
         assertNotNull(taskKeyConverter);
         assertNotNull(taskValueConverter);
         assertNotNull(taskHeaderConverter);
-        expectTaskKeyConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
-        expectTaskKeyConverters(ClassLoaderUsage.PLUGINS, taskKeyConverter);
-        expectTaskValueConverters(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
-        expectTaskValueConverters(ClassLoaderUsage.PLUGINS, taskValueConverter);
-        expectTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
-        expectTaskHeaderConverter(ClassLoaderUsage.PLUGINS, taskHeaderConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, null);
+        mockTaskConverter(ClassLoaderUsage.PLUGINS, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, taskKeyConverter);
+        mockTaskConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, null);
+        mockTaskConverter(ClassLoaderUsage.PLUGINS, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, taskValueConverter);
+        mockTaskHeaderConverter(ClassLoaderUsage.CURRENT_CLASSLOADER, null);
+        mockTaskHeaderConverter(ClassLoaderUsage.PLUGINS, taskHeaderConverter);
 
-        EasyMock.expect(executorService.submit(workerTask)).andReturn(null);
+        when(executorService.submit(any(WorkerSourceTask.class))).thenReturn(null);
 
-        EasyMock.expect(plugins.delegatingLoader()).andReturn(delegatingLoader);
-        EasyMock.expect(delegatingLoader.connectorLoader(WorkerTestConnector.class.getName()))
-                .andReturn(pluginLoader);
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(SampleSourceConnector.class.getName())).thenReturn(pluginLoader);
+        doReturn(SampleSourceConnector.class).when(plugins).connectorClass(SampleSourceConnector.class.getName());
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(pluginLoader)).andReturn(delegatingLoader)
-                .times(2);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
 
-        EasyMock.expect(workerTask.loader()).andReturn(pluginLoader);
-
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader)).andReturn(pluginLoader)
-                .times(2);
-        plugins.connectorClass(WorkerTestConnector.class.getName());
-        EasyMock.expectLastCall().andReturn(WorkerTestConnector.class);
-
-        // Remove
-        workerTask.stop();
-        EasyMock.expectLastCall();
-        EasyMock.expect(workerTask.awaitStop(EasyMock.anyLong())).andStubReturn(true);
-        EasyMock.expectLastCall();
-
-        workerTask.removeMetrics();
-        EasyMock.expectLastCall();
-
-        expectStopStorage();
-        expectClusterId();
-
-        PowerMock.replayAll();
 
         worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, executorService,
                             noneConnectorClientConfigOverridePolicy);
@@ -1061,13 +1079,11 @@ public void testConverterOverrides() throws Exception {
         assertStatistics(worker, 0, 0);
         assertEquals(Collections.emptySet(), worker.taskIds());
         Map<String, String> connProps = anyConnectorConfigMap();
-        connProps.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, TestConverter.class.getName());
-        connProps.put("key.converter.extra.config", "foo");
-        connProps.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, TestConfigurableConverter.class.getName());
-        connProps.put("value.converter.extra.config", "bar");
-        worker.startTask(TASK_ID, ClusterConfigState.EMPTY, connProps, origProps, taskStatusListener, TargetState.STARTED);
+        connProps.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, SampleConverterWithHeaders.class.getName());
+        connProps.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, SampleConverterWithHeaders.class.getName());
+        worker.startSourceTask(TASK_ID, ClusterConfigState.EMPTY, connProps, origProps, taskStatusListener, TargetState.STARTED);
         assertStatistics(worker, 0, 1);
-        assertEquals(new HashSet<>(Arrays.asList(TASK_ID)), worker.taskIds());
+        assertEquals(Collections.singleton(TASK_ID), worker.taskIds());
         worker.stopAndAwaitTask(TASK_ID);
         assertStatistics(worker, 0, 0);
         assertEquals(Collections.emptySet(), worker.taskIds());
@@ -1076,20 +1092,34 @@ public void testConverterOverrides() throws Exception {
         assertStatistics(worker, 0, 0);
 
         // We've mocked the Plugin.newConverter method, so we don't currently configure the converters
+        verify(plugins).newTask(TestSourceTask.class);
+        WorkerSourceTask instantiatedTask = sourceTaskMockedConstruction.constructed().get(0);
+        verify(instantiatedTask).initialize(taskConfig);
+        verify(executorService).submit(any(WorkerSourceTask.class));
+        verify(plugins).delegatingLoader();
+        verify(delegatingLoader).connectorLoader(SampleSourceConnector.class.getName());
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(pluginLoader), times(2));
+        pluginsMockedStatic.verify(() -> Plugins.compareAndSwapLoaders(delegatingLoader), times(2));
+        verify(plugins).connectorClass(SampleSourceConnector.class.getName());
+
+        // Remove
+        verify(instantiatedTask).stop();
+        verify(instantiatedTask).awaitStop(anyLong());
+        verify(instantiatedTask).removeMetrics();
 
-        PowerMock.verifyAll();
+        verify(plugins, times(2)).currentThreadLoader();
+        verifyStorage();
     }
 
     @Test
     public void testProducerConfigsWithoutOverrides() {
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX)).andReturn(
-            new HashMap<>());
-        PowerMock.replayAll();
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX)).thenReturn(new HashMap<>());
         Map<String, String> expectedConfigs = new HashMap<>(defaultProducerConfigs);
         expectedConfigs.put("client.id", "connector-producer-job-0");
         expectedConfigs.put("metrics.context.connect.kafka.cluster.id", CLUSTER_ID);
         assertEquals(expectedConfigs,
-                     Worker.producerConfigs(TASK_ID, "connector-producer-" + TASK_ID, config, connectorConfig, null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID));
+                Worker.baseProducerConfigs(CONNECTOR_ID, "connector-producer-" + TASK_ID, config, connectorConfig, null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX);
     }
 
     @Test
@@ -1106,11 +1136,11 @@ public void testProducerConfigsWithOverrides() {
         expectedConfigs.put("client.id", "producer-test-id");
         expectedConfigs.put("metrics.context.connect.kafka.cluster.id", CLUSTER_ID);
 
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX)).andReturn(
-            new HashMap<>());
-        PowerMock.replayAll();
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX)).thenReturn(new HashMap<>());
+
         assertEquals(expectedConfigs,
-            Worker.producerConfigs(TASK_ID, "connector-producer-" + TASK_ID, configWithOverrides, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID));
+                Worker.baseProducerConfigs(CONNECTOR_ID, "connector-producer-" + TASK_ID, configWithOverrides, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX);
     }
 
     @Test
@@ -1131,29 +1161,31 @@ public void testProducerConfigsWithClientOverrides() {
         Map<String, Object> connConfig = new HashMap<>();
         connConfig.put("linger.ms", "5000");
         connConfig.put("batch.size", "1000");
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX))
-            .andReturn(connConfig);
-        PowerMock.replayAll();
+
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX)).thenReturn(connConfig);
+
         assertEquals(expectedConfigs,
-            Worker.producerConfigs(TASK_ID, "connector-producer-" + TASK_ID, configWithOverrides, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID));
+                Worker.baseProducerConfigs(CONNECTOR_ID, "connector-producer-" + TASK_ID, configWithOverrides, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX);
     }
 
     @Test
     public void testConsumerConfigsWithoutOverrides() {
         Map<String, String> expectedConfigs = new HashMap<>(defaultConsumerConfigs);
-        expectedConfigs.put("group.id", "connect-test");
-        expectedConfigs.put("client.id", "connector-consumer-test-1");
+        expectedConfigs.put("group.id", "connect-test-connector");
+        expectedConfigs.put("client.id", "connector-consumer-job-0");
         expectedConfigs.put("metrics.context.connect.kafka.cluster.id", CLUSTER_ID);
 
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).andReturn(new HashMap<>());
-        PowerMock.replayAll();
-        assertEquals(expectedConfigs, Worker.consumerConfigs(new ConnectorTaskId("test", 1), config, connectorConfig,
-            null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).thenReturn(new HashMap<>());
+
+        assertEquals(expectedConfigs, Worker.baseConsumerConfigs(CONNECTOR_ID, "connector-consumer-" + TASK_ID, config, connectorConfig,
+                null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID, ConnectorType.SINK));
     }
 
     @Test
     public void testConsumerConfigsWithOverrides() {
         Map<String, String> props = new HashMap<>(workerProps);
+        props.put("consumer.group.id", "connect-test");
         props.put("consumer.auto.offset.reset", "latest");
         props.put("consumer.max.poll.records", "1000");
         props.put("consumer.client.id", "consumer-test-id");
@@ -1166,11 +1198,11 @@ public void testConsumerConfigsWithOverrides() {
         expectedConfigs.put("client.id", "consumer-test-id");
         expectedConfigs.put("metrics.context.connect.kafka.cluster.id", CLUSTER_ID);
 
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).andReturn(new HashMap<>());
-        PowerMock.replayAll();
-        assertEquals(expectedConfigs, Worker.consumerConfigs(new ConnectorTaskId("test", 1), configWithOverrides, connectorConfig,
-            null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).thenReturn(new HashMap<>());
 
+        assertEquals(expectedConfigs, Worker.baseConsumerConfigs(CONNECTOR_ID, "connector-consumer-" + TASK_ID, configWithOverrides, connectorConfig,
+                null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID, ConnectorType.SINK));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX);
     }
 
     @Test
@@ -1181,21 +1213,22 @@ public void testConsumerConfigsWithClientOverrides() {
         WorkerConfig configWithOverrides = new StandaloneConfig(props);
 
         Map<String, String> expectedConfigs = new HashMap<>(defaultConsumerConfigs);
-        expectedConfigs.put("group.id", "connect-test");
+        expectedConfigs.put("group.id", "connect-test-connector");
         expectedConfigs.put("auto.offset.reset", "latest");
         expectedConfigs.put("max.poll.records", "5000");
         expectedConfigs.put("max.poll.interval.ms", "1000");
-        expectedConfigs.put("client.id", "connector-consumer-test-1");
+        expectedConfigs.put("client.id", "connector-consumer-job-0");
         expectedConfigs.put("metrics.context.connect.kafka.cluster.id", CLUSTER_ID);
 
         Map<String, Object> connConfig = new HashMap<>();
         connConfig.put("max.poll.records", "5000");
         connConfig.put("max.poll.interval.ms", "1000");
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX))
-            .andReturn(connConfig);
-        PowerMock.replayAll();
-        assertEquals(expectedConfigs, Worker.consumerConfigs(new ConnectorTaskId("test", 1), configWithOverrides, connectorConfig,
-            null, allConnectorClientConfigOverridePolicy, CLUSTER_ID));
+
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).thenReturn(connConfig);
+
+        assertEquals(expectedConfigs, Worker.baseConsumerConfigs(CONNECTOR_ID, "connector-consumer-" + TASK_ID, configWithOverrides, connectorConfig,
+                null, allConnectorClientConfigOverridePolicy, CLUSTER_ID, ConnectorType.SINK));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX);
     }
 
     @Test
@@ -1208,11 +1241,11 @@ public void testConsumerConfigsClientOverridesWithNonePolicy() {
         Map<String, Object> connConfig = new HashMap<>();
         connConfig.put("max.poll.records", "5000");
         connConfig.put("max.poll.interval.ms", "1000");
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX))
-            .andReturn(connConfig);
-        PowerMock.replayAll();
-        assertThrows(ConnectException.class, () -> Worker.consumerConfigs(new ConnectorTaskId("test", 1),
-            configWithOverrides, connectorConfig, null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).thenReturn(connConfig);
+
+        assertThrows(ConnectException.class, () -> Worker.baseConsumerConfigs(CONNECTOR_ID, "connector-consumer-" + TASK_ID,
+                configWithOverrides, connectorConfig, null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID, ConnectorType.SINK));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX);
     }
 
     @Test
@@ -1224,22 +1257,20 @@ public void testAdminConfigsClientOverridesWithAllPolicy() {
         props.put("consumer.bootstrap.servers", "localhost:4761");
         WorkerConfig configWithOverrides = new StandaloneConfig(props);
 
-        Map<String, Object> connConfig = new HashMap<>();
-        connConfig.put("metadata.max.age.ms", "10000");
-
+        Map<String, Object> connConfig = Collections.singletonMap("metadata.max.age.ms", "10000");
         Map<String, String> expectedConfigs = new HashMap<>(workerProps);
-
         expectedConfigs.put("bootstrap.servers", "localhost:9092");
         expectedConfigs.put("client.id", "testid");
         expectedConfigs.put("metadata.max.age.ms", "10000");
+
         //we added a config on the fly
         expectedConfigs.put("metrics.context.connect.kafka.cluster.id", CLUSTER_ID);
 
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX))
-            .andReturn(connConfig);
-        PowerMock.replayAll();
-        assertEquals(expectedConfigs, Worker.adminConfigs(new ConnectorTaskId("test", 1), "", configWithOverrides, connectorConfig,
-                                                             null, allConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX)).thenReturn(connConfig);
+
+        assertEquals(expectedConfigs, Worker.adminConfigs(CONNECTOR_ID, "", configWithOverrides, connectorConfig,
+                null, allConnectorClientConfigOverridePolicy, CLUSTER_ID, ConnectorType.SINK));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX);
     }
 
     @Test
@@ -1248,64 +1279,478 @@ public void testAdminConfigsClientOverridesWithNonePolicy() {
         props.put("admin.client.id", "testid");
         props.put("admin.metadata.max.age.ms", "5000");
         WorkerConfig configWithOverrides = new StandaloneConfig(props);
+        Map<String, Object> connConfig = Collections.singletonMap("metadata.max.age.ms", "10000");
 
-        Map<String, Object> connConfig = new HashMap<>();
-        connConfig.put("metadata.max.age.ms", "10000");
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX)).thenReturn(connConfig);
 
-        EasyMock.expect(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX))
-            .andReturn(connConfig);
-        PowerMock.replayAll();
-        assertThrows(ConnectException.class, () -> Worker.adminConfigs(new ConnectorTaskId("test", 1),
-            "", configWithOverrides, connectorConfig, null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID));
+        assertThrows(ConnectException.class, () -> Worker.adminConfigs("test",
+                "", configWithOverrides, connectorConfig, null, noneConnectorClientConfigOverridePolicy, CLUSTER_ID, ConnectorType.SINK));
+        verify(connectorConfig).originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX);
+    }
 
+    @Test
+    public void testRegularSourceOffsetsConsumerConfigs() {
+        final Map<String, Object> connectorConsumerOverrides = new HashMap<>();
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).thenReturn(connectorConsumerOverrides);
+
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "enabled");
+        workerProps.put("bootstrap.servers", "localhost:4761");
+        workerProps.put("group.id", "connect-cluster");
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", "connect-offsets");
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        Map<String, Object> consumerConfigs = Worker.regularSourceOffsetsConsumerConfigs(
+                "test", "", config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:4761", consumerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        assertEquals("read_committed", consumerConfigs.get(ISOLATION_LEVEL_CONFIG));
+
+        workerProps.put("consumer." + BOOTSTRAP_SERVERS_CONFIG, "localhost:9021");
+        workerProps.put("consumer." + ISOLATION_LEVEL_CONFIG, "read_uncommitted");
+        config = new DistributedConfig(workerProps);
+        consumerConfigs = Worker.regularSourceOffsetsConsumerConfigs(
+                "test", "", config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:9021", consumerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        // User is allowed to override the isolation level for regular (non-exactly-once) source connectors and their tasks
+        assertEquals("read_uncommitted", consumerConfigs.get(ISOLATION_LEVEL_CONFIG));
+
+        workerProps.remove("consumer." + ISOLATION_LEVEL_CONFIG);
+        connectorConsumerOverrides.put(BOOTSTRAP_SERVERS_CONFIG, "localhost:489");
+        connectorConsumerOverrides.put(ISOLATION_LEVEL_CONFIG, "read_uncommitted");
+        config = new DistributedConfig(workerProps);
+        consumerConfigs = Worker.regularSourceOffsetsConsumerConfigs(
+                "test", "", config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:489", consumerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        // User is allowed to override the isolation level for regular (non-exactly-once) source connectors and their tasks
+        assertEquals("read_uncommitted", consumerConfigs.get(ISOLATION_LEVEL_CONFIG));
     }
 
     @Test
-    public void testWorkerMetrics() throws Exception {
-        expectConverters();
-        expectStartStorage();
-        expectFileConfigProvider();
+    public void testExactlyOnceSourceOffsetsConsumerConfigs() {
+        final Map<String, Object> connectorConsumerOverrides = new HashMap<>();
+        when(connectorConfig.originalsWithPrefix(ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX)).thenReturn(connectorConsumerOverrides);
+
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "enabled");
+        workerProps.put("bootstrap.servers", "localhost:4761");
+        workerProps.put("group.id", "connect-cluster");
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", "connect-offsets");
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        Map<String, Object> consumerConfigs = Worker.exactlyOnceSourceOffsetsConsumerConfigs(
+                "test", "", config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:4761", consumerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        assertEquals("read_committed", consumerConfigs.get(ISOLATION_LEVEL_CONFIG));
+
+        workerProps.put("consumer." + BOOTSTRAP_SERVERS_CONFIG, "localhost:9021");
+        workerProps.put("consumer." + ISOLATION_LEVEL_CONFIG, "read_uncommitted");
+        config = new DistributedConfig(workerProps);
+        consumerConfigs = Worker.exactlyOnceSourceOffsetsConsumerConfigs(
+                "test", "", config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:9021", consumerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        // User is not allowed to override isolation level when exactly-once support is enabled
+        assertEquals("read_committed", consumerConfigs.get(ISOLATION_LEVEL_CONFIG));
+
+        workerProps.remove("consumer." + ISOLATION_LEVEL_CONFIG);
+        connectorConsumerOverrides.put(BOOTSTRAP_SERVERS_CONFIG, "localhost:489");
+        connectorConsumerOverrides.put(ISOLATION_LEVEL_CONFIG, "read_uncommitted");
+        config = new DistributedConfig(workerProps);
+        consumerConfigs = Worker.exactlyOnceSourceOffsetsConsumerConfigs(
+                "test", "", config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:489", consumerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        // User is not allowed to override isolation level when exactly-once support is enabled
+        assertEquals("read_committed", consumerConfigs.get(ISOLATION_LEVEL_CONFIG));
+    }
 
-        // Create
-        EasyMock.expect(plugins.currentThreadLoader()).andReturn(delegatingLoader).times(2);
-        EasyMock.expect(plugins.newConnector(WorkerTestConnector.class.getName()))
-                .andReturn(sourceConnector);
-        EasyMock.expect(sourceConnector.version()).andReturn("1.0");
+    @Test
+    public void testExactlyOnceSourceTaskProducerConfigs() {
+        final Map<String, Object> connectorProducerOverrides = new HashMap<>();
+        when(connectorConfig.originalsWithPrefix(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX)).thenReturn(connectorProducerOverrides);
+
+        final String groupId = "connect-cluster";
+        final String transactionalId = Worker.taskTransactionalId(groupId, TASK_ID.connector(), TASK_ID.task());
+
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "enabled");
+        workerProps.put("bootstrap.servers", "localhost:4761");
+        workerProps.put("group.id", groupId);
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", "connect-offsets");
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        Map<String, Object> producerConfigs = Worker.exactlyOnceSourceTaskProducerConfigs(
+                TASK_ID, config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:4761", producerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        assertEquals("true", producerConfigs.get(ENABLE_IDEMPOTENCE_CONFIG));
+        assertEquals(transactionalId, producerConfigs.get(TRANSACTIONAL_ID_CONFIG));
+
+        workerProps.put("producer." + BOOTSTRAP_SERVERS_CONFIG, "localhost:9021");
+        workerProps.put("producer." + ENABLE_IDEMPOTENCE_CONFIG, "false");
+        workerProps.put("producer." + TRANSACTIONAL_ID_CONFIG, "some-other-transactional-id");
+        config = new DistributedConfig(workerProps);
+        producerConfigs = Worker.exactlyOnceSourceTaskProducerConfigs(
+                TASK_ID, config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:9021", producerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        // User is not allowed to override idempotence or transactional ID for exactly-once source tasks
+        assertEquals("true", producerConfigs.get(ENABLE_IDEMPOTENCE_CONFIG));
+        assertEquals(transactionalId, producerConfigs.get(TRANSACTIONAL_ID_CONFIG));
+
+        workerProps.remove("producer." + ENABLE_IDEMPOTENCE_CONFIG);
+        workerProps.remove("producer." + TRANSACTIONAL_ID_CONFIG);
+        connectorProducerOverrides.put(BOOTSTRAP_SERVERS_CONFIG, "localhost:489");
+        connectorProducerOverrides.put(ENABLE_IDEMPOTENCE_CONFIG, "false");
+        connectorProducerOverrides.put(TRANSACTIONAL_ID_CONFIG, "yet-another-transactional-id");
+        config = new DistributedConfig(workerProps);
+        producerConfigs = Worker.exactlyOnceSourceTaskProducerConfigs(
+                TASK_ID, config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        assertEquals("localhost:489", producerConfigs.get(BOOTSTRAP_SERVERS_CONFIG));
+        // User is not allowed to override idempotence or transactional ID for exactly-once source tasks
+        assertEquals("true", producerConfigs.get(ENABLE_IDEMPOTENCE_CONFIG));
+        assertEquals(transactionalId, producerConfigs.get(TRANSACTIONAL_ID_CONFIG));
+
+        // Rare case: somehow, an explicit null has made it into the connector config
+        connectorProducerOverrides.put(TRANSACTIONAL_ID_CONFIG, null);
+        producerConfigs = Worker.exactlyOnceSourceTaskProducerConfigs(
+                TASK_ID, config, connectorConfig, null, allConnectorClientConfigOverridePolicy, CLUSTER_ID);
+        // User is still not allowed to override idempotence or transactional ID for exactly-once source tasks
+        assertEquals("true", producerConfigs.get(ENABLE_IDEMPOTENCE_CONFIG));
+        assertEquals(transactionalId, producerConfigs.get(TRANSACTIONAL_ID_CONFIG));
+    }
 
-        Map<String, String> props = new HashMap<>();
-        props.put(SinkConnectorConfig.TOPICS_CONFIG, "foo,bar");
-        props.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
-        props.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID);
-        props.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, WorkerTestConnector.class.getName());
+    @Test
+    public void testOffsetStoreForRegularSourceConnector() {
+        mockInternalConverters();
+        mockFileConfigProvider();
+
+        final String workerOffsetsTopic = "worker-offsets";
+        final String workerBootstrapServers = "localhost:4761";
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "disabled");
+        workerProps.put("bootstrap.servers", workerBootstrapServers);
+        workerProps.put("group.id", "connect-cluster");
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", workerOffsetsTopic);
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, allConnectorClientConfigOverridePolicy);
+        worker.start();
 
-        EasyMock.expect(sourceConnector.version()).andReturn("1.0");
+        Map<String, String> connectorProps = new HashMap<>();
+        connectorProps.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID);
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, SampleSourceConnector.class.getName());
+        connectorProps.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
+        SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With no connector-specific offsets topic in the config, we should only use the worker-global offsets store
+        ConnectorOffsetBackingStore connectorStore = worker.offsetStoreForRegularSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertFalse(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, "connector-offsets-topic");
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config (whose name differs from the worker's offsets topic), we should use both a
+        // connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForRegularSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, workerOffsetsTopic);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and no overridden bootstrap.servers
+        // for the connector, we should only use a connector-specific offsets store
+        connectorStore = worker.offsetStoreForRegularSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, workerBootstrapServers);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that exactly matches the worker's, we should only use a connector-specific offsets store
+        connectorStore = worker.offsetStoreForRegularSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, "localhost:1111");
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's, we should use both a connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForRegularSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.remove(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With no connector-specific offsets topic in the config, even with an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's, we should still only use the worker-global offsets store
+        connectorStore = worker.offsetStoreForRegularSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertFalse(connectorStore.hasConnectorSpecificStore());
 
-        EasyMock.expect(plugins.compareAndSwapLoaders(sourceConnector))
-                .andReturn(delegatingLoader)
-                .times(2);
-        sourceConnector.initialize(anyObject(ConnectorContext.class));
-        EasyMock.expectLastCall();
-        sourceConnector.start(props);
-        EasyMock.expectLastCall();
+        worker.stop();
+    }
 
-        EasyMock.expect(Plugins.compareAndSwapLoaders(delegatingLoader))
-                .andReturn(pluginLoader).times(2);
+    @Test
+    public void testOffsetStoreForExactlyOnceSourceConnector() {
+        mockInternalConverters();
+        mockFileConfigProvider();
+
+        final String workerOffsetsTopic = "worker-offsets";
+        final String workerBootstrapServers = "localhost:4761";
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "enabled");
+        workerProps.put("bootstrap.servers", workerBootstrapServers);
+        workerProps.put("group.id", "connect-cluster");
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", workerOffsetsTopic);
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, allConnectorClientConfigOverridePolicy);
+        worker.start();
 
-        connectorStatusListener.onStartup(CONNECTOR_ID);
-        EasyMock.expectLastCall();
+        Map<String, String> connectorProps = new HashMap<>();
+        connectorProps.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID);
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, SampleSourceConnector.class.getName());
+        connectorProps.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
+        SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With no connector-specific offsets topic in the config, we should only use a connector-specific offsets store
+        ConnectorOffsetBackingStore connectorStore = worker.offsetStoreForExactlyOnceSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, "connector-offsets-topic");
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config (whose name differs from the worker's offsets topic), we should use both a
+        // connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, workerOffsetsTopic);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and no overridden bootstrap.servers
+        // for the connector, we should only use a connector-specific store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, workerBootstrapServers);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that exactly matches the worker's, we should only use a connector-specific store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(CONNECTOR_CLIENT_PRODUCER_OVERRIDES_PREFIX + BOOTSTRAP_SERVERS_CONFIG, "localhost:1111");
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's, we should use both a connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.remove(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With no connector-specific offsets topic in the config and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's,  we should use both a connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceConnector(sourceConfig, CONNECTOR_ID, sourceConnector);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
 
-        // Remove
-        sourceConnector.stop();
-        EasyMock.expectLastCall();
+        worker.stop();
+    }
 
-        connectorStatusListener.onShutdown(CONNECTOR_ID);
-        EasyMock.expectLastCall();
+    @Test
+    public void testOffsetStoreForRegularSourceTask() {
+        mockInternalConverters();
+        mockFileConfigProvider();
+
+        Map<String, Object> producerProps = new HashMap<>();
+        @SuppressWarnings("unchecked")
+        Producer<byte[], byte[]> producer = mock(Producer.class);
+        TopicAdmin topicAdmin = mock(TopicAdmin.class);
+
+        final String workerOffsetsTopic = "worker-offsets";
+        final String workerBootstrapServers = "localhost:4761";
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "disabled");
+        workerProps.put("bootstrap.servers", workerBootstrapServers);
+        workerProps.put("group.id", "connect-cluster");
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", workerOffsetsTopic);
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, allConnectorClientConfigOverridePolicy);
+        worker.start();
 
-        expectStopStorage();
-        expectClusterId();
+        Map<String, String> connectorProps = new HashMap<>();
+        connectorProps.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID);
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, SampleSourceConnector.class.getName());
+        connectorProps.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
+
+        final SourceConnectorConfig sourceConfigWithoutOffsetsTopic = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        producerProps.put(BOOTSTRAP_SERVERS_CONFIG, workerBootstrapServers);
+        // With no connector-specific offsets topic in the config, we should only use the worker-global store
+        // Pass in a null topic admin to make sure that with these parameters, the method doesn't require a topic admin
+        ConnectorOffsetBackingStore connectorStore = worker.offsetStoreForRegularSourceTask(TASK_ID, sourceConfigWithoutOffsetsTopic, sourceConnector.getClass(), producer, producerProps, null);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertFalse(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, "connector-offsets-topic");
+        final SourceConnectorConfig sourceConfigWithOffsetsTopic = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config (whose name differs from the worker's offsets topic), we should use both a
+        // connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForRegularSourceTask(TASK_ID, sourceConfigWithOffsetsTopic, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+        assertThrows(NullPointerException.class,
+                () -> worker.offsetStoreForRegularSourceTask(
+                        TASK_ID, sourceConfigWithOffsetsTopic, sourceConnector.getClass(), producer, producerProps, null
+                )
+        );
 
-        PowerMock.replayAll();
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, workerOffsetsTopic);
+        final SourceConnectorConfig sourceConfigWithSameOffsetsTopicAsWorker = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and no overridden bootstrap.servers
+        // for the connector, we should only use a connector-specific store
+        connectorStore = worker.offsetStoreForRegularSourceTask(TASK_ID, sourceConfigWithSameOffsetsTopicAsWorker, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+        assertThrows(
+                NullPointerException.class,
+                () -> worker.offsetStoreForRegularSourceTask(
+                        TASK_ID, sourceConfigWithSameOffsetsTopicAsWorker, sourceConnector.getClass(), producer, producerProps, null
+                )
+        );
 
+        producerProps.put(BOOTSTRAP_SERVERS_CONFIG, workerBootstrapServers);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that exactly matches the worker's, we should only use a connector-specific store
+        connectorStore = worker.offsetStoreForRegularSourceTask(TASK_ID, sourceConfigWithSameOffsetsTopicAsWorker, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+        assertThrows(
+                NullPointerException.class,
+                () -> worker.offsetStoreForRegularSourceTask(
+                        TASK_ID, sourceConfigWithSameOffsetsTopicAsWorker, sourceConnector.getClass(), producer, producerProps, null
+                )
+        );
+
+        producerProps.put(BOOTSTRAP_SERVERS_CONFIG, "localhost:1111");
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's, we should use both a connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForRegularSourceTask(TASK_ID, sourceConfigWithSameOffsetsTopicAsWorker, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+        assertThrows(
+                NullPointerException.class,
+                () -> worker.offsetStoreForRegularSourceTask(
+                        TASK_ID, sourceConfigWithSameOffsetsTopicAsWorker, sourceConnector.getClass(), producer, producerProps, null
+                )
+        );
+
+        connectorProps.remove(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG);
+        // With no connector-specific offsets topic in the config and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's, we should still only use the worker-global store
+        // Pass in a null topic admin to make sure that with these parameters, the method doesn't require a topic admin
+        connectorStore = worker.offsetStoreForRegularSourceTask(TASK_ID, sourceConfigWithoutOffsetsTopic, sourceConnector.getClass(), producer, producerProps, null);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertFalse(connectorStore.hasConnectorSpecificStore());
+
+        worker.stop();
+    }
+
+    @Test
+    public void testOffsetStoreForExactlyOnceSourceTask() {
+        mockInternalConverters();
+        mockFileConfigProvider();
+
+        Map<String, Object> producerProps = new HashMap<>();
+        @SuppressWarnings("unchecked")
+        Producer<byte[], byte[]> producer = mock(Producer.class);
+        TopicAdmin topicAdmin = mock(TopicAdmin.class);
+
+        final String workerOffsetsTopic = "worker-offsets";
+        final String workerBootstrapServers = "localhost:4761";
+        Map<String, String> workerProps = new HashMap<>(this.workerProps);
+        workerProps.put("exactly.once.source.support", "enabled");
+        workerProps.put("bootstrap.servers", workerBootstrapServers);
+        workerProps.put("group.id", "connect-cluster");
+        workerProps.put("config.storage.topic", "connect-configs");
+        workerProps.put("offset.storage.topic", workerOffsetsTopic);
+        workerProps.put("status.storage.topic", "connect-statuses");
+        config = new DistributedConfig(workerProps);
+
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, allConnectorClientConfigOverridePolicy);
+        worker.start();
+
+        Map<String, String> connectorProps = new HashMap<>();
+        connectorProps.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID);
+        connectorProps.put(CONNECTOR_CLASS_CONFIG, SampleSourceConnector.class.getName());
+        connectorProps.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
+        SourceConnectorConfig sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        producerProps.put(BOOTSTRAP_SERVERS_CONFIG, workerBootstrapServers);
+        // With no connector-specific offsets topic in the config, we should only use a connector-specific offsets store
+        ConnectorOffsetBackingStore connectorStore = worker.offsetStoreForExactlyOnceSourceTask(TASK_ID, sourceConfig, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, "connector-offsets-topic");
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config (whose name differs from the worker's offsets topic), we should use both a
+        // connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceTask(TASK_ID, sourceConfig, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.put(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG, workerOffsetsTopic);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and no overridden bootstrap.servers
+        // for the connector, we should only use a connector-specific store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceTask(TASK_ID, sourceConfig, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        producerProps.put(BOOTSTRAP_SERVERS_CONFIG, workerBootstrapServers);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that exactly matches the worker's, we should only use a connector-specific store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceTask(TASK_ID, sourceConfig, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertFalse(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        producerProps.put(BOOTSTRAP_SERVERS_CONFIG, "localhost:1111");
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With a connector-specific offsets topic in the config whose name matches the worker's offsets topic, and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's, we should use both a connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceTask(TASK_ID, sourceConfig, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        connectorProps.remove(SourceConnectorConfig.OFFSETS_TOPIC_CONFIG);
+        sourceConfig = new SourceConnectorConfig(plugins, connectorProps, enableTopicCreation);
+        // With no connector-specific offsets topic in the config and an overridden bootstrap.servers
+        // for the connector that doesn't match the worker's,  we should use both a connector-specific store and the worker-global store
+        connectorStore = worker.offsetStoreForExactlyOnceSourceTask(TASK_ID, sourceConfig, sourceConnector.getClass(), producer, producerProps, topicAdmin);
+        assertTrue(connectorStore.hasWorkerGlobalStore());
+        assertTrue(connectorStore.hasConnectorSpecificStore());
+
+        worker.stop();
+    }
+
+    @Test
+    public void testWorkerMetrics() throws Exception {
+        mockInternalConverters();
+        mockFileConfigProvider();
+
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
         Worker worker = new Worker("worker-1",
                 Time.SYSTEM,
                 plugins,
@@ -1316,7 +1761,6 @@ public void testWorkerMetrics() throws Exception {
         MetricName name = worker.metrics().metrics().metricName("test.avg", "grp1");
         worker.metrics().metrics().addMetric(name, new Avg());
         MBeanServer server = ManagementFactory.getPlatformMBeanServer();
-        Set<ObjectInstance> ret = server.queryMBeans(null, null);
 
         List<MetricsReporter> list = worker.metrics().metrics().reporters();
         for (MetricsReporter reporter : list) {
@@ -1330,6 +1774,103 @@ public void testWorkerMetrics() throws Exception {
         assertNotNull(server.getObjectInstance(new ObjectName("kafka.connect:type=grp1")));
     }
 
+    @Test
+    public void testExecutorServiceShutdown() throws InterruptedException {
+        ExecutorService executorService = mock(ExecutorService.class);
+        doNothing().when(executorService).shutdown();
+        when(executorService.awaitTermination(1000L, TimeUnit.MILLISECONDS)).thenReturn(true);
+
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config,
+                            offsetBackingStore, executorService,
+                            noneConnectorClientConfigOverridePolicy);
+        worker.start();
+
+        assertEquals(Collections.emptySet(), worker.connectorNames());
+        worker.stop();
+        verify(executorService, times(1)).shutdown();
+        verify(executorService, times(1)).awaitTermination(1000L, TimeUnit.MILLISECONDS);
+        verifyNoMoreInteractions(executorService);
+
+    }
+
+    @Test
+    public void testExecutorServiceShutdownWhenTerminationFails() throws InterruptedException {
+        ExecutorService executorService = mock(ExecutorService.class);
+        doNothing().when(executorService).shutdown();
+        when(executorService.awaitTermination(1000L, TimeUnit.MILLISECONDS)).thenReturn(false);
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config,
+                            offsetBackingStore, executorService,
+                            noneConnectorClientConfigOverridePolicy);
+        worker.start();
+
+        assertEquals(Collections.emptySet(), worker.connectorNames());
+        worker.stop();
+        verify(executorService, times(1)).shutdown();
+        verify(executorService, times(1)).shutdownNow();
+        verify(executorService, times(2)).awaitTermination(1000L, TimeUnit.MILLISECONDS);
+        verifyNoMoreInteractions(executorService);
+
+    }
+
+    @Test
+    public void testExecutorServiceShutdownWhenTerminationThrowsException() throws InterruptedException {
+        ExecutorService executorService = mock(ExecutorService.class);
+        doNothing().when(executorService).shutdown();
+        when(executorService.awaitTermination(1000L, TimeUnit.MILLISECONDS)).thenThrow(new InterruptedException("interrupt"));
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config,
+                            offsetBackingStore, executorService,
+                            noneConnectorClientConfigOverridePolicy);
+        worker.start();
+
+        assertEquals(Collections.emptySet(), worker.connectorNames());
+        worker.stop();
+        verify(executorService, times(1)).shutdown();
+        verify(executorService, times(1)).shutdownNow();
+        verify(executorService, times(1)).awaitTermination(1000L, TimeUnit.MILLISECONDS);
+        verifyNoMoreInteractions(executorService);
+    }
+
+    @Test
+    @SuppressWarnings("unchecked")
+    public void testZombieFencing() {
+        Admin admin = mock(Admin.class);
+        FenceProducersResult fenceProducersResult = mock(FenceProducersResult.class);
+        KafkaFuture<Void> fenceProducersFuture = mock(KafkaFuture.class);
+        KafkaFuture<Void> expectedZombieFenceFuture = mock(KafkaFuture.class);
+        when(admin.fenceProducers(any(), any())).thenReturn(fenceProducersResult);
+        when(fenceProducersResult.all()).thenReturn(fenceProducersFuture);
+        when(fenceProducersFuture.whenComplete(any())).thenReturn(expectedZombieFenceFuture);
+
+        when(plugins.delegatingLoader()).thenReturn(delegatingLoader);
+        when(delegatingLoader.connectorLoader(anyString())).thenReturn(pluginLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(pluginLoader)).thenReturn(delegatingLoader);
+        pluginsMockedStatic.when(() -> Plugins.compareAndSwapLoaders(delegatingLoader)).thenReturn(pluginLoader);
+
+        worker = new Worker(WORKER_ID, new MockTime(), plugins, config, offsetBackingStore, executorService,
+                allConnectorClientConfigOverridePolicy);
+        worker.herder = herder;
+        worker.start();
+
+        Map<String, String> connectorConfig = anyConnectorConfigMap();
+        connectorConfig.put(CONNECTOR_CLIENT_ADMIN_OVERRIDES_PREFIX + RETRY_BACKOFF_MS_CONFIG, "4761");
+
+        AtomicReference<Map<String, Object>> adminConfig = new AtomicReference<>();
+        Function<Map<String, Object>, Admin> mockAdminConstructor = actualAdminConfig -> {
+            adminConfig.set(actualAdminConfig);
+            return admin;
+        };
+
+        KafkaFuture<Void> actualZombieFenceFuture =
+                worker.fenceZombies(CONNECTOR_ID, 12, connectorConfig, mockAdminConstructor);
+
+        assertEquals(expectedZombieFenceFuture, actualZombieFenceFuture);
+        assertNotNull(adminConfig.get());
+        assertEquals("Admin should be configured with user-specified overrides",
+                "4761",
+                adminConfig.get().get(RETRY_BACKOFF_MS_CONFIG)
+        );
+    }
+
     private void assertStatusMetrics(long expected, String metricName) {
         MetricGroup statusMetrics = worker.connectorStatusMetricsGroup().metricGroup(TASK_ID.connector());
         if (expected == 0L) {
@@ -1375,134 +1916,65 @@ private void assertStartupStatistics(Worker worker, int connectorStartupAttempts
         assertEquals(taskStartupFailurePct, MockConnectMetrics.currentMetricValueAsDouble(worker.metrics(), workerMetrics, "task-startup-failure-percentage"), 0.0001d);
     }
 
-    private void expectStartStorage() {
-        offsetBackingStore.configure(anyObject(WorkerConfig.class));
-        EasyMock.expectLastCall();
-        offsetBackingStore.start();
-        EasyMock.expectLastCall();
-        EasyMock.expect(herder.statusBackingStore())
-                .andReturn(statusBackingStore).anyTimes();
+    private void mockStorage() {
+        when(herder.statusBackingStore()).thenReturn(statusBackingStore);
     }
 
-    private void expectStopStorage() {
-        offsetBackingStore.stop();
-        EasyMock.expectLastCall();
-    }
 
-    private void expectConverters() {
-        expectConverters(JsonConverter.class, false);
+    private void verifyStorage() {
+        verify(offsetBackingStore).configure(any(WorkerConfig.class));
+        verify(offsetBackingStore).start();
+        verify(herder).statusBackingStore();
+        verify(offsetBackingStore).stop();
     }
 
-    private void expectConverters(Boolean expectDefaultConverters) {
-        expectConverters(JsonConverter.class, expectDefaultConverters);
-    }
+    private void mockInternalConverters() {
+        Converter internalKeyConverter = mock(JsonConverter.class);
+        Converter internalValueConverter = mock(JsonConverter.class);
 
-    private void expectConverters(Class<? extends Converter> converterClass, Boolean expectDefaultConverters) {
-        // As default converters are instantiated when a task starts, they are expected only if the `startTask` method is called
-        if (expectDefaultConverters) {
+        when(plugins.newInternalConverter(eq(true), anyString(), anyMap()))
+                       .thenReturn(internalKeyConverter);
 
-            // Instantiate and configure default
-            EasyMock.expect(plugins.newConverter(config, WorkerConfig.KEY_CONVERTER_CLASS_CONFIG, ClassLoaderUsage.PLUGINS))
-                    .andReturn(keyConverter);
-            EasyMock.expect(plugins.newConverter(config, WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG, ClassLoaderUsage.PLUGINS))
-                    .andReturn(valueConverter);
-            EasyMock.expectLastCall();
-        }
+        when(plugins.newInternalConverter(eq(false), anyString(), anyMap()))
+                       .thenReturn(internalValueConverter);
+    }
 
-        //internal
-        Converter internalKeyConverter = PowerMock.createMock(converterClass);
-        Converter internalValueConverter = PowerMock.createMock(converterClass);
+    private void verifyConverters() {
+        verify(plugins, times(1)).newInternalConverter(eq(true), anyString(), anyMap());
+        verify(plugins).newInternalConverter(eq(false), anyString(), anyMap());
+    }
 
-        // Instantiate and configure internal
-        EasyMock.expect(
-                plugins.newInternalConverter(
-                        EasyMock.eq(true),
-                        EasyMock.anyString(),
-                        EasyMock.anyObject()
-                )
-        ).andReturn(internalKeyConverter);
-        EasyMock.expect(
-                plugins.newInternalConverter(
-                        EasyMock.eq(false),
-                        EasyMock.anyString(),
-                        EasyMock.anyObject()
-                )
-        ).andReturn(internalValueConverter);
-        EasyMock.expectLastCall();
+    private void mockTaskConverter(ClassLoaderUsage classLoaderUsage, String converterClassConfig, Converter returning) {
+        when(plugins.newConverter(any(AbstractConfig.class), eq(converterClassConfig), eq(classLoaderUsage)))
+                       .thenReturn(returning);
     }
 
-    private void expectTaskKeyConverters(ClassLoaderUsage classLoaderUsage, Converter returning) {
-        EasyMock.expect(
-                plugins.newConverter(
-                        anyObject(AbstractConfig.class),
-                        eq(WorkerConfig.KEY_CONVERTER_CLASS_CONFIG),
-                        eq(classLoaderUsage)))
-                .andReturn(returning);
+    private void verifyTaskConverter(String converterClassConfig) {
+        verify(plugins).newConverter(any(AbstractConfig.class), eq(converterClassConfig), eq(ClassLoaderUsage.CURRENT_CLASSLOADER));
     }
 
-    private void expectTaskValueConverters(ClassLoaderUsage classLoaderUsage, Converter returning) {
-        EasyMock.expect(
-                plugins.newConverter(
-                        anyObject(AbstractConfig.class),
-                        eq(WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG),
-                        eq(classLoaderUsage)))
-                .andReturn(returning);
+    private void mockTaskHeaderConverter(ClassLoaderUsage classLoaderUsage, HeaderConverter returning) {
+        when(plugins.newHeaderConverter(any(AbstractConfig.class), eq(WorkerConfig.HEADER_CONVERTER_CLASS_CONFIG), eq(classLoaderUsage)))
+               .thenReturn(returning);
     }
 
-    private void expectTaskHeaderConverter(ClassLoaderUsage classLoaderUsage, HeaderConverter returning) {
-        EasyMock.expect(
-                plugins.newHeaderConverter(
-                        anyObject(AbstractConfig.class),
-                        eq(WorkerConfig.HEADER_CONVERTER_CLASS_CONFIG),
-                        eq(classLoaderUsage)))
-                .andReturn(returning);
+    private void verifyTaskHeaderConverter() {
+        verify(plugins).newHeaderConverter(any(AbstractConfig.class), eq(WorkerConfig.HEADER_CONVERTER_CLASS_CONFIG), eq(ClassLoaderUsage.CURRENT_CLASSLOADER));
     }
 
     private Map<String, String> anyConnectorConfigMap() {
         Map<String, String> props = new HashMap<>();
         props.put(ConnectorConfig.NAME_CONFIG, CONNECTOR_ID);
-        props.put(ConnectorConfig.CONNECTOR_CLASS_CONFIG, WorkerTestConnector.class.getName());
+        props.put(CONNECTOR_CLASS_CONFIG, SampleSourceConnector.class.getName());
         props.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
         props.put(DEFAULT_TOPIC_CREATION_PREFIX + REPLICATION_FACTOR_CONFIG, String.valueOf(1));
         props.put(DEFAULT_TOPIC_CREATION_PREFIX + PARTITIONS_CONFIG, String.valueOf(1));
         return props;
     }
 
-    private void expectClusterId() {
-        PowerMock.mockStaticPartial(ConnectUtils.class, "lookupKafkaClusterId");
-        EasyMock.expect(ConnectUtils.lookupKafkaClusterId(EasyMock.anyObject())).andReturn("test-cluster").anyTimes();
-    }
-
-    private void expectNewWorkerTask() throws Exception {
-        PowerMock.expectNew(
-                WorkerSourceTask.class, EasyMock.eq(TASK_ID),
-                EasyMock.eq(task),
-                anyObject(TaskStatus.Listener.class),
-                EasyMock.eq(TargetState.STARTED),
-                anyObject(JsonConverter.class),
-                anyObject(JsonConverter.class),
-                anyObject(JsonConverter.class),
-                EasyMock.eq(new TransformationChain<>(Collections.emptyList(), NOOP_OPERATOR)),
-                anyObject(KafkaProducer.class),
-                anyObject(TopicAdmin.class),
-                EasyMock.<Map<String, TopicCreationGroup>>anyObject(),
-                anyObject(OffsetStorageReader.class),
-                anyObject(OffsetStorageWriter.class),
-                EasyMock.eq(config),
-                anyObject(ClusterConfigState.class),
-                anyObject(ConnectMetrics.class),
-                EasyMock.eq(pluginLoader),
-                anyObject(Time.class),
-                anyObject(RetryWithToleranceOperator.class),
-                anyObject(StatusBackingStore.class),
-                anyObject(Executor.class))
-                .andReturn(workerTask);
-    }
-    /* Name here needs to be unique as we are testing the aliasing mechanism */
-    public static class WorkerTestConnector extends SourceConnector {
-
-        private static final ConfigDef CONFIG_DEF  = new ConfigDef()
-            .define("configName", ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Test configName.");
+    private static class TestSourceTask extends SourceTask {
+        public TestSourceTask() {
+        }
 
         @Override
         public String version() {
@@ -1511,32 +1983,20 @@ public String version() {
 
         @Override
         public void start(Map<String, String> props) {
-
-        }
-
-        @Override
-        public Class<? extends Task> taskClass() {
-            return null;
         }
 
         @Override
-        public List<Map<String, String>> taskConfigs(int maxTasks) {
+        public List<SourceRecord> poll() {
             return null;
         }
 
         @Override
         public void stop() {
-
-        }
-
-        @Override
-        public ConfigDef config() {
-            return CONFIG_DEF;
         }
     }
 
-    private static class TestSourceTask extends SourceTask {
-        public TestSourceTask() {
+    private static class TestSinkTask extends SinkTask {
+        public TestSinkTask() {
         }
 
         @Override
@@ -1549,60 +2009,13 @@ public void start(Map<String, String> props) {
         }
 
         @Override
-        public List<SourceRecord> poll() {
-            return null;
+        public void put(Collection<SinkRecord> records) {
         }
 
         @Override
         public void stop() {
         }
-    }
-
-    public static class TestConverter implements Converter {
-        public Map<String, ?> configs;
-
-        @Override
-        public void configure(Map<String, ?> configs, boolean isKey) {
-            this.configs = configs;
-        }
 
-        @Override
-        public byte[] fromConnectData(String topic, Schema schema, Object value) {
-            return new byte[0];
-        }
-
-        @Override
-        public SchemaAndValue toConnectData(String topic, byte[] value) {
-            return null;
-        }
     }
 
-    public static class TestConfigurableConverter implements Converter, Configurable {
-        public Map<String, ?> configs;
-
-        public ConfigDef config() {
-            return JsonConverterConfig.configDef();
-        }
-
-        @Override
-        public void configure(Map<String, ?> configs) {
-            this.configs = configs;
-            new JsonConverterConfig(configs); // requires the `converter.type` config be set
-        }
-
-        @Override
-        public void configure(Map<String, ?> configs, boolean isKey) {
-            this.configs = configs;
-        }
-
-        @Override
-        public byte[] fromConnectData(String topic, Schema schema, Object value) {
-            return new byte[0];
-        }
-
-        @Override
-        public SchemaAndValue toConnectData(String topic, byte[] value) {
-            return null;
-        }
-    }
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTestUtils.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTestUtils.java
index ed77018f2883b..084d865cc5eca 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTestUtils.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTestUtils.java
@@ -16,7 +16,7 @@
  */
 package org.apache.kafka.connect.runtime;
 
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.distributed.ExtendedAssignment;
 import org.apache.kafka.connect.runtime.distributed.ExtendedWorkerState;
 import org.apache.kafka.connect.util.ConnectorTaskId;
@@ -70,6 +70,9 @@ public static ClusterConfigState clusterConfigState(long offset,
                 connectorConfigs(1, connectorNum),
                 connectorTargetStates(1, connectorNum, TargetState.STARTED),
                 taskConfigs(0, connectorNum, connectorNum * taskNum),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptySet(),
                 Collections.emptySet());
     }
 
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTransactionContextTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTransactionContextTest.java
new file mode 100644
index 0000000000000..3bc2b2155d1f1
--- /dev/null
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/WorkerTransactionContextTest.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.connect.runtime;
+
+import org.apache.kafka.connect.source.SourceRecord;
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class WorkerTransactionContextTest {
+
+    private static final SourceRecord RECORD = new SourceRecord(null, null, "t", null, 0, null, null);
+
+    private WorkerTransactionContext context = new WorkerTransactionContext();
+
+    @Test
+    public void shouldNotifyOfBatchCommit() {
+        context.commitTransaction();
+        assertFalse(context.shouldAbortBatch());
+        assertFalse(context.shouldAbortOn(RECORD));
+        assertFalse(context.shouldCommitOn(RECORD));
+        assertTrue(context.shouldCommitBatch());
+    }
+
+    @Test
+    public void shouldNotifyOfRecordCommit() {
+        context.commitTransaction(RECORD);
+        assertFalse(context.shouldAbortBatch());
+        assertFalse(context.shouldAbortOn(RECORD));
+        assertFalse(context.shouldCommitBatch());
+        assertTrue(context.shouldCommitOn(RECORD));
+    }
+
+    @Test
+    public void shouldNotifyOfBatchAbort() {
+        context.abortTransaction();
+        assertFalse(context.shouldAbortOn(RECORD));
+        assertFalse(context.shouldCommitOn(RECORD));
+        assertFalse(context.shouldCommitBatch());
+        assertTrue(context.shouldAbortBatch());
+    }
+
+    @Test
+    public void shouldNotifyOfRecordAbort() {
+        context.abortTransaction(RECORD);
+        assertFalse(context.shouldAbortBatch());
+        assertFalse(context.shouldCommitOn(RECORD));
+        assertFalse(context.shouldCommitBatch());
+        assertTrue(context.shouldAbortOn(RECORD));
+    }
+
+    @Test
+    public void shouldNotCommitBatchRepeatedly() {
+        context.commitTransaction();
+        assertTrue(context.shouldCommitBatch());
+        assertFalse(context.shouldCommitBatch());
+    }
+
+    @Test
+    public void shouldNotCommitRecordRepeatedly() {
+        context.commitTransaction(RECORD);
+        assertTrue(context.shouldCommitOn(RECORD));
+        assertFalse(context.shouldCommitOn(RECORD));
+    }
+
+    @Test
+    public void shouldNotAbortBatchRepeatedly() {
+        context.abortTransaction();
+        assertTrue(context.shouldAbortBatch());
+        assertFalse(context.shouldAbortBatch());
+    }
+
+    @Test
+    public void shouldNotAbortRecordRepeatedly() {
+        context.abortTransaction(RECORD);
+        assertTrue(context.shouldAbortOn(RECORD));
+        assertFalse(context.shouldAbortOn(RECORD));
+    }
+
+    @Test
+    public void shouldDisallowConflictingRequests() {
+        context.commitTransaction();
+        context.abortTransaction();
+        assertThrows(IllegalStateException.class, context::shouldCommitBatch);
+        assertThrows(IllegalStateException.class, context::shouldAbortBatch);
+
+        context = new WorkerTransactionContext();
+        context.commitTransaction(RECORD);
+        context.abortTransaction(RECORD);
+        assertThrows(IllegalStateException.class, () -> context.shouldCommitOn(RECORD));
+        assertThrows(IllegalStateException.class, () -> context.shouldAbortOn(RECORD));
+    }
+
+}
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/ConnectProtocolCompatibilityTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/ConnectProtocolCompatibilityTest.java
index 883574957dc14..fdb4c542f7d1b 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/ConnectProtocolCompatibilityTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/ConnectProtocolCompatibilityTest.java
@@ -16,32 +16,22 @@
  */
 package org.apache.kafka.connect.runtime.distributed;
 
-import org.apache.kafka.connect.runtime.TargetState;
-import org.apache.kafka.connect.storage.KafkaConfigBackingStore;
 import org.apache.kafka.connect.util.ConnectorTaskId;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Rule;
 import org.junit.Test;
-import org.mockito.Mock;
-import org.mockito.junit.MockitoJUnit;
-import org.mockito.junit.MockitoRule;
 
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 
 import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1;
+import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.when;
 
 public class ConnectProtocolCompatibilityTest {
+    private static final String LEADER = "leader";
     private static final String LEADER_URL = "leaderUrl:8083";
+    private static final long CONFIG_OFFSET = 1;
 
     private String connectorId1 = "connector1";
     private String connectorId2 = "connector2";
@@ -51,95 +41,58 @@ public class ConnectProtocolCompatibilityTest {
     private ConnectorTaskId taskId2x0 = new ConnectorTaskId(connectorId2, 0);
     private ConnectorTaskId taskId3x0 = new ConnectorTaskId(connectorId3, 0);
 
-    @Rule
-    public MockitoRule rule = MockitoJUnit.rule();
-
-    @Mock
-    private KafkaConfigBackingStore configStorage;
-    private ClusterConfigState configState;
-
-    @Before
-    public void setup() {
-        configStorage = mock(KafkaConfigBackingStore.class);
-        configState = new ClusterConfigState(
-                1L,
-                null,
-                Collections.singletonMap(connectorId1, 1),
-                Collections.singletonMap(connectorId1, new HashMap<>()),
-                Collections.singletonMap(connectorId1, TargetState.STARTED),
-                Collections.singletonMap(taskId1x0, new HashMap<>()),
-                Collections.emptySet());
-    }
-
-    @After
-    public void teardown() {
-        verifyNoMoreInteractions(configStorage);
-    }
-
     @Test
     public void testEagerToEagerMetadata() {
-        when(configStorage.snapshot()).thenReturn(configState);
-        ExtendedWorkerState workerState = new ExtendedWorkerState(LEADER_URL, configStorage.snapshot().offset(), null);
+        ConnectProtocol.WorkerState workerState = emptyWorkerState();
         ByteBuffer metadata = ConnectProtocol.serializeMetadata(workerState);
         ConnectProtocol.WorkerState state = ConnectProtocol.deserializeMetadata(metadata);
         assertEquals(LEADER_URL, state.url());
         assertEquals(1, state.offset());
-        verify(configStorage).snapshot();
     }
 
     @Test
     public void testCoopToCoopMetadata() {
-        when(configStorage.snapshot()).thenReturn(configState);
-        ExtendedWorkerState workerState = new ExtendedWorkerState(LEADER_URL, configStorage.snapshot().offset(), null);
+        ExtendedWorkerState workerState = emptyExtendedWorkerState(CONNECT_PROTOCOL_V1);
         ByteBuffer metadata = IncrementalCooperativeConnectProtocol.serializeMetadata(workerState, false);
         ExtendedWorkerState state = IncrementalCooperativeConnectProtocol.deserializeMetadata(metadata);
         assertEquals(LEADER_URL, state.url());
         assertEquals(1, state.offset());
-        verify(configStorage).snapshot();
     }
 
     @Test
     public void testSessionedToCoopMetadata() {
-        when(configStorage.snapshot()).thenReturn(configState);
-        ExtendedWorkerState workerState = new ExtendedWorkerState(LEADER_URL, configStorage.snapshot().offset(), null);
+        ExtendedWorkerState workerState = emptyExtendedWorkerState(CONNECT_PROTOCOL_V2);
         ByteBuffer metadata = IncrementalCooperativeConnectProtocol.serializeMetadata(workerState, true);
         ExtendedWorkerState state = IncrementalCooperativeConnectProtocol.deserializeMetadata(metadata);
         assertEquals(LEADER_URL, state.url());
         assertEquals(1, state.offset());
-        verify(configStorage).snapshot();
     }
 
     @Test
     public void testSessionedToEagerMetadata() {
-        when(configStorage.snapshot()).thenReturn(configState);
-        ExtendedWorkerState workerState = new ExtendedWorkerState(LEADER_URL, configStorage.snapshot().offset(), null);
+        ExtendedWorkerState workerState = emptyExtendedWorkerState(CONNECT_PROTOCOL_V2);
         ByteBuffer metadata = IncrementalCooperativeConnectProtocol.serializeMetadata(workerState, true);
         ConnectProtocol.WorkerState state = ConnectProtocol.deserializeMetadata(metadata);
         assertEquals(LEADER_URL, state.url());
         assertEquals(1, state.offset());
-        verify(configStorage).snapshot();
     }
 
     @Test
     public void testCoopToEagerMetadata() {
-        when(configStorage.snapshot()).thenReturn(configState);
-        ExtendedWorkerState workerState = new ExtendedWorkerState(LEADER_URL, configStorage.snapshot().offset(), null);
+        ExtendedWorkerState workerState = emptyExtendedWorkerState(CONNECT_PROTOCOL_V1);
         ByteBuffer metadata = IncrementalCooperativeConnectProtocol.serializeMetadata(workerState, false);
         ConnectProtocol.WorkerState state = ConnectProtocol.deserializeMetadata(metadata);
         assertEquals(LEADER_URL, state.url());
         assertEquals(1, state.offset());
-        verify(configStorage).snapshot();
     }
 
     @Test
     public void testEagerToCoopMetadata() {
-        when(configStorage.snapshot()).thenReturn(configState);
-        ConnectProtocol.WorkerState workerState = new ConnectProtocol.WorkerState(LEADER_URL, configStorage.snapshot().offset());
+        ConnectProtocol.WorkerState workerState = emptyWorkerState();
         ByteBuffer metadata = ConnectProtocol.serializeMetadata(workerState);
         ConnectProtocol.WorkerState state = IncrementalCooperativeConnectProtocol.deserializeMetadata(metadata);
         assertEquals(LEADER_URL, state.url());
         assertEquals(1, state.offset());
-        verify(configStorage).snapshot();
     }
 
     @Test
@@ -176,7 +129,7 @@ public void testCoopToCoopAssignment() {
                 Arrays.asList(connectorId1, connectorId3), Arrays.asList(taskId2x0),
                 Collections.emptyList(), Collections.emptyList(), 0);
 
-        ByteBuffer leaderBuf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment);
+        ByteBuffer leaderBuf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment, false);
         ConnectProtocol.Assignment leaderAssignment = ConnectProtocol.deserializeAssignment(leaderBuf);
         assertFalse(leaderAssignment.failed());
         assertEquals("leader", leaderAssignment.leader());
@@ -235,7 +188,7 @@ public void testCoopToEagerAssignment() {
                 Arrays.asList(connectorId1, connectorId3), Arrays.asList(taskId2x0),
                 Collections.emptyList(), Collections.emptyList(), 0);
 
-        ByteBuffer leaderBuf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment);
+        ByteBuffer leaderBuf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment, false);
         ConnectProtocol.Assignment leaderAssignment = ConnectProtocol.deserializeAssignment(leaderBuf);
         assertFalse(leaderAssignment.failed());
         assertEquals("leader", leaderAssignment.leader());
@@ -248,7 +201,7 @@ public void testCoopToEagerAssignment() {
                 Arrays.asList(connectorId2), Arrays.asList(taskId1x0, taskId3x0),
                 Collections.emptyList(), Collections.emptyList(), 0);
 
-        ByteBuffer memberBuf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment2);
+        ByteBuffer memberBuf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment2, false);
         ConnectProtocol.Assignment memberAssignment = ConnectProtocol.deserializeAssignment(memberBuf);
         assertFalse(memberAssignment.failed());
         assertEquals("member", memberAssignment.leader());
@@ -257,4 +210,24 @@ public void testCoopToEagerAssignment() {
         assertEquals(Arrays.asList(taskId1x0, taskId3x0), memberAssignment.tasks());
     }
 
+    private ConnectProtocol.WorkerState emptyWorkerState() {
+        return new ConnectProtocol.WorkerState(LEADER_URL, CONFIG_OFFSET);
+    }
+
+    private ExtendedWorkerState emptyExtendedWorkerState(short protocolVersion) {
+        ExtendedAssignment assignment = new ExtendedAssignment(
+                protocolVersion,
+                ConnectProtocol.Assignment.NO_ERROR,
+                LEADER,
+                LEADER_URL,
+                CONFIG_OFFSET,
+                Collections.emptySet(),
+                Collections.emptySet(),
+                Collections.emptySet(),
+                Collections.emptySet(),
+                0
+        );
+        return new ExtendedWorkerState(LEADER_URL, CONFIG_OFFSET, assignment);
+    }
+
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedConfigTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedConfigTest.java
index e95232748b2e1..3996c9714eb93 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedConfigTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedConfigTest.java
@@ -17,20 +17,36 @@
 
 package org.apache.kafka.connect.runtime.distributed;
 
+import org.apache.kafka.clients.CommonClientConfigs;
 import org.apache.kafka.common.config.ConfigException;
 import org.junit.Test;
+import org.mockito.MockedStatic;
 
 import javax.crypto.KeyGenerator;
+import javax.crypto.Mac;
+import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.GROUP_ID_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
 
 public class DistributedConfigTest {
 
@@ -52,13 +68,96 @@ public void shouldCreateKeyGeneratorWithDefaultSettings() {
         assertNotNull(config.getInternalRequestKeyGenerator());
     }
 
+    @Test
+    public void testDefaultAlgorithmsNotPresent() {
+        final String fakeKeyGenerationAlgorithm = "FakeKeyGenerationAlgorithm";
+        final String fakeMacAlgorithm = "FakeMacAlgorithm";
+
+        final KeyGenerator fakeKeyGenerator = mock(KeyGenerator.class);
+        final Mac fakeMac = mock(Mac.class);
+
+        Map<String, String> configs = configs();
+        configs.put(DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG, fakeKeyGenerationAlgorithm);
+        configs.put(DistributedConfig.INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG, fakeMacAlgorithm);
+        configs.put(DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG, fakeMacAlgorithm);
+
+        try (
+                MockedStatic<KeyGenerator> keyGenerator = mockStatic(KeyGenerator.class);
+                MockedStatic<Mac> mac = mockStatic(Mac.class)
+        ) {
+            // Make it seem like the default key generation algorithm isn't available on this worker
+            keyGenerator.when(() -> KeyGenerator.getInstance(DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT))
+                    .thenThrow(new NoSuchAlgorithmException());
+            // But the one specified in the worker config file is
+            keyGenerator.when(() -> KeyGenerator.getInstance(fakeKeyGenerationAlgorithm))
+                    .thenReturn(fakeKeyGenerator);
+
+            // And for the signature algorithm
+            mac.when(() -> Mac.getInstance(DistributedConfig.INTER_WORKER_SIGNATURE_ALGORITHM_DEFAULT))
+                    .thenThrow(new NoSuchAlgorithmException());
+            // Likewise for key verification algorithms
+            DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_DEFAULT.forEach(verificationAlgorithm ->
+                keyGenerator.when(() -> Mac.getInstance(verificationAlgorithm))
+                        .thenThrow(new NoSuchAlgorithmException())
+            );
+            mac.when(() -> Mac.getInstance(fakeMacAlgorithm))
+                    .thenReturn(fakeMac);
+
+            // Should succeed; even though the defaults aren't present, the manually-specified algorithms are valid
+            new DistributedConfig(configs);
+
+            // Should fail; the default key generation algorithm isn't present, and no override is specified
+            String removed = configs.remove(INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG);
+            assertThrows(ConfigException.class, () -> new DistributedConfig(configs));
+            configs.put(INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG, removed);
+
+            // Should fail; the default key generation algorithm isn't present, and no override is specified
+            removed = configs.remove(INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG);
+            assertThrows(ConfigException.class, () -> new DistributedConfig(configs));
+            configs.put(INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG, removed);
+
+            // Should fail; the default key generation algorithm isn't present, and no override is specified
+            removed = configs.remove(INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG);
+            assertThrows(ConfigException.class, () -> new DistributedConfig(configs));
+            configs.put(INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG, removed);
+        }
+    }
+
+    @Test
+    public void testSupportedMacAlgorithms() {
+        // These algorithms are required to be supported on JVMs ranging from at least Java 8 through Java 17; see
+        // https://docs.oracle.com/javase/8/docs/api/javax/crypto/Mac.html
+        // and https://docs.oracle.com/en/java/javase/17/docs/api/java.base/javax/crypto/Mac.html
+        testSupportedAlgorithms(
+                "Mac",
+                "HmacSHA1", "HmacSHA256"
+        );
+    }
+
+    @Test
+    public void testSupportedKeyGeneratorAlgorithms() {
+        // These algorithms are required to be supported on JVMs ranging from at least Java 8 through Java 17; see
+        // https://docs.oracle.com/javase/8/docs/api/javax/crypto/KeyGenerator.html
+        // and https://docs.oracle.com/en/java/javase/17/docs/api/java.base/javax/crypto/KeyGenerator.html
+        testSupportedAlgorithms(
+                "KeyGenerator",
+                "AES", "DESede", "HmacSHA1", "HmacSHA256"
+        );
+    }
+
+    private void testSupportedAlgorithms(String type, String... expectedAlgorithms) {
+        Set<String> supportedAlgorithms = DistributedConfig.supportedAlgorithms(type);
+        Set<String> unuspportedAlgorithms = new HashSet<>(Arrays.asList(expectedAlgorithms));
+        unuspportedAlgorithms.removeAll(supportedAlgorithms);
+        assertEquals(type + " algorithms were found that should be supported by this JVM but are not", Collections.emptySet(), unuspportedAlgorithms);
+    }
+
     @Test
     public void shouldCreateKeyGeneratorWithSpecificSettings() {
         final String algorithm = "HmacSHA1";
         Map<String, String> configs = configs();
         configs.put(DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG, algorithm);
         configs.put(DistributedConfig.INTER_WORKER_KEY_SIZE_CONFIG, "512");
-        configs.put(DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG, algorithm);
         DistributedConfig config = new DistributedConfig(configs);
         KeyGenerator keyGenerator = config.getInternalRequestKeyGenerator();
         assertNotNull(keyGenerator);
@@ -74,13 +173,22 @@ public void shouldFailWithEmptyListOfVerificationAlgorithms() {
     }
 
     @Test
-    public void shouldFailIfKeyAlgorithmNotInVerificationAlgorithmsList() {
+    public void shouldFailIfSignatureAlgorithmNotInVerificationAlgorithmsList() {
         Map<String, String> configs = configs();
-        configs.put(DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG, "HmacSHA1");
+        configs.put(DistributedConfig.INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG, "HmacSHA1");
         configs.put(DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG, "HmacSHA256");
         assertThrows(ConfigException.class, () -> new DistributedConfig(configs));
     }
 
+    @Test
+    public void shouldNotFailIfKeyAlgorithmNotInVerificationAlgorithmsList() {
+        Map<String, String> configs = configs();
+        configs.put(DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_CONFIG, "HmacSHA1");
+        configs.put(DistributedConfig.INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG, "HmacSHA256");
+        configs.put(DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG, "HmacSHA256");
+        new DistributedConfig(configs);
+    }
+
     @Test
     public void shouldFailWithInvalidKeyAlgorithm() {
         Map<String, String> configs = configs();
@@ -294,4 +402,52 @@ public void shouldRemoveCompactionFromStatusTopicSettings() {
         assertEquals(expectedTopicSettings, actual);
         assertNotEquals(topicSettings, actual);
     }
+
+    @Test
+    public void testInvalidSecurityProtocol() {
+        Map<String, String> configs = configs();
+
+        configs.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc");
+        ConfigException ce = assertThrows(ConfigException.class,
+                () -> new DistributedConfig(configs));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
+
+    @Test
+    public void shouldIdentifyNeedForTransactionalLeader() {
+        Map<String, String> workerProps = configs();
+
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "disabled");
+        assertFalse(new DistributedConfig(workerProps).transactionalLeaderEnabled());
+
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "preparing");
+        assertTrue(new DistributedConfig(workerProps).transactionalLeaderEnabled());
+
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        assertTrue(new DistributedConfig(workerProps).transactionalLeaderEnabled());
+    }
+
+    @Test
+    public void shouldConstructExpectedTransactionalId() {
+        Map<String, String> workerProps = configs();
+
+        workerProps.put(GROUP_ID_CONFIG, "why did i stay up all night writing unit tests");
+        assertEquals(
+                "connect-cluster-why did i stay up all night writing unit tests",
+                new DistributedConfig(workerProps).transactionalProducerId()
+        );
+
+        workerProps.put(GROUP_ID_CONFIG, "connect-cluster");
+        assertEquals(
+                "connect-cluster-connect-cluster",
+                new DistributedConfig(workerProps).transactionalProducerId()
+        );
+
+        workerProps.put(GROUP_ID_CONFIG, "\u2603");
+        assertEquals(
+                "connect-cluster-\u2603",
+                new DistributedConfig(workerProps).transactionalProducerId()
+        );
+    }
+
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java
index 6ddf04776bfda..3249412259e36 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/DistributedHerderTest.java
@@ -17,9 +17,11 @@
 package org.apache.kafka.connect.runtime.distributed;
 
 import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.KafkaException;
+import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.config.ConfigValue;
+import org.apache.kafka.common.errors.AuthorizationException;
 import org.apache.kafka.common.utils.MockTime;
-import org.apache.kafka.connect.connector.Connector;
 import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.connector.policy.NoneConnectorClientConfigOverridePolicy;
 import org.apache.kafka.connect.errors.AlreadyExistsException;
@@ -33,6 +35,7 @@
 import org.apache.kafka.connect.runtime.RestartRequest;
 import org.apache.kafka.connect.runtime.SessionKey;
 import org.apache.kafka.connect.runtime.SinkConnectorConfig;
+import org.apache.kafka.connect.runtime.SourceConnectorConfig;
 import org.apache.kafka.connect.runtime.TargetState;
 import org.apache.kafka.connect.runtime.TaskConfig;
 import org.apache.kafka.connect.runtime.TopicStatus;
@@ -44,6 +47,7 @@
 import org.apache.kafka.connect.runtime.isolation.PluginClassLoader;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
 import org.apache.kafka.connect.runtime.rest.InternalRequestSignature;
+import org.apache.kafka.connect.runtime.rest.RestClient;
 import org.apache.kafka.connect.runtime.rest.entities.ConfigInfos;
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo;
 import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo;
@@ -52,14 +56,19 @@
 import org.apache.kafka.connect.runtime.rest.errors.BadRequestException;
 import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
 import org.apache.kafka.connect.sink.SinkConnector;
+import org.apache.kafka.connect.source.ConnectorTransactionBoundaries;
+import org.apache.kafka.connect.source.ExactlyOnceSupport;
 import org.apache.kafka.connect.source.SourceConnector;
 import org.apache.kafka.connect.source.SourceTask;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.ConfigBackingStore;
 import org.apache.kafka.connect.storage.StatusBackingStore;
 import org.apache.kafka.connect.util.Callback;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.apache.kafka.connect.util.FutureCallback;
+import org.apache.kafka.connect.util.ThreadedTest;
 import org.easymock.Capture;
+import org.easymock.CaptureType;
 import org.easymock.EasyMock;
 import org.junit.After;
 import org.junit.Before;
@@ -82,32 +91,41 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import static java.util.Collections.singletonList;
 import static javax.ws.rs.core.Response.Status.FORBIDDEN;
+import static org.apache.kafka.connect.runtime.SourceConnectorConfig.ExactlyOnceSupportLevel.REQUIRED;
 import static org.apache.kafka.connect.runtime.distributed.ConnectProtocol.CONNECT_PROTOCOL_V0;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG;
 import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT;
 import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1;
 import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2;
+import static org.apache.kafka.connect.source.SourceTask.TransactionBoundary.CONNECTOR;
+import static org.easymock.EasyMock.anyLong;
 import static org.easymock.EasyMock.anyObject;
 import static org.easymock.EasyMock.capture;
 import static org.easymock.EasyMock.newCapture;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 @RunWith(PowerMockRunner.class)
-@PrepareForTest({DistributedHerder.class, Plugins.class})
+@PrepareForTest({DistributedHerder.class, Plugins.class, RestClient.class})
 @PowerMockIgnore({"javax.management.*", "javax.crypto.*"})
-public class DistributedHerderTest {
+public class DistributedHerderTest extends ThreadedTest {
     private static final Map<String, String> HERDER_CONFIG = new HashMap<>();
     static {
         HERDER_CONFIG.put(DistributedConfig.STATUS_STORAGE_TOPIC_CONFIG, "status-topic");
@@ -172,13 +190,13 @@ public class DistributedHerderTest {
     }
     private static final ClusterConfigState SNAPSHOT = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
     private static final ClusterConfigState SNAPSHOT_PAUSED_CONN1 = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.PAUSED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
     private static final ClusterConfigState SNAPSHOT_UPDATED_CONN1_CONFIG = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG_UPDATED), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
 
     private static final String WORKER_ID = "localhost:8083";
     private static final String KAFKA_CLUSTER_ID = "I4ZmrWqfT2e-upky_4fdPA";
@@ -201,6 +219,7 @@ public class DistributedHerderTest {
 
     private ConfigBackingStore.UpdateListener configUpdateListener;
     private WorkerRebalanceListener rebalanceListener;
+    private ExecutorService herderExecutor;
 
     private SinkConnectorConfig conn1SinkConfig;
     private SinkConnectorConfig conn1SinkConfigUpdated;
@@ -242,6 +261,10 @@ public void setUp() throws Exception {
     @After
     public void tearDown() {
         if (metrics != null) metrics.stop();
+        if (herderExecutor != null) {
+            herderExecutor.shutdownNow();
+            herderExecutor = null;
+        }
     }
 
     @Test
@@ -251,7 +274,7 @@ public void testJoinAssignment() throws Exception {
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         Capture<Callback<TargetState>> onStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED), capture(onStart));
@@ -264,7 +287,7 @@ public void testJoinAssignment() throws Exception {
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         PowerMock.expectLastCall();
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -286,7 +309,7 @@ public void testRebalance() throws Exception {
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         Capture<Callback<TargetState>> onFirstStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED), capture(onFirstStart));
@@ -298,7 +321,7 @@ public void testRebalance() throws Exception {
         PowerMock.expectLastCall();
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -347,7 +370,7 @@ public void testIncrementalCooperativeRebalanceForNewMember() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("member");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V1);
         expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
@@ -371,7 +394,7 @@ public void testIncrementalCooperativeRebalanceForNewMember() throws Exception {
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
 
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -442,9 +465,9 @@ public void testIncrementalCooperativeRebalanceWithDelay() throws Exception {
                 ConnectProtocol.Assignment.NO_ERROR, 1,
                 Collections.emptyList(), Arrays.asList(TASK2),
                 rebalanceDelay);
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
-        worker.startTask(EasyMock.eq(TASK2), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK2), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -476,7 +499,7 @@ public void testIncrementalCooperativeRebalanceWithDelay() throws Exception {
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
 
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -503,7 +526,7 @@ public void testRebalanceFailedConnector() throws Exception {
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         Capture<Callback<TargetState>> onFirstStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED), capture(onFirstStart));
@@ -515,7 +538,7 @@ public void testRebalanceFailedConnector() throws Exception {
         PowerMock.expectLastCall();
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -574,7 +597,7 @@ public void revokeAndReassign(boolean incompleteRebalance) throws TimeoutExcepti
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         // The lists need to be mutable because assignments might be removed
         expectRebalance(configOffset, new ArrayList<>(singletonList(CONN1)), new ArrayList<>(singletonList(TASK1)));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         Capture<Callback<TargetState>> onFirstStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
             EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED), capture(onFirstStart));
@@ -586,8 +609,8 @@ public void revokeAndReassign(boolean incompleteRebalance) throws TimeoutExcepti
         PowerMock.expectLastCall();
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
-            EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+                EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
@@ -600,7 +623,7 @@ public void revokeAndReassign(boolean incompleteRebalance) throws TimeoutExcepti
             configOffset++;
             expectRebalance(configOffset, Arrays.asList(), Arrays.asList());
             // give it the wrong snapshot, as if we're out of sync/can't reach the broker
-            expectPostRebalanceCatchup(SNAPSHOT);
+            expectConfigRefreshAndSnapshot(SNAPSHOT);
             member.requestRejoin();
             PowerMock.expectLastCall();
             // tick exits early because we failed, and doesn't do the poll at the end of the method
@@ -618,8 +641,8 @@ public void revokeAndReassign(boolean incompleteRebalance) throws TimeoutExcepti
             ClusterConfigState secondSnapshot = new ClusterConfigState(
                 configOffset, null, Collections.singletonMap(CONN1, 3),
                 Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-                TASK_CONFIGS_MAP, Collections.emptySet());
-            expectPostRebalanceCatchup(secondSnapshot);
+                TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
+            expectConfigRefreshAndSnapshot(secondSnapshot);
         }
         member.requestRejoin();
         PowerMock.expectLastCall();
@@ -683,8 +706,8 @@ public void testHaltCleansUpWorker() {
     public void testCreateConnector() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         member.wakeup();
         PowerMock.expectLastCall();
@@ -738,8 +761,8 @@ public void testCreateConnector() throws Exception {
     public void testCreateConnectorFailedValidation() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         HashMap<String, String> config = new HashMap<>(CONN2_CONFIG);
         config.remove(ConnectorConfig.NAME_CONFIG);
@@ -790,22 +813,261 @@ public void testCreateConnectorFailedValidation() throws Exception {
         PowerMock.verifyAll();
     }
 
-    @SuppressWarnings("unchecked")
     @Test
     public void testConnectorNameConflictsWithWorkerGroupId() {
         Map<String, String> config = new HashMap<>(CONN2_CONFIG);
         config.put(ConnectorConfig.NAME_CONFIG, "test-group");
 
-        Connector connectorMock = PowerMock.createMock(SinkConnector.class);
+        SinkConnector connectorMock = PowerMock.createMock(SinkConnector.class);
+
+        PowerMock.replayAll(connectorMock);
 
         // CONN2 creation should fail because the worker group id (connect-test-group) conflicts with
         // the consumer group id we would use for this sink
-        Map<String, ConfigValue> validatedConfigs =
-            herder.validateBasicConnectorConfig(connectorMock, ConnectorConfig.configDef(), config);
+        Map<String, ConfigValue> validatedConfigs = herder.validateSinkConnectorConfig(
+                connectorMock, SinkConnectorConfig.configDef(), config);
 
         ConfigValue nameConfig = validatedConfigs.get(ConnectorConfig.NAME_CONFIG);
-        assertNotNull(nameConfig.errorMessages());
-        assertFalse(nameConfig.errorMessages().isEmpty());
+        assertEquals(
+                Collections.singletonList("Consumer group for sink connector named test-group conflicts with Connect worker group connect-test-group"),
+                nameConfig.errorMessages());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExactlyOnceSourceSupportValidation() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG, REQUIRED.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        EasyMock.expect(connectorMock.exactlyOnceSupport(EasyMock.eq(config)))
+                .andReturn(ExactlyOnceSupport.SUPPORTED);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG).errorMessages();
+        assertEquals(Collections.emptyList(), errors);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExactlyOnceSourceSupportValidationOnUnsupportedConnector() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG, REQUIRED.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        EasyMock.expect(connectorMock.exactlyOnceSupport(EasyMock.eq(config)))
+                .andReturn(ExactlyOnceSupport.UNSUPPORTED);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG).errorMessages();
+        assertEquals(
+                Collections.singletonList("The connector does not support exactly-once delivery guarantees with the provided configuration."),
+                errors);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExactlyOnceSourceSupportValidationOnUnknownConnector() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG, REQUIRED.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        EasyMock.expect(connectorMock.exactlyOnceSupport(EasyMock.eq(config)))
+                .andReturn(null);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG).errorMessages();
+        assertFalse(errors.isEmpty());
+        assertTrue(
+                "Error message did not contain expected text: " + errors.get(0),
+                errors.get(0).contains("The connector does not implement the API required for preflight validation of exactly-once source support."));
+        assertEquals(1, errors.size());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExactlyOnceSourceSupportValidationHandlesConnectorErrorsGracefully() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG, REQUIRED.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        String errorMessage = "time to add a new unit test :)";
+        EasyMock.expect(connectorMock.exactlyOnceSupport(EasyMock.eq(config)))
+                .andThrow(new NullPointerException(errorMessage));
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG).errorMessages();
+        assertFalse(errors.isEmpty());
+        assertTrue(
+                "Error message did not contain expected text: " + errors.get(0),
+                errors.get(0).contains(errorMessage));
+        assertEquals(1, errors.size());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExactlyOnceSourceSupportValidationWhenExactlyOnceNotEnabledOnWorker() {
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG, REQUIRED.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        EasyMock.expect(connectorMock.exactlyOnceSupport(EasyMock.eq(config)))
+                .andReturn(ExactlyOnceSupport.SUPPORTED);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG).errorMessages();
+        assertEquals(
+                Collections.singletonList("This worker does not have exactly-once source support enabled."),
+                errors);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExactlyOnceSourceSupportValidationHandlesInvalidValuesGracefully() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG, "invalid");
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.EXACTLY_ONCE_SUPPORT_CONFIG).errorMessages();
+        assertFalse(errors.isEmpty());
+        assertTrue(
+                "Error message did not contain expected text: " + errors.get(0),
+                errors.get(0).contains("String must be one of (case insensitive): "));
+        assertEquals(1, errors.size());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConnectorTransactionBoundaryValidation() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG, CONNECTOR.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        EasyMock.expect(connectorMock.canDefineTransactionBoundaries(EasyMock.eq(config)))
+                .andReturn(ConnectorTransactionBoundaries.SUPPORTED);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG).errorMessages();
+        assertEquals(Collections.emptyList(), errors);
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConnectorTransactionBoundaryValidationOnUnsupportedConnector() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG, CONNECTOR.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        EasyMock.expect(connectorMock.canDefineTransactionBoundaries(EasyMock.eq(config)))
+                .andReturn(ConnectorTransactionBoundaries.UNSUPPORTED);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG).errorMessages();
+        assertFalse(errors.isEmpty());
+        assertTrue(
+                "Error message did not contain expected text: " + errors.get(0),
+                errors.get(0).contains("The connector does not support connector-defined transaction boundaries with the given configuration."));
+        assertEquals(1, errors.size());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConnectorTransactionBoundaryValidationHandlesConnectorErrorsGracefully() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG, CONNECTOR.toString());
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+        String errorMessage = "Wait I thought we tested for this?";
+        EasyMock.expect(connectorMock.canDefineTransactionBoundaries(EasyMock.eq(config)))
+                .andThrow(new ConnectException(errorMessage));
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG).errorMessages();
+        assertFalse(errors.isEmpty());
+        assertTrue(
+                "Error message did not contain expected text: " + errors.get(0),
+                errors.get(0).contains(errorMessage));
+        assertEquals(1, errors.size());
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConnectorTransactionBoundaryValidationHandlesInvalidValuesGracefully() {
+        herder = exactlyOnceHerder();
+        Map<String, String> config = new HashMap<>();
+        config.put(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG, "CONNECTOR.toString()");
+
+        SourceConnector connectorMock = PowerMock.createMock(SourceConnector.class);
+
+        PowerMock.replayAll(connectorMock);
+
+        Map<String, ConfigValue> validatedConfigs = herder.validateSourceConnectorConfig(
+                connectorMock, SourceConnectorConfig.configDef(), config);
+
+        List<String> errors = validatedConfigs.get(SourceConnectorConfig.TRANSACTION_BOUNDARY_CONFIG).errorMessages();
+        assertFalse(errors.isEmpty());
+        assertTrue(
+                "Error message did not contain expected text: " + errors.get(0),
+                errors.get(0).contains("String must be one of (case insensitive): "));
+        assertEquals(1, errors.size());
+
+        PowerMock.verifyAll();
     }
 
     @Test
@@ -822,8 +1084,8 @@ public void testCreateConnectorAlreadyExists() throws Exception {
             return null;
         });
 
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         member.wakeup();
         PowerMock.expectLastCall();
@@ -861,8 +1123,8 @@ public void testDestroyConnector() throws Exception {
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         // Start with one connector
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
-        expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         Capture<Callback<TargetState>> onStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED), capture(onStart));
@@ -895,9 +1157,9 @@ public void testDestroyConnector() throws Exception {
         statusBackingStore.deleteTopic(EasyMock.eq(CONN1), EasyMock.eq(BAR_TOPIC));
         PowerMock.expectLastCall().times(2);
         expectRebalance(Arrays.asList(CONN1), Arrays.asList(TASK1),
-                ConnectProtocol.Assignment.NO_ERROR, 2,
-                Collections.emptyList(), Collections.emptyList(), 0);
-        expectPostRebalanceCatchup(ClusterConfigState.EMPTY);
+                ConnectProtocol.Assignment.NO_ERROR, 2, "leader", "leaderUrl",
+                Collections.emptyList(), Collections.emptyList(), 0, true);
+        expectConfigRefreshAndSnapshot(ClusterConfigState.EMPTY);
         member.requestRejoin();
         PowerMock.expectLastCall();
         PowerMock.replayAll();
@@ -925,8 +1187,8 @@ public void testRestartConnector() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
-        expectRebalance(1, singletonList(CONN1), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, singletonList(CONN1), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
         Capture<Callback<TargetState>> onFirstStart = newCapture();
@@ -978,8 +1240,8 @@ public void testRestartUnknownConnector() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1013,7 +1275,7 @@ public void testRestartConnectorRedirectToLeader() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("member");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1047,8 +1309,8 @@ public void testRestartConnectorRedirectToOwner() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1096,8 +1358,8 @@ public void testRestartConnectorAndTasksUnknownConnector() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1130,7 +1392,7 @@ public void testRestartConnectorAndTasksNotLeader() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("member");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1165,8 +1427,8 @@ public void testRestartConnectorAndTasksUnknownStatus() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1205,8 +1467,8 @@ public void testRestartConnectorAndTasksSuccess() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1289,27 +1551,30 @@ public void testDoRestartConnectorAndTasksOnlyConnector() {
 
     @Test
     public void testDoRestartConnectorAndTasksOnlyTasks() {
-        ConnectorTaskId taskId = new ConnectorTaskId(CONN1, 0);
         RestartRequest restartRequest = new RestartRequest(CONN1, false, true);
         RestartPlan restartPlan = PowerMock.createMock(RestartPlan.class);
         EasyMock.expect(restartPlan.shouldRestartConnector()).andReturn(true).anyTimes();
         EasyMock.expect(restartPlan.shouldRestartTasks()).andReturn(true).anyTimes();
-        EasyMock.expect(restartPlan.taskIdsToRestart()).andReturn(Collections.singletonList(taskId)).anyTimes();
-        EasyMock.expect(restartPlan.restartTaskCount()).andReturn(1).anyTimes();
-        EasyMock.expect(restartPlan.totalTaskCount()).andReturn(1).anyTimes();
+        // The connector has three tasks
+        EasyMock.expect(restartPlan.taskIdsToRestart()).andReturn(Arrays.asList(TASK0, TASK1, TASK2)).anyTimes();
+        EasyMock.expect(restartPlan.restartTaskCount()).andReturn(3).anyTimes();
+        EasyMock.expect(restartPlan.totalTaskCount()).andReturn(3).anyTimes();
         EasyMock.expect(herder.buildRestartPlan(restartRequest)).andReturn(Optional.of(restartPlan)).anyTimes();
 
         herder.assignment = PowerMock.createMock(ExtendedAssignment.class);
         EasyMock.expect(herder.assignment.connectors()).andReturn(Collections.emptyList()).anyTimes();
-        EasyMock.expect(herder.assignment.tasks()).andReturn(Collections.singletonList(taskId)).anyTimes();
+        // But only one task is assigned to this worker
+        EasyMock.expect(herder.assignment.tasks()).andReturn(Collections.singletonList(TASK0)).anyTimes();
 
-        worker.stopAndAwaitTasks(Collections.singletonList(taskId));
+        herder.configState = SNAPSHOT;
+
+        worker.stopAndAwaitTasks(Collections.singletonList(TASK0));
         PowerMock.expectLastCall();
 
-        herder.onRestart(taskId);
+        herder.onRestart(TASK0);
         EasyMock.expectLastCall();
 
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.anyObject(TargetState.class));
         PowerMock.expectLastCall().andReturn(true);
 
@@ -1334,6 +1599,8 @@ public void testDoRestartConnectorAndTasksBoth() {
         EasyMock.expect(herder.assignment.connectors()).andReturn(Collections.singletonList(CONN1)).anyTimes();
         EasyMock.expect(herder.assignment.tasks()).andReturn(Collections.singletonList(taskId)).anyTimes();
 
+        herder.configState = SNAPSHOT;
+
         worker.stopAndAwaitConnector(CONN1);
         PowerMock.expectLastCall();
 
@@ -1351,7 +1618,7 @@ public void testDoRestartConnectorAndTasksBoth() {
         herder.onRestart(taskId);
         EasyMock.expectLastCall();
 
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.anyObject(TargetState.class));
         PowerMock.expectLastCall().andReturn(true);
 
@@ -1367,11 +1634,11 @@ public void testRestartTask() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), singletonList(TASK0));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), singletonList(TASK0), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
 
@@ -1385,7 +1652,7 @@ public void testRestartTask() throws Exception {
 
         worker.stopAndAwaitTask(TASK0);
         PowerMock.expectLastCall();
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
 
@@ -1406,7 +1673,7 @@ public void testRestartUnknownTask() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("member");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1453,7 +1720,7 @@ public void testRestartTaskRedirectToLeader() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("member");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1487,8 +1754,8 @@ public void testRestartTaskRedirectToOwner() throws Exception {
         // get the initial assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -1580,7 +1847,7 @@ public void testConnectorConfigUpdate() throws Exception {
 
         // join
         expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         Capture<Callback<TargetState>> onFirstStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
@@ -1647,7 +1914,7 @@ public void testConnectorPaused() throws Exception {
 
         // join
         expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         Capture<Callback<TargetState>> onStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
@@ -1707,7 +1974,7 @@ public void testConnectorResumed() throws Exception {
 
         // start with the connector paused
         expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT_PAUSED_CONN1);
+        expectConfigRefreshAndSnapshot(SNAPSHOT_PAUSED_CONN1);
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         Capture<Callback<TargetState>> onStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
@@ -1770,8 +2037,8 @@ public void testUnknownConnectorPaused() throws Exception {
 
         // join
         expectRebalance(1, Collections.emptyList(), singletonList(TASK0));
-        expectPostRebalanceCatchup(SNAPSHOT);
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -1809,8 +2076,8 @@ public void testConnectorPausedRunningTaskOnly() throws Exception {
 
         // join
         expectRebalance(1, Collections.emptyList(), singletonList(TASK0));
-        expectPostRebalanceCatchup(SNAPSHOT);
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -1857,8 +2124,8 @@ public void testConnectorResumedRunningTaskOnly() throws Exception {
 
         // join
         expectRebalance(1, Collections.emptyList(), singletonList(TASK0));
-        expectPostRebalanceCatchup(SNAPSHOT_PAUSED_CONN1);
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        expectConfigRefreshAndSnapshot(SNAPSHOT_PAUSED_CONN1);
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.PAUSED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -1927,7 +2194,7 @@ public void testTaskConfigAdded() {
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
                 ConnectProtocol.Assignment.NO_ERROR, 1, Collections.emptyList(),
                 Arrays.asList(TASK0));
-        worker.startTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK0), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -1949,18 +2216,18 @@ public void testJoinLeaderCatchUpFails() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
-                ConnectProtocol.Assignment.CONFIG_MISMATCH, 1, Collections.emptyList(),
-                Collections.emptyList());
+                ConnectProtocol.Assignment.CONFIG_MISMATCH, 1, "leader", "leaderUrl", Collections.emptyList(),
+                Collections.emptyList(), 0, true);
         // Reading to end of log times out
-        configBackingStore.refresh(EasyMock.anyLong(), EasyMock.anyObject(TimeUnit.class));
+        configBackingStore.refresh(anyLong(), EasyMock.anyObject(TimeUnit.class));
         EasyMock.expectLastCall().andThrow(new TimeoutException());
         member.maybeLeaveGroup(EasyMock.eq("taking too long to read the log"));
         EasyMock.expectLastCall();
         member.requestRejoin();
 
         // After backoff, restart the process and this time succeed
-        expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         Capture<Callback<TargetState>> onStart = newCapture();
@@ -1974,7 +2241,7 @@ public void testJoinLeaderCatchUpFails() throws Exception {
         PowerMock.expectLastCall();
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
@@ -1982,7 +2249,7 @@ public void testJoinLeaderCatchUpFails() throws Exception {
         PowerMock.expectLastCall();
 
         // one more tick, to make sure we don't keep trying to read to the config topic unnecessarily
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -2017,8 +2284,8 @@ public void testJoinLeaderCatchUpRetriesForIncrementalCooperative() throws Excep
         // Join group and as leader fail to do assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V1);
-        expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
@@ -2026,7 +2293,7 @@ public void testJoinLeaderCatchUpRetriesForIncrementalCooperative() throws Excep
         // The leader got its assignment
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
                 ConnectProtocol.Assignment.NO_ERROR,
-                1, Arrays.asList(CONN1), Arrays.asList(TASK1), 0);
+                1, "leader", "leaderUrl", Arrays.asList(CONN1), Arrays.asList(TASK1), 0, true);
 
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         Capture<Callback<TargetState>> onStart = newCapture();
@@ -2041,7 +2308,7 @@ public void testJoinLeaderCatchUpRetriesForIncrementalCooperative() throws Excep
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
 
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -2050,15 +2317,15 @@ public void testJoinLeaderCatchUpRetriesForIncrementalCooperative() throws Excep
         // Another rebalance is triggered but this time it fails to read to the max offset and
         // triggers a re-sync
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
-                ConnectProtocol.Assignment.CONFIG_MISMATCH, 1, Collections.emptyList(),
-                Collections.emptyList());
+                ConnectProtocol.Assignment.CONFIG_MISMATCH, 1, "leader", "leaderUrl",
+                Collections.emptyList(), Collections.emptyList(), 0, true);
 
         // The leader will retry a few times to read to the end of the config log
         int retries = 2;
         member.requestRejoin();
         for (int i = retries; i >= 0; --i) {
             // Reading to end of log times out
-            configBackingStore.refresh(EasyMock.anyLong(), EasyMock.anyObject(TimeUnit.class));
+            configBackingStore.refresh(anyLong(), EasyMock.anyObject(TimeUnit.class));
             EasyMock.expectLastCall().andThrow(new TimeoutException());
             member.maybeLeaveGroup(EasyMock.eq("taking too long to read the log"));
             EasyMock.expectLastCall();
@@ -2067,8 +2334,8 @@ public void testJoinLeaderCatchUpRetriesForIncrementalCooperative() throws Excep
         // After a few retries succeed to read the log to the end
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
                 ConnectProtocol.Assignment.NO_ERROR,
-                1, Arrays.asList(CONN1), Arrays.asList(TASK1), 0);
-        expectPostRebalanceCatchup(SNAPSHOT);
+                1, "leader", "leaderUrl", Arrays.asList(CONN1), Arrays.asList(TASK1), 0, true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -2108,16 +2375,16 @@ public void testJoinLeaderCatchUpFailsForIncrementalCooperative() throws Excepti
         // Join group and as leader fail to do assignment
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V1);
-        expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1));
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Arrays.asList(CONN1), Arrays.asList(TASK1), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
 
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
         // The leader got its assignment
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
-                ConnectProtocol.Assignment.NO_ERROR,
-                1, Arrays.asList(CONN1), Arrays.asList(TASK1), 0);
+                ConnectProtocol.Assignment.NO_ERROR, 1,
+                "leader", "leaderUrl", Arrays.asList(CONN1), Arrays.asList(TASK1), 0, true);
 
         EasyMock.expect(worker.getPlugins()).andReturn(plugins);
         // and the new assignment started
@@ -2133,7 +2400,7 @@ public void testJoinLeaderCatchUpFailsForIncrementalCooperative() throws Excepti
         EasyMock.expect(worker.isRunning(CONN1)).andReturn(true);
         EasyMock.expect(worker.connectorTaskConfigs(CONN1, conn1SinkConfig)).andReturn(TASK_CONFIGS);
 
-        worker.startTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
+        worker.startSourceTask(EasyMock.eq(TASK1), EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject(),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED));
         PowerMock.expectLastCall().andReturn(true);
         member.poll(EasyMock.anyInt());
@@ -2142,15 +2409,15 @@ public void testJoinLeaderCatchUpFailsForIncrementalCooperative() throws Excepti
         // Another rebalance is triggered but this time it fails to read to the max offset and
         // triggers a re-sync
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
-                ConnectProtocol.Assignment.CONFIG_MISMATCH, 1, Collections.emptyList(),
-                Collections.emptyList());
+                ConnectProtocol.Assignment.CONFIG_MISMATCH, 1, "leader", "leaderUrl",
+                Collections.emptyList(), Collections.emptyList(), 0, true);
 
         // The leader will exhaust the retries while trying to read to the end of the config log
         int maxRetries = 5;
         member.requestRejoin();
         for (int i = maxRetries; i >= 0; --i) {
             // Reading to end of log times out
-            configBackingStore.refresh(EasyMock.anyLong(), EasyMock.anyObject(TimeUnit.class));
+            configBackingStore.refresh(anyLong(), EasyMock.anyObject(TimeUnit.class));
             EasyMock.expectLastCall().andThrow(new TimeoutException());
             member.maybeLeaveGroup(EasyMock.eq("taking too long to read the log"));
             EasyMock.expectLastCall();
@@ -2164,8 +2431,9 @@ public void testJoinLeaderCatchUpFailsForIncrementalCooperative() throws Excepti
         // The worker gets back the assignment that had given up
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
                 ConnectProtocol.Assignment.NO_ERROR,
-                1, Arrays.asList(CONN1), Arrays.asList(TASK1), 0);
-        expectPostRebalanceCatchup(SNAPSHOT);
+                1, "leader", "leaderUrl", Arrays.asList(CONN1), Arrays.asList(TASK1),
+                0, true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.poll(EasyMock.anyInt());
         PowerMock.expectLastCall();
 
@@ -2204,7 +2472,7 @@ public void testAccessors() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         EasyMock.expect(worker.getPlugins()).andReturn(plugins).anyTimes();
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
         EasyMock.expect(configBackingStore.snapshot()).andReturn(SNAPSHOT).times(2);
 
         WorkerConfigTransformer configTransformer = EasyMock.mock(WorkerConfigTransformer.class);
@@ -2213,9 +2481,9 @@ public void testAccessors() throws Exception {
         EasyMock.replay(configTransformer);
         ClusterConfigState snapshotWithTransform = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet(), configTransformer);
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet(), configTransformer);
 
-        expectPostRebalanceCatchup(snapshotWithTransform);
+        expectConfigRefreshAndSnapshot(snapshotWithTransform);
 
 
         member.wakeup();
@@ -2259,8 +2527,8 @@ public void testAccessors() throws Exception {
     @Test
     public void testPutConnectorConfig() throws Exception {
         EasyMock.expect(member.memberId()).andStubReturn("leader");
-        expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Arrays.asList(CONN1), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V0);
         Capture<Callback<TargetState>> onFirstStart = newCapture();
         worker.startConnector(EasyMock.eq(CONN1), EasyMock.anyObject(), EasyMock.anyObject(),
@@ -2356,7 +2624,7 @@ public void testKeyRotationWhenWorkerBecomesLeader() throws Exception {
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
 
         expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         // First rebalance: poll indefinitely as no key has been read yet, so expiration doesn't come into play
         member.poll(Long.MAX_VALUE);
         EasyMock.expectLastCall();
@@ -2365,13 +2633,13 @@ public void testKeyRotationWhenWorkerBecomesLeader() throws Exception {
         SessionKey initialKey = new SessionKey(EasyMock.mock(SecretKey.class), 0);
         ClusterConfigState snapshotWithKey =  new ClusterConfigState(2, initialKey, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
-        expectPostRebalanceCatchup(snapshotWithKey);
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
+        expectConfigRefreshAndSnapshot(snapshotWithKey);
         // Second rebalance: poll indefinitely as worker is follower, so expiration still doesn't come into play
         member.poll(Long.MAX_VALUE);
         EasyMock.expectLastCall();
 
-        expectRebalance(2, Collections.emptyList(), Collections.emptyList(), "member", MEMBER_URL);
+        expectRebalance(2, Collections.emptyList(), Collections.emptyList(), "member", MEMBER_URL, true);
         Capture<SessionKey> updatedKey = EasyMock.newCapture();
         configBackingStore.putSessionKey(EasyMock.capture(updatedKey));
         EasyMock.expectLastCall().andAnswer(() -> {
@@ -2400,15 +2668,15 @@ public void testKeyRotationDisabledWhenWorkerBecomesFollower() throws Exception
         EasyMock.expect(member.memberId()).andStubReturn("member");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
 
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), "member", MEMBER_URL);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), "member", MEMBER_URL, true);
         SecretKey initialSecretKey = EasyMock.mock(SecretKey.class);
         EasyMock.expect(initialSecretKey.getAlgorithm()).andReturn(DistributedConfig.INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT).anyTimes();
         EasyMock.expect(initialSecretKey.getEncoded()).andReturn(new byte[32]).anyTimes();
         SessionKey initialKey = new SessionKey(initialSecretKey, time.milliseconds());
         ClusterConfigState snapshotWithKey =  new ClusterConfigState(1, initialKey, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
-        expectPostRebalanceCatchup(snapshotWithKey);
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
+        expectConfigRefreshAndSnapshot(snapshotWithKey);
         // First rebalance: poll for a limited time as worker is leader and must wake up for key expiration
         Capture<Long> firstPollTimeout = EasyMock.newCapture();
         member.poll(EasyMock.captureLong(firstPollTimeout));
@@ -2539,8 +2807,8 @@ public void testFailedToWriteSessionKey() throws Exception {
         // session key to the config topic, and fail
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         configBackingStore.putSessionKey(anyObject(SessionKey.class));
         EasyMock.expectLastCall().andThrow(new ConnectException("Oh no!"));
 
@@ -2548,7 +2816,7 @@ public void testFailedToWriteSessionKey() throws Exception {
         // then ensure we're still active in the group
         // then try a second time to write a new session key,
         // then finally begin polling for group activity
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         member.ensureActive();
         PowerMock.expectLastCall();
         configBackingStore.putSessionKey(anyObject(SessionKey.class));
@@ -2572,7 +2840,7 @@ public void testFailedToReadBackNewlyWrittenSessionKey() throws Exception {
         SessionKey sessionKey = new SessionKey(secretKey, time.milliseconds());
         ClusterConfigState snapshotWithSessionKey = new ClusterConfigState(1, sessionKey, Collections.singletonMap(CONN1, 3),
             Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
-            TASK_CONFIGS_MAP, Collections.emptySet());
+            TASK_CONFIGS_MAP, Collections.emptyMap(), Collections.emptyMap(), Collections.emptySet(), Collections.emptySet());
 
         // First tick -- after joining the group, we try to write a new session key to
         // the config topic, and fail (in this case, we're trying to simulate that we've
@@ -2581,8 +2849,8 @@ public void testFailedToReadBackNewlyWrittenSessionKey() throws Exception {
         // to write the key)
         EasyMock.expect(member.memberId()).andStubReturn("leader");
         EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
-        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
-        expectPostRebalanceCatchup(SNAPSHOT);
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
         configBackingStore.putSessionKey(anyObject(SessionKey.class));
         EasyMock.expectLastCall().andThrow(new ConnectException("Oh no!"));
 
@@ -2591,7 +2859,7 @@ public void testFailedToReadBackNewlyWrittenSessionKey() throws Exception {
         // then ensure we're still active in the group
         // then finally begin polling for group activity
         // Importantly, we do not try to write a new session key this time around
-        configBackingStore.refresh(EasyMock.anyLong(), EasyMock.anyObject(TimeUnit.class));
+        configBackingStore.refresh(anyLong(), EasyMock.anyObject(TimeUnit.class));
         EasyMock.expectLastCall().andAnswer(() -> {
             configUpdateListener.onSessionKeyUpdate(sessionKey);
             return null;
@@ -2610,6 +2878,562 @@ public void testFailedToReadBackNewlyWrittenSessionKey() throws Exception {
         PowerMock.verifyAll();
     }
 
+    @Test
+    public void testFenceZombiesInvalidSignature() {
+        // Don't have to run the whole gamut of scenarios (invalid signature, missing signature, earlier protocol that doesn't require signatures)
+        // since the task config tests cover that pretty well. One sanity check to ensure that this method is guarded should be sufficient.
+        Callback<Void> taskConfigCb = EasyMock.mock(Callback.class);
+        Capture<Throwable> errorCapture = Capture.newInstance();
+        taskConfigCb.onCompletion(capture(errorCapture), EasyMock.eq(null));
+        EasyMock.expectLastCall().once();
+
+        EasyMock.expect(member.currentProtocolVersion()).andReturn(CONNECT_PROTOCOL_V2).anyTimes();
+
+        InternalRequestSignature signature = EasyMock.mock(InternalRequestSignature.class);
+        EasyMock.expect(signature.keyAlgorithm()).andReturn("HmacSHA256").anyTimes();
+        EasyMock.expect(signature.isValid(EasyMock.anyObject())).andReturn(false).anyTimes();
+
+        PowerMock.replayAll(taskConfigCb, signature);
+
+        herder.fenceZombieSourceTasks(CONN1, taskConfigCb, signature);
+
+        PowerMock.verifyAll();
+        assertTrue(errorCapture.getValue() instanceof ConnectRestException);
+        assertEquals(FORBIDDEN.getStatusCode(), ((ConnectRestException) errorCapture.getValue()).statusCode());
+    }
+
+    @Test
+    public void testTaskRequestedZombieFencingForwardedToLeader() throws Exception {
+        testTaskRequestedZombieFencingForwardingToLeader(true);
+    }
+
+    @Test
+    public void testTaskRequestedZombieFencingFailedForwardToLeader() throws Exception {
+        testTaskRequestedZombieFencingForwardingToLeader(false);
+    }
+
+    private void testTaskRequestedZombieFencingForwardingToLeader(boolean succeed) throws Exception {
+        expectHerderStartup();
+        ExecutorService forwardRequestExecutor = EasyMock.mock(ExecutorService.class);
+        herder.forwardRequestExecutor = forwardRequestExecutor;
+
+        EasyMock.expect(member.memberId()).andStubReturn("member");
+        EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList());
+
+        expectAnyTicks();
+
+        member.wakeup();
+        EasyMock.expectLastCall();
+
+        PowerMock.mockStatic(RestClient.class);
+
+        org.easymock.IExpectationSetters<RestClient.HttpResponse<Object>> expectRequest = EasyMock.expect(
+                RestClient.httpRequest(
+                        anyObject(), EasyMock.eq("PUT"), EasyMock.isNull(), EasyMock.isNull(), EasyMock.isNull(), anyObject(), anyObject(), anyObject()
+                ));
+        if (succeed) {
+            expectRequest.andReturn(null);
+        } else {
+            expectRequest.andThrow(new ConnectRestException(409, "Rebalance :("));
+        }
+
+        Capture<Runnable> forwardRequest = EasyMock.newCapture();
+        forwardRequestExecutor.execute(EasyMock.capture(forwardRequest));
+        EasyMock.expectLastCall().andAnswer(() -> {
+            forwardRequest.getValue().run();
+            return null;
+        });
+
+        expectHerderShutdown(true);
+        forwardRequestExecutor.shutdown();
+        EasyMock.expectLastCall();
+        EasyMock.expect(forwardRequestExecutor.awaitTermination(anyLong(), anyObject())).andReturn(true);
+
+        PowerMock.replayAll(forwardRequestExecutor);
+
+
+        startBackgroundHerder();
+
+        FutureCallback<Void> fencing = new FutureCallback<>();
+        herder.fenceZombieSourceTasks(TASK1, fencing);
+
+        if (!succeed) {
+            ExecutionException fencingException =
+                    assertThrows(ExecutionException.class, () -> fencing.get(10, TimeUnit.SECONDS));
+            assertTrue(fencingException.getCause() instanceof ConnectException);
+        } else {
+            fencing.get(10, TimeUnit.SECONDS);
+        }
+
+        stopBackgroundHerder();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testExternalZombieFencingRequestForAlreadyFencedConnector() throws Exception {
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                expectNewSessionKey(),
+                TASK_CONFIGS_MAP,
+                Collections.singletonMap(CONN1, 12),
+                Collections.singletonMap(CONN1, 5),
+                Collections.emptySet()
+        );
+        testExternalZombieFencingRequestThatRequiresNoPhysicalFencing(configState, false);
+    }
+
+    @Test
+    public void testExternalZombieFencingRequestForSingleTaskConnector() throws Exception {
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                expectNewSessionKey(),
+                Collections.singletonMap(TASK1, TASK_CONFIG),
+                Collections.singletonMap(CONN1, 1),
+                Collections.singletonMap(CONN1, 5),
+                Collections.singleton(CONN1)
+        );
+        testExternalZombieFencingRequestThatRequiresNoPhysicalFencing(configState, true);
+    }
+
+    @Test
+    public void testExternalZombieFencingRequestForFreshConnector() throws Exception {
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                expectNewSessionKey(),
+                TASK_CONFIGS_MAP,
+                Collections.emptyMap(),
+                Collections.singletonMap(CONN1, 5),
+                Collections.singleton(CONN1)
+        );
+        testExternalZombieFencingRequestThatRequiresNoPhysicalFencing(configState, true);
+    }
+
+    private void testExternalZombieFencingRequestThatRequiresNoPhysicalFencing(
+            ClusterConfigState configState, boolean expectTaskCountRecord
+    ) throws Exception {
+        expectHerderStartup();
+
+        EasyMock.expect(member.memberId()).andStubReturn("leader");
+        EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+
+        expectAnyTicks();
+
+        member.wakeup();
+        EasyMock.expectLastCall().anyTimes();
+
+        expectConfigRefreshAndSnapshot(configState);
+
+        if (expectTaskCountRecord) {
+            configBackingStore.putTaskCountRecord(CONN1, 1);
+            EasyMock.expectLastCall();
+        }
+
+        expectHerderShutdown(false);
+
+        PowerMock.replayAll();
+
+
+        startBackgroundHerder();
+
+        FutureCallback<Void> fencing = new FutureCallback<>();
+        herder.fenceZombieSourceTasks(CONN1, fencing);
+
+        fencing.get(10, TimeUnit.SECONDS);
+
+        stopBackgroundHerder();
+
+        PowerMock.verifyAll();
+    }
+
+    /**
+     * Tests zombie fencing that completes extremely quickly, and causes all callback-related logic to be invoked
+     * effectively as soon as it's put into place. This is not likely to occur in practice, but the test is valuable all the
+     * same especially since it may shed light on potential deadlocks when the unlikely-but-not-impossible happens.
+     */
+    @Test
+    public void testExternalZombieFencingRequestImmediateCompletion() throws Exception {
+        expectHerderStartup();
+        EasyMock.expect(member.memberId()).andStubReturn("leader");
+        EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        SessionKey sessionKey = expectNewSessionKey();
+
+        expectAnyTicks();
+
+        member.wakeup();
+        EasyMock.expectLastCall();
+
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                sessionKey,
+                TASK_CONFIGS_MAP,
+                Collections.singletonMap(CONN1, 2),
+                Collections.singletonMap(CONN1, 5),
+                Collections.singleton(CONN1)
+        );
+        expectConfigRefreshAndSnapshot(configState);
+
+        // The future returned by Worker::fenceZombies
+        KafkaFuture<Void> workerFencingFuture = EasyMock.mock(KafkaFuture.class);
+        // The future tracked by the herder (which tracks the fencing performed by the worker and the possible followup write to the config topic) 
+        KafkaFuture<Void> herderFencingFuture = EasyMock.mock(KafkaFuture.class);
+
+        // Immediately invoke callbacks that the herder sets up for when the worker fencing and writes to the config topic have completed
+        for (int i = 0; i < 2; i++) {
+            Capture<KafkaFuture.BiConsumer<Void, Throwable>> herderFencingCallback = EasyMock.newCapture();
+            EasyMock.expect(herderFencingFuture.whenComplete(EasyMock.capture(herderFencingCallback))).andAnswer(() -> {
+                herderFencingCallback.getValue().accept(null, null);
+                return null;
+            });
+        }
+
+        Capture<KafkaFuture.BaseFunction<Void, Void>> fencingFollowup = EasyMock.newCapture();
+        EasyMock.expect(workerFencingFuture.thenApply(EasyMock.capture(fencingFollowup))).andAnswer(() -> {
+            fencingFollowup.getValue().apply(null);
+            return herderFencingFuture;
+        });
+        EasyMock.expect(worker.fenceZombies(EasyMock.eq(CONN1), EasyMock.eq(2), EasyMock.eq(CONN1_CONFIG)))
+                .andReturn(workerFencingFuture);
+
+        expectConfigRefreshAndSnapshot(configState);
+
+        configBackingStore.putTaskCountRecord(CONN1, 1);
+        EasyMock.expectLastCall();
+
+        expectHerderShutdown(true);
+
+        PowerMock.replayAll(workerFencingFuture, herderFencingFuture);
+
+
+        startBackgroundHerder();
+
+        FutureCallback<Void> fencing = new FutureCallback<>();
+        herder.fenceZombieSourceTasks(CONN1, fencing);
+
+        fencing.get(10, TimeUnit.SECONDS);
+
+        stopBackgroundHerder();
+
+        PowerMock.verifyAll();
+    }
+
+    /**
+     * The herder tries to perform a round of fencing, but fails synchronously while invoking Worker::fenceZombies
+     */
+    @Test
+    public void testExternalZombieFencingRequestSynchronousFailure() throws Exception {
+        expectHerderStartup();
+        EasyMock.expect(member.memberId()).andStubReturn("leader");
+        EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        SessionKey sessionKey = expectNewSessionKey();
+
+        expectAnyTicks();
+
+        member.wakeup();
+        EasyMock.expectLastCall();
+
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                sessionKey,
+                TASK_CONFIGS_MAP,
+                Collections.singletonMap(CONN1, 2),
+                Collections.singletonMap(CONN1, 5),
+                Collections.singleton(CONN1)
+        );
+        expectConfigRefreshAndSnapshot(configState);
+
+        Exception fencingException = new KafkaException("whoops!");
+        EasyMock.expect(worker.fenceZombies(EasyMock.eq(CONN1), EasyMock.eq(2), EasyMock.eq(CONN1_CONFIG)))
+                .andThrow(fencingException);
+
+        expectHerderShutdown(true);
+
+        PowerMock.replayAll();
+
+
+        startBackgroundHerder();
+
+        FutureCallback<Void> fencing = new FutureCallback<>();
+        herder.fenceZombieSourceTasks(CONN1, fencing);
+
+        ExecutionException exception = assertThrows(ExecutionException.class, () -> fencing.get(10, TimeUnit.SECONDS));
+        assertEquals(fencingException, exception.getCause());
+
+        stopBackgroundHerder();
+
+        PowerMock.verifyAll();
+    }
+
+    /**
+     * The herder tries to perform a round of fencing and is able to retrieve a future from worker::fenceZombies, but the attempt
+     * fails at a later point.
+     */
+    @Test
+    public void testExternalZombieFencingRequestAsynchronousFailure() throws Exception {
+        expectHerderStartup();
+        EasyMock.expect(member.memberId()).andStubReturn("leader");
+        EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        SessionKey sessionKey = expectNewSessionKey();
+
+        expectAnyTicks();
+
+        member.wakeup();
+        EasyMock.expectLastCall();
+
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                sessionKey,
+                TASK_CONFIGS_MAP,
+                Collections.singletonMap(CONN1, 2),
+                Collections.singletonMap(CONN1, 5),
+                Collections.singleton(CONN1)
+        );
+        expectConfigRefreshAndSnapshot(configState);
+
+        // The future returned by Worker::fenceZombies
+        KafkaFuture<Void> workerFencingFuture = EasyMock.mock(KafkaFuture.class);
+        // The future tracked by the herder (which tracks the fencing performed by the worker and the possible followup write to the config topic) 
+        KafkaFuture<Void> herderFencingFuture = EasyMock.mock(KafkaFuture.class);
+        // The callbacks that the herder has accrued for outstanding fencing futures
+        Capture<KafkaFuture.BiConsumer<Void, Throwable>> herderFencingCallbacks = EasyMock.newCapture(CaptureType.ALL);
+
+        EasyMock.expect(worker.fenceZombies(EasyMock.eq(CONN1), EasyMock.eq(2), EasyMock.eq(CONN1_CONFIG)))
+                .andReturn(workerFencingFuture);
+
+        EasyMock.expect(workerFencingFuture.thenApply(EasyMock.<KafkaFuture.BaseFunction<Void, Void>>anyObject()))
+                .andReturn(herderFencingFuture);
+
+        CountDownLatch callbacksInstalled = new CountDownLatch(2);
+        for (int i = 0; i < 2; i++) {
+            EasyMock.expect(herderFencingFuture.whenComplete(EasyMock.capture(herderFencingCallbacks))).andAnswer(() -> {
+                callbacksInstalled.countDown();
+                return null;
+            });
+        }
+
+        expectHerderShutdown(true);
+
+        PowerMock.replayAll(workerFencingFuture, herderFencingFuture);
+
+
+        startBackgroundHerder();
+
+        FutureCallback<Void> fencing = new FutureCallback<>();
+        herder.fenceZombieSourceTasks(CONN1, fencing);
+
+        assertTrue(callbacksInstalled.await(10, TimeUnit.SECONDS));
+
+        Exception fencingException = new AuthorizationException("you didn't say the magic word");
+        herderFencingCallbacks.getValues().forEach(cb -> cb.accept(null, fencingException));
+
+        ExecutionException exception = assertThrows(ExecutionException.class, () -> fencing.get(10, TimeUnit.SECONDS));
+        assertTrue(exception.getCause() instanceof ConnectException);
+
+        stopBackgroundHerder();
+
+        PowerMock.verifyAll();
+    }
+
+    /**
+     * Issues multiple rapid fencing requests for a handful of connectors, each of which takes a little while to complete.
+     * This mimics what might happen when a few connectors are reconfigured in quick succession and each task for the
+     * connector needs to hit the leader with a fencing request during its preflight check.
+     */
+    @Test
+    public void testExternalZombieFencingRequestDelayedCompletion() throws Exception {
+        final String conn3 = "SourceC";
+        final Map<String, Integer> tasksPerConnector = new HashMap<>();
+        tasksPerConnector.put(CONN1, 5);
+        tasksPerConnector.put(CONN2, 3);
+        tasksPerConnector.put(conn3, 12);
+
+        expectHerderStartup();
+        EasyMock.expect(member.memberId()).andStubReturn("leader");
+        EasyMock.expect(member.currentProtocolVersion()).andStubReturn(CONNECT_PROTOCOL_V2);
+        expectConfigRefreshAndSnapshot(SNAPSHOT);
+
+        expectRebalance(1, Collections.emptyList(), Collections.emptyList(), true);
+        SessionKey sessionKey = expectNewSessionKey();
+
+        expectAnyTicks();
+
+        // We invoke the herder's fenceZombies method repeatedly, which adds a new request to the queue.
+        // If the queue is empty, the member is woken up; however, if two or more requests are issued in rapid
+        // succession, the member won't be woken up. We allow the member to be woken up any number of times
+        // here since it's not critical to the testing logic and it's difficult to mock things in order to lead to an
+        // exact number of wakeups.
+        member.wakeup();
+        EasyMock.expectLastCall().anyTimes();
+
+        Map<String, Integer> taskCountRecords = new HashMap<>();
+        taskCountRecords.put(CONN1, 2);
+        taskCountRecords.put(CONN2, 3);
+        taskCountRecords.put(conn3, 5);
+        Map<String, Integer> taskConfigGenerations = new HashMap<>();
+        taskConfigGenerations.put(CONN1, 3);
+        taskConfigGenerations.put(CONN2, 4);
+        taskConfigGenerations.put(conn3, 2);
+        Set<String> pendingFencing = new HashSet<>(Arrays.asList(CONN1, CONN2, conn3));
+        ClusterConfigState configState = exactlyOnceSnapshot(
+                sessionKey,
+                TASK_CONFIGS_MAP,
+                taskCountRecords,
+                taskConfigGenerations,
+                pendingFencing,
+                tasksPerConnector
+        );
+        tasksPerConnector.keySet().forEach(c -> expectConfigRefreshAndSnapshot(configState));
+
+        // The callbacks that the herder has accrued for outstanding fencing futures, which will be completed after
+        // a successful round of fencing and a task record write to the config topic
+        Map<String, Capture<KafkaFuture.BiConsumer<Void, Throwable>>> herderFencingCallbacks = new HashMap<>();
+        // The callbacks that the herder has installed for after a successful round of zombie fencing, but before writing
+        // a task record to the config topic
+        Map<String, Capture<KafkaFuture.BaseFunction<Void, Void>>> workerFencingFollowups = new HashMap<>();
+
+        Map<String, CountDownLatch> callbacksInstalled = new HashMap<>();
+        tasksPerConnector.forEach((connector, numStackedRequests) -> {
+            // The future returned by Worker::fenceZombies
+            KafkaFuture<Void> workerFencingFuture = EasyMock.mock(KafkaFuture.class);
+            // The future tracked by the herder (which tracks the fencing performed by the worker and the possible followup write to the config topic) 
+            KafkaFuture<Void> herderFencingFuture = EasyMock.mock(KafkaFuture.class);
+
+            Capture<KafkaFuture.BiConsumer<Void, Throwable>> herderFencingCallback = EasyMock.newCapture(CaptureType.ALL);
+            herderFencingCallbacks.put(connector, herderFencingCallback);
+
+            // Don't immediately invoke callbacks that the herder sets up for when the worker fencing and writes to the config topic have completed
+            // Instead, wait for them to be installed, then invoke them explicitly after the fact on a thread separate from the herder's tick thread
+            EasyMock.expect(herderFencingFuture.whenComplete(EasyMock.capture(herderFencingCallback)))
+                    .andReturn(null)
+                    .times(numStackedRequests + 1);
+
+            Capture<KafkaFuture.BaseFunction<Void, Void>> fencingFollowup = EasyMock.newCapture();
+            CountDownLatch callbackInstalled = new CountDownLatch(1);
+            workerFencingFollowups.put(connector, fencingFollowup);
+            callbacksInstalled.put(connector, callbackInstalled);
+            EasyMock.expect(workerFencingFuture.thenApply(EasyMock.capture(fencingFollowup))).andAnswer(() -> {
+                callbackInstalled.countDown();
+                return herderFencingFuture;
+            });
+
+            // We should only perform a single physical zombie fencing; all the subsequent requests should be stacked onto the first one
+            EasyMock.expect(worker.fenceZombies(
+                    EasyMock.eq(connector), EasyMock.eq(taskCountRecords.get(connector)), EasyMock.anyObject())
+            ).andReturn(workerFencingFuture);
+
+            for (int i = 0; i < numStackedRequests; i++) {
+                expectConfigRefreshAndSnapshot(configState);
+            }
+
+            PowerMock.replay(workerFencingFuture, herderFencingFuture);
+        });
+
+        tasksPerConnector.forEach((connector, taskCount) -> {
+            configBackingStore.putTaskCountRecord(connector, taskCount);
+            EasyMock.expectLastCall();
+        });
+
+        expectHerderShutdown(false);
+
+        PowerMock.replayAll();
+
+
+        startBackgroundHerder();
+
+        List<FutureCallback<Void>> stackedFencingRequests = new ArrayList<>();
+        tasksPerConnector.forEach((connector, numStackedRequests) -> {
+            List<FutureCallback<Void>> connectorFencingRequests = IntStream.range(0, numStackedRequests)
+                    .mapToObj(i -> new FutureCallback<Void>())
+                    .collect(Collectors.toList());
+
+            connectorFencingRequests.forEach(fencing ->
+                    herder.fenceZombieSourceTasks(connector, fencing)
+            );
+
+            stackedFencingRequests.addAll(connectorFencingRequests);
+        });
+
+        callbacksInstalled.forEach((connector, latch) -> {
+            try {
+                assertTrue(latch.await(10, TimeUnit.SECONDS));
+                workerFencingFollowups.get(connector).getValue().apply(null);
+                herderFencingCallbacks.get(connector).getValues().forEach(cb -> cb.accept(null, null));
+            } catch (InterruptedException e) {
+                fail("Unexpectedly interrupted");
+            }
+        });
+
+        for (FutureCallback<Void> fencing : stackedFencingRequests) {
+            fencing.get(10, TimeUnit.SECONDS);
+        }
+
+        stopBackgroundHerder();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testVerifyTaskGeneration() {
+        Map<String, Integer> taskConfigGenerations = new HashMap<>();
+        herder.configState = new ClusterConfigState(1, null, Collections.singletonMap(CONN1, 3),
+                Collections.singletonMap(CONN1, CONN1_CONFIG), Collections.singletonMap(CONN1, TargetState.STARTED),
+                TASK_CONFIGS_MAP, Collections.emptyMap(), taskConfigGenerations, Collections.emptySet(), Collections.emptySet());
+
+        Callback<Void> verifyCallback = EasyMock.mock(Callback.class);
+        for (int i = 0; i < 5; i++) {
+            verifyCallback.onCompletion(null, null);
+            EasyMock.expectLastCall();
+        }
+
+        PowerMock.replayAll();
+
+        herder.assignment = new ExtendedAssignment(
+                (short) 2, (short) 0, "leader", "leaderUrl", 0,
+                Collections.emptySet(), Collections.singleton(TASK1),
+                Collections.emptySet(), Collections.emptySet(), 0);
+
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 0, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 1, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 2, verifyCallback));
+
+        taskConfigGenerations.put(CONN1, 0);
+        herder.verifyTaskGenerationAndOwnership(TASK1, 0, verifyCallback);
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 1, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 2, verifyCallback));
+
+        taskConfigGenerations.put(CONN1, 1);
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 0, verifyCallback));
+        herder.verifyTaskGenerationAndOwnership(TASK1, 1, verifyCallback);
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 2, verifyCallback));
+
+        taskConfigGenerations.put(CONN1, 2);
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 0, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 1, verifyCallback));
+        herder.verifyTaskGenerationAndOwnership(TASK1, 2, verifyCallback);
+
+        taskConfigGenerations.put(CONN1, 3);
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 0, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 1, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(TASK1, 2, verifyCallback));
+
+        ConnectorTaskId unassignedTask = new ConnectorTaskId(CONN2, 0);
+        taskConfigGenerations.put(unassignedTask.connector(), 1);
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(unassignedTask, 0, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(unassignedTask, 1, verifyCallback));
+        assertThrows(ConnectException.class, () -> herder.verifyTaskGenerationAndOwnership(unassignedTask, 2, verifyCallback));
+
+        PowerMock.verifyAll();
+    }
+
     @Test
     public void testKeyExceptionDetection() {
         assertFalse(herder.isPossibleExpiredKeyException(
@@ -2659,16 +3483,23 @@ public void testHerderStopServicesClosesUponShutdown() {
     private void expectRebalance(final long offset,
                                  final List<String> assignedConnectors,
                                  final List<ConnectorTaskId> assignedTasks) {
-        expectRebalance(Collections.emptyList(), Collections.emptyList(),
-                ConnectProtocol.Assignment.NO_ERROR, offset, assignedConnectors, assignedTasks, 0);
+        expectRebalance(offset, assignedConnectors, assignedTasks, false);
     }
 
     private void expectRebalance(final long offset,
                                  final List<String> assignedConnectors,
                                  final List<ConnectorTaskId> assignedTasks,
-                                 String leader, String leaderUrl) {
+                                 final boolean isLeader) {
+
+        expectRebalance(Collections.emptyList(), Collections.emptyList(),
+                ConnectProtocol.Assignment.NO_ERROR, offset, "leader", "leaderUrl", assignedConnectors, assignedTasks, 0, isLeader);
+    }
+
+    private void expectRebalance(final long offset,
+                                 final List<String> assignedConnectors, final List<ConnectorTaskId> assignedTasks,
+                                 String leader, String leaderUrl, boolean isLeader) {
         expectRebalance(Collections.emptyList(), Collections.emptyList(),
-                ConnectProtocol.Assignment.NO_ERROR, offset, leader, leaderUrl, assignedConnectors, assignedTasks, 0);
+                ConnectProtocol.Assignment.NO_ERROR, offset, leader, leaderUrl, assignedConnectors, assignedTasks, 0, isLeader);
     }
 
     // Handles common initial part of rebalance callback. Does not handle instantiation of connectors and tasks.
@@ -2680,7 +3511,6 @@ private void expectRebalance(final Collection<String> revokedConnectors,
                                  final List<ConnectorTaskId> assignedTasks) {
         expectRebalance(revokedConnectors, revokedTasks, error, offset, assignedConnectors, assignedTasks, 0);
     }
-
     // Handles common initial part of rebalance callback. Does not handle instantiation of connectors and tasks.
     private void expectRebalance(final Collection<String> revokedConnectors,
                                  final List<ConnectorTaskId> revokedTasks,
@@ -2689,7 +3519,7 @@ private void expectRebalance(final Collection<String> revokedConnectors,
                                  final List<String> assignedConnectors,
                                  final List<ConnectorTaskId> assignedTasks,
                                  int delay) {
-        expectRebalance(revokedConnectors, revokedTasks, error, offset, "leader", "leaderUrl", assignedConnectors, assignedTasks, delay);
+        expectRebalance(revokedConnectors, revokedTasks, error, offset, "leader", "leaderUrl", assignedConnectors, assignedTasks, delay, false);
     }
 
     // Handles common initial part of rebalance callback. Does not handle instantiation of connectors and tasks.
@@ -2701,7 +3531,8 @@ private void expectRebalance(final Collection<String> revokedConnectors,
                                  String leaderUrl,
                                  final List<String> assignedConnectors,
                                  final List<ConnectorTaskId> assignedTasks,
-                                 int delay) {
+                                 int delay,
+                                 boolean isLeader) {
         member.ensureActive();
         PowerMock.expectLastCall().andAnswer(() -> {
             ExtendedAssignment assignment;
@@ -2725,6 +3556,11 @@ private void expectRebalance(final Collection<String> revokedConnectors,
             return null;
         });
 
+        if (isLeader) {
+            configBackingStore.claimWritePrivileges();
+            EasyMock.expectLastCall();
+        }
+
         if (!revokedConnectors.isEmpty()) {
             for (String connector : revokedConnectors) {
                 worker.stopAndAwaitConnector(connector);
@@ -2746,10 +3582,111 @@ private void expectRebalance(final Collection<String> revokedConnectors,
         PowerMock.expectLastCall();
     }
 
-    private void expectPostRebalanceCatchup(final ClusterConfigState readToEndSnapshot) throws TimeoutException {
-        configBackingStore.refresh(EasyMock.anyLong(), EasyMock.anyObject(TimeUnit.class));
+    private ClusterConfigState exactlyOnceSnapshot(
+            SessionKey sessionKey,
+            Map<ConnectorTaskId, Map<String, String>> taskConfigs,
+            Map<String, Integer> taskCountRecords,
+            Map<String, Integer> taskConfigGenerations,
+            Set<String> pendingFencing) {
+
+        Set<String> connectors = new HashSet<>();
+        connectors.addAll(taskCountRecords.keySet());
+        connectors.addAll(taskConfigGenerations.keySet());
+        connectors.addAll(pendingFencing);
+        Map<String, Integer> taskCounts = connectors.stream()
+                .collect(Collectors.toMap(Function.identity(), c -> 1));
+
+        return exactlyOnceSnapshot(sessionKey, taskConfigs, taskCountRecords, taskConfigGenerations, pendingFencing, taskCounts);
+    }
+
+    private ClusterConfigState exactlyOnceSnapshot(
+            SessionKey sessionKey,
+            Map<ConnectorTaskId, Map<String, String>> taskConfigs,
+            Map<String, Integer> taskCountRecords,
+            Map<String, Integer> taskConfigGenerations,
+            Set<String> pendingFencing,
+            Map<String, Integer> taskCounts) {
+
+        Set<String> connectors = new HashSet<>();
+        connectors.addAll(taskCounts.keySet());
+        connectors.addAll(taskCountRecords.keySet());
+        connectors.addAll(taskConfigGenerations.keySet());
+        connectors.addAll(pendingFencing);
+
+        Map<String, Map<String, String>> connectorConfigs = connectors.stream()
+                .collect(Collectors.toMap(Function.identity(), c -> CONN1_CONFIG));
+
+        return new ClusterConfigState(1, sessionKey, taskCounts,
+                connectorConfigs, Collections.singletonMap(CONN1, TargetState.STARTED),
+                taskConfigs, taskCountRecords, taskConfigGenerations, pendingFencing, Collections.emptySet());
+    }
+
+    private void expectAnyTicks() {
+        member.ensureActive();
+        EasyMock.expectLastCall().anyTimes();
+        member.poll(EasyMock.anyInt());
+        PowerMock.expectLastCall().anyTimes();
+    }
+
+    private SessionKey expectNewSessionKey() {
+        SecretKey secretKey = EasyMock.niceMock(SecretKey.class);
+        EasyMock.expect(secretKey.getAlgorithm()).andReturn(INTER_WORKER_KEY_GENERATION_ALGORITHM_DEFAULT).anyTimes();
+        EasyMock.expect(secretKey.getEncoded()).andReturn(new byte[32]).anyTimes();
+        SessionKey sessionKey = new SessionKey(secretKey, time.milliseconds() + TimeUnit.DAYS.toMillis(1));
+        configBackingStore.putSessionKey(anyObject(SessionKey.class));
+        EasyMock.expectLastCall().andAnswer(() -> {
+            configUpdateListener.onSessionKeyUpdate(sessionKey);
+            return null;
+        });
+        EasyMock.replay(secretKey);
+        return sessionKey;
+    }
+
+    private void expectConfigRefreshAndSnapshot(final ClusterConfigState readToEndSnapshot) {
+        try {
+            configBackingStore.refresh(anyLong(), EasyMock.anyObject(TimeUnit.class));
+            EasyMock.expectLastCall();
+            EasyMock.expect(configBackingStore.snapshot()).andReturn(readToEndSnapshot);
+        } catch (TimeoutException e) {
+            fail("Mocked method should not throw checked exception");
+        }
+    }
+
+    private void startBackgroundHerder() {
+        herderExecutor = Executors.newSingleThreadExecutor();
+        herderExecutor.submit(herder);
+    }
+
+    private void stopBackgroundHerder() throws Exception {
+        herder.stop();
+        herderExecutor.shutdown();
+        herderExecutor.awaitTermination(10, TimeUnit.SECONDS);
+    }
+
+    private void expectHerderStartup() {
+        worker.start();
+        EasyMock.expectLastCall();
+        statusBackingStore.start();
+        EasyMock.expectLastCall();
+        configBackingStore.start();
+        EasyMock.expectLastCall();
+    }
+
+    private void expectHerderShutdown(boolean wakeup) {
+        if (wakeup) {
+            member.wakeup();
+            EasyMock.expectLastCall();
+        }
+        EasyMock.expect(worker.connectorNames()).andReturn(Collections.emptySet());
+        EasyMock.expect(worker.taskIds()).andReturn(Collections.emptySet());
+        member.stop();
+        EasyMock.expectLastCall();
+        statusBackingStore.stop();
+        EasyMock.expectLastCall();
+        configBackingStore.stop();
+        EasyMock.expectLastCall();
+        worker.stop();
         EasyMock.expectLastCall();
-        EasyMock.expect(configBackingStore.snapshot()).andReturn(readToEndSnapshot);
     }
 
     private void assertStatistics(int expectedEpoch, int completedRebalances, double rebalanceTime, double millisSinceLastRebalance) {
@@ -2850,4 +3787,14 @@ private abstract class BogusSourceConnector extends SourceConnector {
     private abstract class BogusSourceTask extends SourceTask {
     }
 
+    private DistributedHerder exactlyOnceHerder() {
+        Map<String, String> config = new HashMap<>(HERDER_CONFIG);
+        config.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        return PowerMock.createPartialMock(DistributedHerder.class,
+                new String[]{"connectorTypeForClass", "updateDeletedConnectorStatus", "updateDeletedTaskStatus", "validateConnectorConfig"},
+                new DistributedConfig(config), worker, WORKER_ID, KAFKA_CLUSTER_ID,
+                statusBackingStore, configBackingStore, member, MEMBER_URL, metrics, time, noneConnectorClientConfigOverridePolicy,
+                new AutoCloseable[0]);
+    }
+
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignorTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignorTest.java
index 0fe153132eb93..ed825312096f7 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignorTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/IncrementalCooperativeAssignorTest.java
@@ -17,167 +17,105 @@
 package org.apache.kafka.connect.runtime.distributed;
 
 import org.apache.kafka.clients.consumer.internals.RequestFuture;
+import org.apache.kafka.common.message.JoinGroupResponseData;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.connect.runtime.TargetState;
 import org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.ConnectorsAndTasks;
+import org.apache.kafka.connect.util.ConnectUtils;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.util.ConnectorTaskId;
-import org.junit.After;
 import org.junit.Before;
-import org.junit.Rule;
 import org.junit.Test;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Captor;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.junit.MockitoJUnit;
-import org.mockito.junit.MockitoRule;
-
-import java.util.AbstractMap.SimpleEntry;
+
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
-import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1;
-import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2;
+import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeAssignor.ClusterAssignment;
 import static org.apache.kafka.connect.runtime.distributed.WorkerCoordinator.WorkerLoad;
-import static org.hamcrest.CoreMatchers.hasItems;
-import static org.hamcrest.CoreMatchers.is;
-import static org.hamcrest.MatcherAssert.assertThat;
+import static org.apache.kafka.connect.util.ConnectUtils.transformValues;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
-import static org.junit.runners.Parameterized.Parameter;
-import static org.junit.runners.Parameterized.Parameters;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.Mockito.doReturn;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.times;
+import static org.mockito.ArgumentMatchers.notNull;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
 import static org.mockito.Mockito.when;
 
 public class IncrementalCooperativeAssignorTest {
-    @Rule
-    public MockitoRule rule = MockitoJUnit.rule();
-
-    @Mock
-    private WorkerCoordinator coordinator;
-
-    @Captor
-    ArgumentCaptor<Map<String, ExtendedAssignment>> assignmentsCapture;
-
-    @Parameters
-    public static Iterable<?> mode() {
-        return Arrays.asList(new Object[][] {{CONNECT_PROTOCOL_V1, CONNECT_PROTOCOL_V2}});
-    }
 
-    @Parameter
-    public short protocolVersion;
+    // Offset isn't used in most tests but is required for creating a config snapshot object,
+    // so just use some arbitrary constant for that
+    private static final long CONFIG_OFFSET = 618;
 
-    private ClusterConfigState configState;
-    private Map<String, ExtendedWorkerState> memberConfigs;
-    private Map<String, ExtendedWorkerState> expectedMemberConfigs;
-    private long offset;
-    private String leader;
-    private String leaderUrl;
+    private Map<String, Integer> connectors;
     private Time time;
     private int rebalanceDelay;
     private IncrementalCooperativeAssignor assignor;
-    private int rebalanceNum;
-    Map<String, ExtendedAssignment> assignments;
-    Map<String, ExtendedAssignment> returnedAssignments;
+    private int generationId;
+    private ClusterAssignment returnedAssignments;
+    private Map<String, ConnectorsAndTasks> memberAssignments;
 
     @Before
     public void setup() {
-        leader = "worker1";
-        leaderUrl = expectedLeaderUrl(leader);
-        offset = 10;
-        configState = clusterConfigState(offset, 2, 4);
-        memberConfigs = memberConfigs(leader, offset, 1, 1);
+        generationId = 1000;
         time = Time.SYSTEM;
         rebalanceDelay = DistributedConfig.SCHEDULED_REBALANCE_MAX_DELAY_MS_DEFAULT;
-        assignments = new HashMap<>();
+        connectors = new HashMap<>();
+        addNewConnector("connector1", 4);
+        addNewConnector("connector2", 4);
+        memberAssignments = new HashMap<>();
+        addNewEmptyWorkers("worker1");
         initAssignor();
     }
 
-    @After
-    public void teardown() {
-        verifyNoMoreInteractions(coordinator);
-    }
-
     public void initAssignor() {
-        assignor = Mockito.spy(new IncrementalCooperativeAssignor(
-                new LogContext(),
-                time,
-                rebalanceDelay));
-        assignor.previousGenerationId = 1000;
+        assignor = new IncrementalCooperativeAssignor(new LogContext(), time, rebalanceDelay);
+        assignor.previousGenerationId = generationId;
     }
 
     @Test
     public void testTaskAssignmentWhenWorkerJoins() {
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 1 worker and 2 connectors configured but not yet assigned
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1");
+        assertConnectorAllocations(2);
+        assertTaskAllocations(8);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with a second worker joining and all connectors running on previous worker
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 1, 4, "worker1", "worker2");
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(0, 1);
+        assertTaskAllocations(0, 4);
 
         // Third assignment after revocations
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(1, 4, 0, 0, "worker1", "worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // A fourth rebalance should not change assignments
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1", "worker2");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertEmptyAssignment();
     }
 
     @Test
@@ -186,69 +124,40 @@ public void testTaskAssignmentWhenWorkerLeavesPermanently() {
         time = new MockTime();
         initAssignor();
 
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 2 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2");
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with only one worker remaining in the group. The worker that left the
         // group was a follower. No re-assignments take place immediately and the count
         // down for the rebalance delay starts
-        applyAssignments(returnedAssignments);
-        assignments.remove("worker2");
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(rebalanceDelay, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1");
+        removeWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(rebalanceDelay);
+        assertWorkers("worker1");
+        assertEmptyAssignment();
 
         time.sleep(rebalanceDelay / 2);
 
         // Third (incidental) assignment with still only one worker in the group. Max delay has not
         // been reached yet
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(rebalanceDelay / 2, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1");
+        performStandardRebalance();
+        assertDelay(rebalanceDelay / 2);
+        assertEmptyAssignment();
 
         time.sleep(rebalanceDelay / 2 + 1);
 
         // Fourth assignment after delay expired
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(1, 4, 0, 0, "worker1");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(2);
+        assertTaskAllocations(8);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
@@ -257,86 +166,51 @@ public void testTaskAssignmentWhenWorkerBounces() {
         time = new MockTime();
         initAssignor();
 
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 2 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2");
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with only one worker remaining in the group. The worker that left the
         // group was a follower. No re-assignments take place immediately and the count
         // down for the rebalance delay starts
-        applyAssignments(returnedAssignments);
-        assignments.remove("worker2");
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(rebalanceDelay, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1");
+        removeWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(rebalanceDelay);
+        assertWorkers("worker1");
+        assertEmptyAssignment();
 
         time.sleep(rebalanceDelay / 2);
 
         // Third (incidental) assignment with still only one worker in the group. Max delay has not
         // been reached yet
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(rebalanceDelay / 2, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1");
+        performStandardRebalance();
+        assertDelay(rebalanceDelay / 2);
+        assertEmptyAssignment();
 
         time.sleep(rebalanceDelay / 4);
 
         // Fourth assignment with the second worker returning before the delay expires
         // Since the delay is still active, lost assignments are not reassigned yet
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(rebalanceDelay / 4, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1", "worker2");
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(rebalanceDelay / 4);
+        assertWorkers("worker1", "worker2");
+        assertEmptyAssignment();
 
         time.sleep(rebalanceDelay / 4);
 
         // Fifth assignment with the same two workers. The delay has expired, so the lost
         // assignments ought to be assigned to the worker that has appeared as returned.
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(1, 4, 0, 0, "worker1", "worker2");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
@@ -345,59 +219,34 @@ public void testTaskAssignmentWhenLeaderLeavesPermanently() {
         time = new MockTime();
         initAssignor();
 
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 3 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        memberConfigs.put("worker3", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2", "worker3");
+        addNewEmptyWorkers("worker2", "worker3");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2", "worker3");
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(2, 3, 3);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with two workers remaining in the group. The worker that left the
         // group was the leader. The new leader has no previous assignments and is not tracking a
         // delay upon a leader's exit
-        applyAssignments(returnedAssignments);
-        assignments.remove("worker1");
-        leader = "worker2";
-        leaderUrl = expectedLeaderUrl(leader);
-        memberConfigs = memberConfigs(leader, offset, assignments);
+        removeWorkers("worker1");
         // The fact that the leader bounces means that the assignor starts from a clean slate
         initAssignor();
 
         // Capture needs to be reset to point to the new assignor
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(1, 3, 0, 0, "worker2", "worker3");
-
-        // Third (incidental) assignment with still only one worker in the group.
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker2", "worker3");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker2", "worker3");
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
+
+        // Third (incidental) assignment with still only two workers in the group.
+        performStandardRebalance();
+        assertDelay(0);
+        assertEmptyAssignment();
     }
 
     @Test
@@ -406,74 +255,46 @@ public void testTaskAssignmentWhenLeaderBounces() {
         time = new MockTime();
         initAssignor();
 
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 3 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        memberConfigs.put("worker3", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2", "worker3");
+        addNewEmptyWorkers("worker2", "worker3");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2", "worker3");
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(2, 3, 3);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with two workers remaining in the group. The worker that left the
         // group was the leader. The new leader has no previous assignments and is not tracking a
         // delay upon a leader's exit
-        applyAssignments(returnedAssignments);
-        assignments.remove("worker1");
-        leader = "worker2";
-        leaderUrl = expectedLeaderUrl(leader);
-        memberConfigs = memberConfigs(leader, offset, assignments);
+        removeWorkers("worker1");
         // The fact that the leader bounces means that the assignor starts from a clean slate
         initAssignor();
 
         // Capture needs to be reset to point to the new assignor
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(1, 3, 0, 0, "worker2", "worker3");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker2", "worker3");
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // Third assignment with the previous leader returning as a follower. In this case, the
         // arrival of the previous leader is treated as an arrival of a new worker. Reassignment
         // happens immediately, first with a revocation
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        memberConfigs.put("worker1", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 2, "worker1", "worker2", "worker3");
+        addNewEmptyWorkers("worker1");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2", "worker3");
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(0, 3, 3);
 
         // Fourth assignment after revocations
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 2, 0, 0, "worker1", "worker2", "worker3");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(2, 3, 3);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
@@ -482,44 +303,23 @@ public void testTaskAssignmentWhenFirstAssignmentAttemptFails() {
         time = new MockTime();
         initAssignor();
 
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doThrow(new RuntimeException("Unable to send computed assignment with SyncGroupRequest"))
-                .when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 2 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        try {
-            expectGeneration();
-            assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        } catch (RuntimeException e) {
-            RequestFuture.failure(e);
-        }
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
+        addNewEmptyWorkers("worker2");
+        performFailedRebalance();
         // This was the assignment that should have been sent, but didn't make it all the way
-        assertDelay(0, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2");
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(0, 0);
+        assertTaskAllocations(0, 0);
 
         // Second assignment happens with members returning the same assignments (memberConfigs)
         // as the first time. The assignor detects that the number of members did not change and
         // avoids the rebalance delay, treating the lost assignments as new assignments.
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
@@ -528,60 +328,38 @@ public void testTaskAssignmentWhenSubsequentAssignmentAttemptFails() {
         time = new MockTime();
         initAssignor();
 
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 2 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2");
-
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doThrow(new RuntimeException("Unable to send computed assignment with SyncGroupRequest"))
-                .when(assignor).serializeAssignments(assignmentsCapture.capture());
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment triggered by a third worker joining. The computed assignment should
         // revoke tasks from the existing group. But the assignment won't be correctly delivered.
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        memberConfigs.put("worker3", new ExtendedWorkerState(leaderUrl, offset, null));
-        try {
-            expectGeneration();
-            assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        } catch (RuntimeException e) {
-            RequestFuture.failure(e);
-        }
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
+        addNewEmptyWorkers("worker3");
+        performFailedRebalance();
         // This was the assignment that should have been sent, but didn't make it all the way
-        assertDelay(0, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 2, "worker1", "worker2", "worker3");
+        assertDelay(0);
+        assertWorkers("worker1", "worker2", "worker3");
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(0, 4, 4);
 
         // Third assignment happens with members returning the same assignments (memberConfigs)
         // as the first time.
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertDelay(0, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 2, "worker1", "worker2", "worker3");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(0, 3, 3);
+
+        // Fourth assignment after revocations
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(2, 3, 3);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
@@ -590,96 +368,61 @@ public void testTaskAssignmentWhenSubsequentAssignmentAttemptFailsOutsideTheAssi
         time = new MockTime();
         initAssignor();
 
-        expectGeneration();
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 2 workers and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1", "worker2");
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment triggered by a third worker joining. The computed assignment should
         // revoke tasks from the existing group. But the assignment won't be correctly delivered
         // and sync group with fail on the leader worker.
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        memberConfigs.put("worker3", new ExtendedWorkerState(leaderUrl, offset, null));
-        when(coordinator.generationId())
-                .thenReturn(assignor.previousGenerationId + 1)
-                .thenReturn(assignor.previousGenerationId + 1);
-        when(coordinator.lastCompletedGenerationId()).thenReturn(assignor.previousGenerationId - 1);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
+        addNewEmptyWorkers("worker3");
+        performFailedRebalance();
         // This was the assignment that should have been sent, but didn't make it all the way
-        assertDelay(0, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 2, "worker1", "worker2", "worker3");
+        assertDelay(0);
+        assertWorkers("worker1", "worker2", "worker3");
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(0, 4, 4);
 
         // Third assignment happens with members returning the same assignments (memberConfigs)
         // as the first time.
-        when(coordinator.lastCompletedGenerationId()).thenReturn(assignor.previousGenerationId - 1);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertDelay(0, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 2, "worker1", "worker2", "worker3");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performRebalanceWithMismatchedGeneration();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(0, 3, 3);
+
+        // Fourth assignment after revocations
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1, 1);
+        assertTaskAllocations(2, 3, 3);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
     public void testTaskAssignmentWhenConnectorsAreDeleted() {
-        configState = clusterConfigState(offset, 3, 4);
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
+        addNewConnector("connector3", 4);
 
         // First assignment with 1 worker and 2 connectors configured but not yet assigned
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, null));
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(3, 12, 0, 0, "worker1", "worker2");
+        addNewEmptyWorkers("worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(1, 2);
+        assertTaskAllocations(6, 6);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with an updated config state that reflects removal of a connector
-        configState = clusterConfigState(offset + 1, 2, 4);
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        expectGeneration();
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 1, 4, "worker1", "worker2");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        removeConnector("connector3");
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
     }
 
     @Test
@@ -747,7 +490,6 @@ public void testAssignConnectorsWhenImbalanced() {
 
     @Test
     public void testLostAssignmentHandlingWhenWorkerBounces() {
-        // Customize assignor for this test case
         time = new MockTime();
         initAssignor();
 
@@ -759,62 +501,58 @@ public void testLostAssignmentHandlingWhenWorkerBounces() {
         configuredAssignment.put("worker0", workerLoad("worker0", 0, 2, 0, 4));
         configuredAssignment.put("worker1", workerLoad("worker1", 2, 2, 4, 4));
         configuredAssignment.put("worker2", workerLoad("worker2", 4, 2, 8, 4));
-        memberConfigs = memberConfigs(leader, offset, 0, 2);
 
         ConnectorsAndTasks newSubmissions = new ConnectorsAndTasks.Builder().build();
 
         // No lost assignments
         assignor.handleLostAssignments(new ConnectorsAndTasks.Builder().build(),
                 newSubmissions,
-                new ArrayList<>(configuredAssignment.values()),
-                memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
-        String flakyWorker = "worker1";
-        WorkerLoad lostLoad = workerLoad(flakyWorker, 2, 2, 4, 4);
-        memberConfigs.remove(flakyWorker);
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
 
+        String flakyWorker = "worker1";
+        WorkerLoad lostLoad = configuredAssignment.remove(flakyWorker);
         ConnectorsAndTasks lostAssignments = new ConnectorsAndTasks.Builder()
                 .withCopies(lostLoad.connectors(), lostLoad.tasks()).build();
 
         // Lost assignments detected - No candidate worker has appeared yet (worker with no assignments)
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
         time.sleep(rebalanceDelay / 2);
         rebalanceDelay /= 2;
 
         // A new worker (probably returning worker) has joined
         configuredAssignment.put(flakyWorker, new WorkerLoad.Builder(flakyWorker).build());
-        memberConfigs.put(flakyWorker, new ExtendedWorkerState(leaderUrl, offset, null));
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.singleton(flakyWorker),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
         time.sleep(rebalanceDelay);
 
         // The new worker has still no assignments
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
         assertTrue("Wrong assignment of lost connectors",
                 configuredAssignment.getOrDefault(flakyWorker, new WorkerLoad.Builder(flakyWorker).build())
@@ -824,9 +562,9 @@ public void testLostAssignmentHandlingWhenWorkerBounces() {
                 configuredAssignment.getOrDefault(flakyWorker, new WorkerLoad.Builder(flakyWorker).build())
                         .tasks()
                         .containsAll(lostAssignments.tasks()));
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
     }
@@ -845,66 +583,63 @@ public void testLostAssignmentHandlingWhenWorkerLeavesPermanently() {
         configuredAssignment.put("worker0", workerLoad("worker0", 0, 2, 0, 4));
         configuredAssignment.put("worker1", workerLoad("worker1", 2, 2, 4, 4));
         configuredAssignment.put("worker2", workerLoad("worker2", 4, 2, 8, 4));
-        memberConfigs = memberConfigs(leader, offset, 0, 2);
 
         ConnectorsAndTasks newSubmissions = new ConnectorsAndTasks.Builder().build();
 
         // No lost assignments
         assignor.handleLostAssignments(new ConnectorsAndTasks.Builder().build(),
                 newSubmissions,
-                new ArrayList<>(configuredAssignment.values()),
-                memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
-        String removedWorker = "worker1";
-        WorkerLoad lostLoad = workerLoad(removedWorker, 2, 2, 4, 4);
-        memberConfigs.remove(removedWorker);
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
 
+        String removedWorker = "worker1";
+        WorkerLoad lostLoad = configuredAssignment.remove(removedWorker);
         ConnectorsAndTasks lostAssignments = new ConnectorsAndTasks.Builder()
                 .withCopies(lostLoad.connectors(), lostLoad.tasks()).build();
 
         // Lost assignments detected - No candidate worker has appeared yet (worker with no assignments)
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(memberAssignments.keySet());
         time.sleep(rebalanceDelay / 2);
         rebalanceDelay /= 2;
 
         // No new worker has joined
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
         time.sleep(rebalanceDelay);
 
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
         assertTrue("Wrong assignment of lost connectors",
                 newSubmissions.connectors().containsAll(lostAssignments.connectors()));
         assertTrue("Wrong assignment of lost tasks",
                 newSubmissions.tasks().containsAll(lostAssignments.tasks()));
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
     }
@@ -923,62 +658,58 @@ public void testLostAssignmentHandlingWithMoreThanOneCandidates() {
         configuredAssignment.put("worker0", workerLoad("worker0", 0, 2, 0, 4));
         configuredAssignment.put("worker1", workerLoad("worker1", 2, 2, 4, 4));
         configuredAssignment.put("worker2", workerLoad("worker2", 4, 2, 8, 4));
-        memberConfigs = memberConfigs(leader, offset, 0, 2);
 
         ConnectorsAndTasks newSubmissions = new ConnectorsAndTasks.Builder().build();
 
         // No lost assignments
         assignor.handleLostAssignments(new ConnectorsAndTasks.Builder().build(),
                 newSubmissions,
-                new ArrayList<>(configuredAssignment.values()),
-                memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
-        String flakyWorker = "worker1";
-        WorkerLoad lostLoad = workerLoad(flakyWorker, 2, 2, 4, 4);
-        memberConfigs.remove(flakyWorker);
-        String newWorker = "worker3";
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
 
+        String flakyWorker = "worker1";
+        WorkerLoad lostLoad = configuredAssignment.remove(flakyWorker);
         ConnectorsAndTasks lostAssignments = new ConnectorsAndTasks.Builder()
                 .withCopies(lostLoad.connectors(), lostLoad.tasks()).build();
 
-        // Lost assignments detected - A new worker also has joined that is not the returning worker
+        String newWorker = "worker3";
         configuredAssignment.put(newWorker, new WorkerLoad.Builder(newWorker).build());
-        memberConfigs.put(newWorker, new ExtendedWorkerState(leaderUrl, offset, null));
+
+        // Lost assignments detected - A new worker also has joined that is not the returning worker
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.singleton(newWorker),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
         time.sleep(rebalanceDelay / 2);
         rebalanceDelay /= 2;
 
         // Now two new workers have joined
         configuredAssignment.put(flakyWorker, new WorkerLoad.Builder(flakyWorker).build());
-        memberConfigs.put(flakyWorker, new ExtendedWorkerState(leaderUrl, offset, null));
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
         Set<String> expectedWorkers = new HashSet<>();
         expectedWorkers.addAll(Arrays.asList(newWorker, flakyWorker));
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 expectedWorkers,
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
         time.sleep(rebalanceDelay);
 
         // The new workers have new assignments, other than the lost ones
@@ -987,7 +718,7 @@ public void testLostAssignmentHandlingWithMoreThanOneCandidates() {
         // we don't reflect these new assignments in memberConfigs currently because they are not
         // used in handleLostAssignments method
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
         // both the newWorkers would need to be considered for re assignment of connectors and tasks
         List<String> listOfConnectorsInLast2Workers = new ArrayList<>();
@@ -1004,9 +735,9 @@ public void testLostAssignmentHandlingWithMoreThanOneCandidates() {
             listOfConnectorsInLast2Workers.containsAll(lostAssignments.connectors()));
         assertTrue("Wrong assignment of lost tasks",
             listOfTasksInLast2Workers.containsAll(lostAssignments.tasks()));
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
             Collections.emptySet(),
-            is(assignor.candidateWorkersForReassignment));
+            assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
     }
@@ -1025,223 +756,308 @@ public void testLostAssignmentHandlingWhenWorkerBouncesBackButFinallyLeaves() {
         configuredAssignment.put("worker0", workerLoad("worker0", 0, 2, 0, 4));
         configuredAssignment.put("worker1", workerLoad("worker1", 2, 2, 4, 4));
         configuredAssignment.put("worker2", workerLoad("worker2", 4, 2, 8, 4));
-        memberConfigs = memberConfigs(leader, offset, 0, 2);
 
         ConnectorsAndTasks newSubmissions = new ConnectorsAndTasks.Builder().build();
 
         // No lost assignments
         assignor.handleLostAssignments(new ConnectorsAndTasks.Builder().build(),
                 newSubmissions,
-                new ArrayList<>(configuredAssignment.values()),
-                memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
-        String veryFlakyWorker = "worker1";
-        WorkerLoad lostLoad = workerLoad(veryFlakyWorker, 2, 2, 4, 4);
-        memberConfigs.remove(veryFlakyWorker);
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
 
+        String veryFlakyWorker = "worker1";
+        WorkerLoad lostLoad = configuredAssignment.remove(veryFlakyWorker);
         ConnectorsAndTasks lostAssignments = new ConnectorsAndTasks.Builder()
                 .withCopies(lostLoad.connectors(), lostLoad.tasks()).build();
 
         // Lost assignments detected - No candidate worker has appeared yet (worker with no assignments)
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
         time.sleep(rebalanceDelay / 2);
         rebalanceDelay /= 2;
 
         // A new worker (probably returning worker) has joined
         configuredAssignment.put(veryFlakyWorker, new WorkerLoad.Builder(veryFlakyWorker).build());
-        memberConfigs.put(veryFlakyWorker, new ExtendedWorkerState(leaderUrl, offset, null));
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.singleton(veryFlakyWorker),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(time.milliseconds() + rebalanceDelay, assignor.scheduledRebalance);
         assertEquals(rebalanceDelay, assignor.delay);
 
-        assignor.previousMembers = new HashSet<>(memberConfigs.keySet());
+        assignor.previousMembers = new HashSet<>(configuredAssignment.keySet());
         time.sleep(rebalanceDelay);
 
         // The returning worker leaves permanently after joining briefly during the delay
         configuredAssignment.remove(veryFlakyWorker);
-        memberConfigs.remove(veryFlakyWorker);
         assignor.handleLostAssignments(lostAssignments, newSubmissions,
-                new ArrayList<>(configuredAssignment.values()), memberConfigs);
+                new ArrayList<>(configuredAssignment.values()));
 
         assertTrue("Wrong assignment of lost connectors",
                 newSubmissions.connectors().containsAll(lostAssignments.connectors()));
         assertTrue("Wrong assignment of lost tasks",
                 newSubmissions.tasks().containsAll(lostAssignments.tasks()));
-        assertThat("Wrong set of workers for reassignments",
+        assertEquals("Wrong set of workers for reassignments",
                 Collections.emptySet(),
-                is(assignor.candidateWorkersForReassignment));
+                assignor.candidateWorkersForReassignment);
         assertEquals(0, assignor.scheduledRebalance);
         assertEquals(0, assignor.delay);
     }
 
     @Test
     public void testTaskAssignmentWhenTasksDuplicatedInWorkerAssignment() {
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 1 worker and 2 connectors configured but not yet assigned
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1");
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1");
+        assertConnectorAllocations(2);
+        assertTaskAllocations(8);
+        assertBalancedAndCompleteAllocation();
 
         // Second assignment with a second worker with duplicate assignment joining and all connectors running on previous worker
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        ExtendedAssignment duplicatedWorkerAssignment = newExpandableAssignment();
-        duplicatedWorkerAssignment.connectors().addAll(newConnectors(1, 2));
-        duplicatedWorkerAssignment.tasks().addAll(newTasks("connector1", 0, 4));
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, duplicatedWorkerAssignment));
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 2, 8, "worker1", "worker2");
+        addNewWorker("worker2", newConnectors(1, 2), newTasks("connector1", 0, 4));
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(0, 1);
+        assertTaskAllocations(0, 4);
 
         // Third assignment after revocations
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(1, 4, 0, 2, "worker1", "worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(2, 4);
 
         // fourth rebalance after revocations
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 2, 0, 0, "worker1", "worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(1, 1);
+        assertTaskAllocations(4, 4);
+        assertBalancedAndCompleteAllocation();
 
         // Fifth rebalance should not change assignments
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1", "worker2");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertEmptyAssignment();
     }
 
     @Test
     public void testDuplicatedAssignmentHandleWhenTheDuplicatedAssignmentsDeleted() {
-        when(coordinator.configSnapshot()).thenReturn(configState);
-        doReturn(Collections.EMPTY_MAP).when(assignor).serializeAssignments(assignmentsCapture.capture());
-
         // First assignment with 1 worker and 2 connectors configured but not yet assigned
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(2, 8, 0, 0, "worker1");
-
-        //delete connector1
-        configState = clusterConfigState(offset, 2, 1, 4);
-        when(coordinator.configSnapshot()).thenReturn(configState);
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1");
+        assertConnectorAllocations(2);
+        assertTaskAllocations(8);
+        assertBalancedAndCompleteAllocation();
+
+        // Delete connector1
+        removeConnector("connector1");
 
         // Second assignment with a second worker with duplicate assignment joining and the duplicated assignment is deleted at the same time
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        ExtendedAssignment duplicatedWorkerAssignment = newExpandableAssignment();
-        duplicatedWorkerAssignment.connectors().addAll(newConnectors(1, 2));
-        duplicatedWorkerAssignment.tasks().addAll(newTasks("connector1", 0, 4));
-        memberConfigs.put("worker2", new ExtendedWorkerState(leaderUrl, offset, duplicatedWorkerAssignment));
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 2, 8, "worker1", "worker2");
+        addNewWorker("worker2", newConnectors(1, 2), newTasks("connector1", 0, 4));
+        performStandardRebalance();
+        assertDelay(0);
+        assertWorkers("worker1", "worker2");
+        assertConnectorAllocations(0, 1);
+        assertTaskAllocations(0, 4);
 
         // Third assignment after revocations
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 2, "worker1", "worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1);
+        assertTaskAllocations(0, 2);
 
         // fourth rebalance after revocations
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 2, 0, 0, "worker1", "worker2");
+        performStandardRebalance();
+        assertDelay(0);
+        assertConnectorAllocations(0, 1);
+        assertTaskAllocations(2, 2);
+        assertBalancedAndCompleteAllocation();
 
         // Fifth rebalance should not change assignments
-        applyAssignments(returnedAssignments);
-        memberConfigs = memberConfigs(leader, offset, assignments);
-        assignor.performTaskAssignment(leader, offset, memberConfigs, coordinator, protocolVersion);
-        ++rebalanceNum;
-        returnedAssignments = assignmentsCapture.getValue();
-        assertDelay(0, returnedAssignments);
-        expectedMemberConfigs = memberConfigs(leader, offset, returnedAssignments);
-        assertNoReassignments(memberConfigs, expectedMemberConfigs);
-        assertAssignment(0, 0, 0, 0, "worker1", "worker2");
-
-        verify(coordinator, times(rebalanceNum)).configSnapshot();
-        verify(coordinator, times(rebalanceNum)).leaderState(any());
-        verify(coordinator, times(2 * rebalanceNum)).generationId();
-        verify(coordinator, times(rebalanceNum)).memberId();
-        verify(coordinator, times(rebalanceNum)).lastCompletedGenerationId();
+        performStandardRebalance();
+        assertDelay(0);
+        assertEmptyAssignment();
+    }
+
+    @Test
+    public void testLeaderStateUpdated() {
+        // Sanity test to make sure that the coordinator's leader state is actually updated after a rebalance
+        connectors.clear();
+        String leader = "followMe";
+        Map<String, ExtendedWorkerState> workerStates = new HashMap<>();
+        workerStates.put(leader, new ExtendedWorkerState("followMe:618", CONFIG_OFFSET, ExtendedAssignment.empty()));
+        WorkerCoordinator coordinator = mock(WorkerCoordinator.class);
+        when(coordinator.configSnapshot()).thenReturn(configState());
+        assignor.performTaskAssignment(
+                leader,
+                CONFIG_OFFSET,
+                workerStates,
+                coordinator,
+                IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2
+        );
+        verify(coordinator).leaderState(notNull());
+    }
+
+    @Test
+    public void testProtocolV1() {
+        // Sanity test to make sure that the right protocol is chosen during the assignment
+        connectors.clear();
+        String leader = "followMe";
+        List<JoinGroupResponseData.JoinGroupResponseMember> memberMetadata = new ArrayList<>();
+        ExtendedAssignment leaderAssignment = new ExtendedAssignment(
+                IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1,
+                ConnectProtocol.Assignment.NO_ERROR,
+                leader,
+                "followMe:618",
+                CONFIG_OFFSET,
+                Collections.emptySet(),
+                Collections.emptySet(),
+                Collections.emptySet(),
+                Collections.emptySet(),
+                0
+        );
+        ExtendedWorkerState leaderState = new ExtendedWorkerState("followMe:618", CONFIG_OFFSET, leaderAssignment);
+        JoinGroupResponseData.JoinGroupResponseMember leaderMetadata = new JoinGroupResponseData.JoinGroupResponseMember()
+                .setMemberId(leader)
+                .setMetadata(IncrementalCooperativeConnectProtocol.serializeMetadata(leaderState, false).array());
+        memberMetadata.add(leaderMetadata);
+        WorkerCoordinator coordinator = mock(WorkerCoordinator.class);
+        when(coordinator.configSnapshot()).thenReturn(configState());
+        Map<String, ByteBuffer> serializedAssignments = assignor.performAssignment(
+                leader,
+                ConnectProtocolCompatibility.COMPATIBLE.protocol(),
+                memberMetadata,
+                coordinator
+        );
+        serializedAssignments.forEach((worker, serializedAssignment) -> {
+            ExtendedAssignment assignment = IncrementalCooperativeConnectProtocol.deserializeAssignment(serializedAssignment);
+            assertEquals(
+                    "Incorrect protocol version in assignment for worker " + worker,
+                    IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1,
+                    assignment.version()
+            );
+        });
+    }
+
+    @Test
+    public void testProtocolV2() {
+        // Sanity test to make sure that the right protocol is chosen during the assignment
+        connectors.clear();
+        String leader = "followMe";
+        List<JoinGroupResponseData.JoinGroupResponseMember> memberMetadata = new ArrayList<>();
+        ExtendedAssignment leaderAssignment = new ExtendedAssignment(
+                IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2,
+                ConnectProtocol.Assignment.NO_ERROR,
+                leader,
+                "followMe:618",
+                CONFIG_OFFSET,
+                Collections.emptySet(),
+                Collections.emptySet(),
+                Collections.emptySet(),
+                Collections.emptySet(),
+                0
+        );
+        ExtendedWorkerState leaderState = new ExtendedWorkerState("followMe:618", CONFIG_OFFSET, leaderAssignment);
+        JoinGroupResponseData.JoinGroupResponseMember leaderMetadata = new JoinGroupResponseData.JoinGroupResponseMember()
+                .setMemberId(leader)
+                .setMetadata(IncrementalCooperativeConnectProtocol.serializeMetadata(leaderState, true).array());
+        memberMetadata.add(leaderMetadata);
+        WorkerCoordinator coordinator = mock(WorkerCoordinator.class);
+        when(coordinator.configSnapshot()).thenReturn(configState());
+        Map<String, ByteBuffer> serializedAssignments = assignor.performAssignment(
+                leader,
+                ConnectProtocolCompatibility.SESSIONED.protocol(),
+                memberMetadata,
+                coordinator
+        );
+        serializedAssignments.forEach((worker, serializedAssignment) -> {
+            ExtendedAssignment assignment = IncrementalCooperativeConnectProtocol.deserializeAssignment(serializedAssignment);
+            assertEquals(
+                    "Incorrect protocol version in assignment for worker " + worker,
+                    IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2,
+                    assignment.version()
+            );
+        });
+    }
+
+    private void performStandardRebalance() {
+        performRebalance(false, false);
+    }
+
+    private void performFailedRebalance() {
+        performRebalance(true, false);
+    }
+
+    private void performRebalanceWithMismatchedGeneration() {
+        performRebalance(false, true);
+    }
+
+    private void performRebalance(boolean assignmentFailure, boolean generationMismatch) {
+        generationId++;
+        int lastCompletedGenerationId = generationMismatch ? generationId - 2 : generationId - 1;
+        try {
+            Map<String, ConnectorsAndTasks> memberAssignmentsCopy = new HashMap<>(memberAssignments);
+            returnedAssignments = assignor.performTaskAssignment(configState(), lastCompletedGenerationId, generationId, memberAssignmentsCopy);
+        } catch (RuntimeException e) {
+            if (assignmentFailure) {
+                RequestFuture.failure(e);
+            } else {
+                throw e;
+            }
+        }
+        assertNoRedundantAssignments();
+        if (!assignmentFailure) {
+            applyAssignments();
+        }
+    }
+
+    private void addNewEmptyWorkers(String... workers) {
+        for (String worker : workers) {
+            addNewWorker(worker, Collections.emptyList(), Collections.emptyList());
+        }
+    }
+
+    private void addNewWorker(String worker, List<String> connectors, List<ConnectorTaskId> tasks) {
+        ConnectorsAndTasks assignment = new ConnectorsAndTasks.Builder().withCopies(connectors, tasks).build();
+        assertNull(
+                "Worker " + worker + " already exists",
+                memberAssignments.put(worker, assignment)
+        );
+    }
+
+    private void removeWorkers(String... workers) {
+        for (String worker : workers) {
+            assertNotNull(
+                    "Worker " + worker + " does not exist",
+                    memberAssignments.remove(worker)
+            );
+        }
     }
 
-    private WorkerLoad emptyWorkerLoad(String worker) {
+    private static WorkerLoad emptyWorkerLoad(String worker) {
         return new WorkerLoad.Builder(worker).build();
     }
 
-    private WorkerLoad workerLoad(String worker, int connectorStart, int connectorNum,
+    private static WorkerLoad workerLoad(String worker, int connectorStart, int connectorNum,
                                   int taskStart, int taskNum) {
         return new WorkerLoad.Builder(worker).with(
                 newConnectors(connectorStart, connectorStart + connectorNum),
@@ -1264,206 +1080,231 @@ private static List<ConnectorTaskId> newTasks(String connectorName, int start, i
                 .collect(Collectors.toList());
     }
 
-    private static ClusterConfigState clusterConfigState(long offset,
-                                                         int connectorNum,
-                                                         int taskNum) {
-        return clusterConfigState(offset, 1, connectorNum, taskNum);
+    private void addNewConnector(String connector, int taskCount) {
+        assertNull(
+                "Connector " + connector + " already exists",
+                connectors.put(connector, taskCount)
+        );
     }
 
-    private static ClusterConfigState clusterConfigState(long offset,
-                                                         int connectorStart,
-                                                         int connectorNum,
-                                                         int taskNum) {
-        int connectorNumEnd = connectorStart + connectorNum - 1;
-        return new ClusterConfigState(
-                offset,
-                null,
-                connectorTaskCounts(connectorStart, connectorNumEnd, taskNum),
-                connectorConfigs(connectorStart, connectorNumEnd),
-                connectorTargetStates(connectorStart, connectorNumEnd, TargetState.STARTED),
-                taskConfigs(0, connectorNum, connectorNum * taskNum),
-                Collections.emptySet());
+    private void removeConnector(String connector) {
+        assertNotNull(
+                "Connector " + connector + " does not exist",
+                connectors.remove(connector)
+        );
     }
 
-    private static Map<String, ExtendedWorkerState> memberConfigs(String givenLeader,
-                                                                  long givenOffset,
-                                                                  Map<String, ExtendedAssignment> givenAssignments) {
-        return givenAssignments.entrySet().stream()
+    private ClusterConfigState configState() {
+        Map<String, Integer> taskCounts = new HashMap<>(connectors);
+        Map<String, Map<String, String>> connectorConfigs = transformValues(taskCounts, c -> Collections.emptyMap());
+        Map<String, TargetState> targetStates = transformValues(taskCounts, c -> TargetState.STARTED);
+        Map<ConnectorTaskId, Map<String, String>> taskConfigs = taskCounts.entrySet().stream()
+                .flatMap(e -> IntStream.range(0, e.getValue()).mapToObj(i -> new ConnectorTaskId(e.getKey(), i)))
                 .collect(Collectors.toMap(
-                    Map.Entry::getKey,
-                    e -> new ExtendedWorkerState(expectedLeaderUrl(givenLeader), givenOffset, e.getValue())));
+                        Function.identity(),
+                        connectorTaskId -> Collections.emptyMap()
+                ));
+        return new ClusterConfigState(
+                CONFIG_OFFSET,
+                null,
+                taskCounts,
+                connectorConfigs,
+                targetStates,
+                taskConfigs,
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptySet(),
+                Collections.emptySet());
     }
 
-    private static Map<String, ExtendedWorkerState> memberConfigs(String givenLeader,
-                                                                  long givenOffset,
-                                                                  int start,
-                                                                  int connectorNum) {
-        return IntStream.range(start, connectorNum + 1)
-                .mapToObj(i -> new SimpleEntry<>("worker" + i, new ExtendedWorkerState(expectedLeaderUrl(givenLeader), givenOffset, null)))
-                .collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
+    private void applyAssignments() {
+        returnedAssignments.allWorkers().forEach(worker -> {
+            ConnectorsAndTasks workerAssignment = memberAssignments.computeIfAbsent(worker, ignored -> new ConnectorsAndTasks.Builder().build());
+
+            workerAssignment.connectors().removeAll(returnedAssignments.newlyRevokedConnectors(worker));
+            workerAssignment.connectors().addAll(returnedAssignments.newlyAssignedConnectors(worker));
+            workerAssignment.tasks().removeAll(returnedAssignments.newlyRevokedTasks(worker));
+            workerAssignment.tasks().addAll(returnedAssignments.newlyAssignedTasks(worker));
+
+            assertEquals(
+                    "Complete connector assignment for worker " + worker + " does not match expectations " +
+                            "based on prior assignment and new revocations and assignments",
+                    workerAssignment.connectors(),
+                    returnedAssignments.allAssignedConnectors().get(worker)
+            );
+            assertEquals(
+                    "Complete task assignment for worker " + worker + " does not match expectations " +
+                            "based on prior assignment and new revocations and assignments",
+                    workerAssignment.tasks(),
+                    returnedAssignments.allAssignedTasks().get(worker)
+            );
+        });
     }
 
-    private static Map<String, Integer> connectorTaskCounts(int start,
-                                                            int connectorNum,
-                                                            int taskCounts) {
-        return IntStream.range(start, connectorNum + 1)
-                .mapToObj(i -> new SimpleEntry<>("connector" + i, taskCounts))
-                .collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
+    private void assertEmptyAssignment() {
+        assertEquals(
+                "No connectors should have been newly assigned during this round",
+                Collections.emptyList(),
+                ConnectUtils.combineCollections(returnedAssignments.newlyAssignedConnectors().values())
+        );
+        assertEquals(
+                "No tasks should have been newly assigned during this round",
+                Collections.emptyList(),
+                ConnectUtils.combineCollections(returnedAssignments.newlyAssignedTasks().values())
+        );
+        assertEquals(
+                "No connectors should have been revoked during this round",
+                Collections.emptyList(),
+                ConnectUtils.combineCollections(returnedAssignments.newlyRevokedConnectors().values())
+        );
+        assertEquals(
+                "No tasks should have been revoked during this round",
+                Collections.emptyList(),
+                ConnectUtils.combineCollections(returnedAssignments.newlyRevokedTasks().values())
+        );
     }
 
-    private static Map<String, Map<String, String>> connectorConfigs(int start, int connectorNum) {
-        return IntStream.range(start, connectorNum + 1)
-                .mapToObj(i -> new SimpleEntry<>("connector" + i, new HashMap<String, String>()))
-                .collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
+    private void assertWorkers(String... workers) {
+        assertEquals(
+                "Wrong set of workers",
+                new HashSet<>(Arrays.asList(workers)),
+                returnedAssignments.allWorkers()
+        );
     }
 
-    private static Map<String, TargetState> connectorTargetStates(int start,
-                                                                  int connectorNum,
-                                                                  TargetState state) {
-        return IntStream.range(start, connectorNum + 1)
-                .mapToObj(i -> new SimpleEntry<>("connector" + i, state))
-                .collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
+    /**
+     * Assert that the connector counts for each worker in the cluster match the expected counts.
+     * For example, calling {@code assertConnectorAllocations(0, 0, 2, 3)} ensures that there are two
+     * workers in the cluster that are assigned no connectors, one worker that is assigned two connectors,
+     * and one worker that is assigned three connectors.
+     */
+    private void assertConnectorAllocations(int... connectorCounts) {
+        assertAllocations("connectors", ConnectorsAndTasks::connectors, connectorCounts);
     }
 
-    private static Map<ConnectorTaskId, Map<String, String>> taskConfigs(int start,
-                                                                         int connectorNum,
-                                                                         int taskNum) {
-        return IntStream.range(start, taskNum + 1)
-                .mapToObj(i -> new SimpleEntry<>(
-                        new ConnectorTaskId("connector" + i / connectorNum + 1, i),
-                        new HashMap<String, String>())
-                ).collect(Collectors.toMap(SimpleEntry::getKey, SimpleEntry::getValue));
+    /**
+     * Assert that the task counts for each worker in the cluster match the expected counts.
+     * For example, calling {@code assertTaskAllocations(0, 0, 2, 3)} ensures that there are two
+     * workers in the cluster that are assigned no tasks, one worker that is assigned two tasks,
+     * and one worker that is assigned three tasks.
+     */
+    private void assertTaskAllocations(int... taskCounts) {
+        assertAllocations("tasks", ConnectorsAndTasks::tasks, taskCounts);
     }
 
-    private void applyAssignments(Map<String, ExtendedAssignment> newAssignments) {
-        newAssignments.forEach((k, v) -> {
-            assignments.computeIfAbsent(k, noop -> newExpandableAssignment())
-                    .connectors()
-                    .removeAll(v.revokedConnectors());
-            assignments.computeIfAbsent(k, noop -> newExpandableAssignment())
-                    .connectors()
-                    .addAll(v.connectors());
-            assignments.computeIfAbsent(k, noop -> newExpandableAssignment())
-                    .tasks()
-                    .removeAll(v.revokedTasks());
-            assignments.computeIfAbsent(k, noop -> newExpandableAssignment())
-                    .tasks()
-                    .addAll(v.tasks());
-        });
+    private void assertAllocations(String allocated, Function<ConnectorsAndTasks, ? extends Collection<?>> allocation, int... rawExpectedAllocations) {
+        List<Integer> expectedAllocations = IntStream.of(rawExpectedAllocations)
+                .boxed()
+                .sorted()
+                .collect(Collectors.toList());
+        List<Integer> actualAllocations = allocations(allocation);
+        assertEquals(
+                "Allocation of assigned " + allocated + " across cluster does not match expected counts",
+                expectedAllocations,
+                actualAllocations
+        );
     }
 
-    private ExtendedAssignment newExpandableAssignment() {
-        return new ExtendedAssignment(
-                protocolVersion,
-                ConnectProtocol.Assignment.NO_ERROR,
-                leader,
-                leaderUrl,
-                offset,
-                new ArrayList<>(),
-                new ArrayList<>(),
-                new ArrayList<>(),
-                new ArrayList<>(),
-                0);
+    private List<Integer> allocations(Function<ConnectorsAndTasks, ? extends Collection<?>> allocation) {
+        return memberAssignments.values().stream()
+                .map(allocation)
+                .map(Collection::size)
+                .sorted()
+                .collect(Collectors.toList());
     }
 
-    private static String expectedLeaderUrl(String givenLeader) {
-        return "http://" + givenLeader + ":8083";
+    private void assertDelay(int expectedDelay) {
+        assertEquals(
+                "Wrong rebalance delay",
+                expectedDelay,
+                assignor.delay
+        );
     }
 
-    private void assertAssignment(int connectorNum, int taskNum,
-                                  int revokedConnectorNum, int revokedTaskNum,
-                                  String... workers) {
-        assertAssignment(leader, connectorNum, taskNum, revokedConnectorNum, revokedTaskNum, workers);
-    }
+    /**
+     * Ensure that no connectors or tasks that were already assigned during the previous round are newly assigned in this round,
+     * and that each newly-assigned connector and task is only assigned to a single worker.
+     */
+    private void assertNoRedundantAssignments() {
+        List<String> existingConnectors = ConnectUtils.combineCollections(memberAssignments.values(), ConnectorsAndTasks::connectors);
+        List<String> newConnectors = ConnectUtils.combineCollections(returnedAssignments.newlyAssignedConnectors().values());
+        List<ConnectorTaskId> existingTasks = ConnectUtils.combineCollections(memberAssignments.values(), ConnectorsAndTasks::tasks);
+        List<ConnectorTaskId> newTasks = ConnectUtils.combineCollections(returnedAssignments.newlyAssignedTasks().values());
+
+        assertNoDuplicates(
+                newConnectors,
+                "Connectors should be unique in assignments but duplicates were found; the set of newly-assigned connectors is " + newConnectors
+        );
+        assertNoDuplicates(
+                newTasks,
+                "Tasks should be unique in assignments but duplicates were found; the set of newly-assigned tasks is " + newTasks
+        );
 
-    private void assertAssignment(String expectedLeader, int connectorNum, int taskNum,
-                                  int revokedConnectorNum, int revokedTaskNum,
-                                  String... workers) {
-        assertThat("Wrong number of workers",
-                expectedMemberConfigs.keySet().size(),
-                is(workers.length));
-        assertThat("Wrong set of workers",
-                new ArrayList<>(expectedMemberConfigs.keySet()), hasItems(workers));
-        assertThat("Wrong number of assigned connectors",
-                expectedMemberConfigs.values().stream().map(v -> v.assignment().connectors().size()).reduce(0, Integer::sum),
-                is(connectorNum));
-        assertThat("Wrong number of assigned tasks",
-                expectedMemberConfigs.values().stream().map(v -> v.assignment().tasks().size()).reduce(0, Integer::sum),
-                is(taskNum));
-        assertThat("Wrong number of revoked connectors",
-                expectedMemberConfigs.values().stream().map(v -> v.assignment().revokedConnectors().size()).reduce(0, Integer::sum),
-                is(revokedConnectorNum));
-        assertThat("Wrong number of revoked tasks",
-                expectedMemberConfigs.values().stream().map(v -> v.assignment().revokedTasks().size()).reduce(0, Integer::sum),
-                is(revokedTaskNum));
-        assertThat("Wrong leader in assignments",
-                expectedMemberConfigs.values().stream().map(v -> v.assignment().leader()).distinct().collect(Collectors.joining(", ")),
-                is(expectedLeader));
-        assertThat("Wrong leaderUrl in assignments",
-                expectedMemberConfigs.values().stream().map(v -> v.assignment().leaderUrl()).distinct().collect(Collectors.joining(", ")),
-                is(expectedLeaderUrl(expectedLeader)));
+        existingConnectors.retainAll(newConnectors);
+        assertEquals("Found connectors in new assignment that already exist in current assignment",
+                Collections.emptyList(),
+                existingConnectors);
+        existingTasks.retainAll(newTasks);
+        assertEquals("Found tasks in new assignment that already exist in current assignment",
+                Collections.emptyList(),
+                existingConnectors);
     }
 
-    private void assertDelay(int expectedDelay, Map<String, ExtendedAssignment> newAssignments) {
-        newAssignments.values().stream()
-                .forEach(a -> assertEquals(
-                        "Wrong rebalance delay in " + a, expectedDelay, a.delay()));
+    private void assertBalancedAndCompleteAllocation() {
+        assertBalancedAllocation();
+        assertCompleteAllocation();
     }
 
-    private void assertNoReassignments(Map<String, ExtendedWorkerState> existingAssignments,
-                                       Map<String, ExtendedWorkerState> newAssignments) {
-        assertNoDuplicateInAssignment(existingAssignments);
-        assertNoDuplicateInAssignment(newAssignments);
+    private void assertBalancedAllocation() {
+        List<Integer> connectorCounts = allocations(ConnectorsAndTasks::connectors);
+        List<Integer> taskCounts = allocations(ConnectorsAndTasks::tasks);
 
-        List<String> existingConnectors = existingAssignments.values().stream()
-                .flatMap(a -> a.assignment().connectors().stream())
-                .collect(Collectors.toList());
-        List<String> newConnectors = newAssignments.values().stream()
-                .flatMap(a -> a.assignment().connectors().stream())
-                .collect(Collectors.toList());
-
-        List<ConnectorTaskId> existingTasks = existingAssignments.values().stream()
-                .flatMap(a -> a.assignment().tasks().stream())
-                .collect(Collectors.toList());
+        int minConnectors = connectorCounts.get(0);
+        int maxConnectors = connectorCounts.get(connectorCounts.size() - 1);
 
-        List<ConnectorTaskId> newTasks = newAssignments.values().stream()
-                .flatMap(a -> a.assignment().tasks().stream())
-                .collect(Collectors.toList());
+        int minTasks = taskCounts.get(0);
+        int maxTasks = taskCounts.get(taskCounts.size() - 1);
 
-        existingConnectors.retainAll(newConnectors);
-        assertThat("Found connectors in new assignment that already exist in current assignment",
-                Collections.emptyList(),
-                is(existingConnectors));
-        existingTasks.retainAll(newTasks);
-        assertThat("Found tasks in new assignment that already exist in current assignment",
-                Collections.emptyList(),
-                is(existingConnectors));
+        assertTrue(
+                "Assignments are imbalanced. The spread of connectors across each worker is: " + connectorCounts,
+                maxConnectors - minConnectors <= 1
+        );
+        assertTrue(
+                "Assignments are imbalanced. The spread of tasks across each worker is: " + taskCounts,
+                maxTasks - minTasks <= 1
+        );
     }
 
-    private void assertNoDuplicateInAssignment(Map<String, ExtendedWorkerState> existingAssignment) {
-        List<String> existingConnectors = existingAssignment.values().stream()
-                .flatMap(a -> a.assignment().connectors().stream())
-                .collect(Collectors.toList());
-        Set<String> existingUniqueConnectors = new HashSet<>(existingConnectors);
-        existingConnectors.removeAll(existingUniqueConnectors);
-        assertThat("Connectors should be unique in assignments but duplicates where found",
-                Collections.emptyList(),
-                is(existingConnectors));
-
-        List<ConnectorTaskId> existingTasks = existingAssignment.values().stream()
-                .flatMap(a -> a.assignment().tasks().stream())
-                .collect(Collectors.toList());
-        Set<ConnectorTaskId> existingUniqueTasks = new HashSet<>(existingTasks);
-        existingTasks.removeAll(existingUniqueTasks);
-        assertThat("Tasks should be unique in assignments but duplicates where found",
-                Collections.emptyList(),
-                is(existingTasks));
+    private void assertCompleteAllocation() {
+        List<String> allAssignedConnectors = ConnectUtils.combineCollections(memberAssignments.values(), ConnectorsAndTasks::connectors);
+        assertEquals(
+                "The set of connectors assigned across the cluster does not match the set of connectors in the config topic",
+                connectors.keySet(),
+                new HashSet<>(allAssignedConnectors)
+        );
+
+        Map<String, List<ConnectorTaskId>> allAssignedTasks = ConnectUtils.combineCollections(memberAssignments.values(), ConnectorsAndTasks::tasks)
+                .stream()
+                .collect(Collectors.groupingBy(ConnectorTaskId::connector, Collectors.toList()));
+
+        connectors.forEach((connector, taskCount) -> {
+            Set<ConnectorTaskId> expectedTasks = IntStream.range(0, taskCount)
+                    .mapToObj(i -> new ConnectorTaskId(connector, i))
+                    .collect(Collectors.toSet());
+            assertEquals(
+                    "The set of tasks assigned across the cluster for connector " + connector + " does not match the set of tasks in the config topic",
+                    expectedTasks,
+                    new HashSet<>(allAssignedTasks.get(connector))
+            );
+        });
     }
 
-    private void expectGeneration() {
-        when(coordinator.generationId())
-                .thenReturn(assignor.previousGenerationId + 1)
-                .thenReturn(assignor.previousGenerationId + 1);
-        when(coordinator.lastCompletedGenerationId()).thenReturn(assignor.previousGenerationId);
+    private static <T> void assertNoDuplicates(List<T> collection, String assertionMessage) {
+        assertEquals(
+                assertionMessage,
+                new HashSet<>(collection).size(),
+                collection.size()
+        );
     }
+
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorIncrementalTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorIncrementalTest.java
index 35ba6249d7455..f8cf14200ca41 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorIncrementalTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorIncrementalTest.java
@@ -26,6 +26,7 @@
 import org.apache.kafka.common.requests.RequestTestUtils;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.KafkaConfigBackingStore;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.junit.After;
@@ -215,7 +216,7 @@ public void testMetadataWithExistingAssignment() {
                 CONNECT_PROTOCOL_V1, ExtendedAssignment.NO_ERROR, leaderId, leaderUrl, configState1.offset(),
                 Collections.singletonList(connectorId1), Arrays.asList(taskId1x0, taskId2x0),
                 Collections.emptyList(), Collections.emptyList(), 0);
-        ByteBuffer buf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment);
+        ByteBuffer buf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment, false);
         // Using onJoinComplete to register the protocol selection decided by the broker
         // coordinator as well as an existing previous assignment that the call to metadata will
         // include with v1 but not with v0
@@ -246,7 +247,7 @@ public void testMetadataWithExistingAssignmentButOlderProtocolSelection() {
                 CONNECT_PROTOCOL_V1, ExtendedAssignment.NO_ERROR, leaderId, leaderUrl, configState1.offset(),
                 Collections.singletonList(connectorId1), Arrays.asList(taskId1x0, taskId2x0),
                 Collections.emptyList(), Collections.emptyList(), 0);
-        ByteBuffer buf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment);
+        ByteBuffer buf = IncrementalCooperativeConnectProtocol.serializeAssignment(assignment, false);
         // Using onJoinComplete to register the protocol selection decided by the broker
         // coordinator as well as an existing previous assignment that the call to metadata will
         // include with v1 but not with v0
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java
index 60fbe37ad36ab..c3715aa3028ec 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerCoordinatorTest.java
@@ -36,6 +36,7 @@
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.runtime.TargetState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.storage.KafkaConfigBackingStore;
 import org.apache.kafka.connect.util.ConnectorTaskId;
 import org.easymock.EasyMock;
@@ -156,6 +157,9 @@ public void setup() {
                 Collections.singletonMap(connectorId1, new HashMap<>()),
                 Collections.singletonMap(connectorId1, TargetState.STARTED),
                 Collections.singletonMap(taskId1x0, new HashMap<>()),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptySet(),
                 Collections.emptySet()
         );
 
@@ -179,6 +183,9 @@ public void setup() {
                 configState2ConnectorConfigs,
                 configState2TargetStates,
                 configState2TaskConfigs,
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptySet(),
                 Collections.emptySet()
         );
 
@@ -205,6 +212,9 @@ public void setup() {
                 configStateSingleTaskConnectorsConnectorConfigs,
                 configStateSingleTaskConnectorsTargetStates,
                 configStateSingleTaskConnectorsTaskConfigs,
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptySet(),
                 Collections.emptySet()
         );
     }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMemberTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMemberTest.java
index 05cd01734fef8..563d71dbed685 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMemberTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/distributed/WorkerGroupMemberTest.java
@@ -25,16 +25,12 @@
 import org.apache.kafka.connect.runtime.MockConnectMetrics;
 import org.apache.kafka.connect.runtime.WorkerConfig;
 import org.apache.kafka.connect.storage.ConfigBackingStore;
-import org.apache.kafka.connect.storage.StatusBackingStore;
 import org.apache.kafka.connect.util.ConnectUtils;
-import org.easymock.EasyMock;
 import org.junit.Test;
 import org.junit.runner.RunWith;
-import org.powermock.api.easymock.PowerMock;
-import org.powermock.api.easymock.annotation.Mock;
-import org.powermock.core.classloader.annotations.PowerMockIgnore;
-import org.powermock.core.classloader.annotations.PrepareForTest;
-import org.powermock.modules.junit4.PowerMockRunner;
+import org.mockito.Mock;
+import org.mockito.MockedStatic;
+import org.mockito.junit.MockitoJUnitRunner;
 
 import javax.management.MBeanServer;
 import javax.management.ObjectName;
@@ -45,15 +41,13 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mockStatic;
 
-@RunWith(PowerMockRunner.class)
-@PrepareForTest({ConnectUtils.class})
-@PowerMockIgnore({"javax.management.*", "javax.crypto.*"})
+@RunWith(MockitoJUnitRunner.StrictStubs.class)
 public class WorkerGroupMemberTest {
     @Mock
     private ConfigBackingStore configBackingStore;
-    @Mock
-    private StatusBackingStore statusBackingStore;
 
     @Test
     public void testMetrics() throws Exception {
@@ -72,10 +66,11 @@ public void testMetrics() throws Exception {
 
         LogContext logContext = new LogContext("[Worker clientId=client-1 + groupId= group-1]");
 
-        expectClusterId();
-
-        member = new WorkerGroupMember(config, "", configBackingStore,
-        null, Time.SYSTEM, "client-1", logContext);
+        try (MockedStatic<ConnectUtils> utilities = mockStatic(ConnectUtils.class)) {
+            utilities.when(() -> ConnectUtils.lookupKafkaClusterId(any())).thenReturn("cluster-1");
+            member = new WorkerGroupMember(config, "", configBackingStore, null, Time.SYSTEM, "client-1", logContext);
+            utilities.verify(() -> ConnectUtils.lookupKafkaClusterId(any()));
+        }     
 
         boolean entered = false;
         for (MetricsReporter reporter : member.metrics().reporters()) {
@@ -94,10 +89,4 @@ public void testMetrics() throws Exception {
         //verify metric exists with correct prefix
         assertNotNull(server.getObjectInstance(new ObjectName("kafka.connect:type=grp1,client-id=client-1")));
     }
-    private void expectClusterId() {
-        PowerMock.mockStaticPartial(ConnectUtils.class, "lookupKafkaClusterId");
-        EasyMock.expect(ConnectUtils.lookupKafkaClusterId(EasyMock.anyObject())).andReturn("cluster-1").anyTimes();
-        PowerMock.replay(ConnectUtils.class);
-    }
-
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/health/ConnectClusterStateImplTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/health/ConnectClusterStateImplTest.java
index 58eb5a9e97222..33af4ad3d220c 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/health/ConnectClusterStateImplTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/health/ConnectClusterStateImplTest.java
@@ -19,13 +19,12 @@
 import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.connect.runtime.Herder;
 import org.apache.kafka.connect.util.Callback;
-import org.easymock.Capture;
-import org.easymock.EasyMock;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
-import org.powermock.api.easymock.annotation.Mock;
-import org.powermock.modules.junit4.PowerMockRunner;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnitRunner;
 
 import java.util.Arrays;
 import java.util.Collection;
@@ -37,8 +36,10 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertThrows;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.doAnswer;
 
-@RunWith(PowerMockRunner.class)
+@RunWith(MockitoJUnitRunner.class)
 public class ConnectClusterStateImplTest {
     protected static final String KAFKA_CLUSTER_ID = "franzwashere";
 
@@ -60,13 +61,13 @@ public void setUp() {
     
     @Test
     public void connectors() {
-        Capture<Callback<Collection<String>>> callback = EasyMock.newCapture();
-        herder.connectors(EasyMock.capture(callback));
-        EasyMock.expectLastCall().andAnswer(() -> {
+        @SuppressWarnings("unchecked")
+        ArgumentCaptor<Callback<Collection<String>>> callback = ArgumentCaptor.forClass(Callback.class);
+        doAnswer(invocation -> {
             callback.getValue().onCompletion(null, expectedConnectors);
             return null;
-        });
-        EasyMock.replay(herder);
+        }).when(herder).connectors(callback.capture());
+
         assertEquals(expectedConnectors, connectClusterState.connectors());
     }
 
@@ -74,14 +75,16 @@ public void connectors() {
     public void connectorConfig() {
         final String connName = "sink6";
         final Map<String, String> expectedConfig = Collections.singletonMap("key", "value");
-        Capture<Callback<Map<String, String>>> callback = EasyMock.newCapture();
-        herder.connectorConfig(EasyMock.eq(connName), EasyMock.capture(callback));
-        EasyMock.expectLastCall().andAnswer(() -> {
+
+        @SuppressWarnings("unchecked")
+        ArgumentCaptor<Callback<Map<String, String>>> callback = ArgumentCaptor.forClass(Callback.class);
+        doAnswer(invocation -> {
             callback.getValue().onCompletion(null, expectedConfig);
             return null;
-        });
-        EasyMock.replay(herder);
+        }).when(herder).connectorConfig(eq(connName), callback.capture());
+
         Map<String, String> actualConfig = connectClusterState.connectorConfig(connName);
+
         assertEquals(expectedConfig, actualConfig);
         assertNotSame(
             "Config should be copied in order to avoid mutation by REST extensions",
@@ -97,14 +100,14 @@ public void kafkaClusterId() {
 
     @Test
     public void connectorsFailure() {
-        Capture<Callback<Collection<String>>> callback = EasyMock.newCapture();
-        herder.connectors(EasyMock.capture(callback));
-        EasyMock.expectLastCall().andAnswer(() -> {
+        @SuppressWarnings("unchecked")
+        ArgumentCaptor<Callback<Collection<String>>> callback = ArgumentCaptor.forClass(Callback.class);
+        doAnswer(invocation -> {
             Throwable timeout = new TimeoutException();
             callback.getValue().onCompletion(timeout, null);
             return null;
-        });
-        EasyMock.replay(herder);
+        }).when(herder).connectors(callback.capture());
+
         assertThrows(ConnectException.class, connectClusterState::connectors);
     }
 }
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/RestClientTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/RestClientTest.java
new file mode 100644
index 0000000000000..4eb9ada941511
--- /dev/null
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/RestClientTest.java
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.connect.runtime.rest;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.kafka.connect.runtime.rest.entities.ErrorMessage;
+import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
+import org.eclipse.jetty.client.HttpClient;
+import org.eclipse.jetty.client.api.ContentResponse;
+import org.eclipse.jetty.client.api.Request;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.runners.Enclosed;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnit;
+import org.mockito.junit.MockitoJUnitRunner;
+import org.mockito.junit.MockitoRule;
+
+import javax.crypto.SecretKey;
+import javax.ws.rs.core.Response;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Objects;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeoutException;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(Enclosed.class)
+public class RestClientTest {
+
+    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+    private static final TypeReference<TestDTO> TEST_TYPE = new TypeReference<TestDTO>() {
+    };
+    private static final SecretKey MOCK_SECRET_KEY = getMockSecretKey();
+
+    private static void assertIsInternalServerError(ConnectRestException e) {
+        assertEquals(Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), e.statusCode());
+        assertEquals(Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), e.errorCode());
+    }
+
+    private static SecretKey getMockSecretKey() {
+        SecretKey mockKey = mock(SecretKey.class);
+        when(mockKey.getFormat()).thenReturn("RAW"); // supported format by
+        when(mockKey.getEncoded()).thenReturn("SomeKey".getBytes(StandardCharsets.UTF_8));
+        return mockKey;
+    }
+
+    private static RestClient.HttpResponse<TestDTO> httpRequest(HttpClient httpClient, String requestSignatureAlgorithm) {
+        return RestClient.httpRequest(
+                httpClient,
+                "https://localhost:1234/api/endpoint",
+                "GET",
+                null,
+                new TestDTO("requestBodyData"),
+                TEST_TYPE,
+                MOCK_SECRET_KEY,
+                requestSignatureAlgorithm);
+    }
+
+    private static RestClient.HttpResponse<TestDTO> httpRequest(HttpClient httpClient) {
+        String validRequestSignatureAlgorithm = "HmacSHA1";
+        return httpRequest(httpClient, validRequestSignatureAlgorithm);
+    }
+
+
+    @RunWith(Parameterized.class)
+    public static class RequestFailureParameterizedTest {
+
+        @Rule
+        public MockitoRule initRule = MockitoJUnit.rule();
+
+        @Mock
+        private HttpClient httpClient;
+
+        @Parameterized.Parameter
+        public Throwable requestException;
+        
+        @Parameterized.Parameters
+        public static Collection<Object[]> requestExceptions() {
+            return Arrays.asList(new Object[][]{
+                    {new InterruptedException()},
+                    {new ExecutionException(null)},
+                    {new TimeoutException()}
+            });
+        }
+
+        private static Request buildThrowingMockRequest(Throwable t) throws ExecutionException, InterruptedException, TimeoutException {
+            Request req = mock(Request.class);
+            when(req.header(anyString(), anyString())).thenReturn(req);
+            when(req.send()).thenThrow(t);
+            return req;
+        }
+
+        @Test
+        public void testFailureDuringRequestCausesInternalServerError() throws Exception {
+            Request request = buildThrowingMockRequest(requestException);
+            when(httpClient.newRequest(anyString())).thenReturn(request);
+            ConnectRestException e = assertThrows(ConnectRestException.class, () -> httpRequest(httpClient));
+            assertIsInternalServerError(e);
+            assertEquals(requestException, e.getCause());
+        }
+    }
+
+
+    @RunWith(MockitoJUnitRunner.class)
+    public static class Tests {
+        @Mock
+        private HttpClient httpClient;
+
+        private static String toJsonString(Object obj) {
+            try {
+                return OBJECT_MAPPER.writeValueAsString(obj);
+            } catch (JsonProcessingException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        private void setupHttpClient(int responseCode, String responseJsonString) throws Exception {
+            Request req = mock(Request.class);
+            ContentResponse resp = mock(ContentResponse.class);
+            when(resp.getStatus()).thenReturn(responseCode);
+            when(resp.getContentAsString()).thenReturn(responseJsonString);
+            when(req.send()).thenReturn(resp);
+            when(req.header(anyString(), anyString())).thenReturn(req);
+            when(httpClient.newRequest(anyString())).thenReturn(req);
+        }
+
+        @Test
+        public void testSuccess() throws Exception {
+            int statusCode = Response.Status.OK.getStatusCode();
+            TestDTO expectedResponse = new TestDTO("someContent");
+            setupHttpClient(statusCode, toJsonString(expectedResponse));
+
+            RestClient.HttpResponse<TestDTO> httpResp = httpRequest(httpClient);
+            assertEquals(statusCode, httpResp.status());
+            assertEquals(expectedResponse, httpResp.body());
+        }
+
+        @Test
+        public void testNoContent() throws Exception {
+            int statusCode = Response.Status.NO_CONTENT.getStatusCode();
+            setupHttpClient(statusCode, null);
+
+            RestClient.HttpResponse<TestDTO> httpResp = httpRequest(httpClient);
+            assertEquals(statusCode, httpResp.status());
+            assertNull(httpResp.body());
+        }
+
+        @Test
+        public void testStatusCodeAndErrorMessagePreserved() throws Exception {
+            int statusCode = Response.Status.CONFLICT.getStatusCode();
+            ErrorMessage errorMsg = new ErrorMessage(Response.Status.GONE.getStatusCode(), "Some Error Message");
+            setupHttpClient(statusCode, toJsonString(errorMsg));
+
+            ConnectRestException e = assertThrows(ConnectRestException.class, () -> httpRequest(httpClient));
+            assertEquals(statusCode, e.statusCode());
+            assertEquals(errorMsg.errorCode(), e.errorCode());
+            assertEquals(errorMsg.message(), e.getMessage());
+        }
+
+        @Test
+        public void testUnexpectedHttpResponseCausesInternalServerError() throws Exception {
+            int statusCode = Response.Status.NOT_MODIFIED.getStatusCode(); // never thrown explicitly -
+            // should be treated as an unexpected error and translated into 500 INTERNAL_SERVER_ERROR
+
+            setupHttpClient(statusCode, null);
+            ConnectRestException e = assertThrows(ConnectRestException.class, () -> httpRequest(httpClient));
+            assertIsInternalServerError(e);
+        }
+
+        @Test
+        public void testRuntimeExceptionCausesInternalServerError() {
+            when(httpClient.newRequest(anyString())).thenThrow(new RuntimeException());
+
+            ConnectRestException e = assertThrows(ConnectRestException.class, () -> httpRequest(httpClient));
+            assertIsInternalServerError(e);
+        }
+
+        @Test
+        public void testRequestSignatureFailureCausesInternalServerError() throws Exception {
+            setupHttpClient(0, null);
+
+            String invalidRequestSignatureAlgorithm = "Foo";
+            ConnectRestException e = assertThrows(ConnectRestException.class, () -> httpRequest(httpClient, invalidRequestSignatureAlgorithm));
+            assertIsInternalServerError(e);
+        }
+
+        @Test
+        public void testIOExceptionCausesInternalServerError() throws Exception {
+            String invalidJsonString = "Invalid";
+            setupHttpClient(201, invalidJsonString);
+
+            ConnectRestException e = assertThrows(ConnectRestException.class, () -> httpRequest(httpClient));
+            assertIsInternalServerError(e);
+        }
+    }
+
+
+    private static class TestDTO {
+        private final String content;
+
+        @JsonCreator
+        private TestDTO(@JsonProperty(value = "content") String content) {
+            this.content = content;
+        }
+
+        public String getContent() {
+            return content;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            TestDTO testDTO = (TestDTO) o;
+            return content.equals(testDTO.content);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(content);
+        }
+    }
+}
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResourceTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResourceTest.java
index 3c5fe92d8b561..ba89a21c89fef 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResourceTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/rest/resources/ConnectorsResourceTest.java
@@ -848,6 +848,70 @@ public void testRestartConnectorAndTasksRequestAccepted() throws Throwable {
         PowerMock.verifyAll();
     }
 
+    @Test
+    public void testFenceZombiesNoInternalRequestSignature() throws Throwable {
+        final Capture<Callback<Void>> cb = Capture.newInstance();
+        herder.fenceZombieSourceTasks(EasyMock.eq(CONNECTOR_NAME), EasyMock.capture(cb), EasyMock.anyObject(InternalRequestSignature.class));
+        expectAndCallbackResult(cb, null);
+
+        PowerMock.replayAll();
+
+        connectorsResource.fenceZombies(CONNECTOR_NAME, NULL_HEADERS, FORWARD, serializeAsBytes(null));
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFenceZombiesWithInternalRequestSignature() throws Throwable {
+        final String signatureAlgorithm = "HmacSHA256";
+        final String encodedSignature = "Kv1/OSsxzdVIwvZ4e30avyRIVrngDfhzVUm/kAZEKc4=";
+
+        final Capture<Callback<Void>> cb = Capture.newInstance();
+        final Capture<InternalRequestSignature> signatureCapture = Capture.newInstance();
+        herder.fenceZombieSourceTasks(EasyMock.eq(CONNECTOR_NAME), EasyMock.capture(cb), EasyMock.capture(signatureCapture));
+        expectAndCallbackResult(cb, null);
+
+        HttpHeaders headers = EasyMock.mock(HttpHeaders.class);
+        EasyMock.expect(headers.getHeaderString(InternalRequestSignature.SIGNATURE_ALGORITHM_HEADER))
+                .andReturn(signatureAlgorithm)
+                .once();
+        EasyMock.expect(headers.getHeaderString(InternalRequestSignature.SIGNATURE_HEADER))
+                .andReturn(encodedSignature)
+                .once();
+
+        PowerMock.replayAll(headers);
+
+        connectorsResource.fenceZombies(CONNECTOR_NAME, headers, FORWARD, serializeAsBytes(null));
+
+        PowerMock.verifyAll();
+        InternalRequestSignature expectedSignature = new InternalRequestSignature(
+                serializeAsBytes(null),
+                Mac.getInstance(signatureAlgorithm),
+                Base64.getDecoder().decode(encodedSignature)
+        );
+        assertEquals(
+                expectedSignature,
+                signatureCapture.getValue()
+        );
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFenceZombiesConnectorNotFound() throws Throwable {
+        final Capture<Callback<Void>> cb = Capture.newInstance();
+        herder.fenceZombieSourceTasks(EasyMock.eq(CONNECTOR_NAME), EasyMock.capture(cb), EasyMock.anyObject(InternalRequestSignature.class));
+
+        expectAndCallbackException(cb, new NotFoundException("not found"));
+
+        PowerMock.replayAll();
+
+        assertThrows(NotFoundException.class,
+                () -> connectorsResource.fenceZombies(CONNECTOR_NAME, NULL_HEADERS, FORWARD, serializeAsBytes(null)));
+
+        PowerMock.verifyAll();
+    }
+
     @Test
     public void testRestartConnectorNotFound() {
         final Capture<Callback<Void>> cb = Capture.newInstance();
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java
index f5ee4ccd310d7..ddc030ada4c9f 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/runtime/standalone/StandaloneHerderTest.java
@@ -41,7 +41,7 @@
 import org.apache.kafka.connect.runtime.Worker;
 import org.apache.kafka.connect.runtime.WorkerConfigTransformer;
 import org.apache.kafka.connect.runtime.WorkerConnector;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
+import org.apache.kafka.connect.storage.ClusterConfigState;
 import org.apache.kafka.connect.runtime.isolation.DelegatingClassLoader;
 import org.apache.kafka.connect.runtime.isolation.PluginClassLoader;
 import org.apache.kafka.connect.runtime.isolation.Plugins;
@@ -364,9 +364,12 @@ public void testRestartTask() throws Exception {
                 Collections.singletonMap(CONNECTOR_NAME, connectorConfig),
                 Collections.singletonMap(CONNECTOR_NAME, TargetState.STARTED),
                 Collections.singletonMap(taskId, taskConfig(SourceSink.SOURCE)),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                new HashSet<>(),
                 new HashSet<>(),
                 transformer);
-        worker.startTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SOURCE), herder, TargetState.STARTED);
+        worker.startSourceTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SOURCE), herder, TargetState.STARTED);
         EasyMock.expectLastCall().andReturn(true);
 
         PowerMock.replayAll();
@@ -402,9 +405,12 @@ public void testRestartTaskFailureOnStart() throws Exception {
                 Collections.singletonMap(CONNECTOR_NAME, connectorConfig),
                 Collections.singletonMap(CONNECTOR_NAME, TargetState.STARTED),
                 Collections.singletonMap(new ConnectorTaskId(CONNECTOR_NAME, 0), taskConfig(SourceSink.SOURCE)),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                new HashSet<>(),
                 new HashSet<>(),
                 transformer);
-        worker.startTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SOURCE), herder, TargetState.STARTED);
+        worker.startSourceTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SOURCE), herder, TargetState.STARTED);
         EasyMock.expectLastCall().andReturn(false);
 
         PowerMock.replayAll();
@@ -572,9 +578,12 @@ public void testRestartConnectorAndTasksOnlyTasks() throws Exception {
                 Collections.singletonMap(CONNECTOR_NAME, connectorConfig),
                 Collections.singletonMap(CONNECTOR_NAME, TargetState.STARTED),
                 Collections.singletonMap(taskId, taskConfig(SourceSink.SINK)),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                new HashSet<>(),
                 new HashSet<>(),
                 transformer);
-        worker.startTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SINK), herder, TargetState.STARTED);
+        worker.startSinkTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SINK), herder, TargetState.STARTED);
         EasyMock.expectLastCall().andReturn(true);
         PowerMock.replayAll();
 
@@ -635,9 +644,12 @@ public void testRestartConnectorAndTasksBoth() throws Exception {
                 Collections.singletonMap(CONNECTOR_NAME, connectorConfig),
                 Collections.singletonMap(CONNECTOR_NAME, TargetState.STARTED),
                 Collections.singletonMap(taskId, taskConfig(SourceSink.SINK)),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                new HashSet<>(),
                 new HashSet<>(),
                 transformer);
-        worker.startTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SINK), herder, TargetState.STARTED);
+        worker.startSinkTask(taskId, configState, connectorConfig, taskConfig(SourceSink.SINK), herder, TargetState.STARTED);
         EasyMock.expectLastCall().andReturn(true);
         PowerMock.replayAll();
 
@@ -878,7 +890,6 @@ private void expectAdd(SourceSink sourceSink) {
         Capture<Callback<TargetState>> onStart = EasyMock.newCapture();
         worker.startConnector(EasyMock.eq(CONNECTOR_NAME), EasyMock.eq(connectorProps), EasyMock.anyObject(HerderConnectorContext.class),
                 EasyMock.eq(herder), EasyMock.eq(TargetState.STARTED), EasyMock.capture(onStart));
-        // EasyMock.expectLastCall().andReturn(true);
         EasyMock.expectLastCall().andAnswer(() -> {
             onStart.getValue().onCompletion(null, TargetState.STARTED);
             return true;
@@ -902,9 +913,16 @@ private void expectAdd(SourceSink sourceSink) {
                 Collections.singletonMap(CONNECTOR_NAME, connectorConfig(sourceSink)),
                 Collections.singletonMap(CONNECTOR_NAME, TargetState.STARTED),
                 Collections.singletonMap(new ConnectorTaskId(CONNECTOR_NAME, 0), generatedTaskProps),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                new HashSet<>(),
                 new HashSet<>(),
                 transformer);
-        worker.startTask(new ConnectorTaskId(CONNECTOR_NAME, 0), configState, connectorConfig(sourceSink), generatedTaskProps, herder, TargetState.STARTED);
+        if (sourceSink.equals(SourceSink.SOURCE)) {
+            worker.startSourceTask(new ConnectorTaskId(CONNECTOR_NAME, 0), configState, connectorConfig(sourceSink), generatedTaskProps, herder, TargetState.STARTED);
+        } else {
+            worker.startSinkTask(new ConnectorTaskId(CONNECTOR_NAME, 0), configState, connectorConfig(sourceSink), generatedTaskProps, herder, TargetState.STARTED);
+        }
         EasyMock.expectLastCall().andReturn(true);
 
         EasyMock.expect(herder.connectorTypeForClass(BogusSourceConnector.class.getName()))
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java
index 726b4ccbbf90d..b374f8f5d2f79 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaConfigBackingStoreTest.java
@@ -20,7 +20,10 @@
 import org.apache.kafka.clients.admin.NewTopic;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.IsolationLevel;
+import org.apache.kafka.common.errors.ProducerFencedException;
 import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.record.TimestampType;
 import org.apache.kafka.common.config.ConfigException;
@@ -30,7 +33,6 @@
 import org.apache.kafka.connect.data.Struct;
 import org.apache.kafka.connect.runtime.RestartRequest;
 import org.apache.kafka.connect.runtime.TargetState;
-import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
 import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
 import org.apache.kafka.connect.util.Callback;
 import org.apache.kafka.connect.util.ConnectUtils;
@@ -50,22 +52,30 @@
 import org.powermock.modules.junit4.PowerMockRunner;
 import org.powermock.reflect.Whitebox;
 
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 
+import static org.apache.kafka.clients.consumer.ConsumerConfig.ISOLATION_LEVEL_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.TRANSACTIONAL_ID_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.GROUP_ID_CONFIG;
 import static org.apache.kafka.connect.storage.KafkaConfigBackingStore.INCLUDE_TASKS_FIELD_NAME;
 import static org.apache.kafka.connect.storage.KafkaConfigBackingStore.ONLY_FAILED_FIELD_NAME;
 import static org.apache.kafka.connect.storage.KafkaConfigBackingStore.RESTART_KEY;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
@@ -95,6 +105,7 @@ public class KafkaConfigBackingStoreTest {
     private static final List<String> CONNECTOR_CONFIG_KEYS = Arrays.asList("connector-connector1", "connector-connector2");
     private static final List<String> COMMIT_TASKS_CONFIG_KEYS = Arrays.asList("commit-connector1", "commit-connector2");
     private static final List<String> TARGET_STATE_KEYS =  Arrays.asList("target-state-connector1", "target-state-connector2");
+    private static final List<String> CONNECTOR_TASK_COUNT_RECORD_KEYS = Arrays.asList("tasks-fencing-connector1", "tasks-fencing-connector2");
 
     private static final String CONNECTOR_1_NAME = "connector1";
     private static final String CONNECTOR_2_NAME = "connector2";
@@ -123,6 +134,10 @@ public class KafkaConfigBackingStoreTest {
             new Struct(KafkaConfigBackingStore.TASK_CONFIGURATION_V0).put("properties", SAMPLE_CONFIGS.get(0)),
             new Struct(KafkaConfigBackingStore.TASK_CONFIGURATION_V0).put("properties", SAMPLE_CONFIGS.get(1))
     );
+    private static final List<Struct> CONNECTOR_TASK_COUNT_RECORD_STRUCTS = Arrays.asList(
+            new Struct(KafkaConfigBackingStore.TASK_COUNT_RECORD_V0).put("task-count", 6),
+            new Struct(KafkaConfigBackingStore.TASK_COUNT_RECORD_V0).put("task-count", 9)
+    );
     private static final Struct TARGET_STATE_PAUSED = new Struct(KafkaConfigBackingStore.TARGET_STATE_V0).put("state", "PAUSED");
 
     private static final Struct TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR
@@ -151,6 +166,8 @@ public class KafkaConfigBackingStoreTest {
     private ConfigBackingStore.UpdateListener configUpdateListener;
     @Mock
     KafkaBasedLog<String, byte[]> storeLog;
+    @Mock
+    Producer<String, byte[]> fencableProducer;
     private KafkaConfigBackingStore configStorage;
 
     private Capture<String> capturedTopic = EasyMock.newCapture();
@@ -162,15 +179,22 @@ public class KafkaConfigBackingStoreTest {
 
     private long logOffset = 0;
 
+    private void createStore(DistributedConfig config, KafkaBasedLog<String, byte[]> storeLog) {
+        configStorage = PowerMock.createPartialMock(
+                KafkaConfigBackingStore.class,
+                new String[]{"createKafkaBasedLog", "createFencableProducer"},
+                converter, config, null);
+        Whitebox.setInternalState(configStorage, "configLog", storeLog);
+        configStorage.setUpdateListener(configUpdateListener);
+    }
+
     @Before
     public void setUp() {
         PowerMock.mockStaticPartial(ConnectUtils.class, "lookupKafkaClusterId");
         EasyMock.expect(ConnectUtils.lookupKafkaClusterId(EasyMock.anyObject())).andReturn("test-cluster").anyTimes();
         PowerMock.replay(ConnectUtils.class);
 
-        configStorage = PowerMock.createPartialMock(KafkaConfigBackingStore.class, new String[]{"createKafkaBasedLog"}, converter, DEFAULT_DISTRIBUTED_CONFIG, null);
-        Whitebox.setInternalState(configStorage, "configLog", storeLog);
-        configStorage.setUpdateListener(configUpdateListener);
+        createStore(DEFAULT_DISTRIBUTED_CONFIG, storeLog);
     }
 
     @Test
@@ -203,6 +227,32 @@ public void testStartStop() throws Exception {
         PowerMock.verifyAll();
     }
 
+    @Test
+    public void testSnapshotCannotMutateInternalState() throws Exception {
+        expectConfigure();
+        expectStart(Collections.emptyList(), Collections.emptyMap());
+        expectPartitionCount(1);
+        PowerMock.replayAll();
+
+        Map<String, String> settings = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        settings.put("config.storage.min.insync.replicas", "3");
+        settings.put("config.storage.max.message.bytes", "1001");
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, new DistributedConfig(settings));
+
+        configStorage.start();
+        ClusterConfigState snapshot = configStorage.snapshot();
+        assertNotSame(snapshot.connectorTaskCounts, configStorage.connectorTaskCounts);
+        assertNotSame(snapshot.connectorConfigs, configStorage.connectorConfigs);
+        assertNotSame(snapshot.connectorTargetStates, configStorage.connectorTargetStates);
+        assertNotSame(snapshot.taskConfigs, configStorage.taskConfigs);
+        assertNotSame(snapshot.connectorTaskCountRecords, configStorage.connectorTaskCountRecords);
+        assertNotSame(snapshot.connectorTaskConfigGenerations, configStorage.connectorTaskConfigGenerations);
+        assertNotSame(snapshot.connectorsPendingFencing, configStorage.connectorsPendingFencing);
+        assertNotSame(snapshot.inconsistentConnectors, configStorage.inconsistent);
+
+        PowerMock.verifyAll();
+    }
+
     @Test
     public void testPutConnectorConfig() throws Exception {
         expectConfigure();
@@ -266,6 +316,174 @@ public void testPutConnectorConfig() throws Exception {
         PowerMock.verifyAll();
     }
 
+    @Test
+    public void testWritePrivileges() throws Exception {
+        // With exactly.once.source.support = preparing (or also, "enabled"), we need to use a transactional producer
+        // to write some types of messages to the config topic
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "preparing");
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        expectConfigure();
+        expectStart(Collections.emptyList(), Collections.emptyMap());
+
+        // Try and fail to write a task count record to the config topic without write privileges
+        expectConvert(KafkaConfigBackingStore.TASK_COUNT_RECORD_V0, CONNECTOR_TASK_COUNT_RECORD_STRUCTS.get(0), CONFIGS_SERIALIZED.get(0));
+        // Claim write privileges
+        expectFencableProducer();
+        // And write the task count record successfully
+        expectConvert(KafkaConfigBackingStore.TASK_COUNT_RECORD_V0, CONNECTOR_TASK_COUNT_RECORD_STRUCTS.get(0), CONFIGS_SERIALIZED.get(0));
+        fencableProducer.beginTransaction();
+        EasyMock.expectLastCall();
+        EasyMock.expect(fencableProducer.send(EasyMock.anyObject())).andReturn(null);
+        fencableProducer.commitTransaction();
+        EasyMock.expectLastCall();
+        expectRead(CONNECTOR_TASK_COUNT_RECORD_KEYS.get(0), CONFIGS_SERIALIZED.get(0), CONNECTOR_TASK_COUNT_RECORD_STRUCTS.get(0));
+
+        // Try to write a connector config
+        expectConvert(KafkaConfigBackingStore.CONNECTOR_CONFIGURATION_V0, CONNECTOR_CONFIG_STRUCTS.get(0), CONFIGS_SERIALIZED.get(1));
+        fencableProducer.beginTransaction();
+        EasyMock.expectLastCall();
+        EasyMock.expect(fencableProducer.send(EasyMock.anyObject())).andReturn(null);
+        // Get fenced out
+        fencableProducer.commitTransaction();
+        EasyMock.expectLastCall().andThrow(new ProducerFencedException("Better luck next time"));
+        fencableProducer.close(Duration.ZERO);
+        EasyMock.expectLastCall();
+        // And fail when trying to write again without reclaiming write privileges
+        expectConvert(KafkaConfigBackingStore.CONNECTOR_CONFIGURATION_V0, CONNECTOR_CONFIG_STRUCTS.get(0), CONFIGS_SERIALIZED.get(1));
+
+        // In the meantime, write a target state (which doesn't require write privileges)
+        expectConvert(KafkaConfigBackingStore.TARGET_STATE_V0, TARGET_STATE_PAUSED, CONFIGS_SERIALIZED.get(1));
+        storeLog.send("target-state-" + CONNECTOR_IDS.get(1), CONFIGS_SERIALIZED.get(1));
+        PowerMock.expectLastCall();
+
+        // Reclaim write privileges
+        expectFencableProducer();
+        // And successfully write the config
+        expectConvert(KafkaConfigBackingStore.CONNECTOR_CONFIGURATION_V0, CONNECTOR_CONFIG_STRUCTS.get(0), CONFIGS_SERIALIZED.get(1));
+        fencableProducer.beginTransaction();
+        EasyMock.expectLastCall();
+        EasyMock.expect(fencableProducer.send(EasyMock.anyObject())).andReturn(null);
+        fencableProducer.commitTransaction();
+        EasyMock.expectLastCall();
+        expectConvertRead(CONNECTOR_CONFIG_KEYS.get(1), CONNECTOR_CONFIG_STRUCTS.get(0), CONFIGS_SERIALIZED.get(2));
+        configUpdateListener.onConnectorConfigUpdate(CONNECTOR_IDS.get(1));
+        EasyMock.expectLastCall();
+
+        expectPartitionCount(1);
+        expectStop();
+        fencableProducer.close(Duration.ZERO);
+        EasyMock.expectLastCall();
+
+        PowerMock.replayAll();
+
+
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, DEFAULT_DISTRIBUTED_CONFIG);
+        configStorage.start();
+
+        // Should fail the first time since we haven't claimed write privileges
+        assertThrows(IllegalStateException.class, () -> configStorage.putTaskCountRecord(CONNECTOR_IDS.get(0), 6));
+        // Should succeed now
+        configStorage.claimWritePrivileges();
+        configStorage.putTaskCountRecord(CONNECTOR_IDS.get(0), 6);
+
+        // Should fail again when we get fenced out
+        assertThrows(PrivilegedWriteException.class, () -> configStorage.putConnectorConfig(CONNECTOR_IDS.get(1), SAMPLE_CONFIGS.get(0)));
+        // Should fail if we retry without reclaiming write privileges
+        assertThrows(IllegalStateException.class, () -> configStorage.putConnectorConfig(CONNECTOR_IDS.get(1), SAMPLE_CONFIGS.get(0)));
+
+        // Should succeed even without write privileges (target states can be written by anyone)
+        configStorage.putTargetState(CONNECTOR_IDS.get(1), TargetState.PAUSED);
+
+        // Should succeed if we re-claim write privileges
+        configStorage.claimWritePrivileges();
+        configStorage.putConnectorConfig(CONNECTOR_IDS.get(1), SAMPLE_CONFIGS.get(0));
+
+        configStorage.stop();
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testTaskCountRecordsAndGenerations() throws Exception {
+        expectConfigure();
+        expectStart(Collections.emptyList(), Collections.emptyMap());
+
+        // Task configs should read to end, write to the log, read to end, write root, then read to end again
+        expectReadToEnd(new LinkedHashMap<>());
+        expectConvertWriteRead(
+                TASK_CONFIG_KEYS.get(0), KafkaConfigBackingStore.TASK_CONFIGURATION_V0, CONFIGS_SERIALIZED.get(0),
+                "properties", SAMPLE_CONFIGS.get(0));
+        expectConvertWriteRead(
+                TASK_CONFIG_KEYS.get(1), KafkaConfigBackingStore.TASK_CONFIGURATION_V0, CONFIGS_SERIALIZED.get(1),
+                "properties", SAMPLE_CONFIGS.get(1));
+        expectReadToEnd(new LinkedHashMap<>());
+        expectConvertWriteRead(
+                COMMIT_TASKS_CONFIG_KEYS.get(0), KafkaConfigBackingStore.CONNECTOR_TASKS_COMMIT_V0, CONFIGS_SERIALIZED.get(2),
+                "tasks", 2); // Starts with 0 tasks, after update has 2
+        // As soon as root is rewritten, we should see a callback notifying us that we reconfigured some tasks
+        configUpdateListener.onTaskConfigUpdate(Arrays.asList(TASK_IDS.get(0), TASK_IDS.get(1)));
+        EasyMock.expectLastCall();
+
+        // Records to be read by consumer as it reads to the end of the log
+        LinkedHashMap<String, byte[]> serializedConfigs = new LinkedHashMap<>();
+        serializedConfigs.put(TASK_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(0));
+        serializedConfigs.put(TASK_CONFIG_KEYS.get(1), CONFIGS_SERIALIZED.get(1));
+        serializedConfigs.put(COMMIT_TASKS_CONFIG_KEYS.get(0), CONFIGS_SERIALIZED.get(2));
+        expectReadToEnd(serializedConfigs);
+
+        // Task count records are read back after writing as well
+        expectConvertWriteRead(
+                CONNECTOR_TASK_COUNT_RECORD_KEYS.get(0), KafkaConfigBackingStore.TASK_COUNT_RECORD_V0, CONFIGS_SERIALIZED.get(3),
+                "task-count", 4);
+        serializedConfigs = new LinkedHashMap<>();
+        serializedConfigs.put(CONNECTOR_TASK_COUNT_RECORD_KEYS.get(0), CONFIGS_SERIALIZED.get(3));
+        expectReadToEnd(serializedConfigs);
+
+        expectPartitionCount(1);
+        expectStop();
+
+        PowerMock.replayAll();
+
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, DEFAULT_DISTRIBUTED_CONFIG);
+        configStorage.start();
+
+        // Bootstrap as if we had already added the connector, but no tasks had been added yet
+        whiteboxAddConnector(CONNECTOR_IDS.get(0), SAMPLE_CONFIGS.get(0), Collections.emptyList());
+
+        // Before anything is written
+        String connectorName = CONNECTOR_IDS.get(0);
+        ClusterConfigState configState = configStorage.snapshot();
+        assertFalse(configState.pendingFencing(connectorName));
+        assertNull(configState.taskCountRecord(connectorName));
+        assertNull(configState.taskConfigGeneration(connectorName));
+
+        // Writing task configs should block until all the writes have been performed and the root record update
+        // has completed
+        List<Map<String, String>> taskConfigs = Arrays.asList(SAMPLE_CONFIGS.get(0), SAMPLE_CONFIGS.get(1));
+        configStorage.putTaskConfigs("connector1", taskConfigs);
+
+        configState = configStorage.snapshot();
+        assertEquals(3, configState.offset());
+        assertTrue(configState.pendingFencing(connectorName));
+        assertNull(configState.taskCountRecord(connectorName));
+        assertEquals(0, (long) configState.taskConfigGeneration(connectorName));
+
+        configStorage.putTaskCountRecord(connectorName, 4);
+
+        configState = configStorage.snapshot();
+        assertEquals(4, configState.offset());
+        assertFalse(configState.pendingFencing(connectorName));
+        assertEquals(4, (long) configState.taskCountRecord(connectorName));
+        assertEquals(0, (long) configState.taskConfigGeneration(connectorName));
+
+        configStorage.stop();
+
+        PowerMock.verifyAll();
+    }
+
     @Test
     public void testPutTaskConfigs() throws Exception {
         expectConfigure();
@@ -683,30 +901,36 @@ public void testRestore() throws Exception {
         expectConfigure();
         // Overwrite each type at least once to ensure we see the latest data after loading
         List<ConsumerRecord<String, byte[]>> existingRecords = Arrays.asList(
-                new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_CONFIG_KEYS.get(0),
+                new ConsumerRecord<>(TOPIC, 0, 0, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_TASK_COUNT_RECORD_KEYS.get(0),
                         CONFIGS_SERIALIZED.get(0), new RecordHeaders(), Optional.empty()),
-                new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0, 0, TASK_CONFIG_KEYS.get(0),
+                new ConsumerRecord<>(TOPIC, 0, 1, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_CONFIG_KEYS.get(0),
                         CONFIGS_SERIALIZED.get(1), new RecordHeaders(), Optional.empty()),
-                new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0, 0, TASK_CONFIG_KEYS.get(1),
+                new ConsumerRecord<>(TOPIC, 0, 2, 0L, TimestampType.CREATE_TIME, 0, 0, TASK_CONFIG_KEYS.get(0),
                         CONFIGS_SERIALIZED.get(2), new RecordHeaders(), Optional.empty()),
-                new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_CONFIG_KEYS.get(0),
+                new ConsumerRecord<>(TOPIC, 0, 3, 0L, TimestampType.CREATE_TIME, 0, 0, TASK_CONFIG_KEYS.get(1),
                         CONFIGS_SERIALIZED.get(3), new RecordHeaders(), Optional.empty()),
-                new ConsumerRecord<>(TOPIC, 0, 4, 0L, TimestampType.CREATE_TIME, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0),
+                new ConsumerRecord<>(TOPIC, 0, 4, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_CONFIG_KEYS.get(0),
                         CONFIGS_SERIALIZED.get(4), new RecordHeaders(), Optional.empty()),
-                // Connector after root update should make it through, task update shouldn't
-                new ConsumerRecord<>(TOPIC, 0, 5, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_CONFIG_KEYS.get(0),
+                new ConsumerRecord<>(TOPIC, 0, 5, 0L, TimestampType.CREATE_TIME, 0, 0, COMMIT_TASKS_CONFIG_KEYS.get(0),
                         CONFIGS_SERIALIZED.get(5), new RecordHeaders(), Optional.empty()),
-                new ConsumerRecord<>(TOPIC, 0, 6, 0L, TimestampType.CREATE_TIME, 0, 0, TASK_CONFIG_KEYS.get(0),
-                        CONFIGS_SERIALIZED.get(6), new RecordHeaders(), Optional.empty()));
+                new ConsumerRecord<>(TOPIC, 0, 6, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_TASK_COUNT_RECORD_KEYS.get(1),
+                        CONFIGS_SERIALIZED.get(6), new RecordHeaders(), Optional.empty()),
+                // Connector after root update should make it through, task update shouldn't
+                new ConsumerRecord<>(TOPIC, 0, 7, 0L, TimestampType.CREATE_TIME, 0, 0, CONNECTOR_CONFIG_KEYS.get(0),
+                        CONFIGS_SERIALIZED.get(7), new RecordHeaders(), Optional.empty()),
+                new ConsumerRecord<>(TOPIC, 0, 8, 0L, TimestampType.CREATE_TIME, 0, 0, TASK_CONFIG_KEYS.get(0),
+                        CONFIGS_SERIALIZED.get(8), new RecordHeaders(), Optional.empty()));
         LinkedHashMap<byte[], Struct> deserialized = new LinkedHashMap<>();
-        deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_CONFIG_STRUCTS.get(0));
-        deserialized.put(CONFIGS_SERIALIZED.get(1), TASK_CONFIG_STRUCTS.get(0));
+        deserialized.put(CONFIGS_SERIALIZED.get(0), CONNECTOR_TASK_COUNT_RECORD_STRUCTS.get(0));
+        deserialized.put(CONFIGS_SERIALIZED.get(1), CONNECTOR_CONFIG_STRUCTS.get(0));
         deserialized.put(CONFIGS_SERIALIZED.get(2), TASK_CONFIG_STRUCTS.get(0));
-        deserialized.put(CONFIGS_SERIALIZED.get(3), CONNECTOR_CONFIG_STRUCTS.get(1));
-        deserialized.put(CONFIGS_SERIALIZED.get(4), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR);
-        deserialized.put(CONFIGS_SERIALIZED.get(5), CONNECTOR_CONFIG_STRUCTS.get(2));
-        deserialized.put(CONFIGS_SERIALIZED.get(6), TASK_CONFIG_STRUCTS.get(1));
-        logOffset = 7;
+        deserialized.put(CONFIGS_SERIALIZED.get(3), TASK_CONFIG_STRUCTS.get(0));
+        deserialized.put(CONFIGS_SERIALIZED.get(4), CONNECTOR_CONFIG_STRUCTS.get(1));
+        deserialized.put(CONFIGS_SERIALIZED.get(5), TASKS_COMMIT_STRUCT_TWO_TASK_CONNECTOR);
+        deserialized.put(CONFIGS_SERIALIZED.get(6), CONNECTOR_TASK_COUNT_RECORD_STRUCTS.get(1));
+        deserialized.put(CONFIGS_SERIALIZED.get(7), CONNECTOR_CONFIG_STRUCTS.get(2));
+        deserialized.put(CONFIGS_SERIALIZED.get(8), TASK_CONFIG_STRUCTS.get(1));
+        logOffset = 9;
         expectStart(existingRecords, deserialized);
         expectPartitionCount(1);
 
@@ -721,7 +945,7 @@ public void testRestore() throws Exception {
 
         // Should see a single connector and its config should be the last one seen anywhere in the log
         ClusterConfigState configState = configStorage.snapshot();
-        assertEquals(7, configState.offset()); // Should always be next to be read, even if uncommitted
+        assertEquals(logOffset, configState.offset()); // Should always be next to be read, even if uncommitted
         assertEquals(Arrays.asList(CONNECTOR_IDS.get(0)), new ArrayList<>(configState.connectors()));
         assertEquals(TargetState.STARTED, configState.targetState(CONNECTOR_IDS.get(0)));
         // CONNECTOR_CONFIG_STRUCTS[2] -> SAMPLE_CONFIGS[2]
@@ -731,7 +955,9 @@ public void testRestore() throws Exception {
         // Both TASK_CONFIG_STRUCTS[0] -> SAMPLE_CONFIGS[0]
         assertEquals(SAMPLE_CONFIGS.get(0), configState.taskConfig(TASK_IDS.get(0)));
         assertEquals(SAMPLE_CONFIGS.get(0), configState.taskConfig(TASK_IDS.get(1)));
+        assertEquals(9, (int) configState.taskCountRecord(CONNECTOR_IDS.get(1)));
         assertEquals(Collections.EMPTY_SET, configState.inconsistentConnectors());
+        assertEquals(Collections.singleton("connector1"), configState.connectorsPendingFencing);
 
         configStorage.stop();
 
@@ -1066,6 +1292,127 @@ public void testExceptionOnStartWhenConfigTopicHasMultiplePartitions() throws Ex
         PowerMock.verifyAll();
     }
 
+    @Test
+    public void testFencableProducerPropertiesInsertedByDefault() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "preparing");
+        String groupId = "my-connect-cluster";
+        workerProps.put(GROUP_ID_CONFIG, groupId);
+        workerProps.remove(TRANSACTIONAL_ID_CONFIG);
+        workerProps.remove(ENABLE_IDEMPOTENCE_CONFIG);
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        PowerMock.replayAll();
+
+        Map<String, Object> fencableProducerProperties = configStorage.fencableProducerProps(config);
+        assertEquals("connect-cluster-" + groupId, fencableProducerProperties.get(TRANSACTIONAL_ID_CONFIG));
+        assertEquals("true", fencableProducerProperties.get(ENABLE_IDEMPOTENCE_CONFIG));
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testFencableProducerPropertiesOverrideUserSuppliedValues() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "preparing");
+        String groupId = "my-other-connect-cluster";
+        workerProps.put(GROUP_ID_CONFIG, groupId);
+        workerProps.put(TRANSACTIONAL_ID_CONFIG, "my-custom-transactional-id");
+        workerProps.put(ENABLE_IDEMPOTENCE_CONFIG, "false");
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        PowerMock.replayAll();
+
+        Map<String, Object> fencableProducerProperties = configStorage.fencableProducerProps(config);
+        assertEquals("connect-cluster-" + groupId, fencableProducerProperties.get(TRANSACTIONAL_ID_CONFIG));
+        assertEquals("true", fencableProducerProperties.get(ENABLE_IDEMPOTENCE_CONFIG));
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesInsertedByDefaultWithExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        workerProps.remove(ISOLATION_LEVEL_CONFIG);
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        expectConfigure();
+        PowerMock.replayAll();
+
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, config);
+
+        assertEquals(
+                IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG)
+        );
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesOverrideUserSuppliedValuesWithExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        workerProps.put(ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_UNCOMMITTED.name().toLowerCase(Locale.ROOT));
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        expectConfigure();
+        PowerMock.replayAll();
+
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, config);
+
+        assertEquals(
+                IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG)
+        );
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesNotInsertedByDefaultWithoutExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "preparing");
+        workerProps.remove(ISOLATION_LEVEL_CONFIG);
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        expectConfigure();
+        PowerMock.replayAll();
+
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, config);
+
+        assertNull(capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG));
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesDoNotOverrideUserSuppliedValuesWithoutExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_CONFIG_STORAGE_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "preparing");
+        workerProps.put(ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_UNCOMMITTED.name().toLowerCase(Locale.ROOT));
+        DistributedConfig config = new DistributedConfig(workerProps);
+        createStore(config, storeLog);
+
+        expectConfigure();
+        PowerMock.replayAll();
+
+        configStorage.setupAndCreateKafkaBasedLog(TOPIC, config);
+
+        assertEquals(
+                IsolationLevel.READ_UNCOMMITTED.name().toLowerCase(Locale.ROOT),
+                capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG)
+        );
+
+        PowerMock.verifyAll();
+    }
+
     private void expectConfigure() throws Exception {
         PowerMock.expectPrivate(configStorage, "createKafkaBasedLog",
                 EasyMock.capture(capturedTopic), EasyMock.capture(capturedProducerProps),
@@ -1074,6 +1421,13 @@ private void expectConfigure() throws Exception {
                 .andReturn(storeLog);
     }
 
+    private void expectFencableProducer() throws Exception {
+        fencableProducer.initTransactions();
+        EasyMock.expectLastCall();
+        PowerMock.expectPrivate(configStorage, "createFencableProducer")
+                .andReturn(fencableProducer);
+    }
+
     private void expectPartitionCount(int partitionCount) {
         EasyMock.expect(storeLog.partitionCount())
                 .andReturn(partitionCount);
@@ -1116,6 +1470,11 @@ private void expectRead(final String key, final byte[] serializedValue, Struct d
         expectRead(serializedData, Collections.singletonMap(key, deserializedValue));
     }
 
+    private void expectConvert(Schema valueSchema, Struct valueStruct, byte[] serialized) {
+        EasyMock.expect(converter.fromConnectData(EasyMock.eq(TOPIC), EasyMock.eq(valueSchema), EasyMock.eq(valueStruct)))
+                .andReturn(serialized);
+    }
+
     // Expect a conversion & write to the underlying log, followed by a subsequent read when the data is consumed back
     // from the log. Validate the data that is captured when the conversion is performed matches the specified data
     // (by checking a single field's value)
@@ -1136,6 +1495,14 @@ private void expectConvertWriteRead(final String configKey, final Schema valueSc
                 });
     }
 
+    private void expectConvertRead(final String configKey, final Struct struct, final byte[] serialized) {
+        EasyMock.expect(converter.toConnectData(EasyMock.eq(TOPIC), EasyMock.aryEq(serialized)))
+                .andAnswer(() -> new SchemaAndValue(null, serialized == null ? null : structToMap(struct)));
+        LinkedHashMap<String, byte[]> recordsToRead = new LinkedHashMap<>();
+        recordsToRead.put(configKey, serialized);
+        expectReadToEnd(recordsToRead);
+    }
+
     // This map needs to maintain ordering
     private void expectReadToEnd(final LinkedHashMap<String, byte[]> serializedConfigs) {
         EasyMock.expect(storeLog.readToEnd())
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java
index 2ab7c38a3b942..cf11230f3d20c 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/storage/KafkaOffsetBackingStoreTest.java
@@ -21,6 +21,7 @@
 import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.record.TimestampType;
@@ -46,6 +47,7 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.ExecutionException;
@@ -54,6 +56,8 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.Supplier;
 
+import static org.apache.kafka.clients.consumer.ConsumerConfig.ISOLATION_LEVEL_CONFIG;
+import static org.apache.kafka.connect.runtime.distributed.DistributedConfig.EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -384,6 +388,87 @@ public void testSetFailure() throws Exception {
         PowerMock.verifyAll();
     }
 
+    @Test
+    public void testConsumerPropertiesInsertedByDefaultWithExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        workerProps.remove(ISOLATION_LEVEL_CONFIG);
+        DistributedConfig config = new DistributedConfig(workerProps);
+
+        expectConfigure();
+        expectClusterId();
+        PowerMock.replayAll();
+
+        store.configure(config);
+
+        assertEquals(
+                IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG)
+        );
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesOverrideUserSuppliedValuesWithExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "enabled");
+        workerProps.put(ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_UNCOMMITTED.name().toLowerCase(Locale.ROOT));
+        DistributedConfig config = new DistributedConfig(workerProps);
+
+        expectConfigure();
+        expectClusterId();
+        PowerMock.replayAll();
+
+        store.configure(config);
+
+        assertEquals(
+                IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT),
+                capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG)
+        );
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesNotInsertedByDefaultWithoutExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "disabled");
+        workerProps.remove(ISOLATION_LEVEL_CONFIG);
+        DistributedConfig config = new DistributedConfig(workerProps);
+
+        expectConfigure();
+        expectClusterId();
+        PowerMock.replayAll();
+
+        store.configure(config);
+
+        assertNull(capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG));
+
+        PowerMock.verifyAll();
+    }
+
+    @Test
+    public void testConsumerPropertiesDoNotOverrideUserSuppliedValuesWithoutExactlyOnceSourceEnabled() throws Exception {
+        Map<String, String> workerProps = new HashMap<>(DEFAULT_PROPS);
+        workerProps.put(EXACTLY_ONCE_SOURCE_SUPPORT_CONFIG, "disabled");
+        workerProps.put(ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_UNCOMMITTED.name().toLowerCase(Locale.ROOT));
+        DistributedConfig config = new DistributedConfig(workerProps);
+
+        expectConfigure();
+        expectClusterId();
+        PowerMock.replayAll();
+
+        store.configure(config);
+
+        assertEquals(
+                IsolationLevel.READ_UNCOMMITTED.name().toLowerCase(Locale.ROOT),
+                capturedConsumerProps.getValue().get(ISOLATION_LEVEL_CONFIG)
+        );
+
+        PowerMock.verifyAll();
+    }
+
     private void expectConfigure() throws Exception {
         PowerMock.expectPrivate(store, "createKafkaBasedLog", EasyMock.capture(capturedTopic), EasyMock.capture(capturedProducerProps),
                 EasyMock.capture(capturedConsumerProps), EasyMock.capture(capturedConsumedCallback),
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/util/TopicAdminTest.java b/connect/runtime/src/test/java/org/apache/kafka/connect/util/TopicAdminTest.java
index deea050d78bd1..cf611db9c0ffc 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/util/TopicAdminTest.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/util/TopicAdminTest.java
@@ -467,8 +467,31 @@ public void verifyingGettingTopicCleanupPolicies() {
         }
     }
 
+    /**
+     * TopicAdmin can be used to read the end offsets, but the admin client API used to do this was
+     * added to the broker in 0.11.0.0. This means that if Connect talks to older brokers,
+     * the admin client cannot be used to read end offsets, and will throw an UnsupportedVersionException.
+     */
+    @Test
+    public void retryEndOffsetsShouldRethrowUnknownVersionException() {
+        String topicName = "myTopic";
+        TopicPartition tp1 = new TopicPartition(topicName, 0);
+        Set<TopicPartition> tps = Collections.singleton(tp1);
+        Long offset = null; // response should use error
+        Cluster cluster = createCluster(1, topicName, 1);
+        try (AdminClientUnitTestEnv env = new AdminClientUnitTestEnv(new MockTime(), cluster)) {
+            env.kafkaClient().setNodeApiVersions(NodeApiVersions.create());
+            env.kafkaClient().prepareResponse(prepareMetadataResponse(cluster, Errors.NONE));
+            // Expect the admin client list offsets will throw unsupported version, simulating older brokers
+            env.kafkaClient().prepareResponse(listOffsetsResultWithUnsupportedVersion(tp1, offset));
+            TopicAdmin admin = new TopicAdmin(null, env.adminClient());
+            // The retryEndOffsets should catch and rethrow an unsupported version exception
+            assertThrows(UnsupportedVersionException.class, () -> admin.retryEndOffsets(tps, Duration.ofMillis(100), 1));
+        }
+    }
+
     @Test
-    public void retryEndOffsetsShouldThrowConnectException() {
+    public void retryEndOffsetsShouldWrapNonRetriableExceptionsWithConnectException() {
         String topicName = "myTopic";
         TopicPartition tp1 = new TopicPartition(topicName, 0);
         Set<TopicPartition> tps = Collections.singleton(tp1);
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectCluster.java b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectCluster.java
index adcde378bbe95..ccbf2c495d6fe 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectCluster.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectCluster.java
@@ -219,6 +219,16 @@ private void stopWorker(WorkerHandle worker) {
         }
     }
 
+    /**
+     * Set a new timeout for REST requests to each worker in the cluster. Useful if a request
+     * is expected to block, since the time spent awaiting that request can be reduced
+     * and test runtime bloat can be avoided.
+     * @param requestTimeoutMs the new timeout in milliseconds; must be positive
+     */
+    public void requestTimeout(long requestTimeoutMs) {
+        connectCluster.forEach(worker -> worker.requestTimeout(requestTimeoutMs));
+    }
+
     /**
      * Determine whether the Connect cluster has any workers running.
      *
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectClusterAssertions.java b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectClusterAssertions.java
index edd99c8042cc1..c026cb72903da 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectClusterAssertions.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedConnectClusterAssertions.java
@@ -44,9 +44,12 @@
 public class EmbeddedConnectClusterAssertions {
 
     private static final Logger log = LoggerFactory.getLogger(EmbeddedConnectClusterAssertions.class);
-    public static final long WORKER_SETUP_DURATION_MS = TimeUnit.SECONDS.toMillis(60);
+    public static final long WORKER_SETUP_DURATION_MS = TimeUnit.MINUTES.toMillis(5);
     public static final long VALIDATION_DURATION_MS = TimeUnit.SECONDS.toMillis(30);
-    public static final long CONNECTOR_SETUP_DURATION_MS = TimeUnit.SECONDS.toMillis(30);
+    public static final long CONNECTOR_SETUP_DURATION_MS = TimeUnit.MINUTES.toMillis(2);
+    // Creating a connector requires two rounds of rebalance; destroying one only requires one
+    // Assume it'll take ~half the time to destroy a connector as it does to create one
+    public static final long CONNECTOR_SHUTDOWN_DURATION_MS = TimeUnit.MINUTES.toMillis(1);
     private static final long CONNECT_INTERNAL_TOPIC_UPDATES_DURATION_MS = TimeUnit.SECONDS.toMillis(60);
 
     private final EmbeddedConnectCluster connect;
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedKafkaCluster.java b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedKafkaCluster.java
index f1a63a4615caa..5bbbc684c2ecb 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedKafkaCluster.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/EmbeddedKafkaCluster.java
@@ -26,14 +26,18 @@
 import org.apache.kafka.clients.admin.Admin;
 import org.apache.kafka.clients.admin.AdminClientConfig;
 import org.apache.kafka.clients.admin.DescribeTopicsResult;
+import org.apache.kafka.clients.admin.ListOffsetsOptions;
 import org.apache.kafka.clients.admin.NewTopic;
+import org.apache.kafka.clients.admin.OffsetSpec;
 import org.apache.kafka.clients.admin.TopicDescription;
+import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.clients.consumer.ConsumerRecords;
 import org.apache.kafka.clients.consumer.KafkaConsumer;
 import org.apache.kafka.clients.producer.KafkaProducer;
 import org.apache.kafka.clients.producer.ProducerConfig;
 import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.common.IsolationLevel;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.TopicPartition;
@@ -45,6 +49,7 @@
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.connect.errors.ConnectException;
 import org.apache.kafka.metadata.BrokerState;
 import org.slf4j.Logger;
@@ -55,9 +60,11 @@
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -66,6 +73,8 @@
 import java.util.UUID;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.function.Function;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 
@@ -75,6 +84,9 @@
 import static org.apache.kafka.clients.consumer.ConsumerConfig.GROUP_ID_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG;
 import static org.apache.kafka.clients.consumer.ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG;
+import static org.junit.Assert.assertFalse;
 
 /**
  * Setup an embedded Kafka cluster with specified number of brokers and specified broker properties. To be used for
@@ -439,9 +451,23 @@ public Admin createAdminClient() {
      * @return a {@link ConsumerRecords} collection containing at least n records.
      */
     public ConsumerRecords<byte[], byte[]> consume(int n, long maxDuration, String... topics) {
+        return consume(n, maxDuration, Collections.emptyMap(), topics);
+    }
+
+    /**
+     * Consume at least n records in a given duration or throw an exception.
+     *
+     * @param n the number of expected records in this topic.
+     * @param maxDuration the max duration to wait for these records (in milliseconds).
+     * @param topics the topics to subscribe and consume records from.
+     * @param consumerProps overrides to the default properties the consumer is constructed with;
+     *                      may not be null
+     * @return a {@link ConsumerRecords} collection containing at least n records.
+     */
+    public ConsumerRecords<byte[], byte[]> consume(int n, long maxDuration, Map<String, Object> consumerProps, String... topics) {
         Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> records = new HashMap<>();
         int consumedRecords = 0;
-        try (KafkaConsumer<byte[], byte[]> consumer = createConsumerAndSubscribeTo(Collections.emptyMap(), topics)) {
+        try (KafkaConsumer<byte[], byte[]> consumer = createConsumerAndSubscribeTo(consumerProps, topics)) {
             final long startMillis = System.currentTimeMillis();
             long allowedDuration = maxDuration;
             while (allowedDuration > 0) {
@@ -466,6 +492,108 @@ public ConsumerRecords<byte[], byte[]> consume(int n, long maxDuration, String..
         throw new RuntimeException("Could not find enough records. found " + consumedRecords + ", expected " + n);
     }
 
+    /**
+     * Consume all currently-available records for the specified topics in a given duration, or throw an exception.
+     * @param maxDurationMs the max duration to wait for these records (in milliseconds).
+     * @param consumerProps overrides to the default properties the consumer is constructed with; may be null
+     * @param adminProps overrides to the default properties the admin used to query Kafka cluster metadata is constructed with; may be null
+     * @param topics the topics to consume from
+     * @return a {@link ConsumerRecords} collection containing the records for all partitions of the given topics
+     */
+    public ConsumerRecords<byte[], byte[]> consumeAll(
+            long maxDurationMs,
+            Map<String, Object> consumerProps,
+            Map<String, Object> adminProps,
+            String... topics
+    ) throws TimeoutException, InterruptedException, ExecutionException {
+        long endTimeMs = System.currentTimeMillis() + maxDurationMs;
+
+        Consumer<byte[], byte[]> consumer = createConsumer(consumerProps != null ? consumerProps : Collections.emptyMap());
+        Admin admin = createAdminClient(Utils.mkObjectProperties(adminProps != null ? adminProps : Collections.emptyMap()));
+
+        long remainingTimeMs = endTimeMs - System.currentTimeMillis();
+        Set<TopicPartition> topicPartitions = listPartitions(remainingTimeMs, admin, Arrays.asList(topics));
+
+        remainingTimeMs = endTimeMs - System.currentTimeMillis();
+        Map<TopicPartition, Long> endOffsets = readEndOffsets(remainingTimeMs, admin, topicPartitions);
+
+        Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> records = topicPartitions.stream()
+                .collect(Collectors.toMap(
+                        Function.identity(),
+                        tp -> new ArrayList<>()
+                ));
+        consumer.assign(topicPartitions);
+
+        while (!endOffsets.isEmpty()) {
+            Iterator<Map.Entry<TopicPartition, Long>> it = endOffsets.entrySet().iterator();
+            while (it.hasNext()) {
+                Map.Entry<TopicPartition, Long> entry = it.next();
+                TopicPartition topicPartition = entry.getKey();
+                long endOffset = entry.getValue();
+                long lastConsumedOffset = consumer.position(topicPartition);
+                if (lastConsumedOffset >= endOffset) {
+                    // We've reached the end offset for the topic partition; can stop polling it now
+                    it.remove();
+                } else {
+                    remainingTimeMs = endTimeMs - System.currentTimeMillis();
+                    if (remainingTimeMs <= 0) {
+                        throw new AssertionError("failed to read to end of topic(s) " + Arrays.asList(topics) + " within " + maxDurationMs + "ms");
+                    }
+                    // We haven't reached the end offset yet; need to keep polling
+                    ConsumerRecords<byte[], byte[]> recordBatch = consumer.poll(Duration.ofMillis(remainingTimeMs));
+                    recordBatch.partitions().forEach(tp -> records.get(tp)
+                            .addAll(recordBatch.records(tp))
+                    );
+                }
+            }
+        }
+
+        return new ConsumerRecords<>(records);
+    }
+
+    /**
+     * List all the known partitions for the given {@link Collection} of topics
+     * @param maxDurationMs the max duration to wait for while fetching metadata from Kafka (in milliseconds).
+     * @param admin the admin client to use for fetching metadata from the Kafka cluster
+     * @param topics the topics whose partitions should be listed
+     * @return a {@link Set} of {@link TopicPartition topic partitions} for the given topics; never null, and never empty
+     */
+    private Set<TopicPartition> listPartitions(
+            long maxDurationMs,
+            Admin admin,
+            Collection<String> topics
+    ) throws TimeoutException, InterruptedException, ExecutionException {
+        assertFalse("collection of topics may not be empty", topics.isEmpty());
+        return admin.describeTopics(topics)
+                .allTopicNames().get(maxDurationMs, TimeUnit.MILLISECONDS)
+                .entrySet().stream()
+                .flatMap(e -> e.getValue().partitions().stream().map(p -> new TopicPartition(e.getKey(), p.partition())))
+                .collect(Collectors.toSet());
+    }
+
+    /**
+     * List the latest current offsets for the given {@link Collection} of {@link TopicPartition topic partitions}
+     * @param maxDurationMs the max duration to wait for while fetching metadata from Kafka (in milliseconds)
+     * @param admin the admin client to use for fetching metadata from the Kafka cluster
+     * @param topicPartitions the topic partitions to list end offsets for
+     * @return a {@link Map} containing the latest offset for each requested {@link TopicPartition topic partition}; never null, and never empty
+     */
+    private Map<TopicPartition, Long> readEndOffsets(
+            long maxDurationMs,
+            Admin admin,
+            Collection<TopicPartition> topicPartitions
+    ) throws TimeoutException, InterruptedException, ExecutionException {
+        assertFalse("collection of topic partitions may not be empty", topicPartitions.isEmpty());
+        Map<TopicPartition, OffsetSpec> offsetSpecMap = topicPartitions.stream().collect(Collectors.toMap(Function.identity(), tp -> OffsetSpec.latest()));
+        return admin.listOffsets(offsetSpecMap, new ListOffsetsOptions(IsolationLevel.READ_UNCOMMITTED))
+                .all().get(maxDurationMs, TimeUnit.MILLISECONDS)
+                .entrySet().stream()
+                .collect(Collectors.toMap(
+                        Map.Entry::getKey,
+                        e -> e.getValue().offset()
+                ));
+    }
+
     public KafkaConsumer<byte[], byte[]> createConsumer(Map<String, Object> consumerProps) {
         Map<String, Object> props = new HashMap<>(consumerProps);
 
@@ -495,6 +623,26 @@ public KafkaConsumer<byte[], byte[]> createConsumerAndSubscribeTo(Map<String, Ob
         return consumer;
     }
 
+    public KafkaProducer<byte[], byte[]> createProducer(Map<String, Object> producerProps) {
+        Map<String, Object> props = new HashMap<>(producerProps);
+
+        putIfAbsent(props, BOOTSTRAP_SERVERS_CONFIG, bootstrapServers());
+        putIfAbsent(props, KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
+        putIfAbsent(props, VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
+        if (sslEnabled()) {
+            putIfAbsent(props, SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, brokerConfig.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG));
+            putIfAbsent(props, SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG, brokerConfig.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG));
+            putIfAbsent(props, CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SSL");
+        }
+        KafkaProducer<byte[], byte[]> producer;
+        try {
+            producer = new KafkaProducer<>(props);
+        } catch (Throwable t) {
+            throw new ConnectException("Failed to create producer", t);
+        }
+        return producer;
+    }
+
     private static void putIfAbsent(final Map<String, Object> props, final String propertyKey, final Object propertyValue) {
         if (!props.containsKey(propertyKey)) {
             props.put(propertyKey, propertyValue);
diff --git a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/WorkerHandle.java b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/WorkerHandle.java
index 4d947940c585e..936363b4963ff 100644
--- a/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/WorkerHandle.java
+++ b/connect/runtime/src/test/java/org/apache/kafka/connect/util/clusters/WorkerHandle.java
@@ -81,7 +81,7 @@ public String name() {
      * @return the worker's url
      */
     public URI url() {
-        return worker.restUrl();
+        return worker.rest().serverUrl();
     }
 
     /**
@@ -90,14 +90,24 @@ public URI url() {
      * @return the worker's admin url
      */
     public URI adminUrl() {
-        return worker.adminUrl();
+        return worker.rest().adminUrl();
+    }
+
+    /**
+     * Set a new timeout for REST requests to the worker. Useful if a request is expected
+     * to block, since the time spent awaiting that request can be reduced and test runtime
+     * bloat can be avoided.
+     * @param requestTimeoutMs the new timeout in milliseconds; must be positive
+     */
+    public void requestTimeout(long requestTimeoutMs) {
+        worker.rest().requestTimeout(requestTimeoutMs);
     }
 
     @Override
     public String toString() {
         return "WorkerHandle{" +
                 "workerName='" + workerName + '\'' +
-                "workerURL='" + worker.restUrl() + '\'' +
+                "workerURL='" + worker.rest().serverUrl() + '\'' +
                 '}';
     }
 
diff --git a/connect/transforms/src/main/java/org/apache/kafka/connect/transforms/RegexRouter.java b/connect/transforms/src/main/java/org/apache/kafka/connect/transforms/RegexRouter.java
index 74a19cdb82dfc..a79f5c17dad67 100644
--- a/connect/transforms/src/main/java/org/apache/kafka/connect/transforms/RegexRouter.java
+++ b/connect/transforms/src/main/java/org/apache/kafka/connect/transforms/RegexRouter.java
@@ -20,6 +20,8 @@
 import org.apache.kafka.connect.connector.ConnectRecord;
 import org.apache.kafka.connect.transforms.util.RegexValidator;
 import org.apache.kafka.connect.transforms.util.SimpleConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Map;
 import java.util.regex.Matcher;
@@ -27,6 +29,8 @@
 
 public class RegexRouter<R extends ConnectRecord<R>> implements Transformation<R> {
 
+    private static final Logger log = LoggerFactory.getLogger(RegexRouter.class);
+
     public static final String OVERVIEW_DOC = "Update the record topic using the configured regular expression and replacement string."
             + "<p/>Under the hood, the regex is compiled to a <code>java.util.regex.Pattern</code>. "
             + "If the pattern matches the input topic, <code>java.util.regex.Matcher#replaceFirst()</code> is used with the replacement string to obtain the new topic.";
@@ -57,7 +61,10 @@ public R apply(R record) {
         final Matcher matcher = regex.matcher(record.topic());
         if (matcher.matches()) {
             final String topic = matcher.replaceFirst(replacement);
+            log.trace("Rerouting from topic '{}' to new topic '{}'", record.topic(), topic);
             return record.newRecord(topic, record.kafkaPartition(), record.keySchema(), record.key(), record.valueSchema(), record.value(), record.timestamp());
+        } else {
+            log.trace("Not rerouting topic '{}' as it does not match the configured regex", record.topic());
         }
         return record;
     }
diff --git a/core/src/main/java/kafka/metrics/KafkaYammerMetrics.java b/core/src/main/java/kafka/metrics/KafkaYammerMetrics.java
deleted file mode 100644
index dd650fdd0f79e..0000000000000
--- a/core/src/main/java/kafka/metrics/KafkaYammerMetrics.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.metrics;
-
-import com.yammer.metrics.core.MetricsRegistry;
-
-import org.apache.kafka.common.Reconfigurable;
-import org.apache.kafka.common.config.ConfigException;
-import org.apache.kafka.common.metrics.JmxReporter;
-
-import java.util.Map;
-import java.util.Set;
-import java.util.function.Predicate;
-
-/**
- * This class encapsulates the default yammer metrics registry for Kafka server,
- * and configures the set of exported JMX metrics for Yammer metrics.
- *
- * KafkaYammerMetrics.defaultRegistry() should always be used instead of Metrics.defaultRegistry()
- */
-public class KafkaYammerMetrics implements Reconfigurable {
-
-    public static final KafkaYammerMetrics INSTANCE = new KafkaYammerMetrics();
-
-    /**
-     * convenience method to replace {@link com.yammer.metrics.Metrics#defaultRegistry()}
-     */
-    public static MetricsRegistry defaultRegistry() {
-        return INSTANCE.metricsRegistry;
-    }
-
-    private final MetricsRegistry metricsRegistry = new MetricsRegistry();
-    private final FilteringJmxReporter jmxReporter = new FilteringJmxReporter(metricsRegistry,
-        metricName -> true);
-
-    private KafkaYammerMetrics() {
-        jmxReporter.start();
-        Runtime.getRuntime().addShutdownHook(new Thread(jmxReporter::shutdown));
-    }
-
-    @Override
-    public void configure(Map<String, ?> configs) {
-        reconfigure(configs);
-    }
-
-    @Override
-    public Set<String> reconfigurableConfigs() {
-        return JmxReporter.RECONFIGURABLE_CONFIGS;
-    }
-
-    @Override
-    public void validateReconfiguration(Map<String, ?> configs) throws ConfigException {
-        JmxReporter.compilePredicate(configs);
-    }
-
-    @Override
-    public void reconfigure(Map<String, ?> configs) {
-        Predicate<String> mBeanPredicate = JmxReporter.compilePredicate(configs);
-        jmxReporter.updatePredicate(metricName -> mBeanPredicate.test(metricName.getMBeanName()));
-    }
-}
diff --git a/core/src/main/java/kafka/server/builders/LogManagerBuilder.java b/core/src/main/java/kafka/server/builders/LogManagerBuilder.java
index 3ebe7fa8fecce..6b6bd919fee1d 100644
--- a/core/src/main/java/kafka/server/builders/LogManagerBuilder.java
+++ b/core/src/main/java/kafka/server/builders/LogManagerBuilder.java
@@ -17,7 +17,6 @@
 
 package kafka.server.builders;
 
-import kafka.api.ApiVersion;
 import kafka.log.CleanerConfig;
 import kafka.log.LogConfig;
 import kafka.log.LogManager;
@@ -26,6 +25,7 @@
 import kafka.server.metadata.ConfigRepository;
 import kafka.utils.Scheduler;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.server.common.MetadataVersion;
 import scala.collection.JavaConverters;
 
 import java.io.File;
@@ -46,7 +46,7 @@ public class LogManagerBuilder {
     private long retentionCheckMs = 1000L;
     private int maxTransactionTimeoutMs = 15 * 60 * 1000;
     private int maxPidExpirationMs = 60000;
-    private ApiVersion interBrokerProtocolVersion = ApiVersion.latestVersion();
+    private MetadataVersion interBrokerProtocolVersion = MetadataVersion.latest();
     private Scheduler scheduler = null;
     private BrokerTopicStats brokerTopicStats = null;
     private LogDirFailureChannel logDirFailureChannel = null;
@@ -113,7 +113,7 @@ public LogManagerBuilder setMaxPidExpirationMs(int maxPidExpirationMs) {
         return this;
     }
 
-    public LogManagerBuilder setInterBrokerProtocolVersion(ApiVersion interBrokerProtocolVersion) {
+    public LogManagerBuilder setInterBrokerProtocolVersion(MetadataVersion interBrokerProtocolVersion) {
         this.interBrokerProtocolVersion = interBrokerProtocolVersion;
         return this;
     }
diff --git a/core/src/main/java/kafka/server/builders/ReplicaManagerBuilder.java b/core/src/main/java/kafka/server/builders/ReplicaManagerBuilder.java
index a0051784b4d73..a1339264bf0bc 100644
--- a/core/src/main/java/kafka/server/builders/ReplicaManagerBuilder.java
+++ b/core/src/main/java/kafka/server/builders/ReplicaManagerBuilder.java
@@ -18,7 +18,7 @@
 package kafka.server.builders;
 
 import kafka.log.LogManager;
-import kafka.server.AlterIsrManager;
+import kafka.server.AlterPartitionManager;
 import kafka.server.BrokerTopicStats;
 import kafka.server.DelayedDeleteRecords;
 import kafka.server.DelayedElectLeader;
@@ -50,7 +50,7 @@ public class ReplicaManagerBuilder {
     private QuotaManagers quotaManagers = null;
     private MetadataCache metadataCache = null;
     private LogDirFailureChannel logDirFailureChannel = null;
-    private AlterIsrManager alterIsrManager = null;
+    private AlterPartitionManager alterPartitionManager = null;
     private BrokerTopicStats brokerTopicStats = new BrokerTopicStats();
     private AtomicBoolean isShuttingDown = new AtomicBoolean(false);
     private Optional<KafkaZkClient> zkClient = Optional.empty();
@@ -100,8 +100,8 @@ public ReplicaManagerBuilder setLogDirFailureChannel(LogDirFailureChannel logDir
         return this;
     }
 
-    public ReplicaManagerBuilder setAlterIsrManager(AlterIsrManager alterIsrManager) {
-        this.alterIsrManager = alterIsrManager;
+    public ReplicaManagerBuilder setAlterPartitionManager(AlterPartitionManager alterPartitionManager) {
+        this.alterPartitionManager = alterPartitionManager;
         return this;
     }
 
@@ -151,7 +151,7 @@ public ReplicaManager build() {
         if (logManager == null) throw new RuntimeException("You must set logManager");
         if (metadataCache == null) throw new RuntimeException("You must set metadataCache");
         if (logDirFailureChannel == null) throw new RuntimeException("You must set logDirFailureChannel");
-        if (alterIsrManager == null) throw new RuntimeException("You must set alterIsrManager");
+        if (alterPartitionManager == null) throw new RuntimeException("You must set alterIsrManager");
         return new ReplicaManager(config,
                              metrics,
                              time,
@@ -160,7 +160,7 @@ public ReplicaManager build() {
                              quotaManagers,
                              metadataCache,
                              logDirFailureChannel,
-                             alterIsrManager,
+                             alterPartitionManager,
                              brokerTopicStats,
                              isShuttingDown,
                              OptionConverters.toScala(zkClient),
diff --git a/core/src/main/scala/kafka/Kafka.scala b/core/src/main/scala/kafka/Kafka.scala
index 4e278c95e34f8..5dc829fd3b13e 100755
--- a/core/src/main/scala/kafka/Kafka.scala
+++ b/core/src/main/scala/kafka/Kafka.scala
@@ -108,9 +108,9 @@ object Kafka extends Logging {
 
       try server.startup()
       catch {
-        case _: Throwable =>
+        case e: Throwable =>
           // KafkaServer.startup() calls shutdown() in case of exceptions, so we invoke `exit` to set the status code
-          fatal("Exiting Kafka.")
+          fatal("Exiting Kafka due to fatal exception during startup.", e)
           Exit.exit(1)
       }
 
diff --git a/core/src/main/scala/kafka/admin/AclCommand.scala b/core/src/main/scala/kafka/admin/AclCommand.scala
index 116ca24f7bfa3..769e99df737a9 100644
--- a/core/src/main/scala/kafka/admin/AclCommand.scala
+++ b/core/src/main/scala/kafka/admin/AclCommand.scala
@@ -467,6 +467,9 @@ object AclCommand extends Logging {
     if (opts.options.has(opts.delegationTokenOpt))
       opts.options.valuesOf(opts.delegationTokenOpt).forEach(token => resourceFilters += new ResourcePatternFilter(JResourceType.DELEGATION_TOKEN, token.trim, patternType))
 
+    if (opts.options.has(opts.userPrincipalOpt))
+      opts.options.valuesOf(opts.userPrincipalOpt).forEach(user => resourceFilters += new ResourcePatternFilter(JResourceType.USER, user.trim, patternType))
+
     if (resourceFilters.isEmpty && dieIfNoResourceFound)
       CommandLineUtils.printUsageAndDie(opts.parser, "You must provide at least one resource: --topic <topic> or --cluster or --group <group> or --delegation-token <Delegation Token ID>")
 
@@ -518,20 +521,20 @@ object AclCommand extends Logging {
       .ofType(classOf[String])
 
     val topicOpt = parser.accepts("topic", "topic to which ACLs should be added or removed. " +
-      "A value of * indicates ACL should apply to all topics.")
+      "A value of '*' indicates ACL should apply to all topics.")
       .withRequiredArg
       .describedAs("topic")
       .ofType(classOf[String])
 
     val clusterOpt = parser.accepts("cluster", "Add/Remove cluster ACLs.")
     val groupOpt = parser.accepts("group", "Consumer Group to which the ACLs should be added or removed. " +
-      "A value of * indicates the ACLs should apply to all groups.")
+      "A value of '*' indicates the ACLs should apply to all groups.")
       .withRequiredArg
       .describedAs("group")
       .ofType(classOf[String])
 
     val transactionalIdOpt = parser.accepts("transactional-id", "The transactionalId to which ACLs should " +
-      "be added or removed. A value of * indicates the ACLs should apply to all transactionalIds.")
+      "be added or removed. A value of '*' indicates the ACLs should apply to all transactionalIds.")
       .withRequiredArg
       .describedAs("transactional-id")
       .ofType(classOf[String])
@@ -541,7 +544,7 @@ object AclCommand extends Logging {
       "the producer is authorized to a particular transactional-id.")
 
     val delegationTokenOpt = parser.accepts("delegation-token", "Delegation token to which ACLs should be added or removed. " +
-      "A value of * indicates ACL should apply to all tokens.")
+      "A value of '*' indicates ACL should apply to all tokens.")
       .withRequiredArg
       .describedAs("delegation-token")
       .ofType(classOf[String])
@@ -569,7 +572,7 @@ object AclCommand extends Logging {
 
     val allowPrincipalsOpt = parser.accepts("allow-principal", "principal is in principalType:name format." +
       " Note that principalType must be supported by the Authorizer being used." +
-      " For example, User:* is the wild card indicating all users.")
+      " For example, User:'*' is the wild card indicating all users.")
       .withRequiredArg
       .describedAs("allow-principal")
       .ofType(classOf[String])
@@ -579,7 +582,7 @@ object AclCommand extends Logging {
       "You only need to use this option as negation to already allowed set. " +
       "Note that principalType must be supported by the Authorizer being used. " +
       "For example if you wanted to allow access to all users in the system but not test-user you can define an ACL that " +
-      "allows access to User:* and specify --deny-principal=User:test@EXAMPLE.COM. " +
+      "allows access to User:'*' and specify --deny-principal=User:test@EXAMPLE.COM. " +
       "AND PLEASE REMEMBER DENY RULES TAKES PRECEDENCE OVER ALLOW RULES.")
       .withRequiredArg
       .describedAs("deny-principal")
@@ -592,13 +595,13 @@ object AclCommand extends Logging {
       .ofType(classOf[String])
 
     val allowHostsOpt = parser.accepts("allow-host", "Host from which principals listed in --allow-principal will have access. " +
-      "If you have specified --allow-principal then the default for this option will be set to * which allows access from all hosts.")
+      "If you have specified --allow-principal then the default for this option will be set to '*' which allows access from all hosts.")
       .withRequiredArg
       .describedAs("allow-host")
       .ofType(classOf[String])
 
     val denyHostsOpt = parser.accepts("deny-host", "Host from which principals listed in --deny-principal will be denied access. " +
-      "If you have specified --deny-principal then the default for this option will be set to * which denies access from all hosts.")
+      "If you have specified --deny-principal then the default for this option will be set to '*' which denies access from all hosts.")
       .withRequiredArg
       .describedAs("deny-host")
       .ofType(classOf[String])
@@ -621,6 +624,12 @@ object AclCommand extends Logging {
         AclCommand.AuthorizerDeprecationMessage)
       .withRequiredArg().describedAs("Authorizer ZooKeeper TLS configuration").ofType(classOf[String])
 
+    val userPrincipalOpt = parser.accepts("user-principal", "Specifies a user principal as a resource in relation with the operation. For instance " +
+      "one could grant CreateTokens or DescribeTokens permission on a given user principal.")
+      .withRequiredArg()
+      .describedAs("user-principal")
+      .ofType(classOf[String])
+
     options = parser.parse(args: _*)
 
     def checkArgs(): Unit = {
diff --git a/core/src/main/scala/kafka/admin/BrokerApiVersionsCommand.scala b/core/src/main/scala/kafka/admin/BrokerApiVersionsCommand.scala
index f6f87065ca6f9..957cb2ce8bb0d 100644
--- a/core/src/main/scala/kafka/admin/BrokerApiVersionsCommand.scala
+++ b/core/src/main/scala/kafka/admin/BrokerApiVersionsCommand.scala
@@ -40,8 +40,8 @@ import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.utils.LogContext
 import org.apache.kafka.common.utils.{KafkaThread, Time}
 import org.apache.kafka.common.Node
-import org.apache.kafka.common.message.ApiVersionsResponseData.ApiVersionCollection
 import org.apache.kafka.common.requests.{AbstractRequest, AbstractResponse, ApiVersionsRequest, ApiVersionsResponse, MetadataRequest, MetadataResponse}
+import org.apache.kafka.common.security.auth.SecurityProtocol
 
 import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Success, Try}
@@ -156,10 +156,10 @@ object BrokerApiVersionsCommand {
       throw new RuntimeException(s"Request ${request.apiKey()} failed on brokers $bootstrapBrokers")
     }
 
-    private def getApiVersions(node: Node): ApiVersionCollection = {
+    private def getNodeApiVersions(node: Node): NodeApiVersions = {
       val response = send(node, new ApiVersionsRequest.Builder()).asInstanceOf[ApiVersionsResponse]
       Errors.forCode(response.data.errorCode).maybeThrow()
-      response.data.apiKeys
+      new NodeApiVersions(response.data.apiKeys, response.data.supportedFeatures)
     }
 
     /**
@@ -185,7 +185,7 @@ object BrokerApiVersionsCommand {
 
     def listAllBrokerVersionInfo(): Map[Node, Try[NodeApiVersions]] =
       findAllBrokers().map { broker =>
-        broker -> Try[NodeApiVersions](new NodeApiVersions(getApiVersions(broker)))
+        broker -> Try[NodeApiVersions](getNodeApiVersions(broker))
       }.toMap
 
     def close(): Unit = {
@@ -231,6 +231,7 @@ object BrokerApiVersionsCommand {
           CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
           ConfigDef.Type.STRING,
           CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+          in(Utils.enumOptions(classOf[SecurityProtocol]):_*),
           ConfigDef.Importance.MEDIUM,
           CommonClientConfigs.SECURITY_PROTOCOL_DOC)
         .define(
diff --git a/core/src/main/scala/kafka/admin/ConfigCommand.scala b/core/src/main/scala/kafka/admin/ConfigCommand.scala
index 5e5ccefa45408..9a42f9b874dc3 100644
--- a/core/src/main/scala/kafka/admin/ConfigCommand.scala
+++ b/core/src/main/scala/kafka/admin/ConfigCommand.scala
@@ -22,11 +22,10 @@ import java.util.concurrent.TimeUnit
 import java.util.{Collections, Properties}
 
 import joptsimple._
-import kafka.common.Config
 import kafka.log.LogConfig
 import kafka.server.DynamicConfig.QuotaConfigs
 import kafka.server.{ConfigEntityName, ConfigType, Defaults, DynamicBrokerConfig, DynamicConfig, KafkaConfig}
-import kafka.utils.{CommandDefaultOptions, CommandLineUtils, Exit, PasswordEncoder}
+import kafka.utils.{CommandDefaultOptions, CommandLineUtils, Exit, Logging, PasswordEncoder}
 import kafka.utils.Implicits._
 import kafka.zk.{AdminZkClient, KafkaZkClient}
 import org.apache.kafka.clients.admin.{Admin, AlterClientQuotasOptions, AlterConfigOp, AlterConfigsOptions, ConfigEntry, DescribeClusterOptions, DescribeConfigsOptions, ListTopicsOptions, ScramCredentialInfo, UserScramCredentialDeletion, UserScramCredentialUpsertion, Config => JConfig, ScramMechanism => PublicScramMechanism}
@@ -74,7 +73,7 @@ import scala.collection._
  * For example, this allows password configs to be stored encrypted in ZK before brokers are started,
  * avoiding cleartext passwords in `server.properties`.
  */
-object ConfigCommand extends Config {
+object ConfigCommand extends Logging {
 
   val BrokerDefaultEntityName = ""
   val BrokerLoggerConfigType = "broker-loggers"
@@ -212,7 +211,7 @@ object ConfigCommand extends Config {
     encoderConfigs.get(KafkaConfig.PasswordEncoderSecretProp)
     val encoderSecret = encoderConfigs.getOrElse(KafkaConfig.PasswordEncoderSecretProp,
       throw new IllegalArgumentException("Password encoder secret not specified"))
-    new PasswordEncoder(new Password(encoderSecret),
+    PasswordEncoder.encrypting(new Password(encoderSecret),
       None,
       encoderConfigs.get(KafkaConfig.PasswordEncoderCipherAlgorithmProp).getOrElse(Defaults.PasswordEncoderCipherAlgorithm),
       encoderConfigs.get(KafkaConfig.PasswordEncoderKeyLengthProp).map(_.toInt).getOrElse(Defaults.PasswordEncoderKeyLength),
diff --git a/core/src/main/scala/kafka/admin/ConsumerGroupCommand.scala b/core/src/main/scala/kafka/admin/ConsumerGroupCommand.scala
index 47c1d173b306b..d5aee881c9a0c 100755
--- a/core/src/main/scala/kafka/admin/ConsumerGroupCommand.scala
+++ b/core/src/main/scala/kafka/admin/ConsumerGroupCommand.scala
@@ -18,7 +18,7 @@
 package kafka.admin
 
 import java.time.{Duration, Instant}
-import java.util.Properties
+import java.util.{Collections, Properties}
 import com.fasterxml.jackson.dataformat.csv.CsvMapper
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
 import kafka.utils._
@@ -753,9 +753,9 @@ object ConsumerGroupCommand extends Logging {
 
     private def getCommittedOffsets(groupId: String): Map[TopicPartition, OffsetAndMetadata] = {
       adminClient.listConsumerGroupOffsets(
-        groupId,
-        withTimeoutMs(new ListConsumerGroupOffsetsOptions)
-      ).partitionsToOffsetAndMetadata.get.asScala
+        Collections.singletonMap(groupId, new ListConsumerGroupOffsetsSpec),
+        withTimeoutMs(new ListConsumerGroupOffsetsOptions())
+      ).partitionsToOffsetAndMetadata(groupId).get().asScala
     }
 
     type GroupMetadata = immutable.Map[String, immutable.Map[TopicPartition, OffsetAndMetadata]]
diff --git a/core/src/main/scala/kafka/admin/DelegationTokenCommand.scala b/core/src/main/scala/kafka/admin/DelegationTokenCommand.scala
index 6465b143e3f33..78984792ce287 100644
--- a/core/src/main/scala/kafka/admin/DelegationTokenCommand.scala
+++ b/core/src/main/scala/kafka/admin/DelegationTokenCommand.scala
@@ -78,6 +78,9 @@ object DelegationTokenCommand extends Logging {
 
     println("Calling create token operation with renewers :" + renewerPrincipals +" , max-life-time-period :"+ maxLifeTimeMs)
     val createDelegationTokenOptions = new CreateDelegationTokenOptions().maxlifeTimeMs(maxLifeTimeMs).renewers(renewerPrincipals)
+    val ownerPrincipal = getPrincipals(opts, opts.ownerPrincipalsOpt)
+    if (ownerPrincipal.isDefined)
+      createDelegationTokenOptions.owner(ownerPrincipal.get.asScala.head)
     val createResult = adminClient.createDelegationToken(createDelegationTokenOptions)
     val token = createResult.delegationToken().get()
     println("Created delegation token with tokenId : %s".format(token.tokenInfo.tokenId)); printToken(List(token))
@@ -86,13 +89,14 @@ object DelegationTokenCommand extends Logging {
 
   def printToken(tokens: List[DelegationToken]): Unit = {
     val dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm")
-    print("\n%-15s %-30s %-15s %-25s %-15s %-15s %-15s".format("TOKENID", "HMAC", "OWNER", "RENEWERS", "ISSUEDATE", "EXPIRYDATE", "MAXDATE"))
+    print("\n%-15s %-30s %-15s %-15s %-25s %-15s %-15s %-15s".format("TOKENID", "HMAC", "OWNER", "REQUESTER", "RENEWERS", "ISSUEDATE", "EXPIRYDATE", "MAXDATE"))
     for (token <- tokens) {
       val tokenInfo = token.tokenInfo
-      print("\n%-15s %-30s %-15s %-25s %-15s %-15s %-15s".format(
+      print("\n%-15s %-30s %-15s %-15s %-25s %-15s %-15s %-15s".format(
         tokenInfo.tokenId,
         token.hmacAsBase64String,
         tokenInfo.owner,
+        tokenInfo.tokenRequester(),
         tokenInfo.renewersAsString,
         dateFormat.format(tokenInfo.issueTimestamp),
         dateFormat.format(tokenInfo.expiryTimestamp),
@@ -210,7 +214,7 @@ object DelegationTokenCommand extends Logging {
         CommandLineUtils.checkRequiredArgs(parser, options, hmacOpt, expiryTimePeriodOpt)
 
       // check invalid args
-      CommandLineUtils.checkInvalidArgs(parser, options, createOpt, Set(hmacOpt, renewTimePeriodOpt, expiryTimePeriodOpt, ownerPrincipalsOpt))
+      CommandLineUtils.checkInvalidArgs(parser, options, createOpt, Set(hmacOpt, renewTimePeriodOpt, expiryTimePeriodOpt))
       CommandLineUtils.checkInvalidArgs(parser, options, renewOpt, Set(renewPrincipalsOpt, maxLifeTimeOpt, expiryTimePeriodOpt, ownerPrincipalsOpt))
       CommandLineUtils.checkInvalidArgs(parser, options, expiryOpt, Set(renewOpt, maxLifeTimeOpt, renewTimePeriodOpt, ownerPrincipalsOpt))
       CommandLineUtils.checkInvalidArgs(parser, options, describeOpt, Set(renewTimePeriodOpt, maxLifeTimeOpt, hmacOpt, renewTimePeriodOpt, expiryTimePeriodOpt))
diff --git a/core/src/main/scala/kafka/admin/FeatureCommand.scala b/core/src/main/scala/kafka/admin/FeatureCommand.scala
index 4b299652a6f2b..c5c62648f4ed9 100644
--- a/core/src/main/scala/kafka/admin/FeatureCommand.scala
+++ b/core/src/main/scala/kafka/admin/FeatureCommand.scala
@@ -17,374 +17,317 @@
 
 package kafka.admin
 
-import kafka.server.BrokerFeatures
-import kafka.utils.{CommandDefaultOptions, CommandLineUtils, Exit}
+import kafka.tools.TerseFailure
+import kafka.utils.Exit
+import net.sourceforge.argparse4j.ArgumentParsers
+import net.sourceforge.argparse4j.impl.Arguments.{append, fileType, storeTrue}
+import net.sourceforge.argparse4j.inf.{Namespace, Subparsers}
 import org.apache.kafka.clients.CommonClientConfigs
-import org.apache.kafka.clients.admin.{Admin, FeatureUpdate, UpdateFeaturesOptions}
-import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
+import org.apache.kafka.clients.admin.FeatureUpdate.UpgradeType
+import org.apache.kafka.clients.admin.{Admin, FeatureUpdate, UpdateFeaturesOptions, UpdateFeaturesResult}
 import org.apache.kafka.common.utils.Utils
-import java.util.Properties
 
+import java.io.File
+import java.util.Properties
 import scala.collection.Seq
-import scala.collection.immutable.ListMap
-import scala.jdk.CollectionConverters._
-import joptsimple.OptionSpec
-
 import scala.concurrent.ExecutionException
+import scala.jdk.CollectionConverters._
 
 object FeatureCommand {
 
   def main(args: Array[String]): Unit = {
-    val opts = new FeatureCommandOptions(args)
-    val featureApis = new FeatureApis(opts)
-    var exitCode = 0
+    val res = mainNoExit(args)
+    Exit.exit(res)
+  }
+
+  // This is used for integration tests in order to avoid killing the test with Exit.exit
+  def mainNoExit(args: Array[String]): Int = {
+    val parser = ArgumentParsers.newArgumentParser("kafka-features")
+      .defaultHelp(true)
+      .description("This tool manages feature flags in Kafka.")
+    parser.addArgument("--bootstrap-server")
+      .help("A comma-separated list of host:port pairs to use for establishing the connection to the Kafka cluster.")
+      .required(true)
+
+    parser.addArgument("--command-config")
+      .`type`(fileType())
+      .help("Property file containing configs to be passed to Admin Client.")
+    val subparsers = parser.addSubparsers().dest("command")
+    addDescribeParser(subparsers)
+    addUpgradeParser(subparsers)
+    addDowngradeParser(subparsers)
+    addDisableParser(subparsers)
+
     try {
-      featureApis.execute()
+      val namespace = parser.parseArgsOrFail(args)
+      val command = namespace.getString("command")
+
+      val commandConfig = namespace.get[File]("command_config")
+      val props = if (commandConfig != null) {
+        if (!commandConfig.exists()) {
+          throw new TerseFailure(s"Properties file ${commandConfig.getPath} does not exists!")
+        }
+        Utils.loadProps(commandConfig.getPath)
+      } else {
+        new Properties()
+      }
+      props.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, namespace.getString("bootstrap_server"))
+      val admin = Admin.create(props)
+
+      command match {
+        case "describe" => handleDescribe(namespace, admin)
+        case "upgrade" => handleUpgrade(namespace, admin)
+        case "downgrade" => handleDowngrade(namespace, admin)
+        case "disable" => handleDisable(namespace, admin)
+      }
+      admin.close()
+      0
     } catch {
-      case e: IllegalArgumentException =>
-        printException(e)
-        opts.parser.printHelpOn(System.err)
-        exitCode = 1
-      case _: UpdateFeaturesException =>
-        exitCode = 1
-      case e: ExecutionException =>
-        val cause = if (e.getCause == null) e else e.getCause
-        printException(cause)
-        exitCode = 1
-      case e: Throwable =>
-        printException(e)
-        exitCode = 1
-    } finally {
-      featureApis.close()
-      Exit.exit(exitCode)
+      case e: TerseFailure =>
+        System.err.println(e.getMessage)
+        1
     }
   }
 
-  private def printException(exception: Throwable): Unit = {
-    System.err.println("\nError encountered when executing command: " + Utils.stackTrace(exception))
-  }
-}
+  def addDescribeParser(subparsers: Subparsers): Unit = {
+    val describeParser = subparsers.addParser("describe")
+      .help("Describe one or more feature flags.")
 
-class UpdateFeaturesException(message: String) extends RuntimeException(message)
-
-/**
- * A class that provides necessary APIs to bridge feature APIs provided by the Admin client with
- * the requirements of the CLI tool.
- *
- * @param opts the CLI options
- */
-class FeatureApis(private var opts: FeatureCommandOptions) {
-  private var supportedFeatures = BrokerFeatures.createDefault().supportedFeatures
-  private var adminClient = FeatureApis.createAdminClient(opts)
+    val featureArgs = describeParser.addArgumentGroup("Specific Features")
+    featureArgs.addArgument("--feature")
+      .action(append())
+      .help("A specific feature to describe. This option may be repeated for describing multiple feature flags.")
 
-  private def pad(op: String): String = {
-    f"$op%11s"
+    val releaseArgs = describeParser.addArgumentGroup("All Features for release")
+    releaseArgs.addArgument("--release")
   }
 
-  private val addOp = pad("[Add]")
-  private val upgradeOp = pad("[Upgrade]")
-  private val deleteOp = pad("[Delete]")
-  private val downgradeOp = pad("[Downgrade]")
-
-  // For testing only.
-  private[admin] def setSupportedFeatures(newFeatures: Features[SupportedVersionRange]): Unit = {
-    supportedFeatures = newFeatures
+  def addUpgradeParser(subparsers: Subparsers): Unit = {
+    val upgradeParser = subparsers.addParser("upgrade")
+      .help("Upgrade one or more feature flags.")
+
+    val featureArgs = upgradeParser.addArgumentGroup("Upgrade specific features")
+    featureArgs.addArgument("--feature")
+      .action(append())
+      .help("A feature flag to upgrade. This option may be repeated for upgrading multiple feature flags.")
+    featureArgs.addArgument("--version")
+      .`type`(classOf[Short])
+      .help("The version to upgrade to.")
+      .action(append())
+
+    val releaseArgs = upgradeParser.addArgumentGroup("Upgrade to feature level defined for a given release")
+    releaseArgs.addArgument("--release")
+
+    upgradeParser.addArgument("--dry-run")
+      .help("Perform a dry-run of this upgrade operation.")
+      .action(storeTrue())
   }
 
-  // For testing only.
-  private[admin] def setOptions(newOpts: FeatureCommandOptions): Unit = {
-    adminClient.close()
-    adminClient = FeatureApis.createAdminClient(newOpts)
-    opts = newOpts
+  def addDowngradeParser(subparsers: Subparsers): Unit = {
+    val downgradeParser = subparsers.addParser("downgrade")
+      .help("Upgrade one or more feature flags.")
+
+    downgradeParser.addArgument("--feature")
+      .help("A feature flag to downgrade. This option may be repeated for downgrade multiple feature flags.")
+      .required(true)
+      .action(append())
+    downgradeParser.addArgument("--version")
+      .`type`(classOf[Short])
+      .help("The version to downgrade to.")
+      .required(true)
+      .action(append())
+    downgradeParser.addArgument("--unsafe")
+      .help("Perform this downgrade even if it considered unsafe. Refer to specific feature flag documentation for details.")
+      .action(storeTrue())
+    downgradeParser.addArgument("--dry-run")
+      .help("Perform a dry-run of this downgrade operation.")
+      .action(storeTrue())
   }
 
-  /**
-   * Describes the supported and finalized features. The request is issued to any of the provided
-   * bootstrap servers.
-   */
-  def describeFeatures(): Unit = {
-    val result = adminClient.describeFeatures.featureMetadata.get
-    val features = result.supportedFeatures.asScala.keys.toSet ++ result.finalizedFeatures.asScala.keys.toSet
-
-    features.toList.sorted.foreach {
-      feature =>
-        val output = new StringBuilder()
-        output.append(s"Feature: $feature")
-
-        val (supportedMinVersion, supportedMaxVersion) = {
-          val supportedVersionRange = result.supportedFeatures.get(feature)
-          if (supportedVersionRange == null) {
-            ("-", "-")
-          } else {
-            (supportedVersionRange.minVersion, supportedVersionRange.maxVersion)
-          }
-        }
-        output.append(s"\tSupportedMinVersion: $supportedMinVersion")
-        output.append(s"\tSupportedMaxVersion: $supportedMaxVersion")
+  def addDisableParser(subparsers: Subparsers): Unit = {
+    val disableParser = subparsers.addParser("disable")
+      .help("Disable one or more feature flags. This is the same as downgrading the version to zero.")
+
+    disableParser.addArgument("--feature")
+      .help("A feature flag to disable. This option may be repeated for disable multiple feature flags.")
+      .required(true)
+      .action(append())
+    disableParser.addArgument("--unsafe")
+      .help("Disable the feature flag(s) even if it considered unsafe. Refer to specific feature flag documentation for details.")
+      .action(storeTrue())
+    disableParser.addArgument("--dry-run")
+      .help("Perform a dry-run of this disable operation.")
+      .action(storeTrue())
+  }
 
-        val (finalizedMinVersionLevel, finalizedMaxVersionLevel) = {
-          val finalizedVersionRange = result.finalizedFeatures.get(feature)
-          if (finalizedVersionRange == null) {
-            ("-", "-")
-          } else {
-            (finalizedVersionRange.minVersionLevel, finalizedVersionRange.maxVersionLevel)
-          }
-        }
-        output.append(s"\tFinalizedMinVersionLevel: $finalizedMinVersionLevel")
-        output.append(s"\tFinalizedMaxVersionLevel: $finalizedMaxVersionLevel")
+  def handleDescribe(namespace: Namespace, admin: Admin): Unit = {
+    val featureFilter = parseFeaturesOrRelease(namespace) match {
+      case Neither() => (_: String) => true
+      case Features(featureNames) => (feature: String) => featureNames.contains(feature)
+      case Release(release) =>
+        // Special case, print the versions associated with the given release
+        printReleaseFeatures(release)
+        return
+      case Both() => throw new TerseFailure("Only one of --release or --feature may be specified with describe sub-command.")
+    }
 
-        val epoch = {
-          if (result.finalizedFeaturesEpoch.isPresent) {
-            result.finalizedFeaturesEpoch.get.toString
+    val featureMetadata = admin.describeFeatures().featureMetadata().get()
+    val featureEpoch = featureMetadata.finalizedFeaturesEpoch()
+    val epochString = if (featureEpoch.isPresent) {
+      s"Epoch: ${featureEpoch.get}"
+    } else {
+      "Epoch: -"
+    }
+    val finalized = featureMetadata.finalizedFeatures().asScala
+    featureMetadata.supportedFeatures().asScala.foreach {
+      case (feature, range) =>
+        if (featureFilter.apply(feature)) {
+          if (finalized.contains(feature)) {
+            println(s"Feature: $feature\tSupportedMinVersion: ${range.minVersion()}\t" +
+              s"SupportedMaxVersion: ${range.maxVersion()}\tFinalizedVersionLevel: ${finalized(feature).maxVersionLevel()}\t$epochString")
           } else {
-            "-"
+            println(s"Feature: $feature\tSupportedMinVersion: ${range.minVersion()}\t" +
+              s"SupportedMaxVersion: ${range.maxVersion()}\tFinalizedVersionLevel: -\t$epochString")
           }
         }
-        output.append(s"\tEpoch: $epoch")
-
-        println(output)
     }
   }
 
-  /**
-   * Upgrades all features known to this tool to their highest max version levels. The method may
-   * add new finalized features if they were not finalized previously, but it does not delete
-   * any existing finalized feature. The results of the feature updates are written to STDOUT.
-   *
-   * NOTE: if the --dry-run CLI option is provided, this method only prints the expected feature
-   * updates to STDOUT, without applying them.
-   *
-   * @throws UpdateFeaturesException if at least one of the feature updates failed
-   */
-  def upgradeAllFeatures(): Unit = {
-    val metadata = adminClient.describeFeatures.featureMetadata.get
-    val existingFinalizedFeatures = metadata.finalizedFeatures
-    val updates = supportedFeatures.features.asScala.map {
-      case (feature, targetVersionRange) =>
-        val existingVersionRange = existingFinalizedFeatures.get(feature)
-        if (existingVersionRange == null) {
-          val updateStr =
-            addOp +
-            s"\tFeature: $feature" +
-            s"\tExistingFinalizedMaxVersion: -" +
-            s"\tNewFinalizedMaxVersion: ${targetVersionRange.max}"
-          (feature, Some((updateStr, new FeatureUpdate(targetVersionRange.max, false))))
-        } else {
-          if (targetVersionRange.max > existingVersionRange.maxVersionLevel) {
-            val updateStr =
-              upgradeOp +
-              s"\tFeature: $feature" +
-              s"\tExistingFinalizedMaxVersion: ${existingVersionRange.maxVersionLevel}" +
-              s"\tNewFinalizedMaxVersion: ${targetVersionRange.max}"
-            (feature, Some((updateStr, new FeatureUpdate(targetVersionRange.max, false))))
-          } else {
-            (feature, Option.empty)
-          }
-        }
-    }.filter {
-      case(_, updateInfo) => updateInfo.isDefined
-    }.map {
-      case(feature, updateInfo) => (feature, updateInfo.get)
-    }.toMap
+  def printReleaseFeatures(release: String): Unit = {
+    println(s"Default feature versions for release $release:")
+  }
 
-    if (updates.nonEmpty) {
-      maybeApplyFeatureUpdates(updates)
+  def handleUpgrade(namespace: Namespace, admin: Admin): Unit = {
+    val featuresToUpgrade = parseFeaturesOrRelease(namespace) match {
+      case Features(featureNames) => parseVersions(featureNames, namespace)
+      case Release(release) => featuresForRelease(release)
+      case Neither() => throw new TerseFailure("Must specify either --release or at least one --feature and --version with upgrade sub-command.")
+      case Both() => throw new TerseFailure("Cannot specify both --release and --feature with upgrade sub-command.")
     }
-  }
 
-  /**
-   * Downgrades existing finalized features to the highest max version levels known to this tool.
-   * The method may delete existing finalized features if they are no longer seen to be supported,
-   * but it does not add a feature that was not finalized previously. The results of the feature
-   * updates are written to STDOUT.
-   *
-   * NOTE: if the --dry-run CLI option is provided, this method only prints the expected feature
-   * updates to STDOUT, without applying them.
-   *
-   * @throws UpdateFeaturesException if at least one of the feature updates failed
-   */
-  def downgradeAllFeatures(): Unit = {
-    val metadata = adminClient.describeFeatures.featureMetadata.get
-    val existingFinalizedFeatures = metadata.finalizedFeatures
-    val supportedFeaturesMap = supportedFeatures.features
-    val updates = existingFinalizedFeatures.asScala.map {
-      case (feature, existingVersionRange) =>
-        val targetVersionRange = supportedFeaturesMap.get(feature)
-        if (targetVersionRange == null) {
-          val updateStr =
-            deleteOp +
-            s"\tFeature: $feature" +
-            s"\tExistingFinalizedMaxVersion: ${existingVersionRange.maxVersionLevel}" +
-            s"\tNewFinalizedMaxVersion: -"
-          (feature, Some(updateStr, new FeatureUpdate(0, true)))
-        } else {
-          if (targetVersionRange.max < existingVersionRange.maxVersionLevel) {
-            val updateStr =
-              downgradeOp +
-              s"\tFeature: $feature" +
-              s"\tExistingFinalizedMaxVersion: ${existingVersionRange.maxVersionLevel}" +
-              s"\tNewFinalizedMaxVersion: ${targetVersionRange.max}"
-            (feature, Some(updateStr, new FeatureUpdate(targetVersionRange.max, true)))
-          } else {
-            (feature, Option.empty)
-          }
-        }
-    }.filter {
-      case(_, updateInfo) => updateInfo.isDefined
-    }.map {
-      case(feature, updateInfo) => (feature, updateInfo.get)
-    }.toMap
+    val dryRun = namespace.getBoolean("dry_run")
+    val updateResult = admin.updateFeatures(featuresToUpgrade.map { case (feature, version) =>
+      feature -> new FeatureUpdate(version, UpgradeType.UPGRADE)
+    }.asJava, new UpdateFeaturesOptions().validateOnly(dryRun))
+    handleUpdateFeaturesResponse(updateResult, featuresToUpgrade, dryRun, "upgrade")
+  }
 
-    if (updates.nonEmpty) {
-      maybeApplyFeatureUpdates(updates)
+  def handleDowngrade(namespace: Namespace, admin: Admin): Unit = {
+    val featuresToDowngrade = parseFeaturesOrRelease(namespace) match {
+      case Features(featureNames) => parseVersions(featureNames, namespace)
+      case Neither() => throw new TerseFailure("Must specify at least one --feature and --version with downgrade sub-command.")
+      case _ => throw new IllegalStateException()
     }
-  }
 
-  /**
-   * Applies the provided feature updates. If the --dry-run CLI option is provided, the method
-   * only prints the expected feature updates to STDOUT without applying them.
-   *
-   * @param updates the feature updates to be applied via the admin client
-   *
-   * @throws UpdateFeaturesException if at least one of the feature updates failed
-   */
-  private def maybeApplyFeatureUpdates(updates: Map[String, (String, FeatureUpdate)]): Unit = {
-    if (opts.hasDryRunOption) {
-      println("Expected feature updates:" + ListMap(
-        updates
-          .toSeq
-          .sortBy { case(feature, _) => feature} :_*)
-          .map { case(_, (updateStr, _)) => updateStr}
-          .mkString("\n"))
-    } else {
-      val result = adminClient.updateFeatures(
-        updates
-          .map { case(feature, (_, update)) => (feature, update)}
-          .asJava,
-        new UpdateFeaturesOptions())
-      val resultSortedByFeature = ListMap(
-        result
-          .values
-          .asScala
-          .toSeq
-          .sortBy { case(feature, _) => feature} :_*)
-      val failures = resultSortedByFeature.map {
-        case (feature, updateFuture) =>
-          val (updateStr, _) = updates(feature)
-          try {
-            updateFuture.get
-            println(updateStr + "\tResult: OK")
-            0
-          } catch {
-            case e: ExecutionException =>
-              val cause = if (e.getCause == null) e else e.getCause
-              println(updateStr + "\tResult: FAILED due to " + cause)
-              1
-            case e: Throwable =>
-              println(updateStr + "\tResult: FAILED due to " + e)
-              1
-          }
-      }.sum
-      if (failures > 0) {
-        throw new UpdateFeaturesException(s"$failures feature updates failed!")
+    val dryRun = namespace.getBoolean("dry_run")
+    val unsafe = namespace.getBoolean("unsafe")
+    val updateResult = admin.updateFeatures(featuresToDowngrade.map { case (feature, version) =>
+      if (unsafe) {
+        feature -> new FeatureUpdate(version, UpgradeType.UNSAFE_DOWNGRADE)
+      } else {
+        feature -> new FeatureUpdate(version, UpgradeType.SAFE_DOWNGRADE)
       }
-    }
+    }.asJava, new UpdateFeaturesOptions().validateOnly(dryRun))
+
+    handleUpdateFeaturesResponse(updateResult, featuresToDowngrade, dryRun, "downgrade")
   }
 
-  def execute(): Unit = {
-    if (opts.hasDescribeOption) {
-      describeFeatures()
-    } else if (opts.hasUpgradeAllOption) {
-      upgradeAllFeatures()
-    } else if (opts.hasDowngradeAllOption) {
-      downgradeAllFeatures()
-    } else {
-      throw new IllegalStateException("Unexpected state: no CLI command could be executed.")
+  def handleDisable(namespace: Namespace, admin: Admin): Unit = {
+    val featuresToDisable = parseFeaturesOrRelease(namespace) match {
+      case Features(featureNames) => featureNames
+      case Neither() => throw new TerseFailure("Must specify at least one --feature and --version with downgrade sub-command.")
+      case _ => throw new IllegalStateException()
     }
-  }
 
-  def close(): Unit = {
-    adminClient.close()
+    val dryRun = namespace.getBoolean("dry_run")
+    val unsafe = namespace.getBoolean("unsafe")
+    val updateResult = admin.updateFeatures(featuresToDisable.map { feature =>
+      if (unsafe) {
+        feature -> new FeatureUpdate(0.toShort, UpgradeType.UNSAFE_DOWNGRADE)
+      } else {
+        feature -> new FeatureUpdate(0.toShort, UpgradeType.SAFE_DOWNGRADE)
+      }
+    }.toMap.asJava, new UpdateFeaturesOptions().validateOnly(dryRun))
+
+    handleUpdateFeaturesResponse(updateResult, featuresToDisable.map {
+      feature => feature -> 0.toShort
+    }.toMap, dryRun, "disable")
   }
-}
 
-class FeatureCommandOptions(args: Array[String]) extends CommandDefaultOptions(args) {
-  private val bootstrapServerOpt = parser.accepts(
-      "bootstrap-server",
-      "REQUIRED: A comma-separated list of host:port pairs to use for establishing the connection" +
-      " to the Kafka cluster.")
-      .withRequiredArg
-      .describedAs("server to connect to")
-      .ofType(classOf[String])
-  private val commandConfigOpt = parser.accepts(
-    "command-config",
-    "Property file containing configs to be passed to Admin Client." +
-    " This is used with --bootstrap-server option when required.")
-    .withOptionalArg
-    .describedAs("command config property file")
-    .ofType(classOf[String])
-  private val describeOpt = parser.accepts(
-    "describe",
-    "Describe supported and finalized features from a random broker.")
-  private val upgradeAllOpt = parser.accepts(
-    "upgrade-all",
-    "Upgrades all finalized features to the maximum version levels known to the tool." +
-    " This command finalizes new features known to the tool that were never finalized" +
-    " previously in the cluster, but it is guaranteed to not delete any existing feature.")
-  private val downgradeAllOpt = parser.accepts(
-    "downgrade-all",
-    "Downgrades all finalized features to the maximum version levels known to the tool." +
-    " This command deletes unknown features from the list of finalized features in the" +
-    " cluster, but it is guaranteed to not add a new feature.")
-  private val dryRunOpt = parser.accepts(
-    "dry-run",
-    "Performs a dry-run of upgrade/downgrade mutations to finalized feature without applying them.")
-
-  options = parser.parse(args : _*)
-
-  checkArgs()
-
-  def has(builder: OptionSpec[_]): Boolean = options.has(builder)
-
-  def hasDescribeOption: Boolean = has(describeOpt)
-
-  def hasDryRunOption: Boolean = has(dryRunOpt)
-
-  def hasUpgradeAllOption: Boolean = has(upgradeAllOpt)
-
-  def hasDowngradeAllOption: Boolean = has(downgradeAllOpt)
-
-  def commandConfig: Properties = {
-    if (has(commandConfigOpt))
-      Utils.loadProps(options.valueOf(commandConfigOpt))
-    else
-      new Properties()
+  def handleUpdateFeaturesResponse(updateResult: UpdateFeaturesResult,
+                                   updatedFeatures: Map[String, Short],
+                                   dryRun: Boolean,
+                                   op: String): Unit = {
+    val errors = updateResult.values().asScala.map { case (feature, future) =>
+      try {
+        future.get()
+        feature -> None
+      } catch {
+        case e: ExecutionException => feature -> Some(e.getCause)
+        case t: Throwable => feature -> Some(t)
+      }
+    }
+
+    errors.foreach { case (feature, maybeThrowable) =>
+      if (maybeThrowable.isDefined) {
+        if (dryRun) {
+          System.out.println(s"Can not $op feature '$feature' to ${updatedFeatures(feature)}. ${maybeThrowable.get.getMessage}")
+        } else {
+          System.out.println(s"Could not $op feature '$feature' to ${updatedFeatures(feature)}. ${maybeThrowable.get.getMessage}")
+        }
+      } else {
+        if (dryRun) {
+          System.out.println(s"Feature '$feature' can be ${op}d to ${updatedFeatures(feature)}.")
+        } else {
+          System.out.println(s"Feature '$feature' was ${op}d to ${updatedFeatures(feature)}.")
+        }
+      }
+    }
   }
 
-  def bootstrapServers: String = options.valueOf(bootstrapServerOpt)
+  sealed trait ReleaseOrFeatures { }
+  case class Neither() extends ReleaseOrFeatures
+  case class Release(release: String) extends ReleaseOrFeatures
+  case class Features(featureNames: Seq[String]) extends ReleaseOrFeatures
+  case class Both() extends ReleaseOrFeatures
+
+  def parseFeaturesOrRelease(namespace: Namespace): ReleaseOrFeatures = {
+    val release = namespace.getString("release")
+    val features = namespace.getList[String]("feature").asScala
+
+    if (release != null && features != null) {
+      Both()
+    } else if (release == null && features == null) {
+      Neither()
+    } else if (release != null) {
+      Release(release)
+    } else {
+      Features(features)
+    }
+  }
 
-  def checkArgs(): Unit = {
-    CommandLineUtils.printHelpAndExitIfNeeded(this, "This tool describes and updates finalized features.")
-    val numActions = Seq(describeOpt, upgradeAllOpt, downgradeAllOpt).count(has)
-    if (numActions != 1) {
-      CommandLineUtils.printUsageAndDie(
-        parser,
-        "Command must include exactly one action: --describe, --upgrade-all, --downgrade-all.")
+  def parseVersions(features: Seq[String], namespace: Namespace): Map[String, Short] = {
+    val versions = namespace.getList[Short]("version").asScala
+    if (versions == null) {
+      throw new TerseFailure("Must specify --version when using --feature argument(s).")
     }
-    CommandLineUtils.checkRequiredArgs(parser, options, bootstrapServerOpt)
-    if (hasDryRunOption && !hasUpgradeAllOption && !hasDowngradeAllOption) {
-      CommandLineUtils.printUsageAndDie(
-        parser,
-        "Command can contain --dry-run option only when either --upgrade-all or --downgrade-all actions are provided.")
+    if (versions.size != features.size) {
+      if (versions.size > features.size) {
+        throw new TerseFailure("Too many --version arguments given. For each --feature argument there should be one --version argument.")
+      } else {
+        throw new TerseFailure("Too many --feature arguments given. For each --feature argument there should be one --version argument.")
+      }
     }
+    features.zip(versions).map { case (feature, version) =>
+      feature -> version
+    }.toMap
+  }
+
+  def defaultFeatures(): Map[String, Short] = {
+    Map.empty
   }
-}
 
-object FeatureApis {
-  private def createAdminClient(opts: FeatureCommandOptions): Admin = {
-    val props = new Properties()
-    props.putAll(opts.commandConfig)
-    props.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, opts.bootstrapServers)
-    Admin.create(props)
+  def featuresForRelease(release: String): Map[String, Short] = {
+    Map.empty
   }
 }
diff --git a/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala b/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala
index ac6304b449191..ac454584e9135 100755
--- a/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala
+++ b/core/src/main/scala/kafka/admin/ReassignPartitionsCommand.scala
@@ -331,7 +331,7 @@ object ReassignPartitionsCommand extends Logging {
       val state = states(topicPartition)
       if (state.done) {
         if (state.currentReplicas.equals(state.targetReplicas)) {
-          bld.append("Reassignment of partition %s is complete.".
+          bld.append("Reassignment of partition %s is completed.".
             format(topicPartition.toString))
         } else {
           bld.append(s"There is no active reassignment of partition ${topicPartition}, " +
diff --git a/core/src/main/scala/kafka/api/ApiVersion.scala b/core/src/main/scala/kafka/api/ApiVersion.scala
deleted file mode 100644
index 8165e6c6f1e5a..0000000000000
--- a/core/src/main/scala/kafka/api/ApiVersion.scala
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.api
-
-import org.apache.kafka.clients.NodeApiVersions
-import org.apache.kafka.common.config.ConfigDef.Validator
-import org.apache.kafka.common.config.ConfigException
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
-import org.apache.kafka.common.message.ApiMessageType.ListenerType
-import org.apache.kafka.common.record.RecordVersion
-import org.apache.kafka.common.requests.ApiVersionsResponse
-
-/**
- * This class contains the different Kafka versions.
- * Right now, we use them for upgrades - users can configure the version of the API brokers will use to communicate between themselves.
- * This is only for inter-broker communications - when communicating with clients, the client decides on the API version.
- *
- * Note that the ID we initialize for each version is important.
- * We consider a version newer than another, if it has a higher ID (to avoid depending on lexicographic order)
- *
- * Since the api protocol may change more than once within the same release and to facilitate people deploying code from
- * trunk, we have the concept of internal versions (first introduced during the 0.10.0 development cycle). For example,
- * the first time we introduce a version change in a release, say 0.10.0, we will add a config value "0.10.0-IV0" and a
- * corresponding case object KAFKA_0_10_0-IV0. We will also add a config value "0.10.0" that will be mapped to the
- * latest internal version object, which is KAFKA_0_10_0-IV0. When we change the protocol a second time while developing
- * 0.10.0, we will add a new config value "0.10.0-IV1" and a corresponding case object KAFKA_0_10_0-IV1. We will change
- * the config value "0.10.0" to map to the latest internal version object KAFKA_0_10_0-IV1. The config value of
- * "0.10.0-IV0" is still mapped to KAFKA_0_10_0-IV0. This way, if people are deploying from trunk, they can use
- * "0.10.0-IV0" and "0.10.0-IV1" to upgrade one internal version at a time. For most people who just want to use
- * released version, they can use "0.10.0" when upgrading to the 0.10.0 release.
- */
-object ApiVersion {
-  // This implicit is necessary due to: https://issues.scala-lang.org/browse/SI-8541
-  implicit def orderingByVersion[A <: ApiVersion]: Ordering[A] = Ordering.by(_.id)
-
-  val allVersions: Seq[ApiVersion] = Seq(
-    KAFKA_0_8_0,
-    KAFKA_0_8_1,
-    KAFKA_0_8_2,
-    KAFKA_0_9_0,
-    // 0.10.0-IV0 is introduced for KIP-31/32 which changes the message format.
-    KAFKA_0_10_0_IV0,
-    // 0.10.0-IV1 is introduced for KIP-36(rack awareness) and KIP-43(SASL handshake).
-    KAFKA_0_10_0_IV1,
-    // introduced for JoinGroup protocol change in KIP-62
-    KAFKA_0_10_1_IV0,
-    // 0.10.1-IV1 is introduced for KIP-74(fetch response size limit).
-    KAFKA_0_10_1_IV1,
-    // introduced ListOffsetRequest v1 in KIP-79
-    KAFKA_0_10_1_IV2,
-    // introduced UpdateMetadataRequest v3 in KIP-103
-    KAFKA_0_10_2_IV0,
-    // KIP-98 (idempotent and transactional producer support)
-    KAFKA_0_11_0_IV0,
-    // introduced DeleteRecordsRequest v0 and FetchRequest v4 in KIP-107
-    KAFKA_0_11_0_IV1,
-    // Introduced leader epoch fetches to the replica fetcher via KIP-101
-    KAFKA_0_11_0_IV2,
-    // Introduced LeaderAndIsrRequest V1, UpdateMetadataRequest V4 and FetchRequest V6 via KIP-112
-    KAFKA_1_0_IV0,
-    // Introduced DeleteGroupsRequest V0 via KIP-229, plus KIP-227 incremental fetch requests,
-    // and KafkaStorageException for fetch requests.
-    KAFKA_1_1_IV0,
-    // Introduced OffsetsForLeaderEpochRequest V1 via KIP-279 (Fix log divergence between leader and follower after fast leader fail over)
-    KAFKA_2_0_IV0,
-    // Several request versions were bumped due to KIP-219 (Improve quota communication)
-    KAFKA_2_0_IV1,
-    // Introduced new schemas for group offset (v2) and group metadata (v2) (KIP-211)
-    KAFKA_2_1_IV0,
-    // New Fetch, OffsetsForLeaderEpoch, and ListOffsets schemas (KIP-320)
-    KAFKA_2_1_IV1,
-    // Support ZStandard Compression Codec (KIP-110)
-    KAFKA_2_1_IV2,
-    // Introduced broker generation (KIP-380), and
-    // LeaderAdnIsrRequest V2, UpdateMetadataRequest V5, StopReplicaRequest V1
-    KAFKA_2_2_IV0,
-    // New error code for ListOffsets when a new leader is lagging behind former HW (KIP-207)
-    KAFKA_2_2_IV1,
-    // Introduced static membership.
-    KAFKA_2_3_IV0,
-    // Add rack_id to FetchRequest, preferred_read_replica to FetchResponse, and replica_id to OffsetsForLeaderRequest
-    KAFKA_2_3_IV1,
-    // Add adding_replicas and removing_replicas fields to LeaderAndIsrRequest
-    KAFKA_2_4_IV0,
-    // Flexible version support in inter-broker APIs
-    KAFKA_2_4_IV1,
-    // No new APIs, equivalent to 2.4-IV1
-    KAFKA_2_5_IV0,
-    // Introduced StopReplicaRequest V3 containing the leader epoch for each partition (KIP-570)
-    KAFKA_2_6_IV0,
-    // Introduced feature versioning support (KIP-584)
-    KAFKA_2_7_IV0,
-    // Bup Fetch protocol for Raft protocol (KIP-595)
-    KAFKA_2_7_IV1,
-    // Introduced AlterIsr (KIP-497)
-    KAFKA_2_7_IV2,
-    // Flexible versioning on ListOffsets, WriteTxnMarkers and OffsetsForLeaderEpoch. Also adds topic IDs (KIP-516)
-    KAFKA_2_8_IV0,
-    // Introduced topic IDs to LeaderAndIsr and UpdateMetadata requests/responses (KIP-516)
-    KAFKA_2_8_IV1,
-    // Introduce AllocateProducerIds (KIP-730)
-    KAFKA_3_0_IV0,
-    // Introduce ListOffsets V7 which supports listing offsets by max timestamp (KIP-734)
-    // Assume message format version is 3.0 (KIP-724)
-    KAFKA_3_0_IV1,
-    // Adds topic IDs to Fetch requests/responses (KIP-516)
-    KAFKA_3_1_IV0
-  )
-
-  // Map keys are the union of the short and full versions
-  private val versionMap: Map[String, ApiVersion] =
-    allVersions.map(v => v.version -> v).toMap ++ allVersions.groupBy(_.shortVersion).map { case (k, v) => k -> v.last }
-
-  /**
-   * Return an `ApiVersion` instance for `versionString`, which can be in a variety of formats (e.g. "0.8.0", "0.8.0.x",
-   * "0.10.0", "0.10.0-IV1"). `IllegalArgumentException` is thrown if `versionString` cannot be mapped to an `ApiVersion`.
-   */
-  def apply(versionString: String): ApiVersion = {
-    val versionSegments = versionString.split('.').toSeq
-    val numSegments = if (versionString.startsWith("0.")) 3 else 2
-    val key = versionSegments.take(numSegments).mkString(".")
-    versionMap.getOrElse(key, throw new IllegalArgumentException(s"Version `$versionString` is not a valid version"))
-  }
-
-  val latestVersion: ApiVersion = allVersions.last
-
-  def isTruncationOnFetchSupported(version: ApiVersion): Boolean = version >= KAFKA_2_7_IV1
-
-  /**
-   * Return the minimum `ApiVersion` that supports `RecordVersion`.
-   */
-  def minSupportedFor(recordVersion: RecordVersion): ApiVersion = {
-    recordVersion match {
-      case RecordVersion.V0 => KAFKA_0_8_0
-      case RecordVersion.V1 => KAFKA_0_10_0_IV0
-      case RecordVersion.V2 => KAFKA_0_11_0_IV0
-      case _ => throw new IllegalArgumentException(s"Invalid message format version $recordVersion")
-    }
-  }
-
-  def apiVersionsResponse(
-    throttleTimeMs: Int,
-    minRecordVersion: RecordVersion,
-    latestSupportedFeatures: Features[SupportedVersionRange],
-    controllerApiVersions: Option[NodeApiVersions],
-    listenerType: ListenerType
-  ): ApiVersionsResponse = {
-    apiVersionsResponse(
-      throttleTimeMs,
-      minRecordVersion,
-      latestSupportedFeatures,
-      Features.emptyFinalizedFeatures,
-      ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH,
-      controllerApiVersions,
-      listenerType
-    )
-  }
-
-  def apiVersionsResponse(
-    throttleTimeMs: Int,
-    minRecordVersion: RecordVersion,
-    latestSupportedFeatures: Features[SupportedVersionRange],
-    finalizedFeatures: Features[FinalizedVersionRange],
-    finalizedFeaturesEpoch: Long,
-    controllerApiVersions: Option[NodeApiVersions],
-    listenerType: ListenerType
-  ): ApiVersionsResponse = {
-    val apiKeys = controllerApiVersions match {
-      case None => ApiVersionsResponse.filterApis(minRecordVersion, listenerType)
-      case Some(controllerApiVersion) => ApiVersionsResponse.intersectForwardableApis(
-        listenerType, minRecordVersion, controllerApiVersion.allSupportedApiVersions())
-    }
-
-    ApiVersionsResponse.createApiVersionsResponse(
-      throttleTimeMs,
-      apiKeys,
-      latestSupportedFeatures,
-      finalizedFeatures,
-      finalizedFeaturesEpoch
-    )
-  }
-}
-
-sealed trait ApiVersion extends Ordered[ApiVersion] {
-  def version: String
-  def shortVersion: String
-  def recordVersion: RecordVersion
-  def id: Int
-
-  def isAlterIsrSupported: Boolean = this >= KAFKA_2_7_IV2
-
-  def isAllocateProducerIdsSupported: Boolean = this >= KAFKA_3_0_IV0
-
-  override def compare(that: ApiVersion): Int =
-    ApiVersion.orderingByVersion.compare(this, that)
-
-  override def toString: String = version
-}
-
-/**
- * For versions before 0.10.0, `version` and `shortVersion` were the same.
- */
-sealed trait LegacyApiVersion extends ApiVersion {
-  def version = shortVersion
-}
-
-/**
- * From 0.10.0 onwards, each version has a sub-version. For example, IV0 is the sub-version of 0.10.0-IV0.
- */
-sealed trait DefaultApiVersion extends ApiVersion {
-  lazy val version = shortVersion + "-" + subVersion
-  protected def subVersion: String
-}
-
-// Keep the IDs in order of versions
-case object KAFKA_0_8_0 extends LegacyApiVersion {
-  val shortVersion = "0.8.0"
-  val recordVersion = RecordVersion.V0
-  val id: Int = 0
-}
-
-case object KAFKA_0_8_1 extends LegacyApiVersion {
-  val shortVersion = "0.8.1"
-  val recordVersion = RecordVersion.V0
-  val id: Int = 1
-}
-
-case object KAFKA_0_8_2 extends LegacyApiVersion {
-  val shortVersion = "0.8.2"
-  val recordVersion = RecordVersion.V0
-  val id: Int = 2
-}
-
-case object KAFKA_0_9_0 extends LegacyApiVersion {
-  val shortVersion = "0.9.0"
-  val subVersion = ""
-  val recordVersion = RecordVersion.V0
-  val id: Int = 3
-}
-
-case object KAFKA_0_10_0_IV0 extends DefaultApiVersion {
-  val shortVersion = "0.10.0"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V1
-  val id: Int = 4
-}
-
-case object KAFKA_0_10_0_IV1 extends DefaultApiVersion {
-  val shortVersion = "0.10.0"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V1
-  val id: Int = 5
-}
-
-case object KAFKA_0_10_1_IV0 extends DefaultApiVersion {
-  val shortVersion = "0.10.1"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V1
-  val id: Int = 6
-}
-
-case object KAFKA_0_10_1_IV1 extends DefaultApiVersion {
-  val shortVersion = "0.10.1"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V1
-  val id: Int = 7
-}
-
-case object KAFKA_0_10_1_IV2 extends DefaultApiVersion {
-  val shortVersion = "0.10.1"
-  val subVersion = "IV2"
-  val recordVersion = RecordVersion.V1
-  val id: Int = 8
-}
-
-case object KAFKA_0_10_2_IV0 extends DefaultApiVersion {
-  val shortVersion = "0.10.2"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V1
-  val id: Int = 9
-}
-
-case object KAFKA_0_11_0_IV0 extends DefaultApiVersion {
-  val shortVersion = "0.11.0"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 10
-}
-
-case object KAFKA_0_11_0_IV1 extends DefaultApiVersion {
-  val shortVersion = "0.11.0"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 11
-}
-
-case object KAFKA_0_11_0_IV2 extends DefaultApiVersion {
-  val shortVersion = "0.11.0"
-  val subVersion = "IV2"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 12
-}
-
-case object KAFKA_1_0_IV0 extends DefaultApiVersion {
-  val shortVersion = "1.0"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 13
-}
-
-case object KAFKA_1_1_IV0 extends DefaultApiVersion {
-  val shortVersion = "1.1"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 14
-}
-
-case object KAFKA_2_0_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.0"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 15
-}
-
-case object KAFKA_2_0_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.0"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 16
-}
-
-case object KAFKA_2_1_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.1"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 17
-}
-
-case object KAFKA_2_1_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.1"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 18
-}
-
-case object KAFKA_2_1_IV2 extends DefaultApiVersion {
-  val shortVersion: String = "2.1"
-  val subVersion = "IV2"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 19
-}
-
-case object KAFKA_2_2_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.2"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 20
-}
-
-case object KAFKA_2_2_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.2"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 21
-}
-
-case object KAFKA_2_3_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.3"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 22
-}
-
-case object KAFKA_2_3_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.3"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 23
-}
-
-case object KAFKA_2_4_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.4"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 24
-}
-
-case object KAFKA_2_4_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.4"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 25
-}
-
-case object KAFKA_2_5_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.5"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 26
-}
-
-case object KAFKA_2_6_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.6"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 27
-}
-
-case object KAFKA_2_7_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.7"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 28
-}
-
-case object KAFKA_2_7_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.7"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 29
-}
-
-case object KAFKA_2_7_IV2 extends DefaultApiVersion {
-  val shortVersion: String = "2.7"
-  val subVersion = "IV2"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 30
-}
-
-case object KAFKA_2_8_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "2.8"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 31
-}
-
-case object KAFKA_2_8_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "2.8"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 32
-}
-
-case object KAFKA_3_0_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "3.0"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 33
-}
-
-case object KAFKA_3_0_IV1 extends DefaultApiVersion {
-  val shortVersion: String = "3.0"
-  val subVersion = "IV1"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 34
-}
-
-case object KAFKA_3_1_IV0 extends DefaultApiVersion {
-  val shortVersion: String = "3.1"
-  val subVersion = "IV0"
-  val recordVersion = RecordVersion.V2
-  val id: Int = 35
-}
-
-object ApiVersionValidator extends Validator {
-
-  override def ensureValid(name: String, value: Any): Unit = {
-    try {
-      ApiVersion(value.toString)
-    } catch {
-      case e: IllegalArgumentException => throw new ConfigException(name, value.toString, e.getMessage)
-    }
-  }
-
-  override def toString: String = "[" + ApiVersion.allVersions.map(_.version).distinct.mkString(", ") + "]"
-}
diff --git a/core/src/main/scala/kafka/api/LeaderAndIsr.scala b/core/src/main/scala/kafka/api/LeaderAndIsr.scala
index 05952aa58d3a9..da68cdb479c1c 100644
--- a/core/src/main/scala/kafka/api/LeaderAndIsr.scala
+++ b/core/src/main/scala/kafka/api/LeaderAndIsr.scala
@@ -17,46 +17,66 @@
 
 package kafka.api
 
+import org.apache.kafka.metadata.LeaderRecoveryState
+
 object LeaderAndIsr {
-  val initialLeaderEpoch: Int = 0
-  val initialZKVersion: Int = 0
+  val InitialLeaderEpoch: Int = 0
+  val InitialPartitionEpoch: Int = 0
   val NoLeader: Int = -1
   val NoEpoch: Int = -1
   val LeaderDuringDelete: Int = -2
   val EpochDuringDelete: Int = -2
 
-  def apply(leader: Int, isr: List[Int]): LeaderAndIsr = LeaderAndIsr(leader, initialLeaderEpoch, isr, initialZKVersion)
+  def apply(leader: Int, isr: List[Int]): LeaderAndIsr = {
+    LeaderAndIsr(leader, InitialLeaderEpoch, isr, LeaderRecoveryState.RECOVERED, InitialPartitionEpoch)
+  }
 
   def duringDelete(isr: List[Int]): LeaderAndIsr = LeaderAndIsr(LeaderDuringDelete, isr)
 }
 
-case class LeaderAndIsr(leader: Int,
-                        leaderEpoch: Int,
-                        isr: List[Int],
-                        zkVersion: Int) {
-  def withZkVersion(zkVersion: Int) = copy(zkVersion = zkVersion)
+case class LeaderAndIsr(
+  leader: Int,
+  leaderEpoch: Int,
+  isr: List[Int],
+  leaderRecoveryState: LeaderRecoveryState,
+  // The current epoch for the partition for KRaft controllers. The current ZK version for the
+  // legacy controllers. The epoch is a monotonically increasing value which is incremented
+  // after every partition change.
+  partitionEpoch: Int
+) {
+  def withPartitionEpoch(partitionEpoch: Int): LeaderAndIsr = copy(partitionEpoch = partitionEpoch)
 
-  def newLeader(leader: Int) = newLeaderAndIsr(leader, isr)
+  def newLeader(leader: Int): LeaderAndIsr = newLeaderAndIsr(leader, isr)
 
-  def newLeaderAndIsr(leader: Int, isr: List[Int]) = LeaderAndIsr(leader, leaderEpoch + 1, isr, zkVersion)
+  def newLeaderAndIsr(leader: Int, isr: List[Int]): LeaderAndIsr = {
+    LeaderAndIsr(leader, leaderEpoch + 1, isr, leaderRecoveryState, partitionEpoch)
+  }
+
+  def newRecoveringLeaderAndIsr(leader: Int, isr: List[Int]): LeaderAndIsr = {
+    LeaderAndIsr(leader, leaderEpoch + 1, isr, LeaderRecoveryState.RECOVERING, partitionEpoch)
+  }
 
-  def newEpochAndZkVersion = newLeaderAndIsr(leader, isr)
+  def newEpoch: LeaderAndIsr = newLeaderAndIsr(leader, isr)
 
   def leaderOpt: Option[Int] = {
     if (leader == LeaderAndIsr.NoLeader) None else Some(leader)
   }
 
-  def equalsIgnoreZk(other: LeaderAndIsr): Boolean = {
+  def equalsAllowStalePartitionEpoch(other: LeaderAndIsr): Boolean = {
     if (this == other) {
       true
     } else if (other == null) {
       false
     } else {
-      leader == other.leader && leaderEpoch == other.leaderEpoch && isr.equals(other.isr)
+      leader == other.leader &&
+        leaderEpoch == other.leaderEpoch &&
+        isr.equals(other.isr) &&
+        leaderRecoveryState == other.leaderRecoveryState &&
+        partitionEpoch <= other.partitionEpoch
     }
   }
 
   override def toString: String = {
-    s"LeaderAndIsr(leader=$leader, leaderEpoch=$leaderEpoch, isr=$isr, zkVersion=$zkVersion)"
+    s"LeaderAndIsr(leader=$leader, leaderEpoch=$leaderEpoch, isr=$isr, leaderRecoveryState=$leaderRecoveryState, partitionEpoch=$partitionEpoch)"
   }
 }
diff --git a/core/src/main/scala/kafka/api/Request.scala b/core/src/main/scala/kafka/api/Request.scala
index 653b5f653ac52..6c405a45b03ed 100644
--- a/core/src/main/scala/kafka/api/Request.scala
+++ b/core/src/main/scala/kafka/api/Request.scala
@@ -25,6 +25,10 @@ object Request {
   // Broker ids are non-negative int.
   def isValidBrokerId(brokerId: Int): Boolean = brokerId >= 0
 
+  def isConsumer(replicaId: Int): Boolean = {
+    replicaId < 0 && replicaId != FutureLocalReplicaId
+  }
+
   def describeReplicaId(replicaId: Int): String = {
     replicaId match {
       case OrdinaryConsumerId => "consumer"
diff --git a/core/src/main/scala/kafka/cluster/Broker.scala b/core/src/main/scala/kafka/cluster/Broker.scala
index 657d89b8fe719..9b1d741835c35 100755
--- a/core/src/main/scala/kafka/cluster/Broker.scala
+++ b/core/src/main/scala/kafka/cluster/Broker.scala
@@ -35,7 +35,8 @@ object Broker {
   private[kafka] case class ServerInfo(clusterResource: ClusterResource,
                                          brokerId: Int,
                                          endpoints: util.List[Endpoint],
-                                         interBrokerEndpoint: Endpoint) extends AuthorizerServerInfo
+                                         interBrokerEndpoint: Endpoint,
+                                         earlyStartListeners: util.Set[String]) extends AuthorizerServerInfo
 
   def apply(id: Int, endPoints: Seq[EndPoint], rack: Option[String]): Broker = {
     new Broker(id, endPoints, rack, emptySupportedFeatures)
@@ -93,6 +94,7 @@ case class Broker(id: Int, endPoints: Seq[EndPoint], rack: Option[String], featu
     val clusterResource: ClusterResource = new ClusterResource(clusterId)
     val interBrokerEndpoint: Endpoint = endPoint(config.interBrokerListenerName).toJava
     val brokerEndpoints: util.List[Endpoint] = endPoints.toList.map(_.toJava).asJava
-    Broker.ServerInfo(clusterResource, id, brokerEndpoints, interBrokerEndpoint)
+    Broker.ServerInfo(clusterResource, id, brokerEndpoints, interBrokerEndpoint,
+      config.earlyStartListeners.map(_.value()).asJava)
   }
 }
diff --git a/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala b/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala
index b2b36af09da31..0137b6926ff93 100644
--- a/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala
+++ b/core/src/main/scala/kafka/cluster/BrokerEndPoint.scala
@@ -16,12 +16,6 @@
  */
 package kafka.cluster
 
-import java.nio.ByteBuffer
-
-import kafka.api.ApiUtils._
-import org.apache.kafka.common.KafkaException
-import org.apache.kafka.common.utils.Utils._
-
 object BrokerEndPoint {
 
   private val uriParseExp = """\[?([0-9a-zA-Z\-%._:]*)\]?:([0-9]+)""".r
@@ -36,23 +30,6 @@ object BrokerEndPoint {
       case _ => None
     }
   }
-  
-  /**
-   * BrokerEndPoint URI is host:port or [ipv6_host]:port
-   * Note that unlike EndPoint (or listener) this URI has no security information.
-   */
-  def createBrokerEndPoint(brokerId: Int, connectionString: String): BrokerEndPoint = {
-    parseHostPort(connectionString).map { case (host, port) => new BrokerEndPoint(brokerId, host, port) }.getOrElse {
-      throw new KafkaException("Unable to parse " + connectionString + " to a broker endpoint")
-    }
-  }
-
-  def readFrom(buffer: ByteBuffer): BrokerEndPoint = {
-    val brokerId = buffer.getInt()
-    val host = readShortString(buffer)
-    val port = buffer.getInt()
-    BrokerEndPoint(brokerId, host, port)
-  }
 }
 
 /**
@@ -63,20 +40,6 @@ object BrokerEndPoint {
  * This allows us to keep the wire protocol with the clients unchanged where the protocol is not needed.
  */
 case class BrokerEndPoint(id: Int, host: String, port: Int) {
-
-  def connectionString(): String = formatAddress(host, port)
-
-  def writeTo(buffer: ByteBuffer): Unit = {
-    buffer.putInt(id)
-    writeShortString(buffer, host)
-    buffer.putInt(port)
-  }
-
-  def sizeInBytes: Int =
-    4 + /* broker Id */
-    4 + /* port */
-    shortStringLength(host)
-
   override def toString: String = {
     s"BrokerEndPoint(id=$id, host=$host:$port)"
   }
diff --git a/core/src/main/scala/kafka/cluster/EndPoint.scala b/core/src/main/scala/kafka/cluster/EndPoint.scala
index 3e84f9ed834eb..89c9f5ec3d4c8 100644
--- a/core/src/main/scala/kafka/cluster/EndPoint.scala
+++ b/core/src/main/scala/kafka/cluster/EndPoint.scala
@@ -65,6 +65,12 @@ object EndPoint {
       case _ => throw new KafkaException(s"Unable to parse a listener name from $connectionString")
     }
   }
+
+  def fromJava(endpoint: JEndpoint): EndPoint =
+    new EndPoint(endpoint.host(),
+      endpoint.port(),
+      new ListenerName(endpoint.listenerName().get()),
+      endpoint.securityProtocol())
 }
 
 /**
diff --git a/core/src/main/scala/kafka/cluster/Partition.scala b/core/src/main/scala/kafka/cluster/Partition.scala
index 150432d99c004..1eab4c4669a01 100755
--- a/core/src/main/scala/kafka/cluster/Partition.scala
+++ b/core/src/main/scala/kafka/cluster/Partition.scala
@@ -19,17 +19,18 @@ package kafka.cluster
 import java.util.concurrent.locks.ReentrantReadWriteLock
 import java.util.Optional
 import java.util.concurrent.CompletableFuture
-
-import kafka.api.{ApiVersion, LeaderAndIsr}
+import kafka.api.LeaderAndIsr
 import kafka.common.UnexpectedAppendOffsetException
 import kafka.controller.{KafkaController, StateChangeLogger}
 import kafka.log._
 import kafka.metrics.KafkaMetricsGroup
 import kafka.server._
 import kafka.server.checkpoints.OffsetCheckpoints
+import kafka.server.metadata.{KRaftMetadataCache, ZkMetadataCache}
 import kafka.utils.CoreUtils.{inReadLock, inWriteLock}
 import kafka.utils._
 import kafka.zookeeper.ZooKeeperClientException
+import org.apache.kafka.common.TopicIdPartition
 import org.apache.kafka.common.errors._
 import org.apache.kafka.common.message.{DescribeProducersResponseData, FetchResponseData}
 import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState
@@ -41,13 +42,15 @@ import org.apache.kafka.common.requests._
 import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.{UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET}
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.common.{IsolationLevel, TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.MetadataVersion
 
 import scala.collection.{Map, Seq}
 import scala.jdk.CollectionConverters._
 
-trait IsrChangeListener {
-  def markExpand(): Unit
-  def markShrink(): Unit
+trait AlterPartitionListener {
+  def markIsrExpand(): Unit
+  def markIsrShrink(): Unit
   def markFailed(): Unit
 }
 
@@ -71,12 +74,12 @@ object Partition extends KafkaMetricsGroup {
             time: Time,
             replicaManager: ReplicaManager): Partition = {
 
-    val isrChangeListener = new IsrChangeListener {
-      override def markExpand(): Unit = {
+    val isrChangeListener = new AlterPartitionListener {
+      override def markIsrExpand(): Unit = {
         replicaManager.isrExpandRate.mark()
       }
 
-      override def markShrink(): Unit = {
+      override def markIsrShrink(): Unit = {
         replicaManager.isrShrinkRate.mark()
       }
 
@@ -94,11 +97,11 @@ object Partition extends KafkaMetricsGroup {
       interBrokerProtocolVersion = replicaManager.config.interBrokerProtocolVersion,
       localBrokerId = replicaManager.config.brokerId,
       time = time,
-      isrChangeListener = isrChangeListener,
+      alterPartitionListener = isrChangeListener,
       delayedOperations = delayedOperations,
       metadataCache = replicaManager.metadataCache,
       logManager = replicaManager.logManager,
-      alterIsrManager = replicaManager.alterIsrManager)
+      alterIsrManager = replicaManager.alterPartitionManager)
   }
 
   def removeMetrics(topicPartition: TopicPartition): Unit = {
@@ -130,8 +133,7 @@ case class OngoingReassignmentState(addingReplicas: Seq[Int],
 case class SimpleAssignmentState(replicas: Seq[Int]) extends AssignmentState
 
 
-
-sealed trait IsrState {
+sealed trait PartitionState {
   /**
    * Includes only the in-sync replicas which have been committed to ZK.
    */
@@ -142,61 +144,84 @@ sealed trait IsrState {
    * the high watermark as well as determining which replicas are required for acks=all produce requests.
    *
    * Only applicable as of IBP 2.7-IV2, for older versions this will return the committed ISR
-   *
    */
   def maximalIsr: Set[Int]
 
   /**
-   * Indicates if we have an AlterIsr request inflight.
+   * The leader recovery state. See the description for LeaderRecoveryState for details on the different values.
+   */
+  def leaderRecoveryState: LeaderRecoveryState
+
+  /**
+   * Indicates if we have an AlterPartition request inflight.
    */
   def isInflight: Boolean
 }
 
-sealed trait PendingIsrChange extends IsrState {
+sealed trait PendingPartitionChange extends PartitionState {
+  def lastCommittedState: CommittedPartitionState
   def sentLeaderAndIsr: LeaderAndIsr
+
+  override val leaderRecoveryState: LeaderRecoveryState = LeaderRecoveryState.RECOVERED
+
+  def notifyListener(alterPartitionListener: AlterPartitionListener): Unit
 }
 
 case class PendingExpandIsr(
-  isr: Set[Int],
   newInSyncReplicaId: Int,
-  sentLeaderAndIsr: LeaderAndIsr
-) extends PendingIsrChange {
+  sentLeaderAndIsr: LeaderAndIsr,
+  lastCommittedState: CommittedPartitionState
+) extends PendingPartitionChange {
+  val isr = lastCommittedState.isr
   val maximalIsr = isr + newInSyncReplicaId
   val isInflight = true
 
+  def notifyListener(alterPartitionListener: AlterPartitionListener): Unit = {
+    alterPartitionListener.markIsrExpand()
+  }
+
   override def toString: String = {
-    s"PendingExpandIsr(isr=$isr" +
-      s", newInSyncReplicaId=$newInSyncReplicaId" +
-      s", sentLeaderAndIsr=$sentLeaderAndIsr" +
-      ")"
+    s"PendingExpandIsr(newInSyncReplicaId=$newInSyncReplicaId" +
+    s", sentLeaderAndIsr=$sentLeaderAndIsr" +
+    s", leaderRecoveryState=$leaderRecoveryState" +
+    s", lastCommittedState=$lastCommittedState" +
+    ")"
   }
 }
 
 case class PendingShrinkIsr(
-  isr: Set[Int],
   outOfSyncReplicaIds: Set[Int],
-  sentLeaderAndIsr: LeaderAndIsr
-) extends PendingIsrChange  {
+  sentLeaderAndIsr: LeaderAndIsr,
+  lastCommittedState: CommittedPartitionState
+) extends PendingPartitionChange  {
+  val isr = lastCommittedState.isr
   val maximalIsr = isr
   val isInflight = true
 
+  def notifyListener(alterPartitionListener: AlterPartitionListener): Unit = {
+    alterPartitionListener.markIsrShrink()
+  }
+
   override def toString: String = {
-    s"PendingShrinkIsr(isr=$isr" +
-      s", outOfSyncReplicaIds=$outOfSyncReplicaIds" +
-      s", sentLeaderAndIsr=$sentLeaderAndIsr" +
-      ")"
+    s"PendingShrinkIsr(outOfSyncReplicaIds=$outOfSyncReplicaIds" +
+    s", sentLeaderAndIsr=$sentLeaderAndIsr" +
+    s", leaderRecoveryState=$leaderRecoveryState" +
+    s", lastCommittedState=$lastCommittedState" +
+    ")"
   }
 }
 
-case class CommittedIsr(
-  isr: Set[Int]
-) extends IsrState {
+case class CommittedPartitionState(
+  isr: Set[Int],
+  leaderRecoveryState: LeaderRecoveryState
+) extends PartitionState {
   val maximalIsr = isr
   val isInflight = false
 
   override def toString: String = {
-    s"CommittedIsr(isr=$isr" +
-      ")"
+    s"CommittedPartitionState(isr=$isr" +
+    s", leaderRecoveryState=$leaderRecoveryState" +
+    ")"
   }
 }
 
@@ -221,14 +246,14 @@ case class CommittedIsr(
  */
 class Partition(val topicPartition: TopicPartition,
                 val replicaLagTimeMaxMs: Long,
-                interBrokerProtocolVersion: ApiVersion,
+                interBrokerProtocolVersion: MetadataVersion,
                 localBrokerId: Int,
                 time: Time,
-                isrChangeListener: IsrChangeListener,
+                alterPartitionListener: AlterPartitionListener,
                 delayedOperations: DelayedOperations,
                 metadataCache: MetadataCache,
                 logManager: LogManager,
-                alterIsrManager: AlterIsrManager) extends Logging with KafkaMetricsGroup {
+                alterIsrManager: AlterPartitionManager) extends Logging with KafkaMetricsGroup {
 
   def topic: String = topicPartition.topic
   def partitionId: Int = topicPartition.partition
@@ -240,13 +265,15 @@ class Partition(val topicPartition: TopicPartition,
 
   // lock to prevent the follower replica log update while checking if the log dir could be replaced with future log.
   private val futureLogLock = new Object()
-  private var zkVersion: Int = LeaderAndIsr.initialZKVersion
-  @volatile private var leaderEpoch: Int = LeaderAndIsr.initialLeaderEpoch - 1
+  // The current epoch for the partition for KRaft controllers. The current ZK version for the legacy controllers.
+  @volatile private var partitionEpoch: Int = LeaderAndIsr.InitialPartitionEpoch
+  @volatile private var leaderEpoch: Int = LeaderAndIsr.InitialLeaderEpoch - 1
   // start offset for 'leaderEpoch' above (leader epoch of the current leader for this partition),
   // defined when this broker is leader for partition
-  @volatile private var leaderEpochStartOffsetOpt: Option[Long] = None
+  @volatile private[cluster] var leaderEpochStartOffsetOpt: Option[Long] = None
+  // Replica ID of the leader, defined when this broker is leader or follower for the partition.
   @volatile var leaderReplicaIdOpt: Option[Int] = None
-  @volatile private[cluster] var isrState: IsrState = CommittedIsr(Set.empty)
+  @volatile private[cluster] var partitionState: PartitionState = CommittedPartitionState(Set.empty, LeaderRecoveryState.RECOVERED)
   @volatile var assignmentState: AssignmentState = SimpleAssignmentState(Seq.empty)
 
   // Logs belonging to this partition. Majority of time it will be only one log, but if log directory
@@ -268,7 +295,7 @@ class Partition(val topicPartition: TopicPartition,
   private val tags = Map("topic" -> topic, "partition" -> partitionId.toString)
 
   newGauge("UnderReplicated", () => if (isUnderReplicated) 1 else 0, tags)
-  newGauge("InSyncReplicasCount", () => if (isLeader) isrState.isr.size else 0, tags)
+  newGauge("InSyncReplicasCount", () => if (isLeader) partitionState.isr.size else 0, tags)
   newGauge("UnderMinIsr", () => if (isUnderMinIsr) 1 else 0, tags)
   newGauge("AtMinIsr", () => if (isAtMinIsr) 1 else 0, tags)
   newGauge("ReplicasCount", () => if (isLeader) assignmentState.replicationFactor else 0, tags)
@@ -276,11 +303,11 @@ class Partition(val topicPartition: TopicPartition,
 
   def hasLateTransaction(currentTimeMs: Long): Boolean = leaderLogIfLocal.exists(_.hasLateTransaction(currentTimeMs))
 
-  def isUnderReplicated: Boolean = isLeader && (assignmentState.replicationFactor - isrState.isr.size) > 0
+  def isUnderReplicated: Boolean = isLeader && (assignmentState.replicationFactor - partitionState.isr.size) > 0
 
-  def isUnderMinIsr: Boolean = leaderLogIfLocal.exists { isrState.isr.size < _.config.minInSyncReplicas }
+  def isUnderMinIsr: Boolean = leaderLogIfLocal.exists { partitionState.isr.size < _.config.minInSyncReplicas }
 
-  def isAtMinIsr: Boolean = leaderLogIfLocal.exists { isrState.isr.size == _.config.minInSyncReplicas }
+  def isAtMinIsr: Boolean = leaderLogIfLocal.exists { partitionState.isr.size == _.config.minInSyncReplicas }
 
   def isReassigning: Boolean = assignmentState.isInstanceOf[OngoingReassignmentState]
 
@@ -288,7 +315,7 @@ class Partition(val topicPartition: TopicPartition,
 
   def isAddingReplica(replicaId: Int): Boolean = assignmentState.isAddingReplica(replicaId)
 
-  def inSyncReplicaIds: Set[Int] = isrState.isr
+  def inSyncReplicaIds: Set[Int] = partitionState.isr
 
   /**
     * Create the future replica if 1) the current replica is not in the given log directory and 2) the future replica
@@ -422,8 +449,14 @@ class Partition(val topicPartition: TopicPartition,
    */
   def isLeader: Boolean = leaderReplicaIdOpt.contains(localBrokerId)
 
-  private def localLogWithEpochOrException(currentLeaderEpoch: Optional[Integer],
-                                           requireLeader: Boolean): UnifiedLog = {
+  def leaderIdIfLocal: Option[Int] = {
+    leaderReplicaIdOpt.filter(_ == localBrokerId)
+  }
+
+  private def localLogWithEpochOrThrow(
+    currentLeaderEpoch: Optional[Integer],
+    requireLeader: Boolean
+  ): UnifiedLog = {
     getLocalLog(currentLeaderEpoch, requireLeader) match {
       case Left(localLog) => localLog
       case Right(error) =>
@@ -510,7 +543,7 @@ class Partition(val topicPartition: TopicPartition,
       assignmentState = SimpleAssignmentState(Seq.empty)
       log = None
       futureLog = None
-      isrState = CommittedIsr(Set.empty)
+      partitionState = CommittedPartitionState(Set.empty, LeaderRecoveryState.RECOVERED)
       leaderReplicaIdOpt = None
       leaderEpochStartOffsetOpt = None
       Partition.removeMetrics(topicPartition)
@@ -519,7 +552,7 @@ class Partition(val topicPartition: TopicPartition,
 
   def getLeaderEpoch: Int = this.leaderEpoch
 
-  def getZkVersion: Int = this.zkVersion
+  def getPartitionEpoch: Int = this.partitionEpoch
 
   /**
    * Make the local replica the leader by resetting LogEndOffset for remote replicas (there could be old LogEndOffset
@@ -530,75 +563,107 @@ class Partition(val topicPartition: TopicPartition,
                  highWatermarkCheckpoints: OffsetCheckpoints,
                  topicId: Option[Uuid]): Boolean = {
     val (leaderHWIncremented, isNewLeader) = inWriteLock(leaderIsrUpdateLock) {
-      // record the epoch of the controller that made the leadership decision. This is useful while updating the isr
-      // to maintain the decision maker controller's epoch in the zookeeper path
+      // Partition state changes are expected to have an partition epoch larger or equal
+      // to the current partition epoch. The latter is allowed because the partition epoch
+      // is also updated by the AlterPartition response so the new epoch might be known
+      // before a LeaderAndIsr request is received or before an update is received via
+      // the metadata log.
+      if (partitionState.partitionEpoch < partitionEpoch) {
+        stateChangeLogger.info(s"Skipped the become-leader state change for $topicPartition with topic id $topicId " +
+          s"and partition state $partitionState since the leader is already at a newer partition epoch $partitionEpoch.")
+        return false
+      }
+
+      // Record the epoch of the controller that made the leadership decision. This is useful while updating the isr
+      // to maintain the decision maker controller's epoch in the zookeeper path.
       controllerEpoch = partitionState.controllerEpoch
 
+      val currentTimeMs = time.milliseconds
+      val isNewLeader = !isLeader
+      val isNewLeaderEpoch = partitionState.leaderEpoch > leaderEpoch
+      val replicas = partitionState.replicas.asScala.map(_.toInt)
       val isr = partitionState.isr.asScala.map(_.toInt).toSet
       val addingReplicas = partitionState.addingReplicas.asScala.map(_.toInt)
       val removingReplicas = partitionState.removingReplicas.asScala.map(_.toInt)
 
+      if (partitionState.leaderRecoveryState == LeaderRecoveryState.RECOVERING.value) {
+        stateChangeLogger.info(s"The topic partition $topicPartition was marked as RECOVERING. " +
+          "Marking the topic partition as RECOVERED.")
+      }
+
+      // Updating the assignment and ISR state is safe if the partition epoch is
+      // larger or equal to the current partition epoch.
       updateAssignmentAndIsr(
-        assignment = partitionState.replicas.asScala.map(_.toInt),
+        replicas = replicas,
+        isLeader = true,
         isr = isr,
         addingReplicas = addingReplicas,
-        removingReplicas = removingReplicas
+        removingReplicas = removingReplicas,
+        LeaderRecoveryState.RECOVERED
       )
+
       try {
         createLogIfNotExists(partitionState.isNew, isFutureReplica = false, highWatermarkCheckpoints, topicId)
       } catch {
         case e: ZooKeeperClientException =>
           stateChangeLogger.error(s"A ZooKeeper client exception has occurred and makeLeader will be skipping the " +
-            s"state change for the partition $topicPartition with leader epoch: $leaderEpoch ", e)
-
+            s"state change for the partition $topicPartition with leader epoch: $leaderEpoch.", e)
           return false
       }
 
       val leaderLog = localLogOrException
-      val leaderEpochStartOffset = leaderLog.logEndOffset
-      stateChangeLogger.info(s"Leader $topicPartition starts at leader epoch ${partitionState.leaderEpoch} from " +
-        s"offset $leaderEpochStartOffset with high watermark ${leaderLog.highWatermark} " +
-        s"ISR ${isr.mkString("[", ",", "]")} addingReplicas ${addingReplicas.mkString("[", ",", "]")} " +
-        s"removingReplicas ${removingReplicas.mkString("[", ",", "]")}. Previous leader epoch was $leaderEpoch.")
-
-      //We cache the leader epoch here, persisting it only if it's local (hence having a log dir)
-      leaderEpoch = partitionState.leaderEpoch
-      leaderEpochStartOffsetOpt = Some(leaderEpochStartOffset)
-      zkVersion = partitionState.zkVersion
 
-      // In the case of successive leader elections in a short time period, a follower may have
-      // entries in its log from a later epoch than any entry in the new leader's log. In order
-      // to ensure that these followers can truncate to the right offset, we must cache the new
-      // leader epoch and the start offset since it should be larger than any epoch that a follower
-      // would try to query.
-      leaderLog.maybeAssignEpochStartOffset(leaderEpoch, leaderEpochStartOffset)
-
-      val isNewLeader = !isLeader
-      val curTimeMs = time.milliseconds
-      // initialize lastCaughtUpTime of replicas as well as their lastFetchTimeMs and lastFetchLeaderLogEndOffset.
-      remoteReplicas.foreach { replica =>
-        val lastCaughtUpTimeMs = if (isrState.isr.contains(replica.brokerId)) curTimeMs else 0L
-        replica.resetLastCaughtUpTime(leaderEpochStartOffset, curTimeMs, lastCaughtUpTimeMs)
-      }
-
-      if (isNewLeader) {
-        // mark local replica as the leader after converting hw
-        leaderReplicaIdOpt = Some(localBrokerId)
-        // reset log end offset for remote replicas
+      // We update the epoch start offset and the replicas' state only if the leader epoch
+      // has changed.
+      if (isNewLeaderEpoch) {
+        val leaderEpochStartOffset = leaderLog.logEndOffset
+        stateChangeLogger.info(s"Leader $topicPartition with topic id $topicId starts at " +
+          s"leader epoch ${partitionState.leaderEpoch} from offset $leaderEpochStartOffset " +
+          s"with partition epoch ${partitionState.partitionEpoch}, high watermark ${leaderLog.highWatermark}, " +
+          s"ISR ${isr.mkString("[", ",", "]")}, adding replicas ${addingReplicas.mkString("[", ",", "]")} and " +
+          s"removing replicas ${removingReplicas.mkString("[", ",", "]")}. Previous leader epoch was $leaderEpoch.")
+
+        // In the case of successive leader elections in a short time period, a follower may have
+        // entries in its log from a later epoch than any entry in the new leader's log. In order
+        // to ensure that these followers can truncate to the right offset, we must cache the new
+        // leader epoch and the start offset since it should be larger than any epoch that a follower
+        // would try to query.
+        leaderLog.maybeAssignEpochStartOffset(partitionState.leaderEpoch, leaderEpochStartOffset)
+
+        // Initialize lastCaughtUpTime of replicas as well as their lastFetchTimeMs and
+        // lastFetchLeaderLogEndOffset.
         remoteReplicas.foreach { replica =>
-          replica.updateFetchState(
-            followerFetchOffsetMetadata = LogOffsetMetadata.UnknownOffsetMetadata,
-            followerStartOffset = UnifiedLog.UnknownOffset,
-            followerFetchTimeMs = 0L,
-            leaderEndOffset = UnifiedLog.UnknownOffset)
+          replica.resetReplicaState(
+            currentTimeMs = currentTimeMs,
+            leaderEndOffset = leaderEpochStartOffset,
+            isNewLeader = isNewLeader,
+            isFollowerInSync = partitionState.isr.contains(replica.brokerId)
+          )
         }
+
+        // We update the leader epoch and the leader epoch start offset iff the
+        // leader epoch changed.
+        leaderEpoch = partitionState.leaderEpoch
+        leaderEpochStartOffsetOpt = Some(leaderEpochStartOffset)
+      } else {
+        stateChangeLogger.info(s"Skipped the become-leader state change for $topicPartition with topic id $topicId " +
+          s"and partition state $partitionState since it is already the leader with leader epoch $leaderEpoch. " +
+          s"Current high watermark ${leaderLog.highWatermark}, ISR ${isr.mkString("[", ",", "]")}, " +
+          s"adding replicas ${addingReplicas.mkString("[", ",", "]")} and " +
+          s"removing replicas ${removingReplicas.mkString("[", ",", "]")}.")
       }
-      // we may need to increment high watermark since ISR could be down to 1
-      (maybeIncrementLeaderHW(leaderLog), isNewLeader)
+
+      partitionEpoch = partitionState.partitionEpoch
+      leaderReplicaIdOpt = Some(localBrokerId)
+
+      // We may need to increment high watermark since ISR could be down to 1.
+      (maybeIncrementLeaderHW(leaderLog, currentTimeMs = currentTimeMs), isNewLeader)
     }
-    // some delayed operations may be unblocked after HW changed
+
+    // Some delayed operations may be unblocked after HW changed.
     if (leaderHWIncremented)
       tryCompleteDelayedRequests()
+
     isNewLeader
   }
 
@@ -606,50 +671,63 @@ class Partition(val topicPartition: TopicPartition,
    * Make the local replica the follower by setting the new leader and ISR to empty
    * If the leader replica id does not change and the new epoch is equal or one
    * greater (that is, no updates have been missed), return false to indicate to the
-   * replica manager that state is already correct and the become-follower steps can be skipped
+   * replica manager that state is already correct and the become-follower steps can
+   * be skipped.
    */
   def makeFollower(partitionState: LeaderAndIsrPartitionState,
                    highWatermarkCheckpoints: OffsetCheckpoints,
                    topicId: Option[Uuid]): Boolean = {
     inWriteLock(leaderIsrUpdateLock) {
-      val newLeaderBrokerId = partitionState.leader
-      val oldLeaderEpoch = leaderEpoch
-      // record the epoch of the controller that made the leadership decision. This is useful while updating the isr
+      if (partitionState.partitionEpoch < partitionEpoch) {
+        stateChangeLogger.info(s"Skipped the become-follower state change for $topicPartition with topic id $topicId " +
+          s"and partition state $partitionState since the follower is already at a newer partition epoch $partitionEpoch.")
+        return false
+      }
+
+      // Record the epoch of the controller that made the leadership decision. This is useful while updating the isr
       // to maintain the decision maker controller's epoch in the zookeeper path
       controllerEpoch = partitionState.controllerEpoch
 
       updateAssignmentAndIsr(
-        assignment = partitionState.replicas.asScala.iterator.map(_.toInt).toSeq,
-        isr = Set.empty[Int],
+        replicas = partitionState.replicas.asScala.iterator.map(_.toInt).toSeq,
+        isLeader = false,
+        isr = Set.empty,
         addingReplicas = partitionState.addingReplicas.asScala.map(_.toInt),
-        removingReplicas = partitionState.removingReplicas.asScala.map(_.toInt)
+        removingReplicas = partitionState.removingReplicas.asScala.map(_.toInt),
+        LeaderRecoveryState.of(partitionState.leaderRecoveryState)
       )
+
       try {
         createLogIfNotExists(partitionState.isNew, isFutureReplica = false, highWatermarkCheckpoints, topicId)
       } catch {
         case e: ZooKeeperClientException =>
           stateChangeLogger.error(s"A ZooKeeper client exception has occurred. makeFollower will be skipping the " +
             s"state change for the partition $topicPartition with leader epoch: $leaderEpoch.", e)
-
           return false
       }
 
       val followerLog = localLogOrException
-      val leaderEpochEndOffset = followerLog.logEndOffset
-      stateChangeLogger.info(s"Follower $topicPartition starts at leader epoch ${partitionState.leaderEpoch} from " +
-        s"offset $leaderEpochEndOffset with high watermark ${followerLog.highWatermark}. " +
-        s"Previous leader epoch was $leaderEpoch.")
+      val isNewLeaderEpoch = partitionState.leaderEpoch > leaderEpoch
+
+      if (isNewLeaderEpoch) {
+        val leaderEpochEndOffset = followerLog.logEndOffset
+        stateChangeLogger.info(s"Follower $topicPartition starts at leader epoch ${partitionState.leaderEpoch} from " +
+          s"offset $leaderEpochEndOffset with partition epoch ${partitionState.partitionEpoch} and " +
+          s"high watermark ${followerLog.highWatermark}. Current leader is ${partitionState.leader}. " +
+          s"Previous leader epoch was $leaderEpoch.")
+      } else {
+        stateChangeLogger.info(s"Skipped the become-follower state change for $topicPartition with topic id $topicId " +
+          s"and partition state $partitionState since it is already a follower with leader epoch $leaderEpoch.")
+      }
 
+      leaderReplicaIdOpt = Option(partitionState.leader)
       leaderEpoch = partitionState.leaderEpoch
       leaderEpochStartOffsetOpt = None
-      zkVersion = partitionState.zkVersion
+      partitionEpoch = partitionState.partitionEpoch
 
-      if (leaderReplicaIdOpt.contains(newLeaderBrokerId) && leaderEpoch == oldLeaderEpoch) {
-        false
-      } else {
-        leaderReplicaIdOpt = Some(newLeaderBrokerId)
-        true
-      }
+      // We must restart the fetchers when the leader epoch changed regardless of
+      // whether the leader changed as well.
+      isNewLeaderEpoch
     }
   }
 
@@ -657,55 +735,51 @@ class Partition(val topicPartition: TopicPartition,
    * Update the follower's state in the leader based on the last fetch request. See
    * [[Replica.updateFetchState()]] for details.
    *
-   * @return true if the follower's fetch state was updated, false if the followerId is not recognized
+   * This method is visible for performance testing (see `UpdateFollowerFetchStateBenchmark`)
    */
-  def updateFollowerFetchState(followerId: Int,
-                               followerFetchOffsetMetadata: LogOffsetMetadata,
-                               followerStartOffset: Long,
-                               followerFetchTimeMs: Long,
-                               leaderEndOffset: Long): Boolean = {
-    getReplica(followerId) match {
-      case Some(followerReplica) =>
-        // No need to calculate low watermark if there is no delayed DeleteRecordsRequest
-        val oldLeaderLW = if (delayedOperations.numDelayedDelete > 0) lowWatermarkIfLeader else -1L
-        val prevFollowerEndOffset = followerReplica.logEndOffset
-        followerReplica.updateFetchState(
-          followerFetchOffsetMetadata,
-          followerStartOffset,
-          followerFetchTimeMs,
-          leaderEndOffset)
-
-        val newLeaderLW = if (delayedOperations.numDelayedDelete > 0) lowWatermarkIfLeader else -1L
-        // check if the LW of the partition has incremented
-        // since the replica's logStartOffset may have incremented
-        val leaderLWIncremented = newLeaderLW > oldLeaderLW
-
-        // Check if this in-sync replica needs to be added to the ISR.
-        maybeExpandIsr(followerReplica)
-
-        // check if the HW of the partition can now be incremented
-        // since the replica may already be in the ISR and its LEO has just incremented
-        val leaderHWIncremented = if (prevFollowerEndOffset != followerReplica.logEndOffset) {
-          // the leader log may be updated by ReplicaAlterLogDirsThread so the following method must be in lock of
-          // leaderIsrUpdateLock to prevent adding new hw to invalid log.
-          inReadLock(leaderIsrUpdateLock) {
-            leaderLogIfLocal.exists(leaderLog => maybeIncrementLeaderHW(leaderLog, followerFetchTimeMs))
-          }
-        } else {
-          false
-        }
-
-        // some delayed operations may be unblocked after HW or LW changed
-        if (leaderLWIncremented || leaderHWIncremented)
-          tryCompleteDelayedRequests()
+  def updateFollowerFetchState(
+    replica: Replica,
+    followerFetchOffsetMetadata: LogOffsetMetadata,
+    followerStartOffset: Long,
+    followerFetchTimeMs: Long,
+    leaderEndOffset: Long
+  ): Unit = {
+    // No need to calculate low watermark if there is no delayed DeleteRecordsRequest
+    val oldLeaderLW = if (delayedOperations.numDelayedDelete > 0) lowWatermarkIfLeader else -1L
+    val prevFollowerEndOffset = replica.stateSnapshot.logEndOffset
+    replica.updateFetchState(
+      followerFetchOffsetMetadata,
+      followerStartOffset,
+      followerFetchTimeMs,
+      leaderEndOffset
+    )
+
+    val newLeaderLW = if (delayedOperations.numDelayedDelete > 0) lowWatermarkIfLeader else -1L
+    // check if the LW of the partition has incremented
+    // since the replica's logStartOffset may have incremented
+    val leaderLWIncremented = newLeaderLW > oldLeaderLW
+
+    // Check if this in-sync replica needs to be added to the ISR.
+    maybeExpandIsr(replica)
+
+    // check if the HW of the partition can now be incremented
+    // since the replica may already be in the ISR and its LEO has just incremented
+    val leaderHWIncremented = if (prevFollowerEndOffset != replica.stateSnapshot.logEndOffset) {
+      // the leader log may be updated by ReplicaAlterLogDirsThread so the following method must be in lock of
+      // leaderIsrUpdateLock to prevent adding new hw to invalid log.
+      inReadLock(leaderIsrUpdateLock) {
+        leaderLogIfLocal.exists(leaderLog => maybeIncrementLeaderHW(leaderLog, followerFetchTimeMs))
+      }
+    } else {
+      false
+    }
 
-        debug(s"Recorded replica $followerId log end offset (LEO) position " +
-          s"${followerFetchOffsetMetadata.messageOffset} and log start offset $followerStartOffset.")
-        true
+    // some delayed operations may be unblocked after HW or LW changed
+    if (leaderLWIncremented || leaderHWIncremented)
+      tryCompleteDelayedRequests()
 
-      case None =>
-        false
-    }
+    debug(s"Recorded replica ${replica.brokerId} log end offset (LEO) position " +
+      s"${followerFetchOffsetMetadata.messageOffset} and log start offset $followerStartOffset.")
   }
 
   /**
@@ -715,31 +789,41 @@ class Partition(val topicPartition: TopicPartition,
    *
    * Note: public visibility for tests.
    *
-   * @param assignment An ordered sequence of all the broker ids that were assigned to this
+   * @param replicas An ordered sequence of all the broker ids that were assigned to this
    *                   topic partition
+   * @param isLeader True if this replica is the leader.
    * @param isr The set of broker ids that are known to be insync with the leader
    * @param addingReplicas An ordered sequence of all broker ids that will be added to the
     *                       assignment
    * @param removingReplicas An ordered sequence of all broker ids that will be removed from
     *                         the assignment
    */
-  def updateAssignmentAndIsr(assignment: Seq[Int],
-                             isr: Set[Int],
-                             addingReplicas: Seq[Int],
-                             removingReplicas: Seq[Int]): Unit = {
-    val newRemoteReplicas = assignment.filter(_ != localBrokerId)
-    val removedReplicas = remoteReplicasMap.keys.filter(!newRemoteReplicas.contains(_))
-
-    // due to code paths accessing remoteReplicasMap without a lock,
-    // first add the new replicas and then remove the old ones
-    newRemoteReplicas.foreach(id => remoteReplicasMap.getAndMaybePut(id, new Replica(id, topicPartition)))
-    remoteReplicasMap.removeAll(removedReplicas)
-
-    if (addingReplicas.nonEmpty || removingReplicas.nonEmpty)
-      assignmentState = OngoingReassignmentState(addingReplicas, removingReplicas, assignment)
+  def updateAssignmentAndIsr(
+    replicas: Seq[Int],
+    isLeader: Boolean,
+    isr: Set[Int],
+    addingReplicas: Seq[Int],
+    removingReplicas: Seq[Int],
+    leaderRecoveryState: LeaderRecoveryState
+  ): Unit = {
+    if (isLeader) {
+      val followers = replicas.filter(_ != localBrokerId)
+      val removedReplicas = remoteReplicasMap.keys.filter(!followers.contains(_))
+
+      // Due to code paths accessing remoteReplicasMap without a lock,
+      // first add the new replicas and then remove the old ones
+      followers.foreach(id => remoteReplicasMap.getAndMaybePut(id, new Replica(id, topicPartition)))
+      remoteReplicasMap.removeAll(removedReplicas)
+    } else {
+      remoteReplicasMap.clear()
+    }
+
+    assignmentState = if (addingReplicas.nonEmpty || removingReplicas.nonEmpty)
+      OngoingReassignmentState(addingReplicas, removingReplicas, replicas)
     else
-      assignmentState = SimpleAssignmentState(assignment)
-    isrState = CommittedIsr(isr)
+      SimpleAssignmentState(replicas)
+
+    partitionState = CommittedPartitionState(isr, leaderRecoveryState)
   }
 
   /**
@@ -757,40 +841,60 @@ class Partition(val topicPartition: TopicPartition,
    * This function can be triggered when a replica's LEO has incremented.
    */
   private def maybeExpandIsr(followerReplica: Replica): Unit = {
-    val needsIsrUpdate = !isrState.isInflight && canAddReplicaToIsr(followerReplica.brokerId) && inReadLock(leaderIsrUpdateLock) {
+    val needsIsrUpdate = !partitionState.isInflight && canAddReplicaToIsr(followerReplica.brokerId) && inReadLock(leaderIsrUpdateLock) {
       needsExpandIsr(followerReplica)
     }
     if (needsIsrUpdate) {
       val alterIsrUpdateOpt = inWriteLock(leaderIsrUpdateLock) {
         // check if this replica needs to be added to the ISR
-        if (!isrState.isInflight && needsExpandIsr(followerReplica)) {
-          Some(prepareIsrExpand(followerReplica.brokerId))
-        } else {
-          None
+        partitionState match {
+          case currentState: CommittedPartitionState if needsExpandIsr(followerReplica) =>
+            Some(prepareIsrExpand(currentState, followerReplica.brokerId))
+          case _ =>
+            None
         }
       }
-      // Send the AlterIsr request outside of the LeaderAndIsr lock since the completion logic
+      // Send the AlterPartition request outside of the LeaderAndIsr lock since the completion logic
       // may increment the high watermark (and consequently complete delayed operations).
-      alterIsrUpdateOpt.foreach(submitAlterIsr)
+      alterIsrUpdateOpt.foreach(submitAlterPartition)
     }
   }
 
   private def needsExpandIsr(followerReplica: Replica): Boolean = {
-    canAddReplicaToIsr(followerReplica.brokerId) && isFollowerAtHighwatermark(followerReplica)
+    canAddReplicaToIsr(followerReplica.brokerId) && isFollowerInSync(followerReplica)
   }
 
   private def canAddReplicaToIsr(followerReplicaId: Int): Boolean = {
-    val current = isrState
-    !current.isInflight && !current.isr.contains(followerReplicaId)
+    val current = partitionState
+    !current.isInflight &&
+      !current.isr.contains(followerReplicaId) &&
+      isReplicaIsrEligible(followerReplicaId)
   }
 
-  private def isFollowerAtHighwatermark(followerReplica: Replica): Boolean = {
+  private def isFollowerInSync(followerReplica: Replica): Boolean = {
     leaderLogIfLocal.exists { leaderLog =>
-      val followerEndOffset = followerReplica.logEndOffset
+      val followerEndOffset = followerReplica.stateSnapshot.logEndOffset
       followerEndOffset >= leaderLog.highWatermark && leaderEpochStartOffsetOpt.exists(followerEndOffset >= _)
     }
   }
 
+  private def isReplicaIsrEligible(followerReplicaId: Int): Boolean = {
+    metadataCache match {
+      // In KRaft mode, only replicas which are not fenced nor in controlled shutdown are
+      // allowed to join the ISR.
+      case kRaftMetadataCache: KRaftMetadataCache =>
+        !kRaftMetadataCache.isBrokerFenced(followerReplicaId) &&
+          !kRaftMetadataCache.isBrokerShuttingDown(followerReplicaId)
+
+      // In ZK mode, we just ensure the broker is alive. Although we do not check for shutting down brokers here,
+      // the controller will block them from being added to ISR.
+      case zkMetadataCache: ZkMetadataCache =>
+        zkMetadataCache.hasAliveBroker(followerReplicaId)
+
+      case _ => true
+    }
+  }
+
   /*
    * Returns a tuple where the first element is a boolean indicating whether enough replicas reached `requiredOffset`
    * and the second element is an error (which would be `Errors.NONE` for no error).
@@ -803,7 +907,7 @@ class Partition(val topicPartition: TopicPartition,
     leaderLogIfLocal match {
       case Some(leaderLog) =>
         // keep the current immutable replica list reference
-        val curMaximalIsr = isrState.maximalIsr
+        val curMaximalIsr = partitionState.maximalIsr
 
         if (isTraceEnabled) {
           def logEndOffsetString: ((Int, Long)) => String = {
@@ -811,7 +915,7 @@ class Partition(val topicPartition: TopicPartition,
           }
 
           val curInSyncReplicaObjects = (curMaximalIsr - localBrokerId).flatMap(getReplica)
-          val replicaInfo = curInSyncReplicaObjects.map(replica => (replica.brokerId, replica.logEndOffset))
+          val replicaInfo = curInSyncReplicaObjects.map(replica => (replica.brokerId, replica.stateSnapshot.logEndOffset))
           val localLogInfo = (localBrokerId, localLogOrException.logEndOffset)
           val (ackedReplicas, awaitingReplicas) = (replicaInfo + localLogInfo).partition { _._2 >= requiredOffset}
 
@@ -851,24 +955,27 @@ class Partition(val topicPartition: TopicPartition,
    * follower's log end offset may keep falling behind the HW (determined by the leader's log end offset) and therefore
    * will never be added to ISR.
    *
-   * With the addition of AlterIsr, we also consider newly added replicas as part of the ISR when advancing
+   * With the addition of AlterPartition, we also consider newly added replicas as part of the ISR when advancing
    * the HW. These replicas have not yet been committed to the ISR by the controller, so we could revert to the previously
-   * committed ISR. However, adding additional replicas to the ISR makes it more restrictive and therefor safe. We call
+   * committed ISR. However, adding additional replicas to the ISR makes it more restrictive and therefore safe. We call
    * this set the "maximal" ISR. See KIP-497 for more details
    *
    * Note There is no need to acquire the leaderIsrUpdate lock here since all callers of this private API acquire that lock
    *
    * @return true if the HW was incremented, and false otherwise.
    */
-  private def maybeIncrementLeaderHW(leaderLog: UnifiedLog, curTime: Long = time.milliseconds): Boolean = {
+  private def maybeIncrementLeaderHW(leaderLog: UnifiedLog, currentTimeMs: Long = time.milliseconds): Boolean = {
     // maybeIncrementLeaderHW is in the hot path, the following code is written to
     // avoid unnecessary collection generation
-    var newHighWatermark = leaderLog.logEndOffsetMetadata
+    val leaderLogEndOffset = leaderLog.logEndOffsetMetadata
+    var newHighWatermark = leaderLogEndOffset
     remoteReplicasMap.values.foreach { replica =>
       // Note here we are using the "maximal", see explanation above
-      if (replica.logEndOffsetMetadata.messageOffset < newHighWatermark.messageOffset &&
-        (curTime - replica.lastCaughtUpTimeMs <= replicaLagTimeMaxMs || isrState.maximalIsr.contains(replica.brokerId))) {
-        newHighWatermark = replica.logEndOffsetMetadata
+      val replicaState = replica.stateSnapshot
+      if (replicaState.logEndOffsetMetadata.messageOffset < newHighWatermark.messageOffset &&
+        (replicaState.isCaughtUp(leaderLogEndOffset.messageOffset, currentTimeMs, replicaLagTimeMaxMs)
+          || partitionState.maximalIsr.contains(replica.brokerId))) {
+        newHighWatermark = replicaState.logEndOffsetMetadata
       }
     }
 
@@ -883,7 +990,7 @@ class Partition(val topicPartition: TopicPartition,
         }
 
         if (isTraceEnabled) {
-          val replicaInfo = remoteReplicas.map(replica => (replica.brokerId, replica.logEndOffsetMetadata)).toSet
+          val replicaInfo = remoteReplicas.map(replica => (replica.brokerId, replica.stateSnapshot.logEndOffsetMetadata)).toSet
           val localLogInfo = (localBrokerId, localLogOrException.logEndOffsetMetadata)
           trace(s"Skipping update high watermark since new hw $newHighWatermark is not larger than old value. " +
             s"All current LEOs are ${(replicaInfo + localLogInfo).map(logEndOffsetString)}")
@@ -905,8 +1012,9 @@ class Partition(val topicPartition: TopicPartition,
     // care has been taken to avoid generating unnecessary collections in this code
     var lowWaterMark = localLogOrException.logStartOffset
     remoteReplicas.foreach { replica =>
-      if (metadataCache.hasAliveBroker(replica.brokerId) && replica.logStartOffset < lowWaterMark) {
-        lowWaterMark = replica.logStartOffset
+      val logStartOffset = replica.stateSnapshot.logStartOffset
+      if (metadataCache.hasAliveBroker(replica.brokerId) && logStartOffset < lowWaterMark) {
+        lowWaterMark = logStartOffset
       }
     }
 
@@ -925,7 +1033,7 @@ class Partition(val topicPartition: TopicPartition,
 
   def maybeShrinkIsr(): Unit = {
     def needsIsrUpdate: Boolean = {
-      !isrState.isInflight && inReadLock(leaderIsrUpdateLock) {
+      !partitionState.isInflight && inReadLock(leaderIsrUpdateLock) {
         needsShrinkIsr()
       }
     }
@@ -934,27 +1042,28 @@ class Partition(val topicPartition: TopicPartition,
       val alterIsrUpdateOpt = inWriteLock(leaderIsrUpdateLock) {
         leaderLogIfLocal.flatMap { leaderLog =>
           val outOfSyncReplicaIds = getOutOfSyncReplicas(replicaLagTimeMaxMs)
-          if (!isrState.isInflight && outOfSyncReplicaIds.nonEmpty) {
-            val outOfSyncReplicaLog = outOfSyncReplicaIds.map { replicaId =>
-              val logEndOffsetMessage = getReplica(replicaId)
-                .map(_.logEndOffset.toString)
-                .getOrElse("unknown")
-              s"(brokerId: $replicaId, endOffset: $logEndOffsetMessage)"
-            }.mkString(" ")
-            val newIsrLog = (isrState.isr -- outOfSyncReplicaIds).mkString(",")
-            info(s"Shrinking ISR from ${isrState.isr.mkString(",")} to $newIsrLog. " +
-              s"Leader: (highWatermark: ${leaderLog.highWatermark}, " +
-              s"endOffset: ${leaderLog.logEndOffset}). " +
-              s"Out of sync replicas: $outOfSyncReplicaLog.")
-            Some(prepareIsrShrink(outOfSyncReplicaIds))
-          } else {
-            None
+          partitionState match {
+            case currentState: CommittedPartitionState if outOfSyncReplicaIds.nonEmpty =>
+              val outOfSyncReplicaLog = outOfSyncReplicaIds.map { replicaId =>
+                val logEndOffsetMessage = getReplica(replicaId)
+                  .map(_.stateSnapshot.logEndOffset.toString)
+                  .getOrElse("unknown")
+                s"(brokerId: $replicaId, endOffset: $logEndOffsetMessage)"
+              }.mkString(" ")
+              val newIsrLog = (partitionState.isr -- outOfSyncReplicaIds).mkString(",")
+              info(s"Shrinking ISR from ${partitionState.isr.mkString(",")} to $newIsrLog. " +
+                s"Leader: (highWatermark: ${leaderLog.highWatermark}, " +
+                s"endOffset: ${leaderLog.logEndOffset}). " +
+                s"Out of sync replicas: $outOfSyncReplicaLog.")
+              Some(prepareIsrShrink(currentState, outOfSyncReplicaIds))
+            case _ =>
+              None
           }
         }
       }
-      // Send the AlterIsr request outside of the LeaderAndIsr lock since the completion logic
+      // Send the AlterPartition request outside of the LeaderAndIsr lock since the completion logic
       // may increment the high watermark (and consequently complete delayed operations).
-      alterIsrUpdateOpt.foreach(submitAlterIsr)
+      alterIsrUpdateOpt.foreach(submitAlterPartition)
     }
   }
 
@@ -967,8 +1076,7 @@ class Partition(val topicPartition: TopicPartition,
                                   currentTimeMs: Long,
                                   maxLagMs: Long): Boolean = {
     getReplica(replicaId).fold(true) { followerReplica =>
-      followerReplica.logEndOffset != leaderEndOffset &&
-        (currentTimeMs - followerReplica.lastCaughtUpTimeMs) > maxLagMs
+      !followerReplica.stateSnapshot.isCaughtUp(leaderEndOffset, currentTimeMs, maxLagMs)
     }
   }
 
@@ -986,7 +1094,7 @@ class Partition(val topicPartition: TopicPartition,
    * If an ISR update is in-flight, we will return an empty set here
    **/
   def getOutOfSyncReplicas(maxLagMs: Long): Set[Int] = {
-    val current = isrState
+    val current = partitionState
     if (!current.isInflight) {
       val candidateReplicaIds = current.isr - localBrokerId
       val currentTimeMs = time.milliseconds()
@@ -1047,11 +1155,11 @@ class Partition(val topicPartition: TopicPartition,
       leaderLogIfLocal match {
         case Some(leaderLog) =>
           val minIsr = leaderLog.config.minInSyncReplicas
-          val inSyncSize = isrState.isr.size
+          val inSyncSize = partitionState.isr.size
 
           // Avoid writing to leader if there are not enough insync replicas to make it safe
           if (inSyncSize < minIsr && requiredAcks == -1) {
-            throw new NotEnoughReplicasException(s"The size of the current ISR ${isrState.isr} " +
+            throw new NotEnoughReplicasException(s"The size of the current ISR ${partitionState.isr} " +
               s"is insufficient to satisfy the min.isr requirement of $minIsr for partition $topicPartition")
           }
 
@@ -1070,16 +1178,127 @@ class Partition(val topicPartition: TopicPartition,
     info.copy(leaderHwChange = if (leaderHWIncremented) LeaderHwChange.Increased else LeaderHwChange.Same)
   }
 
-  def readRecords(lastFetchedEpoch: Optional[Integer],
-                  fetchOffset: Long,
-                  currentLeaderEpoch: Optional[Integer],
-                  maxBytes: Int,
-                  fetchIsolation: FetchIsolation,
-                  fetchOnlyFromLeader: Boolean,
-                  minOneMessage: Boolean): LogReadInfo = inReadLock(leaderIsrUpdateLock) {
-    // decide whether to only fetch from leader
-    val localLog = localLogWithEpochOrException(currentLeaderEpoch, fetchOnlyFromLeader)
+  /**
+   * Fetch records from the partition.
+   *
+   * @param fetchParams parameters of the corresponding `Fetch` request
+   * @param fetchPartitionData partition-level parameters of the `Fetch` (e.g. the fetch offset)
+   * @param fetchTimeMs current time in milliseconds on the broker of this fetch request
+   * @param maxBytes the maximum bytes to return
+   * @param minOneMessage whether to ensure that at least one complete message is returned
+   * @param updateFetchState true if the Fetch should update replica state (only applies to follower fetches)
+   * @return [[LogReadInfo]] containing the fetched records or the diverging epoch if present
+   * @throws NotLeaderOrFollowerException if this node is not the current leader and [[FetchParams.fetchOnlyLeader]]
+   *                                      is enabled, or if this is a follower fetch with an older request version
+   *                                      and the replicaId is not recognized among the current valid replicas
+   * @throws FencedLeaderEpochException if the leader epoch in the `Fetch` request is lower than the current
+   *                                    leader epoch
+   * @throws UnknownLeaderEpochException if the leader epoch in the `Fetch` request is higher than the current
+   *                                     leader epoch, or if this is a follower fetch and the replicaId is not
+   *                                     recognized among the current valid replicas
+   * @throws OffsetOutOfRangeException if the fetch offset is smaller than the log start offset or larger than
+   *                                   the log end offset (or high watermark depending on [[FetchParams.isolation]]),
+   *                                   or if the end offset for the last fetched epoch in [[FetchRequest.PartitionData]]
+   *                                   cannot be determined from the local epoch cache (e.g. if it is larger than
+   *                                   any cached epoch value)
+   */
+  def fetchRecords(
+    fetchParams: FetchParams,
+    fetchPartitionData: FetchRequest.PartitionData,
+    fetchTimeMs: Long,
+    maxBytes: Int,
+    minOneMessage: Boolean,
+    updateFetchState: Boolean
+  ): LogReadInfo = {
+    def readFromLocalLog(log: UnifiedLog): LogReadInfo = {
+      readRecords(
+        log,
+        fetchPartitionData.lastFetchedEpoch,
+        fetchPartitionData.fetchOffset,
+        fetchPartitionData.currentLeaderEpoch,
+        maxBytes,
+        fetchParams.isolation,
+        minOneMessage
+      )
+    }
 
+    if (fetchParams.isFromFollower) {
+      // Check that the request is from a valid replica before doing the read
+      val (replica, logReadInfo) = inReadLock(leaderIsrUpdateLock) {
+        val localLog = localLogWithEpochOrThrow(
+          fetchPartitionData.currentLeaderEpoch,
+          fetchParams.fetchOnlyLeader
+        )
+        val replica = followerReplicaOrThrow(
+          fetchParams.replicaId,
+          fetchPartitionData
+        )
+        val logReadInfo = readFromLocalLog(localLog)
+        (replica, logReadInfo)
+      }
+
+      if (updateFetchState && logReadInfo.divergingEpoch.isEmpty) {
+        updateFollowerFetchState(
+          replica,
+          followerFetchOffsetMetadata = logReadInfo.fetchedData.fetchOffsetMetadata,
+          followerStartOffset = fetchPartitionData.logStartOffset,
+          followerFetchTimeMs = fetchTimeMs,
+          leaderEndOffset = logReadInfo.logEndOffset
+        )
+      }
+
+      logReadInfo
+    } else {
+      inReadLock(leaderIsrUpdateLock) {
+        val localLog = localLogWithEpochOrThrow(
+          fetchPartitionData.currentLeaderEpoch,
+          fetchParams.fetchOnlyLeader
+        )
+        readFromLocalLog(localLog)
+      }
+    }
+  }
+
+  private def followerReplicaOrThrow(
+    replicaId: Int,
+    fetchPartitionData: FetchRequest.PartitionData
+  ): Replica = {
+    getReplica(replicaId).getOrElse {
+      debug(s"Leader $localBrokerId failed to record follower $replicaId's position " +
+        s"${fetchPartitionData.fetchOffset}, and last sent high watermark since the replica is " +
+        s"not recognized to be one of the assigned replicas ${assignmentState.replicas.mkString(",")} " +
+        s"for leader epoch $leaderEpoch with partition epoch $partitionEpoch")
+
+      val error = if (fetchPartitionData.currentLeaderEpoch.isPresent) {
+        // The leader epoch is present in the request and matches the local epoch, but
+        // the replica is not in the replica set. This case is possible in KRaft,
+        // for example, when new replicas are added as part of a reassignment.
+        // We return UNKNOWN_LEADER_EPOCH to signify that the tuple (replicaId, leaderEpoch)
+        // is not yet recognized as valid, which causes the follower to retry.
+        Errors.UNKNOWN_LEADER_EPOCH
+      } else {
+        // The request has no leader epoch, which means it is an older version. We cannot
+        // say if the follower's state is stale or the local state is. In this case, we
+        // return `NOT_LEADER_OR_FOLLOWER` for lack of a better error so that the follower
+        // will retry.
+        Errors.NOT_LEADER_OR_FOLLOWER
+      }
+
+      throw error.exception(s"Replica $replicaId is not recognized as a " +
+        s"valid replica of $topicPartition in leader epoch $leaderEpoch with " +
+        s"partition epoch $partitionEpoch")
+    }
+  }
+
+  private def readRecords(
+    localLog: UnifiedLog,
+    lastFetchedEpoch: Optional[Integer],
+    fetchOffset: Long,
+    currentLeaderEpoch: Optional[Integer],
+    maxBytes: Int,
+    fetchIsolation: FetchIsolation,
+    minOneMessage: Boolean
+  ): LogReadInfo = {
     // Note we use the log end offset prior to the read. This ensures that any appends following
     // the fetch do not prevent a follower from coming into sync.
     val initialHighWatermark = localLog.highWatermark
@@ -1106,18 +1325,12 @@ class Partition(val topicPartition: TopicPartition,
       }
 
       if (epochEndOffset.leaderEpoch < fetchEpoch || epochEndOffset.endOffset < fetchOffset) {
-        val emptyFetchData = FetchDataInfo(
-          fetchOffsetMetadata = LogOffsetMetadata(fetchOffset),
-          records = MemoryRecords.EMPTY,
-          abortedTransactions = None
-        )
-
         val divergingEpoch = new FetchResponseData.EpochEndOffset()
           .setEpoch(epochEndOffset.leaderEpoch)
           .setEndOffset(epochEndOffset.endOffset)
 
         return LogReadInfo(
-          fetchedData = emptyFetchData,
+          fetchedData = FetchDataInfo.empty(fetchOffset),
           divergingEpoch = Some(divergingEpoch),
           highWatermark = initialHighWatermark,
           logStartOffset = initialLogStartOffset,
@@ -1126,14 +1339,21 @@ class Partition(val topicPartition: TopicPartition,
       }
     }
 
-    val fetchedData = localLog.read(fetchOffset, maxBytes, fetchIsolation, minOneMessage)
+    val fetchedData = localLog.read(
+      fetchOffset,
+      maxBytes,
+      fetchIsolation,
+      minOneMessage
+    )
+
     LogReadInfo(
       fetchedData = fetchedData,
       divergingEpoch = None,
       highWatermark = initialHighWatermark,
       logStartOffset = initialLogStartOffset,
       logEndOffset = initialLogEndOffset,
-      lastStableOffset = initialLastStableOffset)
+      lastStableOffset = initialLastStableOffset
+    )
   }
 
   def fetchOffsetForTimestamp(timestamp: Long,
@@ -1141,7 +1361,7 @@ class Partition(val topicPartition: TopicPartition,
                               currentLeaderEpoch: Optional[Integer],
                               fetchOnlyFromLeader: Boolean): Option[TimestampAndOffset] = inReadLock(leaderIsrUpdateLock) {
     // decide whether to only fetch from leader
-    val localLog = localLogWithEpochOrException(currentLeaderEpoch, fetchOnlyFromLeader)
+    val localLog = localLogWithEpochOrThrow(currentLeaderEpoch, fetchOnlyFromLeader)
 
     val lastFetchableOffset = isolationLevel match {
       case Some(IsolationLevel.READ_COMMITTED) => localLog.lastStableOffset
@@ -1202,7 +1422,7 @@ class Partition(val topicPartition: TopicPartition,
   def fetchOffsetSnapshot(currentLeaderEpoch: Optional[Integer],
                           fetchOnlyFromLeader: Boolean): LogOffsetSnapshot = inReadLock(leaderIsrUpdateLock) {
     // decide whether to only fetch from leader
-    val localLog = localLogWithEpochOrException(currentLeaderEpoch, fetchOnlyFromLeader)
+    val localLog = localLogWithEpochOrThrow(currentLeaderEpoch, fetchOnlyFromLeader)
     localLog.fetchOffsetSnapshot
   }
 
@@ -1210,7 +1430,7 @@ class Partition(val topicPartition: TopicPartition,
                                      maxNumOffsets: Int,
                                      isFromConsumer: Boolean,
                                      fetchOnlyFromLeader: Boolean): Seq[Long] = inReadLock(leaderIsrUpdateLock) {
-    val localLog = localLogWithEpochOrException(Optional.empty(), fetchOnlyFromLeader)
+    val localLog = localLogWithEpochOrThrow(Optional.empty(), fetchOnlyFromLeader)
     val allOffsets = localLog.legacyFetchOffsetsBefore(timestamp, maxNumOffsets)
 
     if (!isFromConsumer) {
@@ -1324,48 +1544,78 @@ class Partition(val topicPartition: TopicPartition,
     }
   }
 
-  private def prepareIsrExpand(newInSyncReplicaId: Int): PendingExpandIsr = {
+  private def prepareIsrExpand(
+    currentState: CommittedPartitionState,
+    newInSyncReplicaId: Int
+  ): PendingExpandIsr = {
     // When expanding the ISR, we assume that the new replica will make it into the ISR
     // before we receive confirmation that it has. This ensures that the HW will already
     // reflect the updated ISR even if there is a delay before we receive the confirmation.
     // Alternatively, if the update fails, no harm is done since the expanded ISR puts
     // a stricter requirement for advancement of the HW.
-    val isrToSend = isrState.isr + newInSyncReplicaId
-    val newLeaderAndIsr = new LeaderAndIsr(localBrokerId, leaderEpoch, isrToSend.toList, zkVersion)
-    val updatedState = PendingExpandIsr(isrState.isr, newInSyncReplicaId, newLeaderAndIsr)
-    isrState = updatedState
+    val isrToSend = partitionState.isr + newInSyncReplicaId
+    val newLeaderAndIsr = LeaderAndIsr(
+      localBrokerId,
+      leaderEpoch,
+      isrToSend.toList,
+      partitionState.leaderRecoveryState,
+      partitionEpoch
+    )
+    val updatedState = PendingExpandIsr(
+      newInSyncReplicaId,
+      newLeaderAndIsr,
+      currentState
+    )
+    partitionState = updatedState
     updatedState
   }
 
-  private[cluster] def prepareIsrShrink(outOfSyncReplicaIds: Set[Int]): PendingShrinkIsr = {
+  private[cluster] def prepareIsrShrink(
+    currentState: CommittedPartitionState,
+    outOfSyncReplicaIds: Set[Int]
+  ): PendingShrinkIsr = {
     // When shrinking the ISR, we cannot assume that the update will succeed as this could
-    // erroneously advance the HW if the `AlterIsr` were to fail. Hence the "maximal ISR"
+    // erroneously advance the HW if the `AlterPartition` were to fail. Hence the "maximal ISR"
     // for `PendingShrinkIsr` is the the current ISR.
-    val isrToSend = isrState.isr -- outOfSyncReplicaIds
-    val newLeaderAndIsr = new LeaderAndIsr(localBrokerId, leaderEpoch, isrToSend.toList, zkVersion)
-    val updatedState = PendingShrinkIsr(isrState.isr, outOfSyncReplicaIds, newLeaderAndIsr)
-    isrState = updatedState
+    val isrToSend = partitionState.isr -- outOfSyncReplicaIds
+    val newLeaderAndIsr = LeaderAndIsr(
+      localBrokerId,
+      leaderEpoch,
+      isrToSend.toList,
+      partitionState.leaderRecoveryState,
+      partitionEpoch
+    )
+    val updatedState = PendingShrinkIsr(
+      outOfSyncReplicaIds,
+      newLeaderAndIsr,
+      currentState
+    )
+    partitionState = updatedState
     updatedState
   }
 
-  private def submitAlterIsr(proposedIsrState: PendingIsrChange): CompletableFuture[LeaderAndIsr] = {
+  private def submitAlterPartition(proposedIsrState: PendingPartitionChange): CompletableFuture[LeaderAndIsr] = {
     debug(s"Submitting ISR state change $proposedIsrState")
-    val future = alterIsrManager.submit(topicPartition, proposedIsrState.sentLeaderAndIsr, controllerEpoch)
+    val future = alterIsrManager.submit(
+      new TopicIdPartition(topicId.getOrElse(Uuid.ZERO_UUID), topicPartition),
+      proposedIsrState.sentLeaderAndIsr,
+      controllerEpoch
+    )
     future.whenComplete { (leaderAndIsr, e) =>
       var hwIncremented = false
       var shouldRetry = false
 
       inWriteLock(leaderIsrUpdateLock) {
-        if (isrState != proposedIsrState) {
-          // This means isrState was updated through leader election or some other mechanism
-          // before we got the AlterIsr response. We don't know what happened on the controller
+        if (partitionState != proposedIsrState) {
+          // This means partitionState was updated through leader election or some other mechanism
+          // before we got the AlterPartition response. We don't know what happened on the controller
           // exactly, but we do know this response is out of date so we ignore it.
           debug(s"Ignoring failed ISR update to $proposedIsrState since we have already " +
-            s"updated state to $isrState")
+            s"updated state to $partitionState")
         } else if (leaderAndIsr != null) {
-          hwIncremented = handleAlterIsrUpdate(proposedIsrState, leaderAndIsr)
+          hwIncremented = handleAlterPartitionUpdate(proposedIsrState, leaderAndIsr)
         } else {
-          shouldRetry = handleAlterIsrError(proposedIsrState, Errors.forException(e))
+          shouldRetry = handleAlterPartitionError(proposedIsrState, Errors.forException(e))
         }
       }
 
@@ -1373,45 +1623,71 @@ class Partition(val topicPartition: TopicPartition,
         tryCompleteDelayedRequests()
       }
 
-      // Send the AlterIsr request outside of the LeaderAndIsr lock since the completion logic
+      // Send the AlterPartition request outside of the LeaderAndIsr lock since the completion logic
       // may increment the high watermark (and consequently complete delayed operations).
       if (shouldRetry) {
-        submitAlterIsr(proposedIsrState)
+        submitAlterPartition(proposedIsrState)
       }
     }
   }
 
   /**
-   * Handle a failed `AlterIsr` request. For errors which are non-retriable, we simply give up.
-   * This leaves [[Partition.isrState]] in a pending state. Since the error was non-retriable,
+   * Handle a failed `AlterPartition` request. For errors which are non-retriable, we simply give up.
+   * This leaves [[Partition.partitionState]] in a pending state. Since the error was non-retriable,
    * we are okay staying in this state until we see new metadata from LeaderAndIsr (or an update
    * to the KRaft metadata log).
    *
    * @param proposedIsrState The ISR state change that was requested
-   * @param error The error returned from [[AlterIsrManager]]
-   * @return true if the `AlterIsr` request should be retried, false otherwise
+   * @param error The error returned from [[AlterPartitionManager]]
+   * @return true if the `AlterPartition` request should be retried, false otherwise
    */
-  private def handleAlterIsrError(
-    proposedIsrState: PendingIsrChange,
+  private def handleAlterPartitionError(
+    proposedIsrState: PendingPartitionChange,
     error: Errors
   ): Boolean = {
-    isrChangeListener.markFailed()
+    alterPartitionListener.markFailed()
     error match {
-      case Errors.OPERATION_NOT_ATTEMPTED =>
-        // Since the operation was not attempted, it is safe to reset back to the committed state.
-        isrState = CommittedIsr(proposedIsrState.isr)
-        debug(s"Failed to update ISR to $proposedIsrState since there is a pending ISR update still inflight. " +
-          s"ISR state has been reset to the latest committed state $isrState")
+      case Errors.OPERATION_NOT_ATTEMPTED | Errors.INELIGIBLE_REPLICA =>
+        // Care must be taken when resetting to the last committed state since we may not
+        // know in general whether the request was applied or not taking into account retries
+        // and controller changes which might have occurred before we received the response.
+        // However, when the controller returns INELIGIBLE_REPLICA (or OPERATION_NOT_ATTEMPTED),
+        // the controller is explicitly telling us 1) that the current partition epoch is correct,
+        // and 2) that the request was not applied. Even if the controller that sent the response
+        // is stale, we are guaranteed from the monotonicity of the controller epoch that the
+        // request could not have been applied by any past or future controller.
+        partitionState = proposedIsrState.lastCommittedState
+        info(s"Failed to alter partition to $proposedIsrState since the controller rejected the request with $error. " +
+          s"Partition state has been reset to the latest committed state $partitionState.")
         false
       case Errors.UNKNOWN_TOPIC_OR_PARTITION =>
-        debug(s"Failed to update ISR to $proposedIsrState since the controller doesn't know about " +
-          "this topic or partition. Giving up.")
+        debug(s"Failed to alter partition to $proposedIsrState since the controller doesn't know about " +
+          "this topic or partition. Partition state may be out of sync, awaiting new the latest metadata.")
+        false
+      case Errors.UNKNOWN_TOPIC_ID =>
+        debug(s"Failed to alter partition to $proposedIsrState since the controller doesn't know about " +
+          "this topic. Partition state may be out of sync, awaiting new the latest metadata.")
         false
       case Errors.FENCED_LEADER_EPOCH =>
-        debug(s"Failed to update ISR to $proposedIsrState since the leader epoch is old. Giving up.")
+        debug(s"Failed to alter partition to $proposedIsrState since the leader epoch is old. " +
+          "Partition state may be out of sync, awaiting new the latest metadata.")
         false
       case Errors.INVALID_UPDATE_VERSION =>
-        debug(s"Failed to update ISR to $proposedIsrState because the version is invalid. Giving up.")
+        debug(s"Failed to alter partition to $proposedIsrState because the partition epoch is invalid. " +
+          "Partition state may be out of sync, awaiting new the latest metadata.")
+        false
+      case Errors.INVALID_REQUEST =>
+        debug(s"Failed to alter partition to $proposedIsrState because the request is invalid. " +
+          "Partition state may be out of sync, awaiting new the latest metadata.")
+        false
+      case Errors.NEW_LEADER_ELECTED =>
+        // The operation completed successfully but this replica got removed from the replica set by the controller
+        // while completing a ongoing reassignment. This replica is no longer the leader but it does not know it
+        // yet. It should remain in the current pending state until the metadata overrides it.
+        // This is only raised in KRaft mode.
+        debug(s"The alter partition request successfully updated the partition state to $proposedIsrState but " +
+          "this replica got removed from the replica set while completing a reassignment. " +
+          "Waiting on new metadata to clean up this replica.")
         false
       case _ =>
         warn(s"Failed to update ISR to $proposedIsrState due to unexpected $error. Retrying.")
@@ -1420,39 +1696,36 @@ class Partition(val topicPartition: TopicPartition,
   }
 
   /**
-   * Handle a successful `AlterIsr` response.
+   * Handle a successful `AlterPartition` response.
    *
    * @param proposedIsrState The ISR state change that was requested
    * @param leaderAndIsr The updated LeaderAndIsr state
    * @return true if the high watermark was successfully incremented following, false otherwise
    */
-  private def handleAlterIsrUpdate(
-    proposedIsrState: PendingIsrChange,
+  private def handleAlterPartitionUpdate(
+    proposedIsrState: PendingPartitionChange,
     leaderAndIsr: LeaderAndIsr
   ): Boolean = {
     // Success from controller, still need to check a few things
     if (leaderAndIsr.leaderEpoch != leaderEpoch) {
       debug(s"Ignoring new ISR $leaderAndIsr since we have a stale leader epoch $leaderEpoch.")
-      isrChangeListener.markFailed()
+      alterPartitionListener.markFailed()
       false
-    } else if (leaderAndIsr.zkVersion < zkVersion) {
-      debug(s"Ignoring new ISR $leaderAndIsr since we have a newer version $zkVersion.")
-      isrChangeListener.markFailed()
+    } else if (leaderAndIsr.partitionEpoch < partitionEpoch) {
+      debug(s"Ignoring new ISR $leaderAndIsr since we have a newer version $partitionEpoch.")
+      alterPartitionListener.markFailed()
       false
     } else {
       // This is one of two states:
-      //   1) leaderAndIsr.zkVersion > zkVersion: Controller updated to new version with proposedIsrState.
-      //   2) leaderAndIsr.zkVersion == zkVersion: No update was performed since proposed and actual state are the same.
+      //   1) leaderAndIsr.partitionEpoch > partitionEpoch: Controller updated to new version with proposedIsrState.
+      //   2) leaderAndIsr.partitionEpoch == partitionEpoch: No update was performed since proposed and actual state are the same.
       // In both cases, we want to move from Pending to Committed state to ensure new updates are processed.
 
-      isrState = CommittedIsr(leaderAndIsr.isr.toSet)
-      zkVersion = leaderAndIsr.zkVersion
-      info(s"ISR updated to ${isrState.isr.mkString(",")} and version updated to $zkVersion")
+      partitionState = CommittedPartitionState(leaderAndIsr.isr.toSet, leaderAndIsr.leaderRecoveryState)
+      partitionEpoch = leaderAndIsr.partitionEpoch
+      info(s"ISR updated to ${partitionState.isr.mkString(",")} and version updated to $partitionEpoch")
 
-      proposedIsrState match {
-        case PendingExpandIsr(_, _, _) => isrChangeListener.markExpand()
-        case PendingShrinkIsr(_, _, _) => isrChangeListener.markShrink()
-      }
+      proposedIsrState.notifyListener(alterPartitionListener)
 
       // we may need to increment high watermark since ISR could be down to 1
       leaderLogIfLocal.exists(log => maybeIncrementLeaderHW(log))
@@ -1473,13 +1746,14 @@ class Partition(val topicPartition: TopicPartition,
     partitionString.append("; Partition: " + partitionId)
     partitionString.append("; Leader: " + leaderReplicaIdOpt)
     partitionString.append("; Replicas: " + assignmentState.replicas.mkString(","))
-    partitionString.append("; ISR: " + isrState.isr.mkString(","))
+    partitionString.append("; ISR: " + partitionState.isr.mkString(","))
     assignmentState match {
       case OngoingReassignmentState(adding, removing, _) =>
         partitionString.append("; AddingReplicas: " + adding.mkString(","))
         partitionString.append("; RemovingReplicas: " + removing.mkString(","))
       case _ =>
     }
+    partitionString.append("; LeaderRecoveryState: " + partitionState.leaderRecoveryState)
     partitionString.toString
   }
 }
diff --git a/core/src/main/scala/kafka/cluster/Replica.scala b/core/src/main/scala/kafka/cluster/Replica.scala
index 921faef061baa..0321488af4d86 100644
--- a/core/src/main/scala/kafka/cluster/Replica.scala
+++ b/core/src/main/scala/kafka/cluster/Replica.scala
@@ -13,7 +13,7 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
+ */
 
 package kafka.cluster
 
@@ -22,35 +22,67 @@ import kafka.server.LogOffsetMetadata
 import kafka.utils.Logging
 import org.apache.kafka.common.TopicPartition
 
-class Replica(val brokerId: Int, val topicPartition: TopicPartition) extends Logging {
-  // the log end offset value, kept in all replicas;
-  // for local replica it is the log's end offset, for remote replicas its value is only updated by follower fetch
-  @volatile private[this] var _logEndOffsetMetadata = LogOffsetMetadata.UnknownOffsetMetadata
-  // the log start offset value, kept in all replicas;
-  // for local replica it is the log's start offset, for remote replicas its value is only updated by follower fetch
-  @volatile private[this] var _logStartOffset = UnifiedLog.UnknownOffset
+import java.util.concurrent.atomic.AtomicReference
+
+case class ReplicaState(
+  // The log start offset value, kept in all replicas; for local replica it is the
+  // log's start offset, for remote replicas its value is only updated by follower fetch.
+  logStartOffset: Long,
+
+  // The log end offset value, kept in all replicas; for local replica it is the
+  // log's end offset, for remote replicas its value is only updated by follower fetch.
+  logEndOffsetMetadata: LogOffsetMetadata,
 
-  // The log end offset value at the time the leader received the last FetchRequest from this follower
-  // This is used to determine the lastCaughtUpTimeMs of the follower
-  @volatile private[this] var lastFetchLeaderLogEndOffset = 0L
+  // The log end offset value at the time the leader received the last FetchRequest from this follower.
+  // This is used to determine the lastCaughtUpTimeMs of the follower. It is reset by the leader
+  // when a LeaderAndIsr request is received and might be reset when the leader appends a record
+  // to its log.
+  lastFetchLeaderLogEndOffset: Long,
 
-  // The time when the leader received the last FetchRequest from this follower
-  // This is used to determine the lastCaughtUpTimeMs of the follower
-  @volatile private[this] var lastFetchTimeMs = 0L
+  // The time when the leader received the last FetchRequest from this follower.
+  // This is used to determine the lastCaughtUpTimeMs of the follower.
+  lastFetchTimeMs: Long,
 
   // lastCaughtUpTimeMs is the largest time t such that the offset of most recent FetchRequest from this follower >=
   // the LEO of leader at time t. This is used to determine the lag of this follower and ISR of this partition.
-  @volatile private[this] var _lastCaughtUpTimeMs = 0L
+  lastCaughtUpTimeMs: Long
+) {
+  /**
+   * Returns the current log end offset of the replica.
+   */
+  def logEndOffset: Long = logEndOffsetMetadata.messageOffset
 
-  def logStartOffset: Long = _logStartOffset
+  /**
+   * Returns true when the replica is considered as "caught-up". A replica is
+   * considered "caught-up" when its log end offset is equals to the log end
+   * offset of the leader OR when its last caught up time minus the current
+   * time is smaller than the max replica lag.
+   */
+  def isCaughtUp(
+    leaderEndOffset: Long,
+    currentTimeMs: Long,
+    replicaMaxLagMs: Long
+  ): Boolean = {
+    leaderEndOffset == logEndOffset || currentTimeMs - lastCaughtUpTimeMs <= replicaMaxLagMs
+  }
+}
 
-  def logEndOffsetMetadata: LogOffsetMetadata = _logEndOffsetMetadata
+object ReplicaState {
+  val Empty: ReplicaState = ReplicaState(
+    logEndOffsetMetadata = LogOffsetMetadata.UnknownOffsetMetadata,
+    logStartOffset = UnifiedLog.UnknownOffset,
+    lastFetchLeaderLogEndOffset = 0L,
+    lastFetchTimeMs = 0L,
+    lastCaughtUpTimeMs = 0L
+  )
+}
 
-  def logEndOffset: Long = logEndOffsetMetadata.messageOffset
+class Replica(val brokerId: Int, val topicPartition: TopicPartition) extends Logging {
+  private val replicaState = new AtomicReference[ReplicaState](ReplicaState.Empty)
 
-  def lastCaughtUpTimeMs: Long = _lastCaughtUpTimeMs
+  def stateSnapshot: ReplicaState = replicaState.get
 
-  /*
+  /**
    * If the FetchRequest reads up to the log end offset of the leader when the current fetch request is received,
    * set `lastCaughtUpTimeMs` to the time when the current fetch request was received.
    *
@@ -62,39 +94,85 @@ class Replica(val brokerId: Int, val topicPartition: TopicPartition) extends Log
    * fetch request is always smaller than the leader's LEO, which can happen if small produce requests are received at
    * high frequency.
    */
-  def updateFetchState(followerFetchOffsetMetadata: LogOffsetMetadata,
-                       followerStartOffset: Long,
-                       followerFetchTimeMs: Long,
-                       leaderEndOffset: Long): Unit = {
-    if (followerFetchOffsetMetadata.messageOffset >= leaderEndOffset)
-      _lastCaughtUpTimeMs = math.max(_lastCaughtUpTimeMs, followerFetchTimeMs)
-    else if (followerFetchOffsetMetadata.messageOffset >= lastFetchLeaderLogEndOffset)
-      _lastCaughtUpTimeMs = math.max(_lastCaughtUpTimeMs, lastFetchTimeMs)
-
-    _logStartOffset = followerStartOffset
-    _logEndOffsetMetadata = followerFetchOffsetMetadata
-    lastFetchLeaderLogEndOffset = leaderEndOffset
-    lastFetchTimeMs = followerFetchTimeMs
+  def updateFetchState(
+    followerFetchOffsetMetadata: LogOffsetMetadata,
+    followerStartOffset: Long,
+    followerFetchTimeMs: Long,
+    leaderEndOffset: Long
+  ): Unit = {
+    replicaState.updateAndGet { currentReplicaState =>
+      val lastCaughtUpTime = if (followerFetchOffsetMetadata.messageOffset >= leaderEndOffset) {
+        math.max(currentReplicaState.lastCaughtUpTimeMs, followerFetchTimeMs)
+      } else if (followerFetchOffsetMetadata.messageOffset >= currentReplicaState.lastFetchLeaderLogEndOffset) {
+        math.max(currentReplicaState.lastCaughtUpTimeMs, currentReplicaState.lastFetchTimeMs)
+      } else {
+        currentReplicaState.lastCaughtUpTimeMs
+      }
+
+      ReplicaState(
+        logStartOffset = followerStartOffset,
+        logEndOffsetMetadata = followerFetchOffsetMetadata,
+        lastFetchLeaderLogEndOffset = math.max(leaderEndOffset, currentReplicaState.lastFetchLeaderLogEndOffset),
+        lastFetchTimeMs = followerFetchTimeMs,
+        lastCaughtUpTimeMs = lastCaughtUpTime
+      )
+    }
   }
 
-  def resetLastCaughtUpTime(curLeaderLogEndOffset: Long, curTimeMs: Long, lastCaughtUpTimeMs: Long): Unit = {
-    lastFetchLeaderLogEndOffset = curLeaderLogEndOffset
-    lastFetchTimeMs = curTimeMs
-    _lastCaughtUpTimeMs = lastCaughtUpTimeMs
+  /**
+   * When the leader is elected or re-elected, the state of the follower is reinitialized
+   * accordingly.
+   */
+  def resetReplicaState(
+    currentTimeMs: Long,
+    leaderEndOffset: Long,
+    isNewLeader: Boolean,
+    isFollowerInSync: Boolean
+  ): Unit = {
+    replicaState.updateAndGet { currentReplicaState =>
+      // When the leader is elected or re-elected, the follower's last caught up time
+      // is set to the current time if the follower is in the ISR, else to 0. The latter
+      // is done to ensure that the high watermark is not hold back unnecessarily for
+      // a follower which is not in the ISR anymore.
+      val lastCaughtUpTimeMs = if (isFollowerInSync) currentTimeMs else 0L
+
+      if (isNewLeader) {
+        ReplicaState(
+          logStartOffset = UnifiedLog.UnknownOffset,
+          logEndOffsetMetadata = LogOffsetMetadata.UnknownOffsetMetadata,
+          lastFetchLeaderLogEndOffset = UnifiedLog.UnknownOffset,
+          lastFetchTimeMs = 0L,
+          lastCaughtUpTimeMs = lastCaughtUpTimeMs
+        )
+      } else {
+        ReplicaState(
+          logStartOffset = currentReplicaState.logStartOffset,
+          logEndOffsetMetadata = currentReplicaState.logEndOffsetMetadata,
+          lastFetchLeaderLogEndOffset = leaderEndOffset,
+          // When the leader is re-elected, the follower's last fetch time is
+          // set to the current time if the follower is in the ISR, else to 0.
+          // The latter is done to ensure that the follower is not brought back
+          // into the ISR before a fetch is received.
+          lastFetchTimeMs = if (isFollowerInSync) currentTimeMs else 0L,
+          lastCaughtUpTimeMs = lastCaughtUpTimeMs
+        )
+      }
+    }
     trace(s"Reset state of replica to $this")
   }
 
   override def toString: String = {
+    val replicaState = this.replicaState.get
     val replicaString = new StringBuilder
-    replicaString.append("Replica(replicaId=" + brokerId)
+    replicaString.append(s"Replica(replicaId=$brokerId")
     replicaString.append(s", topic=${topicPartition.topic}")
     replicaString.append(s", partition=${topicPartition.partition}")
-    replicaString.append(s", lastCaughtUpTimeMs=$lastCaughtUpTimeMs")
-    replicaString.append(s", logStartOffset=$logStartOffset")
-    replicaString.append(s", logEndOffset=$logEndOffset")
-    replicaString.append(s", logEndOffsetMetadata=$logEndOffsetMetadata")
-    replicaString.append(s", lastFetchLeaderLogEndOffset=$lastFetchLeaderLogEndOffset")
-    replicaString.append(s", lastFetchTimeMs=$lastFetchTimeMs")
+    replicaString.append(s", lastCaughtUpTimeMs=${replicaState.lastCaughtUpTimeMs}")
+    replicaString.append(s", logStartOffset=${replicaState.logStartOffset}")
+    replicaString.append(s", logEndOffset=${replicaState.logEndOffsetMetadata.messageOffset}")
+    replicaString.append(s", logEndOffsetMetadata=${replicaState.logEndOffsetMetadata}")
+    replicaString.append(s", lastFetchLeaderLogEndOffset=${replicaState.lastFetchLeaderLogEndOffset}")
+    replicaString.append(s", lastFetchTimeMs=${replicaState.lastFetchTimeMs}")
     replicaString.append(")")
     replicaString.toString
   }
diff --git a/core/src/main/scala/kafka/common/Config.scala b/core/src/main/scala/kafka/common/Config.scala
deleted file mode 100644
index f56cca8bd0528..0000000000000
--- a/core/src/main/scala/kafka/common/Config.scala
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.common
-
-import util.matching.Regex
-import kafka.utils.Logging
-import org.apache.kafka.common.errors.InvalidConfigurationException
-
-trait Config extends Logging {
-
-  def validateChars(prop: String, value: String): Unit = {
-    val legalChars = "[a-zA-Z0-9\\._\\-]"
-    val rgx = new Regex(legalChars + "*")
-
-    rgx.findFirstIn(value) match {
-      case Some(t) =>
-        if (!t.equals(value))
-          throw new InvalidConfigurationException(prop + " " + value + " is illegal, contains a character other than ASCII alphanumerics, '.', '_' and '-'")
-      case None => throw new InvalidConfigurationException(prop + " " + value + " is illegal, contains a character other than ASCII alphanumerics, '.', '_' and '-'")
-    }
-  }
-}
-
-
-
-
diff --git a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
index 2f10710ddfef5..d900a7ccea9aa 100755
--- a/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
+++ b/core/src/main/scala/kafka/controller/ControllerChannelManager.scala
@@ -38,6 +38,7 @@ import org.apache.kafka.common.security.JaasContext
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.utils.{LogContext, Time}
 import org.apache.kafka.common.{KafkaException, Node, Reconfigurable, TopicPartition, Uuid}
+import org.apache.kafka.server.common.MetadataVersion._
 
 import scala.jdk.CollectionConverters._
 import scala.collection.mutable.HashMap
@@ -377,18 +378,24 @@ abstract class AbstractControllerBrokerRequestBatch(config: KafkaConfig,
       val result = leaderAndIsrRequestMap.getOrElseUpdate(brokerId, mutable.Map.empty)
       val alreadyNew = result.get(topicPartition).exists(_.isNew)
       val leaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
-      result.put(topicPartition, new LeaderAndIsrPartitionState()
+      val partitionState = new LeaderAndIsrPartitionState()
         .setTopicName(topicPartition.topic)
         .setPartitionIndex(topicPartition.partition)
         .setControllerEpoch(leaderIsrAndControllerEpoch.controllerEpoch)
         .setLeader(leaderAndIsr.leader)
         .setLeaderEpoch(leaderAndIsr.leaderEpoch)
         .setIsr(leaderAndIsr.isr.map(Integer.valueOf).asJava)
-        .setZkVersion(leaderAndIsr.zkVersion)
+        .setPartitionEpoch(leaderAndIsr.partitionEpoch)
         .setReplicas(replicaAssignment.replicas.map(Integer.valueOf).asJava)
         .setAddingReplicas(replicaAssignment.addingReplicas.map(Integer.valueOf).asJava)
         .setRemovingReplicas(replicaAssignment.removingReplicas.map(Integer.valueOf).asJava)
-        .setIsNew(isNew || alreadyNew))
+        .setIsNew(isNew || alreadyNew)
+
+      if (config.interBrokerProtocolVersion.isAtLeast(IBP_3_2_IV0)) {
+        partitionState.setLeaderRecoveryState(leaderAndIsr.leaderRecoveryState.value)
+      }
+
+      result.put(topicPartition, partitionState)
     }
 
     addUpdateMetadataRequestForBrokers(controllerContext.liveOrShuttingDownBrokerIds.toSeq, Set(topicPartition))
@@ -437,7 +444,7 @@ abstract class AbstractControllerBrokerRequestBatch(config: KafkaConfig,
             .setLeader(updatedLeaderAndIsr.leader)
             .setLeaderEpoch(updatedLeaderAndIsr.leaderEpoch)
             .setIsr(updatedLeaderAndIsr.isr.map(Integer.valueOf).asJava)
-            .setZkVersion(updatedLeaderAndIsr.zkVersion)
+            .setZkVersion(updatedLeaderAndIsr.partitionEpoch)
             .setReplicas(replicas.map(Integer.valueOf).asJava)
             .setOfflineReplicas(offlineReplicas.map(Integer.valueOf).asJava)
           updateMetadataRequestPartitionInfoMap.put(partition, partitionStateInfo)
@@ -454,11 +461,12 @@ abstract class AbstractControllerBrokerRequestBatch(config: KafkaConfig,
 
   private def sendLeaderAndIsrRequest(controllerEpoch: Int, stateChangeLog: StateChangeLogger): Unit = {
     val leaderAndIsrRequestVersion: Short =
-      if (config.interBrokerProtocolVersion >= KAFKA_2_8_IV1) 5
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_4_IV1) 4
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_4_IV0) 3
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_2_IV0) 2
-      else if (config.interBrokerProtocolVersion >= KAFKA_1_0_IV0) 1
+      if (config.interBrokerProtocolVersion.isAtLeast(IBP_3_2_IV0)) 6
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_8_IV1)) 5
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_4_IV1)) 4
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_4_IV0)) 3
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_2_IV0)) 2
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_1_0_IV0)) 1
       else 0
 
     leaderAndIsrRequestMap.forKeyValue { (broker, leaderAndIsrPartitionStates) =>
@@ -504,13 +512,13 @@ abstract class AbstractControllerBrokerRequestBatch(config: KafkaConfig,
 
     val partitionStates = updateMetadataRequestPartitionInfoMap.values.toBuffer
     val updateMetadataRequestVersion: Short =
-      if (config.interBrokerProtocolVersion >= KAFKA_2_8_IV1) 7
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_4_IV1) 6
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_2_IV0) 5
-      else if (config.interBrokerProtocolVersion >= KAFKA_1_0_IV0) 4
-      else if (config.interBrokerProtocolVersion >= KAFKA_0_10_2_IV0) 3
-      else if (config.interBrokerProtocolVersion >= KAFKA_0_10_0_IV1) 2
-      else if (config.interBrokerProtocolVersion >= KAFKA_0_9_0) 1
+      if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_8_IV1)) 7
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_4_IV1)) 6
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_2_IV0)) 5
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_1_0_IV0)) 4
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_0_10_2_IV0)) 3
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_0_10_0_IV1)) 2
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_0_9_0)) 1
       else 0
 
     val liveBrokers = controllerContext.liveOrShuttingDownBrokers.iterator.map { broker =>
@@ -560,9 +568,9 @@ abstract class AbstractControllerBrokerRequestBatch(config: KafkaConfig,
   private def sendStopReplicaRequests(controllerEpoch: Int, stateChangeLog: StateChangeLogger): Unit = {
     val traceEnabled = stateChangeLog.isTraceEnabled
     val stopReplicaRequestVersion: Short =
-      if (config.interBrokerProtocolVersion >= KAFKA_2_6_IV0) 3
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_4_IV1) 2
-      else if (config.interBrokerProtocolVersion >= KAFKA_2_2_IV0) 1
+      if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_6_IV0)) 3
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_4_IV1)) 2
+      else if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_2_IV0)) 1
       else 0
 
     def responseCallback(brokerId: Int, isPartitionDeleted: TopicPartition => Boolean)
diff --git a/core/src/main/scala/kafka/controller/ControllerContext.scala b/core/src/main/scala/kafka/controller/ControllerContext.scala
index 379196aa1d42c..7065d87c4c606 100644
--- a/core/src/main/scala/kafka/controller/ControllerContext.scala
+++ b/core/src/main/scala/kafka/controller/ControllerContext.scala
@@ -327,9 +327,16 @@ class ControllerContext {
     }
   }
 
-  def queueTopicDeletion(topics: Set[String]): Unit = {
-    topicsToBeDeleted ++= topics
-    topics.foreach(cleanPreferredReplicaImbalanceMetric)
+  def queueTopicDeletion(topicToBeAddedIntoDeletionList: Set[String]): Unit = {
+    // queueTopicDeletion could be called multiple times for same topic.
+    // e.g. 1) delete topic-A => 2) delete topic-B before A's deletion completes.
+    // In this case, at 2), queueTopicDeletion will be called with Set(topic-A, topic-B).
+    // However we should call cleanPreferredReplicaImbalanceMetric only once for same topic
+    // because otherwise, preferredReplicaImbalanceCount could be decremented wrongly at 2nd call.
+    // So we need to take a diff with already queued topics here.
+    val newlyDeletedTopics = topicToBeAddedIntoDeletionList.diff(topicsToBeDeleted)
+    topicsToBeDeleted ++= newlyDeletedTopics
+    newlyDeletedTopics.foreach(cleanPreferredReplicaImbalanceMetric)
   }
 
   def beginTopicDeletion(topics: Set[String]): Unit = {
diff --git a/core/src/main/scala/kafka/controller/Election.scala b/core/src/main/scala/kafka/controller/Election.scala
index dffa88841aac3..1e1ee4e5b469d 100644
--- a/core/src/main/scala/kafka/controller/Election.scala
+++ b/core/src/main/scala/kafka/controller/Election.scala
@@ -28,6 +28,7 @@ object Election {
   private def leaderForOffline(partition: TopicPartition,
                                leaderAndIsrOpt: Option[LeaderAndIsr],
                                uncleanLeaderElectionEnabled: Boolean,
+                               isLeaderRecoverySupported: Boolean,
                                controllerContext: ControllerContext): ElectionResult = {
 
     val assignment = controllerContext.partitionReplicaAssignment(partition)
@@ -40,7 +41,14 @@ object Election {
         val newLeaderAndIsrOpt = leaderOpt.map { leader =>
           val newIsr = if (isr.contains(leader)) isr.filter(replica => controllerContext.isReplicaOnline(replica, partition))
           else List(leader)
-          leaderAndIsr.newLeaderAndIsr(leader, newIsr)
+
+          if (!isr.contains(leader) && isLeaderRecoverySupported) {
+            // The new leader is not in the old ISR so mark the partition a RECOVERING
+            leaderAndIsr.newRecoveringLeaderAndIsr(leader, newIsr)
+          } else {
+            // Elect a new leader but keep the previous leader recovery state
+            leaderAndIsr.newLeaderAndIsr(leader, newIsr)
+          }
         }
         ElectionResult(partition, newLeaderAndIsrOpt, liveReplicas)
 
@@ -53,7 +61,8 @@ object Election {
    * Elect leaders for new or offline partitions.
    *
    * @param controllerContext Context with the current state of the cluster
-   * @param partitionsWithUncleanLeaderElectionState A sequence of tuples representing the partitions
+   * @param isLeaderRecoverySupported true leader recovery is support and should be set if election is unclean
+   * @param partitionsWithUncleanLeaderRecoveryState A sequence of tuples representing the partitions
    *                                                 that need election, their leader/ISR state, and whether
    *                                                 or not unclean leader election is enabled
    *
@@ -61,11 +70,12 @@ object Election {
    */
   def leaderForOffline(
     controllerContext: ControllerContext,
-    partitionsWithUncleanLeaderElectionState: Seq[(TopicPartition, Option[LeaderAndIsr], Boolean)]
+    isLeaderRecoverySupported: Boolean,
+    partitionsWithUncleanLeaderRecoveryState: Seq[(TopicPartition, Option[LeaderAndIsr], Boolean)]
   ): Seq[ElectionResult] = {
-    partitionsWithUncleanLeaderElectionState.map {
+    partitionsWithUncleanLeaderRecoveryState.map {
       case (partition, leaderAndIsrOpt, uncleanLeaderElectionEnabled) =>
-        leaderForOffline(partition, leaderAndIsrOpt, uncleanLeaderElectionEnabled, controllerContext)
+        leaderForOffline(partition, leaderAndIsrOpt, uncleanLeaderElectionEnabled, isLeaderRecoverySupported, controllerContext)
     }
   }
 
diff --git a/core/src/main/scala/kafka/controller/KafkaController.scala b/core/src/main/scala/kafka/controller/KafkaController.scala
index c8d973aed180d..999bcb818e91c 100644
--- a/core/src/main/scala/kafka/controller/KafkaController.scala
+++ b/core/src/main/scala/kafka/controller/KafkaController.scala
@@ -16,33 +16,34 @@
  */
 package kafka.controller
 
-import java.util
 import java.util.concurrent.TimeUnit
 import kafka.admin.AdminOperationException
 import kafka.api._
 import kafka.common._
-import kafka.controller.KafkaController.AlterIsrCallback
 import kafka.cluster.Broker
 import kafka.controller.KafkaController.{AlterReassignmentsCallback, ElectLeadersCallback, ListReassignmentsCallback, UpdateFeaturesCallback}
 import kafka.coordinator.transaction.ZkProducerIdManager
 import kafka.metrics.{KafkaMetricsGroup, KafkaTimer}
 import kafka.server._
+import kafka.server.metadata.ZkFinalizedFeatureCache
 import kafka.utils._
 import kafka.utils.Implicits._
 import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
 import kafka.zk.TopicZNode.TopicIdReplicaAssignment
 import kafka.zk.{FeatureZNodeStatus, _}
 import kafka.zookeeper.{StateChangeHandler, ZNodeChangeHandler, ZNodeChildChangeHandler}
+import org.apache.kafka.clients.admin.FeatureUpdate.UpgradeType
 import org.apache.kafka.common.ElectionType
 import org.apache.kafka.common.KafkaException
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.Uuid
 import org.apache.kafka.common.errors.{BrokerNotAvailableException, ControllerMovedException, StaleBrokerEpochException}
-import org.apache.kafka.common.message.{AllocateProducerIdsRequestData, AllocateProducerIdsResponseData, AlterIsrRequestData, AlterIsrResponseData, UpdateFeaturesRequestData}
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange}
+import org.apache.kafka.common.message.{AllocateProducerIdsRequestData, AllocateProducerIdsResponseData, AlterPartitionRequestData, AlterPartitionResponseData}
 import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.{AbstractControlRequest, ApiError, LeaderAndIsrResponse, UpdateFeaturesRequest, UpdateMetadataResponse}
 import org.apache.kafka.common.utils.{Time, Utils}
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.apache.kafka.server.common.ProducerIdsBlock
 import org.apache.zookeeper.KeeperException
 import org.apache.zookeeper.KeeperException.Code
@@ -64,7 +65,6 @@ object KafkaController extends Logging {
   type ElectLeadersCallback = Map[TopicPartition, Either[ApiError, Int]] => Unit
   type ListReassignmentsCallback = Either[Map[TopicPartition, ReplicaAssignment], ApiError] => Unit
   type AlterReassignmentsCallback = Either[Map[TopicPartition, ApiError], ApiError] => Unit
-  type AlterIsrCallback = Either[Map[TopicPartition, Either[Errors, LeaderAndIsr]], Errors] => Unit
   type UpdateFeaturesCallback = Either[ApiError, Map[String, ApiError]] => Unit
 }
 
@@ -76,7 +76,7 @@ class KafkaController(val config: KafkaConfig,
                       initialBrokerEpoch: Long,
                       tokenManager: DelegationTokenManager,
                       brokerFeatures: BrokerFeatures,
-                      featureCache: FinalizedFeatureCache,
+                      featureCache: ZkFinalizedFeatureCache,
                       threadNamePrefix: Option[String] = None)
   extends ControllerEventProcessor with Logging with KafkaMetricsGroup {
 
@@ -85,7 +85,7 @@ class KafkaController(val config: KafkaConfig,
   @volatile private var brokerInfo = initialBrokerInfo
   @volatile private var _brokerEpoch = initialBrokerEpoch
 
-  private val isAlterIsrEnabled = config.interBrokerProtocolVersion.isAlterIsrSupported
+  private val isAlterPartitionEnabled = config.interBrokerProtocolVersion.isAlterPartitionSupported
   private val stateChangeLogger = new StateChangeLogger(config.brokerId, inControllerContext = true, None)
   val controllerContext = new ControllerContext
   var controllerChannelManager = new ControllerChannelManager(controllerContext, config, time, metrics,
@@ -307,7 +307,7 @@ class KafkaController(val config: KafkaConfig,
    * This method enables the feature versioning system (KIP-584).
    *
    * Development in Kafka (from a high level) is organized into features. Each feature is tracked by
-   * a name and a range of version numbers. A feature can be of two types:
+   * a name and a range of version numbers or a version number. A feature can be of two types:
    *
    * 1. Supported feature:
    * A supported feature is represented by a name (string) and a range of versions (defined by a
@@ -318,8 +318,8 @@ class KafkaController(val config: KafkaConfig,
    * range of versions.
    *
    * 2. Finalized feature:
-   * A finalized feature is represented by a name (string) and a range of version levels (defined
-   * by a FinalizedVersionRange). Whenever the feature versioning system (KIP-584) is
+   * A finalized feature is represented by a name (string) and a specified version level (defined
+   * by a Short). Whenever the feature versioning system (KIP-584) is
    * enabled, the finalized features are stored in the cluster-wide common FeatureZNode.
    * In comparison to a supported feature, the key difference is that a finalized feature exists
    * in ZK only when it is guaranteed to be supported by any random broker in the cluster for a
@@ -329,36 +329,36 @@ class KafkaController(val config: KafkaConfig,
    * This method sets up the FeatureZNode with enabled status, which means that the finalized
    * features stored in the FeatureZNode are active. The enabled status should be written by the
    * controller to the FeatureZNode only when the broker IBP config is greater than or equal to
-   * KAFKA_2_7_IV0.
+   * IBP_2_7_IV0.
    *
    * There are multiple cases handled here:
    *
    * 1. New cluster bootstrap:
    *    A new Kafka cluster (i.e. it is deployed first time) is almost always started with IBP config
-   *    setting greater than or equal to KAFKA_2_7_IV0. We would like to start the cluster with all
+   *    setting greater than or equal to IBP_2_7_IV0. We would like to start the cluster with all
    *    the possible supported features finalized immediately. Assuming this is the case, the
    *    controller will start up and notice that the FeatureZNode is absent in the new cluster,
    *    it will then create a FeatureZNode (with enabled status) containing the entire list of
    *    supported features as its finalized features.
    *
-   * 2. Broker binary upgraded, but IBP config set to lower than KAFKA_2_7_IV0:
-   *    Imagine there was an existing Kafka cluster with IBP config less than KAFKA_2_7_IV0, and the
+   * 2. Broker binary upgraded, but IBP config set to lower than IBP_2_7_IV0:
+   *    Imagine there was an existing Kafka cluster with IBP config less than IBP_2_7_IV0, and the
    *    broker binary has now been upgraded to a newer version that supports the feature versioning
-   *    system (KIP-584). But the IBP config is still set to lower than KAFKA_2_7_IV0, and may be
+   *    system (KIP-584). But the IBP config is still set to lower than IBP_2_7_IV0, and may be
    *    set to a higher value later. In this case, we want to start with no finalized features and
    *    allow the user to finalize them whenever they are ready i.e. in the future whenever the
-   *    user sets IBP config to be greater than or equal to KAFKA_2_7_IV0, then the user could start
+   *    user sets IBP config to be greater than or equal to IBP_2_7_IV0, then the user could start
    *    finalizing the features. This process ensures we do not enable all the possible features
    *    immediately after an upgrade, which could be harmful to Kafka.
    *    This is how we handle such a case:
-   *      - Before the IBP config upgrade (i.e. IBP config set to less than KAFKA_2_7_IV0), the
+   *      - Before the IBP config upgrade (i.e. IBP config set to less than IBP_2_7_IV0), the
    *        controller will start up and check if the FeatureZNode is absent.
    *        - If the node is absent, it will react by creating a FeatureZNode with disabled status
    *          and empty finalized features.
    *        - Otherwise, if a node already exists in enabled status then the controller will just
    *          flip the status to disabled and clear the finalized features.
    *      - After the IBP config upgrade (i.e. IBP config set to greater than or equal to
-   *        KAFKA_2_7_IV0), when the controller starts up it will check if the FeatureZNode exists
+   *        IBP_2_7_IV0), when the controller starts up it will check if the FeatureZNode exists
    *        and whether it is disabled.
    *         - If the node is in disabled status, the controller won’t upgrade all features immediately.
    *           Instead it will just switch the FeatureZNode status to enabled status. This lets the
@@ -366,41 +366,44 @@ class KafkaController(val config: KafkaConfig,
    *         - Otherwise, if a node already exists in enabled status then the controller will leave
    *           the node umodified.
    *
-   * 3. Broker binary upgraded, with existing cluster IBP config >= KAFKA_2_7_IV0:
-   *    Imagine there was an existing Kafka cluster with IBP config >= KAFKA_2_7_IV0, and the broker
-   *    binary has just been upgraded to a newer version (that supports IBP config KAFKA_2_7_IV0 and
+   * 3. Broker binary upgraded, with existing cluster IBP config >= IBP_2_7_IV0:
+   *    Imagine there was an existing Kafka cluster with IBP config >= IBP_2_7_IV0, and the broker
+   *    binary has just been upgraded to a newer version (that supports IBP config IBP_2_7_IV0 and
    *    higher). The controller will start up and find that a FeatureZNode is already present with
    *    enabled status and existing finalized features. In such a case, the controller leaves the node
    *    unmodified.
    *
    * 4. Broker downgrade:
    *    Imagine that a Kafka cluster exists already and the IBP config is greater than or equal to
-   *    KAFKA_2_7_IV0. Then, the user decided to downgrade the cluster by setting IBP config to a
-   *    value less than KAFKA_2_7_IV0. This means the user is also disabling the feature versioning
+   *    IBP_2_7_IV0. Then, the user decided to downgrade the cluster by setting IBP config to a
+   *    value less than IBP_2_7_IV0. This means the user is also disabling the feature versioning
    *    system (KIP-584). In this case, when the controller starts up with the lower IBP config, it
    *    will switch the FeatureZNode status to disabled with empty features.
    */
   private def enableFeatureVersioning(): Unit = {
     val (mayBeFeatureZNodeBytes, version) = zkClient.getDataAndVersion(FeatureZNode.path)
     if (version == ZkVersion.UnknownVersion) {
-      val newVersion = createFeatureZNode(new FeatureZNode(FeatureZNodeStatus.Enabled,
-                                          brokerFeatures.defaultFinalizedFeatures))
-      featureCache.waitUntilEpochOrThrow(newVersion, config.zkConnectionTimeoutMs)
+      val newVersion = createFeatureZNode(
+        FeatureZNode(config.interBrokerProtocolVersion,
+          FeatureZNodeStatus.Enabled,
+          brokerFeatures.defaultFinalizedFeatures
+        ))
+      featureCache.waitUntilFeatureEpochOrThrow(newVersion, config.zkConnectionTimeoutMs)
     } else {
       val existingFeatureZNode = FeatureZNode.decode(mayBeFeatureZNodeBytes.get)
       val newFeatures = existingFeatureZNode.status match {
         case FeatureZNodeStatus.Enabled => existingFeatureZNode.features
         case FeatureZNodeStatus.Disabled =>
-          if (!existingFeatureZNode.features.empty()) {
+          if (existingFeatureZNode.features.nonEmpty) {
             warn(s"FeatureZNode at path: ${FeatureZNode.path} with disabled status" +
                  s" contains non-empty features: ${existingFeatureZNode.features}")
           }
-          Features.emptyFinalizedFeatures
+          Map.empty[String, Short]
       }
-      val newFeatureZNode = new FeatureZNode(FeatureZNodeStatus.Enabled, newFeatures)
+      val newFeatureZNode = FeatureZNode(config.interBrokerProtocolVersion, FeatureZNodeStatus.Enabled, newFeatures)
       if (!newFeatureZNode.equals(existingFeatureZNode)) {
         val newVersion = updateFeatureZNode(newFeatureZNode)
-        featureCache.waitUntilEpochOrThrow(newVersion, config.zkConnectionTimeoutMs)
+        featureCache.waitUntilFeatureEpochOrThrow(newVersion, config.zkConnectionTimeoutMs)
       }
     }
   }
@@ -411,24 +414,24 @@ class KafkaController(val config: KafkaConfig,
    * Sets up the FeatureZNode with disabled status. This status means the feature versioning system
    * (KIP-584) is disabled, and, the finalized features stored in the FeatureZNode are not relevant.
    * This status should be written by the controller to the FeatureZNode only when the broker
-   * IBP config is less than KAFKA_2_7_IV0.
+   * IBP config is less than IBP_2_7_IV0.
    *
    * NOTE:
    * 1. When this method returns, existing finalized features (if any) will be cleared from the
    *    FeatureZNode.
    * 2. This method, unlike enableFeatureVersioning() need not wait for the FinalizedFeatureCache
    *    to be updated, because, such updates to the cache (via FinalizedFeatureChangeListener)
-   *    are disabled when IBP config is < than KAFKA_2_7_IV0.
+   *    are disabled when IBP config is < than IBP_2_7_IV0.
    */
   private def disableFeatureVersioning(): Unit = {
-    val newNode = FeatureZNode(FeatureZNodeStatus.Disabled, Features.emptyFinalizedFeatures())
+    val newNode = FeatureZNode(config.interBrokerProtocolVersion, FeatureZNodeStatus.Disabled, Map.empty[String, Short])
     val (mayBeFeatureZNodeBytes, version) = zkClient.getDataAndVersion(FeatureZNode.path)
     if (version == ZkVersion.UnknownVersion) {
       createFeatureZNode(newNode)
     } else {
       val existingFeatureZNode = FeatureZNode.decode(mayBeFeatureZNodeBytes.get)
       if (existingFeatureZNode.status == FeatureZNodeStatus.Disabled &&
-          !existingFeatureZNode.features.empty()) {
+          existingFeatureZNode.features.nonEmpty) {
         warn(s"FeatureZNode at path: ${FeatureZNode.path} with disabled status" +
              s" contains non-empty features: ${existingFeatureZNode.features}")
       }
@@ -466,14 +469,6 @@ class KafkaController(val config: KafkaConfig,
 
     // shutdown leader rebalance scheduler
     kafkaScheduler.shutdown()
-    offlinePartitionCount = 0
-    preferredReplicaImbalanceCount = 0
-    globalTopicCount = 0
-    globalPartitionCount = 0
-    topicsToDeleteCount = 0
-    replicasToDeleteCount = 0
-    ineligibleTopicsToDeleteCount = 0
-    ineligibleReplicasToDeleteCount = 0
 
     // stop token expiry check scheduler
     if (tokenCleanScheduler.isStarted)
@@ -800,7 +795,7 @@ class KafkaController(val config: KafkaConfig,
         stopRemovedReplicasOfReassignedPartition(topicPartition, unneededReplicas)
     }
 
-    if (!isAlterIsrEnabled) {
+    if (!isAlterPartitionEnabled) {
       val reassignIsrChangeHandler = new PartitionReassignmentIsrChangeHandler(eventManager, topicPartition)
       zkClient.registerZNodeChangeHandler(reassignIsrChangeHandler)
     }
@@ -856,7 +851,7 @@ class KafkaController(val config: KafkaConfig,
     * Attempt to elect a replica as leader for each of the given partitions.
     * @param partitions The partitions to have a new leader elected
     * @param electionType The type of election to perform
-    * @param electionTrigger The reason for tigger this election
+    * @param electionTrigger The reason for trigger this election
     * @return A map of failed and successful elections. The keys are the topic partitions and the corresponding values are
     *         either the exception that was thrown or new leader & ISR.
     */
@@ -906,7 +901,7 @@ class KafkaController(val config: KafkaConfig,
     // update controller cache with delete topic information
     val curBrokerAndEpochs = zkClient.getAllBrokerAndEpochsInCluster
     val (compatibleBrokerAndEpochs, incompatibleBrokerAndEpochs) = partitionOnFeatureCompatibility(curBrokerAndEpochs)
-    if (!incompatibleBrokerAndEpochs.isEmpty) {
+    if (incompatibleBrokerAndEpochs.nonEmpty) {
       warn("Ignoring registration of new brokers due to incompatibilities with finalized features: " +
         incompatibleBrokerAndEpochs.map { case (broker, _) => broker.id }.toSeq.sorted.mkString(","))
     }
@@ -1093,7 +1088,7 @@ class KafkaController(val config: KafkaConfig,
     }
   }
 
-  private def registerPartitionModificationsHandlers(topics: Seq[String]) = {
+  private def registerPartitionModificationsHandlers(topics: Seq[String]): Unit = {
     topics.foreach { topic =>
       val partitionModificationsHandler = new PartitionModificationsHandler(eventManager, topic)
       partitionModificationsHandlers.put(topic, partitionModificationsHandler)
@@ -1101,14 +1096,14 @@ class KafkaController(val config: KafkaConfig,
     partitionModificationsHandlers.values.foreach(zkClient.registerZNodeChangeHandler)
   }
 
-  private[controller] def unregisterPartitionModificationsHandlers(topics: Seq[String]) = {
+  private[controller] def unregisterPartitionModificationsHandlers(topics: Seq[String]): Unit = {
     topics.foreach { topic =>
       partitionModificationsHandlers.remove(topic).foreach(handler => zkClient.unregisterZNodeChangeHandler(handler.path))
     }
   }
 
   private def unregisterPartitionReassignmentIsrChangeHandlers(): Unit = {
-    if (!isAlterIsrEnabled) {
+    if (!isAlterPartitionEnabled) {
       controllerContext.partitionsBeingReassigned.foreach { tp =>
         val path = TopicPartitionStateZNode.path(tp)
         zkClient.unregisterZNodeChangeHandler(path)
@@ -1119,7 +1114,7 @@ class KafkaController(val config: KafkaConfig,
   private def removePartitionFromReassigningPartitions(topicPartition: TopicPartition,
                                                        assignment: ReplicaAssignment): Unit = {
     if (controllerContext.partitionsBeingReassigned.contains(topicPartition)) {
-      if (!isAlterIsrEnabled) {
+      if (!isAlterPartitionEnabled) {
         val path = TopicPartitionStateZNode.path(topicPartition)
         zkClient.unregisterZNodeChangeHandler(path)
       }
@@ -1219,7 +1214,7 @@ class KafkaController(val config: KafkaConfig,
               s"controller was elected with epoch $controllerEpoch. Aborting state change by this controller")
           // increment the leader epoch even if there are no leader or isr changes to allow the leader to cache the expanded
           // assigned replica list
-          val newLeaderAndIsr = leaderAndIsr.newEpochAndZkVersion
+          val newLeaderAndIsr = leaderAndIsr.newEpoch
           // update the new leadership decision in zookeeper or retry
           val UpdateLeaderAndIsrResult(finishedUpdates, _) =
             zkClient.updateLeaderAndIsr(immutable.Map(partition -> newLeaderAndIsr), epoch, controllerContext.epochZkVersion)
@@ -1229,7 +1224,7 @@ class KafkaController(val config: KafkaConfig,
               val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, epoch)
               controllerContext.putPartitionLeadershipInfo(partition, leaderIsrAndControllerEpoch)
               finalLeaderIsrAndControllerEpoch = Some(leaderIsrAndControllerEpoch)
-              info(s"Updated leader epoch for partition $partition to ${leaderAndIsr.leaderEpoch}, zkVersion=${leaderAndIsr.zkVersion}")
+              info(s"Updated leader epoch for partition $partition to ${leaderAndIsr.leaderEpoch}, zkVersion=${leaderAndIsr.partitionEpoch}")
               true
             case Some(Left(e)) => throw e
             case None => false
@@ -1433,43 +1428,37 @@ class KafkaController(val config: KafkaConfig,
   }
 
   private def updateMetrics(): Unit = {
-    offlinePartitionCount =
-      if (!isActive) {
-        0
-      } else {
-        controllerContext.offlinePartitionCount
-      }
-
-    preferredReplicaImbalanceCount =
-      if (!isActive) {
-        0
-      } else {
-        controllerContext.preferredReplicaImbalanceCount
-      }
-
-    globalTopicCount = if (!isActive) 0 else controllerContext.allTopics.size
-
-    globalPartitionCount = if (!isActive) 0 else controllerContext.partitionWithLeadersCount
-
-    topicsToDeleteCount = if (!isActive) 0 else controllerContext.topicsToBeDeleted.size
-
-    replicasToDeleteCount = if (!isActive) 0 else controllerContext.topicsToBeDeleted.map { topic =>
-      // For each enqueued topic, count the number of replicas that are not yet deleted
-      controllerContext.replicasForTopic(topic).count { replica =>
-        controllerContext.replicaState(replica) != ReplicaDeletionSuccessful
-      }
-    }.sum
-
-    ineligibleTopicsToDeleteCount = if (!isActive) 0 else controllerContext.topicsIneligibleForDeletion.size
-
-    ineligibleReplicasToDeleteCount = if (!isActive) 0 else controllerContext.topicsToBeDeleted.map { topic =>
-      // For each enqueued topic, count the number of replicas that are ineligible
-      controllerContext.replicasForTopic(topic).count { replica =>
-        controllerContext.replicaState(replica) == ReplicaDeletionIneligible
-      }
-    }.sum
-
-    activeBrokerCount = if (isActive) controllerContext.liveOrShuttingDownBrokerIds.size else 0
+    if (isActive) {
+      offlinePartitionCount = controllerContext.offlinePartitionCount
+      preferredReplicaImbalanceCount = controllerContext.preferredReplicaImbalanceCount
+      globalTopicCount = controllerContext.allTopics.size
+      globalPartitionCount = controllerContext.partitionWithLeadersCount
+      topicsToDeleteCount = controllerContext.topicsToBeDeleted.size
+      replicasToDeleteCount = controllerContext.topicsToBeDeleted.map { topic =>
+        // For each enqueued topic, count the number of replicas that are not yet deleted
+        controllerContext.replicasForTopic(topic).count { replica =>
+          controllerContext.replicaState(replica) != ReplicaDeletionSuccessful
+        }
+      }.sum
+      ineligibleTopicsToDeleteCount = controllerContext.topicsIneligibleForDeletion.size
+      ineligibleReplicasToDeleteCount = controllerContext.topicsToBeDeleted.map { topic =>
+        // For each enqueued topic, count the number of replicas that are ineligible
+        controllerContext.replicasForTopic(topic).count { replica =>
+          controllerContext.replicaState(replica) == ReplicaDeletionIneligible
+        }
+      }.sum
+      activeBrokerCount = controllerContext.liveOrShuttingDownBrokerIds.size
+    } else {
+      offlinePartitionCount = 0
+      preferredReplicaImbalanceCount = 0
+      globalTopicCount = 0
+      globalPartitionCount = 0
+      topicsToDeleteCount = 0
+      replicasToDeleteCount = 0
+      ineligibleTopicsToDeleteCount = 0
+      ineligibleReplicasToDeleteCount = 0
+      activeBrokerCount = 0
+    }
   }
 
   // visible for testing
@@ -1562,7 +1551,7 @@ class KafkaController(val config: KafkaConfig,
     brokersAndEpochs.partition {
       case (broker, _) =>
         !config.isFeatureVersioningSupported ||
-        !featureCache.get.exists(
+        !featureCache.getFeatureOption.exists(
           latestFinalizedFeatures =>
             BrokerFeatures.hasIncompatibleFeatures(broker.features, latestFinalizedFeatures.features))
     }
@@ -1597,7 +1586,7 @@ class KafkaController(val config: KafkaConfig,
     if (newBrokerIds.nonEmpty) {
       val (newCompatibleBrokerAndEpochs, newIncompatibleBrokerAndEpochs) =
         partitionOnFeatureCompatibility(newBrokerAndEpochs)
-      if (!newIncompatibleBrokerAndEpochs.isEmpty) {
+      if (newIncompatibleBrokerAndEpochs.nonEmpty) {
         warn("Ignoring registration of new brokers due to incompatibilities with finalized features: " +
           newIncompatibleBrokerAndEpochs.map { case (broker, _) => broker.id }.toSeq.sorted.mkString(","))
       }
@@ -1609,7 +1598,7 @@ class KafkaController(val config: KafkaConfig,
       onBrokerFailure(bouncedBrokerIdsSorted)
       val (bouncedCompatibleBrokerAndEpochs, bouncedIncompatibleBrokerAndEpochs) =
         partitionOnFeatureCompatibility(bouncedBrokerAndEpochs)
-      if (!bouncedIncompatibleBrokerAndEpochs.isEmpty) {
+      if (bouncedIncompatibleBrokerAndEpochs.nonEmpty) {
         warn("Ignoring registration of bounced brokers due to incompatibilities with finalized features: " +
           bouncedIncompatibleBrokerAndEpochs.map { case (broker, _) => broker.id }.toSeq.sorted.mkString(","))
       }
@@ -1919,16 +1908,17 @@ class KafkaController(val config: KafkaConfig,
   }
 
   /**
-   * Returns the new FinalizedVersionRange for the feature, if there are no feature
+   * Returns the new finalized version for the feature, if there are no feature
    * incompatibilities seen with all known brokers for the provided feature update.
    * Otherwise returns an ApiError object containing Errors.INVALID_REQUEST.
    *
    * @param update   the feature update to be processed (this can not be meant to delete the feature)
    *
-   * @return         the new FinalizedVersionRange or error, as described above.
+   * @return         the new finalized version or error, as described above.
    */
-  private def newFinalizedVersionRangeOrIncompatibilityError(update: UpdateFeaturesRequestData.FeatureUpdateKey): Either[FinalizedVersionRange, ApiError] = {
-    if (UpdateFeaturesRequest.isDeleteRequest(update)) {
+  private def newFinalizedVersionOrIncompatibilityError(update: UpdateFeaturesRequest.FeatureUpdateItem):
+      Either[Short, ApiError] = {
+    if (update.isDeleteRequest) {
       throw new IllegalArgumentException(s"Provided feature update can not be meant to delete the feature: $update")
     }
 
@@ -1938,28 +1928,19 @@ class KafkaController(val config: KafkaConfig,
                          "Could not apply finalized feature update because the provided feature" +
                          " is not supported."))
     } else {
-      var newVersionRange: FinalizedVersionRange = null
-      try {
-        newVersionRange = new FinalizedVersionRange(supportedVersionRange.min, update.maxVersionLevel)
-      } catch {
-        case _: IllegalArgumentException => {
-          // This exception means the provided maxVersionLevel is invalid. It is handled below
-          // outside of this catch clause.
-        }
-      }
-      if (newVersionRange == null) {
+      val newVersion = update.versionLevel()
+      if (supportedVersionRange.isIncompatibleWith(newVersion)) {
         Right(new ApiError(Errors.INVALID_REQUEST,
           "Could not apply finalized feature update because the provided" +
-          s" maxVersionLevel:${update.maxVersionLevel} is lower than the" +
+          s" versionLevel:${update.versionLevel} is lower than the" +
           s" supported minVersion:${supportedVersionRange.min}."))
       } else {
-        val newFinalizedFeature =
-          Features.finalizedFeatures(Utils.mkMap(Utils.mkEntry(update.feature, newVersionRange)))
+        val newFinalizedFeature = Utils.mkMap(Utils.mkEntry(update.feature, newVersion)).asScala.toMap
         val numIncompatibleBrokers = controllerContext.liveOrShuttingDownBrokers.count(broker => {
           BrokerFeatures.hasIncompatibleFeatures(broker.features, newFinalizedFeature)
         })
         if (numIncompatibleBrokers == 0) {
-          Left(newVersionRange)
+          Left(newVersion)
         } else {
           Right(new ApiError(Errors.INVALID_REQUEST,
                              "Could not apply finalized feature update because" +
@@ -1970,73 +1951,70 @@ class KafkaController(val config: KafkaConfig,
   }
 
   /**
-   * Validates a feature update on an existing FinalizedVersionRange.
+   * Validates a feature update on an existing finalized version.
    * If the validation succeeds, then, the return value contains:
-   * 1. the new FinalizedVersionRange for the feature, if the feature update was not meant to delete the feature.
+   * 1. the new finalized version for the feature, if the feature update was not meant to delete the feature.
    * 2. Option.empty, if the feature update was meant to delete the feature.
    *
    * If the validation fails, then returned value contains a suitable ApiError.
    *
-   * @param update                 the feature update to be processed.
-   * @param existingVersionRange   the existing FinalizedVersionRange which can be empty when no
-   *                               FinalizedVersionRange exists for the associated feature
+   * @param update             the feature update to be processed.
+   * @param existingVersion    the existing finalized version which can be empty when no
+   *                           finalized version exists for the associated feature
    *
-   * @return                       the new FinalizedVersionRange to be updated into ZK or error
-   *                               as described above.
+   * @return                   the new finalized version to be updated into ZK or error
+   *                           as described above.
    */
-  private def validateFeatureUpdate(update: UpdateFeaturesRequestData.FeatureUpdateKey,
-                                    existingVersionRange: Option[FinalizedVersionRange]): Either[Option[FinalizedVersionRange], ApiError] = {
-    def newVersionRangeOrError(update: UpdateFeaturesRequestData.FeatureUpdateKey): Either[Option[FinalizedVersionRange], ApiError] = {
-      newFinalizedVersionRangeOrIncompatibilityError(update)
+  private def validateFeatureUpdate(update: UpdateFeaturesRequest.FeatureUpdateItem,
+                                    existingVersion: Option[Short]): Either[Option[Short], ApiError] = {
+    def newVersionRangeOrError(update: UpdateFeaturesRequest.FeatureUpdateItem): Either[Option[Short], ApiError] = {
+      newFinalizedVersionOrIncompatibilityError(update)
         .fold(versionRange => Left(Some(versionRange)), error => Right(error))
     }
 
     if (update.feature.isEmpty) {
       // Check that the feature name is not empty.
       Right(new ApiError(Errors.INVALID_REQUEST, "Feature name can not be empty."))
+    } else if (update.upgradeType.equals(UpgradeType.UNKNOWN)) {
+      Right(new ApiError(Errors.INVALID_REQUEST, "Received unknown upgrade type."))
     } else {
+
       // We handle deletion requests separately from non-deletion requests.
-      if (UpdateFeaturesRequest.isDeleteRequest(update)) {
-        if (existingVersionRange.isEmpty) {
+      if (update.isDeleteRequest) {
+        if (existingVersion.isEmpty) {
           // Disallow deletion of a non-existing finalized feature.
           Right(new ApiError(Errors.INVALID_REQUEST,
                              "Can not delete non-existing finalized feature."))
         } else {
           Left(Option.empty)
         }
-      } else if (update.maxVersionLevel() < 1) {
-        // Disallow deletion of a finalized feature without allowDowngrade flag set.
+      } else if (update.versionLevel() < 1) {
+        // Disallow deletion of a finalized feature without SAFE downgrade type.
         Right(new ApiError(Errors.INVALID_REQUEST,
-                           s"Can not provide maxVersionLevel: ${update.maxVersionLevel} less" +
-                           s" than 1 without setting the allowDowngrade flag to true in the request."))
+                           s"Can not provide versionLevel: ${update.versionLevel} less" +
+                           s" than 1 without setting the SAFE downgradeType in the request."))
       } else {
-        existingVersionRange.map(existing =>
-          if (update.maxVersionLevel == existing.max) {
-            // Disallow a case where target maxVersionLevel matches existing maxVersionLevel.
+        existingVersion.map(existing =>
+          if (update.versionLevel == existing) {
+            // Disallow a case where target versionLevel matches existing versionLevel.
             Right(new ApiError(Errors.INVALID_REQUEST,
-                               s"Can not ${if (update.allowDowngrade) "downgrade" else "upgrade"}" +
-                               s" a finalized feature from existing maxVersionLevel:${existing.max}" +
+                               s"Can not ${if (update.upgradeType.equals(UpgradeType.SAFE_DOWNGRADE)) "downgrade" else "upgrade"}" +
+                               s" a finalized feature from existing versionLevel:$existing" +
                                " to the same value."))
-          } else if (update.maxVersionLevel < existing.max && !update.allowDowngrade) {
-            // Disallow downgrade of a finalized feature without the allowDowngrade flag set.
+          } else if (update.versionLevel < existing && !update.upgradeType.equals(UpgradeType.SAFE_DOWNGRADE)) {
+            // Disallow downgrade of a finalized feature without the downgradeType set.
             Right(new ApiError(Errors.INVALID_REQUEST,
                                s"Can not downgrade finalized feature from existing" +
-                               s" maxVersionLevel:${existing.max} to provided" +
-                               s" maxVersionLevel:${update.maxVersionLevel} without setting the" +
-                               " allowDowngrade flag in the request."))
-          } else if (update.allowDowngrade && update.maxVersionLevel > existing.max) {
-            // Disallow a request that sets allowDowngrade flag without specifying a
-            // maxVersionLevel that's lower than the existing maxVersionLevel.
+                               s" versionLevel:$existing to provided" +
+                               s" versionLevel:${update.versionLevel} without setting the" +
+                               " downgradeType to SAFE in the request."))
+          } else if (!update.upgradeType.equals(UpgradeType.UPGRADE) && update.versionLevel > existing) {
+            // Disallow a request that sets downgradeType without specifying a
+            // versionLevel that's lower than the existing versionLevel.
             Right(new ApiError(Errors.INVALID_REQUEST,
-                               s"When the allowDowngrade flag set in the request, the provided" +
-                               s" maxVersionLevel:${update.maxVersionLevel} can not be greater than" +
-                               s" existing maxVersionLevel:${existing.max}."))
-          } else if (update.maxVersionLevel < existing.min) {
-            // Disallow downgrade of a finalized feature below the existing finalized
-            // minVersionLevel.
-            Right(new ApiError(Errors.INVALID_REQUEST,
-                               s"Can not downgrade finalized feature to maxVersionLevel:${update.maxVersionLevel}" +
-                               s" because it's lower than the existing minVersionLevel:${existing.min}."))
+                               s"When the downgradeType is set to SAFE in the request, the provided" +
+                               s" versionLevel:${update.versionLevel} can not be greater than" +
+                               s" existing versionLevel:$existing."))
           } else {
             newVersionRangeOrError(update)
           }
@@ -2056,13 +2034,13 @@ class KafkaController(val config: KafkaConfig,
 
   private def processFeatureUpdatesWithActiveController(request: UpdateFeaturesRequest,
                                                         callback: UpdateFeaturesCallback): Unit = {
-    val updates = request.data.featureUpdates
-    val existingFeatures = featureCache.get
-      .map(featuresAndEpoch => featuresAndEpoch.features.features().asScala)
-      .getOrElse(Map[String, FinalizedVersionRange]())
-    // A map with key being feature name and value being FinalizedVersionRange.
+    val updates = request.featureUpdates
+    val existingFeatures = featureCache.getFeatureOption
+      .map(featuresAndEpoch => featuresAndEpoch.features)
+      .getOrElse(Map[String, Short]())
+    // A map with key being feature name and value being finalized version.
     // This contains the target features to be eventually written to FeatureZNode.
-    val targetFeatures = scala.collection.mutable.Map[String, FinalizedVersionRange]() ++ existingFeatures
+    val targetFeatures = scala.collection.mutable.Map[String, Short]() ++ existingFeatures
     // A map with key being feature name and value being error encountered when the FeatureUpdate
     // was applied.
     val errors = scala.collection.mutable.Map[String, ApiError]()
@@ -2071,7 +2049,7 @@ class KafkaController(val config: KafkaConfig,
     //  - If a FeatureUpdate is found to be valid, then:
     //    - The corresponding entry in errors map would be updated to contain Errors.NONE.
     //    - If the FeatureUpdate is an add or update request, then the targetFeatures map is updated
-    //      to contain the new FinalizedVersionRange for the feature.
+    //      to contain the new finalized version for the feature.
     //    - Otherwise if the FeatureUpdate is a delete request, then the feature is removed from the
     //      targetFeatures map.
     //  - Otherwise if a FeatureUpdate is found to be invalid, then:
@@ -2096,9 +2074,9 @@ class KafkaController(val config: KafkaConfig,
     // of the existing finalized features in ZK.
     try {
       if (!existingFeatures.equals(targetFeatures)) {
-        val newNode = new FeatureZNode(FeatureZNodeStatus.Enabled, Features.finalizedFeatures(targetFeatures.asJava))
+        val newNode = FeatureZNode(config.interBrokerProtocolVersion, FeatureZNodeStatus.Enabled, targetFeatures)
         val newVersion = updateFeatureZNode(newNode)
-        featureCache.waitUntilEpochOrThrow(newVersion, request.data().timeoutMs())
+        featureCache.waitUntilFeatureEpochOrThrow(newVersion, request.data().timeoutMs())
       }
     } catch {
       // For all features that correspond to valid FeatureUpdate (i.e. error is Errors.NONE),
@@ -2132,7 +2110,7 @@ class KafkaController(val config: KafkaConfig,
         processUpdateNotifications(partitions)
 
         // During a partial upgrade, the controller may be on an IBP which assumes
-        // ISR changes through the `AlterIsr` API while some brokers are on an older
+        // ISR changes through the `AlterPartition` API while some brokers are on an older
         // IBP which assumes notification through Zookeeper. In this case, since the
         // controller will not have registered watches for reassigning partitions, we
         // can still rely on the batch ISR change notification path in order to
@@ -2245,158 +2223,250 @@ class KafkaController(val config: KafkaConfig,
     }
   }
 
-  def alterIsrs(alterIsrRequest: AlterIsrRequestData, callback: AlterIsrResponseData => Unit): Unit = {
-    val isrsToAlter = mutable.Map[TopicPartition, LeaderAndIsr]()
+  def alterPartitions(
+    alterPartitionRequest: AlterPartitionRequestData,
+    alterPartitionRequestVersion: Short,
+    callback: AlterPartitionResponseData => Unit
+  ): Unit = {
+    eventManager.put(AlterPartitionReceived(
+      alterPartitionRequest,
+      alterPartitionRequestVersion,
+      callback
+    ))
+  }
 
-    alterIsrRequest.topics.forEach { topicReq =>
-      topicReq.partitions.forEach { partitionReq =>
-        val tp = new TopicPartition(topicReq.name, partitionReq.partitionIndex)
-        val newIsr = partitionReq.newIsr().asScala.toList.map(_.toInt)
-        isrsToAlter.put(tp, new LeaderAndIsr(alterIsrRequest.brokerId, partitionReq.leaderEpoch, newIsr, partitionReq.currentIsrVersion))
-      }
+  private def processAlterPartition(
+    alterPartitionRequest: AlterPartitionRequestData,
+    alterPartitionRequestVersion: Short,
+    callback: AlterPartitionResponseData => Unit
+  ): Unit = {
+    val partitionResponses = try {
+      tryProcessAlterPartition(
+        alterPartitionRequest,
+        alterPartitionRequestVersion,
+        callback
+      )
+    } catch {
+      case e: Throwable =>
+        error(s"Error when processing AlterPartition: $alterPartitionRequest", e)
+        callback(new AlterPartitionResponseData().setErrorCode(Errors.UNKNOWN_SERVER_ERROR.code))
+        mutable.Map.empty
     }
 
-    def responseCallback(results: Either[Map[TopicPartition, Either[Errors, LeaderAndIsr]], Errors]): Unit = {
-      val resp = new AlterIsrResponseData()
-      results match {
-        case Right(error) =>
-          resp.setErrorCode(error.code)
-        case Left(partitionResults) =>
-          resp.setTopics(new util.ArrayList())
-          partitionResults
-            .groupBy { case (tp, _) => tp.topic }   // Group by topic
-            .foreach { case (topic, partitions) =>
-              // Add each topic part to the response
-              val topicResp = new AlterIsrResponseData.TopicData()
-                .setName(topic)
-                .setPartitions(new util.ArrayList())
-              resp.topics.add(topicResp)
-              partitions.foreach { case (tp, errorOrIsr) =>
-                // Add each partition part to the response (new ISR or error)
-                errorOrIsr match {
-                  case Left(error) => topicResp.partitions.add(
-                    new AlterIsrResponseData.PartitionData()
-                      .setPartitionIndex(tp.partition)
-                      .setErrorCode(error.code))
-                  case Right(leaderAndIsr) => topicResp.partitions.add(
-                    new AlterIsrResponseData.PartitionData()
-                      .setPartitionIndex(tp.partition)
-                      .setLeaderId(leaderAndIsr.leader)
-                      .setLeaderEpoch(leaderAndIsr.leaderEpoch)
-                      .setIsr(leaderAndIsr.isr.map(Integer.valueOf).asJava)
-                      .setCurrentIsrVersion(leaderAndIsr.zkVersion))
-                }
-            }
-          }
+    // After we have returned the result of the `AlterPartition` request, we should check whether
+    // there are any reassignments which can be completed by a successful ISR expansion.
+    partitionResponses.forKeyValue { (topicPartition, partitionResponse) =>
+      if (controllerContext.partitionsBeingReassigned.contains(topicPartition)) {
+        val isSuccessfulUpdate = partitionResponse.isRight
+        if (isSuccessfulUpdate) {
+          maybeCompleteReassignment(topicPartition)
+        }
       }
-      callback.apply(resp)
     }
-
-    eventManager.put(AlterIsrReceived(alterIsrRequest.brokerId, alterIsrRequest.brokerEpoch, isrsToAlter, responseCallback))
   }
 
-  private def processAlterIsr(brokerId: Int, brokerEpoch: Long,
-                              isrsToAlter: Map[TopicPartition, LeaderAndIsr],
-                              callback: AlterIsrCallback): Unit = {
+  private def tryProcessAlterPartition(
+    alterPartitionRequest: AlterPartitionRequestData,
+    alterPartitionRequestVersion: Short,
+    callback: AlterPartitionResponseData => Unit
+  ): mutable.Map[TopicPartition, Either[Errors, LeaderAndIsr]] = {
+    val useTopicsIds = alterPartitionRequestVersion > 1
 
     // Handle a few short-circuits
     if (!isActive) {
-      callback.apply(Right(Errors.NOT_CONTROLLER))
-      return
+      callback(new AlterPartitionResponseData().setErrorCode(Errors.NOT_CONTROLLER.code))
+      return mutable.Map.empty
     }
 
+    val brokerId = alterPartitionRequest.brokerId
+    val brokerEpoch = alterPartitionRequest.brokerEpoch
     val brokerEpochOpt = controllerContext.liveBrokerIdAndEpochs.get(brokerId)
     if (brokerEpochOpt.isEmpty) {
-      info(s"Ignoring AlterIsr due to unknown broker $brokerId")
-      callback.apply(Right(Errors.STALE_BROKER_EPOCH))
-      return
+      info(s"Ignoring AlterPartition due to unknown broker $brokerId")
+      callback(new AlterPartitionResponseData().setErrorCode(Errors.STALE_BROKER_EPOCH.code))
+      return mutable.Map.empty
     }
 
     if (!brokerEpochOpt.contains(brokerEpoch)) {
-      info(s"Ignoring AlterIsr due to stale broker epoch $brokerEpoch and local broker epoch $brokerEpochOpt for broker $brokerId")
-      callback.apply(Right(Errors.STALE_BROKER_EPOCH))
-      return
+      info(s"Ignoring AlterPartition due to stale broker epoch $brokerEpoch and local broker epoch $brokerEpochOpt for broker $brokerId")
+      callback(new AlterPartitionResponseData().setErrorCode(Errors.STALE_BROKER_EPOCH.code))
+      return mutable.Map.empty
     }
 
-    val response = try {
-      val partitionResponses = mutable.HashMap[TopicPartition, Either[Errors, LeaderAndIsr]]()
-
-      // Determine which partitions we will accept the new ISR for
-      val adjustedIsrs: Map[TopicPartition, LeaderAndIsr] = isrsToAlter.flatMap {
-        case (tp: TopicPartition, newLeaderAndIsr: LeaderAndIsr) =>
-          controllerContext.partitionLeadershipInfo(tp) match {
-            case Some(leaderIsrAndControllerEpoch) =>
-              val currentLeaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
-              if (newLeaderAndIsr.leaderEpoch < currentLeaderAndIsr.leaderEpoch) {
-                partitionResponses(tp) = Left(Errors.FENCED_LEADER_EPOCH)
-                None
-              } else if (newLeaderAndIsr.equalsIgnoreZk(currentLeaderAndIsr)) {
-                // If a partition is already in the desired state, just return it
-                partitionResponses(tp) = Right(currentLeaderAndIsr)
-                None
-              } else {
-                Some(tp -> newLeaderAndIsr)
-              }
-            case None =>
-              partitionResponses(tp) = Left(Errors.UNKNOWN_TOPIC_OR_PARTITION)
-              None
+    val partitionsToAlter = new mutable.HashMap[TopicPartition, LeaderAndIsr]()
+    val alterPartitionResponse = new AlterPartitionResponseData()
+
+    alterPartitionRequest.topics.forEach { topicReq =>
+      val topicNameOpt = if (useTopicsIds) {
+        controllerContext.topicName(topicReq.topicId)
+      } else {
+        Some(topicReq.topicName)
+      }
+
+      topicNameOpt match {
+        case None =>
+          val topicResponse = new AlterPartitionResponseData.TopicData()
+            .setTopicId(topicReq.topicId)
+          alterPartitionResponse.topics.add(topicResponse)
+          topicReq.partitions.forEach { partitionReq =>
+            topicResponse.partitions.add(new AlterPartitionResponseData.PartitionData()
+              .setPartitionIndex(partitionReq.partitionIndex)
+              .setErrorCode(Errors.UNKNOWN_TOPIC_ID.code))
+          }
+
+        case Some(topicName) =>
+          topicReq.partitions.forEach { partitionReq =>
+            partitionsToAlter.put(
+              new TopicPartition(topicName, partitionReq.partitionIndex),
+              LeaderAndIsr(
+                alterPartitionRequest.brokerId,
+                partitionReq.leaderEpoch,
+                partitionReq.newIsr.asScala.toList.map(_.toInt),
+                LeaderRecoveryState.of(partitionReq.leaderRecoveryState),
+                partitionReq.partitionEpoch
+              )
+            )
           }
       }
+    }
 
-      // Do the updates in ZK
-      debug(s"Updating ISRs for partitions: ${adjustedIsrs.keySet}.")
-      val UpdateLeaderAndIsrResult(finishedUpdates, badVersionUpdates) = zkClient.updateLeaderAndIsr(
-        adjustedIsrs, controllerContext.epoch, controllerContext.epochZkVersion)
-
-      val successfulUpdates: Map[TopicPartition, LeaderAndIsr] = finishedUpdates.flatMap {
-        case (partition: TopicPartition, isrOrError: Either[Throwable, LeaderAndIsr]) =>
-          isrOrError match {
-            case Right(updatedIsr) =>
-              debug(s"ISR for partition $partition updated to [${updatedIsr.isr.mkString(",")}] and zkVersion updated to [${updatedIsr.zkVersion}]")
-              partitionResponses(partition) = Right(updatedIsr)
-              Some(partition -> updatedIsr)
-            case Left(e) =>
-              error(s"Failed to update ISR for partition $partition", e)
-              partitionResponses(partition) = Left(Errors.forException(e))
+    val partitionResponses = mutable.HashMap[TopicPartition, Either[Errors, LeaderAndIsr]]()
+    // Determine which partitions we will accept the new ISR for
+    val adjustedIsrs = partitionsToAlter.flatMap { case (tp, newLeaderAndIsr) =>
+      controllerContext.partitionLeadershipInfo(tp) match {
+        case Some(leaderIsrAndControllerEpoch) =>
+          val currentLeaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
+          if (newLeaderAndIsr.partitionEpoch > currentLeaderAndIsr.partitionEpoch
+              || newLeaderAndIsr.leaderEpoch > currentLeaderAndIsr.leaderEpoch) {
+            // If the partition leader has a higher partition/leader epoch, then it is likely
+            // that this node is no longer the active controller. We return NOT_CONTROLLER in
+            // this case to give the leader an opportunity to find the new controller.
+            partitionResponses(tp) = Left(Errors.NOT_CONTROLLER)
+            None
+          } else if (newLeaderAndIsr.leaderEpoch != currentLeaderAndIsr.leaderEpoch) {
+            partitionResponses(tp) = Left(Errors.FENCED_LEADER_EPOCH)
+            None
+          } else if (newLeaderAndIsr.equalsAllowStalePartitionEpoch(currentLeaderAndIsr)) {
+            // If a partition is already in the desired state, just return it
+            // this check must be done before fencing based on partition epoch to maintain idempotency
+            partitionResponses(tp) = Right(currentLeaderAndIsr)
+            None
+          } else if (newLeaderAndIsr.partitionEpoch != currentLeaderAndIsr.partitionEpoch) {
+            partitionResponses(tp) = Left(Errors.INVALID_UPDATE_VERSION)
+            None
+          }  else if (newLeaderAndIsr.leaderRecoveryState == LeaderRecoveryState.RECOVERING && newLeaderAndIsr.isr.length > 1) {
+            partitionResponses(tp) = Left(Errors.INVALID_REQUEST)
+            info(
+              s"Rejecting AlterPartition from node $brokerId for $tp because leader is recovering and ISR is greater than 1: " +
+              s"$newLeaderAndIsr"
+            )
+            None
+          } else if (currentLeaderAndIsr.leaderRecoveryState == LeaderRecoveryState.RECOVERED &&
+            newLeaderAndIsr.leaderRecoveryState == LeaderRecoveryState.RECOVERING) {
+
+            partitionResponses(tp) = Left(Errors.INVALID_REQUEST)
+            info(
+              s"Rejecting AlterPartition from node $brokerId for $tp because the leader recovery state cannot change from " +
+              s"RECOVERED to RECOVERING: $newLeaderAndIsr"
+            )
+            None
+          } else {
+            // Pull out replicas being added to ISR and verify they are all online.
+            // If a replica is not online, reject the update as specified in KIP-841.
+            val ineligibleReplicas = newLeaderAndIsr.isr.toSet -- controllerContext.liveBrokerIds
+            if (ineligibleReplicas.nonEmpty) {
+              info(s"Rejecting AlterPartition request from node $brokerId for $tp because " +
+                s"it specified ineligible replicas $ineligibleReplicas in the new ISR ${newLeaderAndIsr.isr}."
+              )
+
+              if (alterPartitionRequestVersion > 1) {
+                partitionResponses(tp) = Left(Errors.INELIGIBLE_REPLICA)
+              } else {
+                partitionResponses(tp) = Left(Errors.OPERATION_NOT_ATTEMPTED)
+              }
               None
+            } else {
+              Some(tp -> newLeaderAndIsr)
+            }
           }
-      }
 
-      badVersionUpdates.foreach { partition =>
-        info(s"Failed to update ISR to ${adjustedIsrs(partition)} for partition $partition, bad ZK version.")
-        partitionResponses(partition) = Left(Errors.INVALID_UPDATE_VERSION)
+        case None =>
+          partitionResponses(tp) = Left(Errors.UNKNOWN_TOPIC_OR_PARTITION)
+          None
       }
+    }
 
-      def processUpdateNotifications(partitions: Seq[TopicPartition]): Unit = {
-        val liveBrokers: Seq[Int] = controllerContext.liveOrShuttingDownBrokerIds.toSeq
-        sendUpdateMetadataRequest(liveBrokers, partitions.toSet)
+    // Do the updates in ZK
+    debug(s"Updating ISRs for partitions: ${adjustedIsrs.keySet}.")
+    val UpdateLeaderAndIsrResult(finishedUpdates, badVersionUpdates) = zkClient.updateLeaderAndIsr(
+      adjustedIsrs, controllerContext.epoch, controllerContext.epochZkVersion)
+
+    val successfulUpdates = finishedUpdates.flatMap { case (partition, isrOrError) =>
+      isrOrError match {
+        case Right(updatedIsr) =>
+          debug(s"ISR for partition $partition updated to $updatedIsr.")
+          partitionResponses(partition) = Right(updatedIsr)
+          Some(partition -> updatedIsr)
+        case Left(e) =>
+          error(s"Failed to update ISR for partition $partition", e)
+          partitionResponses(partition) = Left(Errors.forException(e))
+          None
       }
+    }
 
-      // Update our cache and send out metadata updates
-      updateLeaderAndIsrCache(successfulUpdates.keys.toSeq)
-      processUpdateNotifications(isrsToAlter.keys.toSeq)
-
-      Left(partitionResponses)
-    } catch {
-      case e: Throwable =>
-        error(s"Error when processing AlterIsr for partitions: ${isrsToAlter.keys.toSeq}", e)
-        Right(Errors.UNKNOWN_SERVER_ERROR)
+    badVersionUpdates.foreach { partition =>
+      info(s"Failed to update ISR to ${adjustedIsrs(partition)} for partition $partition, bad ZK version.")
+      partitionResponses(partition) = Left(Errors.INVALID_UPDATE_VERSION)
     }
 
-    callback.apply(response)
+    // Update our cache and send out metadata updates
+    updateLeaderAndIsrCache(successfulUpdates.keys.toSeq)
+    sendUpdateMetadataRequest(
+      controllerContext.liveOrShuttingDownBrokerIds.toSeq,
+      partitionsToAlter.keySet
+    )
 
-    // After we have returned the result of the `AlterIsr` request, we should check whether
-    // there are any reassignments which can be completed by a successful ISR expansion.
-    response.left.foreach { alterIsrResponses =>
-      alterIsrResponses.forKeyValue { (topicPartition, partitionResponse) =>
-        if (controllerContext.partitionsBeingReassigned.contains(topicPartition)) {
-          val isSuccessfulUpdate = partitionResponse.isRight
-          if (isSuccessfulUpdate) {
-            maybeCompleteReassignment(topicPartition)
-          }
+    partitionResponses.groupBy(_._1.topic).forKeyValue { (topicName, partitionResponses) =>
+      // Add each topic part to the response
+      val topicResponse = if (useTopicsIds) {
+        new AlterPartitionResponseData.TopicData()
+          .setTopicId(controllerContext.topicIds.getOrElse(topicName, Uuid.ZERO_UUID))
+      } else {
+        new AlterPartitionResponseData.TopicData()
+          .setTopicName(topicName)
+      }
+      alterPartitionResponse.topics.add(topicResponse)
+
+      partitionResponses.forKeyValue { (tp, errorOrIsr) =>
+        // Add each partition part to the response (new ISR or error)
+        errorOrIsr match {
+          case Left(error) =>
+            topicResponse.partitions.add(
+              new AlterPartitionResponseData.PartitionData()
+                .setPartitionIndex(tp.partition)
+                .setErrorCode(error.code))
+          case Right(leaderAndIsr) =>
+            /* Setting the LeaderRecoveryState field is always safe because it will always be the same
+             * as the value set in the request. For version 0, that is always the default RECOVERED
+             * which is ignored when serializing to version 0. For any other version, the
+             * LeaderRecoveryState field is supported.
+             */
+            topicResponse.partitions.add(
+              new AlterPartitionResponseData.PartitionData()
+                .setPartitionIndex(tp.partition)
+                .setLeaderId(leaderAndIsr.leader)
+                .setLeaderEpoch(leaderAndIsr.leaderEpoch)
+                .setIsr(leaderAndIsr.isr.map(Integer.valueOf).asJava)
+                .setLeaderRecoveryState(leaderAndIsr.leaderRecoveryState.value)
+                .setPartitionEpoch(leaderAndIsr.partitionEpoch)
+            )
         }
       }
     }
+
+    callback(alterPartitionResponse)
+
+    partitionResponses
   }
 
   def allocateProducerIds(allocateProducerIdsRequest: AllocateProducerIdsRequestData,
@@ -2523,8 +2593,8 @@ class KafkaController(val config: KafkaConfig,
           processPartitionReassignmentIsrChange(partition)
         case IsrChangeNotification =>
           processIsrChangeNotification()
-        case AlterIsrReceived(brokerId, brokerEpoch, isrsToAlter, callback) =>
-          processAlterIsr(brokerId, brokerEpoch, isrsToAlter, callback)
+        case AlterPartitionReceived(alterPartitionRequest, alterPartitionRequestVersion, callback) =>
+          processAlterPartition(alterPartitionRequest, alterPartitionRequestVersion, callback)
         case AllocateProducerIds(brokerId, brokerEpoch, callback) =>
           processAllocateProducerIds(brokerId, brokerEpoch, callback)
         case Startup =>
@@ -2643,8 +2713,9 @@ case class LeaderIsrAndControllerEpoch(leaderAndIsr: LeaderAndIsr, controllerEpo
     val leaderAndIsrInfo = new StringBuilder
     leaderAndIsrInfo.append("(Leader:" + leaderAndIsr.leader)
     leaderAndIsrInfo.append(",ISR:" + leaderAndIsr.isr.mkString(","))
+    leaderAndIsrInfo.append(",LeaderRecoveryState:" + leaderAndIsr.leaderRecoveryState)
     leaderAndIsrInfo.append(",LeaderEpoch:" + leaderAndIsr.leaderEpoch)
-    leaderAndIsrInfo.append(",ZkVersion:" + leaderAndIsr.zkVersion)
+    leaderAndIsrInfo.append(",ZkVersion:" + leaderAndIsr.partitionEpoch)
     leaderAndIsrInfo.append(",ControllerEpoch:" + controllerEpoch + ")")
     leaderAndIsrInfo.toString()
   }
@@ -2785,8 +2856,11 @@ case object IsrChangeNotification extends ControllerEvent {
   override def preempt(): Unit = {}
 }
 
-case class AlterIsrReceived(brokerId: Int, brokerEpoch: Long, isrsToAlter: Map[TopicPartition, LeaderAndIsr],
-                            callback: AlterIsrCallback) extends ControllerEvent {
+case class AlterPartitionReceived(
+  alterPartitionRequest: AlterPartitionRequestData,
+  alterPartitionRequestVersion: Short,
+  callback: AlterPartitionResponseData => Unit
+) extends ControllerEvent {
   override def state: ControllerState = ControllerState.IsrChange
   override def preempt(): Unit = {}
 }
diff --git a/core/src/main/scala/kafka/controller/PartitionStateMachine.scala b/core/src/main/scala/kafka/controller/PartitionStateMachine.scala
index 105e158f348d5..71b163a2e2243 100755
--- a/core/src/main/scala/kafka/controller/PartitionStateMachine.scala
+++ b/core/src/main/scala/kafka/controller/PartitionStateMachine.scala
@@ -27,8 +27,10 @@ import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
 import kafka.zk.TopicPartitionStateZNode
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.errors.ControllerMovedException
+import org.apache.kafka.server.common.MetadataVersion.IBP_3_2_IV0
 import org.apache.zookeeper.KeeperException
 import org.apache.zookeeper.KeeperException.Code
+
 import scala.collection.{Map, Seq, mutable}
 
 abstract class PartitionStateMachine(controllerContext: ControllerContext) extends Logging {
@@ -131,6 +133,8 @@ class ZkPartitionStateMachine(config: KafkaConfig,
                               controllerBrokerRequestBatch: ControllerBrokerRequestBatch)
   extends PartitionStateMachine(controllerContext) {
 
+  private val isLeaderRecoverySupported = config.interBrokerProtocolVersion.isAtLeast(IBP_3_2_IV0)
+
   private val controllerId = config.brokerId
   this.logIdent = s"[PartitionStateMachine controllerId=$controllerId] "
 
@@ -410,7 +414,12 @@ class ZkPartitionStateMachine(config: KafkaConfig,
           validLeaderAndIsrs,
           allowUnclean
         )
-        leaderForOffline(controllerContext, partitionsWithUncleanLeaderElectionState).partition(_.leaderAndIsr.isEmpty)
+        leaderForOffline(
+          controllerContext,
+          isLeaderRecoverySupported,
+          partitionsWithUncleanLeaderElectionState
+        ).partition(_.leaderAndIsr.isEmpty)
+
       case ReassignPartitionLeaderElectionStrategy =>
         leaderForReassign(controllerContext, validLeaderAndIsrs).partition(_.leaderAndIsr.isEmpty)
       case PreferredReplicaPartitionLeaderElectionStrategy =>
diff --git a/core/src/main/scala/kafka/coordinator/group/GroupCoordinator.scala b/core/src/main/scala/kafka/coordinator/group/GroupCoordinator.scala
index 22d82dfa6d947..6bf337d6799fe 100644
--- a/core/src/main/scala/kafka/coordinator/group/GroupCoordinator.scala
+++ b/core/src/main/scala/kafka/coordinator/group/GroupCoordinator.scala
@@ -1104,7 +1104,7 @@ class GroupCoordinator(val brokerId: Int,
         groupId != null
       case _ =>
         // The remaining APIs are groups using Kafka for group coordination and must have a non-empty groupId
-        groupId != null && !groupId.isEmpty
+        groupId != null && groupId.nonEmpty
     }
   }
 
diff --git a/core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala b/core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala
index 24f9ad5fead81..e1bb7ccf262e2 100644
--- a/core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala
+++ b/core/src/main/scala/kafka/coordinator/group/GroupMetadataManager.scala
@@ -26,7 +26,6 @@ import java.util.concurrent.locks.ReentrantLock
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 
 import com.yammer.metrics.core.Gauge
-import kafka.api.{ApiVersion, KAFKA_0_10_1_IV0, KAFKA_2_1_IV0, KAFKA_2_1_IV1, KAFKA_2_3_IV0}
 import kafka.common.OffsetAndMetadata
 import kafka.internals.generated.{GroupMetadataValue, OffsetCommitKey, OffsetCommitValue, GroupMetadataKey => GroupMetadataKeyData}
 import kafka.log.AppendOrigin
@@ -47,13 +46,15 @@ import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
 import org.apache.kafka.common.requests.{OffsetCommitRequest, OffsetFetchResponse}
 import org.apache.kafka.common.utils.{Time, Utils}
 import org.apache.kafka.common.{KafkaException, MessageFormatter, TopicPartition}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_10_1_IV0, IBP_2_1_IV0, IBP_2_1_IV1, IBP_2_3_IV0}
 
 import scala.collection._
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 
 class GroupMetadataManager(brokerId: Int,
-                           interBrokerProtocolVersion: ApiVersion,
+                           interBrokerProtocolVersion: MetadataVersion,
                            config: OffsetConfig,
                            val replicaManager: ReplicaManager,
                            time: Time,
@@ -1074,14 +1075,14 @@ object GroupMetadataManager {
    * Generates the payload for offset commit message from given offset and metadata
    *
    * @param offsetAndMetadata consumer's current offset and metadata
-   * @param apiVersion the api version
+   * @param metadataVersion the api version
    * @return payload for offset commit message
    */
   def offsetCommitValue(offsetAndMetadata: OffsetAndMetadata,
-                        apiVersion: ApiVersion): Array[Byte] = {
+                        metadataVersion: MetadataVersion): Array[Byte] = {
     val version =
-      if (apiVersion < KAFKA_2_1_IV0 || offsetAndMetadata.expireTimestamp.nonEmpty) 1.toShort
-      else if (apiVersion < KAFKA_2_1_IV1) 2.toShort
+      if (metadataVersion.isLessThan(IBP_2_1_IV0) || offsetAndMetadata.expireTimestamp.nonEmpty) 1.toShort
+      else if (metadataVersion.isLessThan(IBP_2_1_IV1)) 2.toShort
       else 3.toShort
     MessageUtil.toVersionPrefixedBytes(version, new OffsetCommitValue()
       .setOffset(offsetAndMetadata.offset)
@@ -1099,17 +1100,17 @@ object GroupMetadataManager {
    *
    * @param groupMetadata current group metadata
    * @param assignment the assignment for the rebalancing generation
-   * @param apiVersion the api version
+   * @param metadataVersion the api version
    * @return payload for offset commit message
    */
   def groupMetadataValue(groupMetadata: GroupMetadata,
                          assignment: Map[String, Array[Byte]],
-                         apiVersion: ApiVersion): Array[Byte] = {
+                         metadataVersion: MetadataVersion): Array[Byte] = {
 
     val version =
-      if (apiVersion < KAFKA_0_10_1_IV0) 0.toShort
-      else if (apiVersion < KAFKA_2_1_IV0) 1.toShort
-      else if (apiVersion < KAFKA_2_3_IV0) 2.toShort
+      if (metadataVersion.isLessThan(IBP_0_10_1_IV0)) 0.toShort
+      else if (metadataVersion.isLessThan(IBP_2_1_IV0)) 1.toShort
+      else if (metadataVersion.isLessThan(IBP_2_3_IV0)) 2.toShort
       else 3.toShort
 
     MessageUtil.toVersionPrefixedBytes(version, new GroupMetadataValue()
diff --git a/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerChannelManager.scala b/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerChannelManager.scala
index 62c70d91121db..94baf0f976d02 100644
--- a/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerChannelManager.scala
+++ b/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerChannelManager.scala
@@ -19,7 +19,7 @@ package kafka.coordinator.transaction
 
 import java.util
 import java.util.concurrent.{BlockingQueue, ConcurrentHashMap, LinkedBlockingQueue}
-import kafka.api.KAFKA_2_8_IV0
+
 import kafka.common.{InterBrokerSendThread, RequestAndCompletionHandler}
 import kafka.metrics.KafkaMetricsGroup
 import kafka.server.{KafkaConfig, MetadataCache, RequestLocal}
@@ -34,6 +34,7 @@ import org.apache.kafka.common.requests.{TransactionResult, WriteTxnMarkersReque
 import org.apache.kafka.common.security.JaasContext
 import org.apache.kafka.common.utils.{LogContext, Time}
 import org.apache.kafka.common.{Node, Reconfigurable, TopicPartition}
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_8_IV0
 
 import scala.collection.{concurrent, immutable}
 import scala.jdk.CollectionConverters._
@@ -147,7 +148,7 @@ class TransactionMarkerChannelManager(
   private val transactionsWithPendingMarkers = new ConcurrentHashMap[String, PendingCompleteTxn]
 
   val writeTxnMarkersRequestVersion: Short =
-    if (config.interBrokerProtocolVersion >= KAFKA_2_8_IV0) 1
+    if (config.interBrokerProtocolVersion.isAtLeast(IBP_2_8_IV0)) 1
     else 0
 
   newGauge("UnknownDestinationQueueSize", () => markersQueueForUnknownBroker.totalNumMarkers)
diff --git a/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerRequestCompletionHandler.scala b/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerRequestCompletionHandler.scala
index 848e0fa65ceeb..7a59139b17c76 100644
--- a/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerRequestCompletionHandler.scala
+++ b/core/src/main/scala/kafka/coordinator/transaction/TransactionMarkerRequestCompletionHandler.scala
@@ -89,7 +89,7 @@ class TransactionMarkerRequestCompletionHandler(brokerId: Int,
 
       val writeTxnMarkerResponse = response.responseBody.asInstanceOf[WriteTxnMarkersResponse]
 
-      val responseErrors = writeTxnMarkerResponse.errorsByProducerId;
+      val responseErrors = writeTxnMarkerResponse.errorsByProducerId
       for (txnIdAndMarker <- txnIdAndMarkerEntries.asScala) {
         val transactionalId = txnIdAndMarker.txnId
         val txnMarker = txnIdAndMarker.txnMarkerEntry
diff --git a/core/src/main/scala/kafka/log/AbstractIndex.scala b/core/src/main/scala/kafka/log/AbstractIndex.scala
index 31b9f6d8dd71e..37cd4b9f55c6a 100644
--- a/core/src/main/scala/kafka/log/AbstractIndex.scala
+++ b/core/src/main/scala/kafka/log/AbstractIndex.scala
@@ -188,7 +188,7 @@ abstract class AbstractIndex(@volatile private var _file: File, val baseOffset:
             safeForceUnmap()
           raf.setLength(roundedNewSize)
           _length = roundedNewSize
-          mmap = raf.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, roundedNewSize)
+          mmap = raf.getChannel.map(FileChannel.MapMode.READ_WRITE, 0, roundedNewSize)
           _maxEntries = mmap.limit() / entrySize
           mmap.position(position)
           debug(s"Resized ${file.getAbsolutePath} to $roundedNewSize, position is ${mmap.position()} " +
diff --git a/core/src/main/scala/kafka/log/LocalLog.scala b/core/src/main/scala/kafka/log/LocalLog.scala
index 86ac672448bd9..b0e7b0e446eab 100644
--- a/core/src/main/scala/kafka/log/LocalLog.scala
+++ b/core/src/main/scala/kafka/log/LocalLog.scala
@@ -86,7 +86,7 @@ class LocalLog(@volatile private var _dir: File,
 
   private[log] def dir: File = _dir
 
-  private[log] def name: String = dir.getName()
+  private[log] def name: String = dir.getName
 
   private[log] def parentDir: String = _parentDir
 
@@ -315,6 +315,44 @@ class LocalLog(@volatile private var _dir: File,
     }
   }
 
+  /**
+   * This method deletes the given segment and creates a new segment with the given new base offset. It ensures an
+   * active segment exists in the log at all times during this process.
+   *
+   * Asynchronous deletion allows reads to happen concurrently without synchronization and without the possibility of
+   * physically deleting a file while it is being read.
+   *
+   * This method does not convert IOException to KafkaStorageException, the immediate caller
+   * is expected to catch and handle IOException.
+   *
+   * @param newOffset The base offset of the new segment
+   * @param segmentToDelete The old active segment to schedule for deletion
+   * @param asyncDelete Whether the segment files should be deleted asynchronously
+   * @param reason The reason for the segment deletion
+   */
+  private[log] def createAndDeleteSegment(newOffset: Long,
+                                          segmentToDelete: LogSegment,
+                                          asyncDelete: Boolean,
+                                          reason: SegmentDeletionReason): LogSegment = {
+    if (newOffset == segmentToDelete.baseOffset)
+      segmentToDelete.changeFileSuffixes("", DeletedFileSuffix)
+
+    val newSegment = LogSegment.open(dir,
+      baseOffset = newOffset,
+      config,
+      time = time,
+      initFileSize = config.initFileSize,
+      preallocate = config.preallocate)
+    segments.add(newSegment)
+
+    reason.logReason(List(segmentToDelete))
+    if (newOffset != segmentToDelete.baseOffset)
+      segments.remove(segmentToDelete.baseOffset)
+    LocalLog.deleteSegmentFiles(List(segmentToDelete), asyncDelete, dir, topicPartition, config, scheduler, logDirFailureChannel, logIdent)
+
+    newSegment
+  }
+
   /**
    * Given a message offset, find its corresponding offset metadata in the log.
    * If the message offset is out of range, throw an OffsetOutOfRangeException
@@ -465,7 +503,10 @@ class LocalLog(@volatile private var _dir: File,
             s"=max(provided offset = $expectedNextOffset, LEO = $logEndOffset) while it already " +
             s"exists and is active with size 0. Size of time index: ${activeSegment.timeIndex.entries}," +
             s" size of offset index: ${activeSegment.offsetIndex.entries}.")
-          removeAndDeleteSegments(Seq(activeSegment), asyncDelete = true, LogRoll(this))
+          val newSegment = createAndDeleteSegment(newOffset, activeSegment, asyncDelete = true, LogRoll(this))
+          updateLogEndOffset(nextOffsetMetadata.messageOffset)
+          info(s"Rolled new log segment at offset $newOffset in ${time.hiResClockMs() - start} ms.")
+          return newSegment
         } else {
           throw new KafkaException(s"Trying to roll a new log segment for topic partition $topicPartition with start offset $newOffset" +
             s" =max(provided offset = $expectedNextOffset, LEO = $logEndOffset) while it already exists. Existing " +
@@ -517,14 +558,16 @@ class LocalLog(@volatile private var _dir: File,
       debug(s"Truncate and start at offset $newOffset")
       checkIfMemoryMappedBufferClosed()
       val segmentsToDelete = List[LogSegment]() ++ segments.values
-      removeAndDeleteSegments(segmentsToDelete, asyncDelete = true, LogTruncation(this))
-      segments.add(LogSegment.open(dir,
-        baseOffset = newOffset,
-        config = config,
-        time = time,
-        initFileSize = config.initFileSize,
-        preallocate = config.preallocate))
+
+      if (segmentsToDelete.nonEmpty) {
+        removeAndDeleteSegments(segmentsToDelete.dropRight(1), asyncDelete = true, LogTruncation(this))
+        // Use createAndDeleteSegment() to create new segment first and then delete the old last segment to prevent missing
+        // active segment during the deletion process
+        createAndDeleteSegment(newOffset, segmentsToDelete.last, asyncDelete = true, LogTruncation(this))
+      }
+
       updateLogEndOffset(newOffset)
+
       segmentsToDelete
     }
   }
@@ -614,9 +657,9 @@ object LocalLog extends Logging {
    */
   private[log] def logDeleteDirName(topicPartition: TopicPartition): String = {
     val uniqueId = java.util.UUID.randomUUID.toString.replaceAll("-", "")
-    val suffix = s"-${topicPartition.partition()}.${uniqueId}${DeleteDirSuffix}"
+    val suffix = s"-${topicPartition.partition()}.$uniqueId$DeleteDirSuffix"
     val prefixLength = Math.min(topicPartition.topic().size, 255 - suffix.size)
-    s"${topicPartition.topic().substring(0, prefixLength)}${suffix}"
+    s"${topicPartition.topic().substring(0, prefixLength)}$suffix"
   }
 
   /**
@@ -880,7 +923,7 @@ object LocalLog extends Logging {
                                    isRecoveredSwapFile: Boolean = false): Iterable[LogSegment] = {
     val sortedNewSegments = newSegments.sortBy(_.baseOffset)
     // Some old segments may have been removed from index and scheduled for async deletion after the caller reads segments
-    // but before this method is executed. We want to filter out those segments to avoid calling asyncDeleteSegment()
+    // but before this method is executed. We want to filter out those segments to avoid calling deleteSegmentFiles()
     // multiple times for the same segment.
     val sortedOldSegments = oldSegments.filter(seg => existingSegments.contains(seg.baseOffset)).sortBy(_.baseOffset)
 
@@ -888,7 +931,7 @@ object LocalLog extends Logging {
     // if we crash in the middle of this we complete the swap in loadSegments()
     if (!isRecoveredSwapFile)
       sortedNewSegments.reverse.foreach(_.changeFileSuffixes(CleanedFileSuffix, SwapFileSuffix))
-    sortedNewSegments.reverse.foreach(existingSegments.add(_))
+    sortedNewSegments.reverse.foreach(existingSegments.add)
     val newSegmentBaseOffsets = sortedNewSegments.map(_.baseOffset).toSet
 
     // delete the old files
@@ -941,7 +984,10 @@ object LocalLog extends Logging {
                                       scheduler: Scheduler,
                                       logDirFailureChannel: LogDirFailureChannel,
                                       logPrefix: String): Unit = {
-    segmentsToDelete.foreach(_.changeFileSuffixes("", DeletedFileSuffix))
+    segmentsToDelete.foreach { segment =>
+      if (!segment.hasSuffix(DeletedFileSuffix))
+        segment.changeFileSuffixes("", DeletedFileSuffix)
+    }
 
     def deleteSegments(): Unit = {
       info(s"${logPrefix}Deleting segment files ${segmentsToDelete.mkString(",")}")
diff --git a/core/src/main/scala/kafka/log/LogCleaner.scala b/core/src/main/scala/kafka/log/LogCleaner.scala
index 0d4cab9a0fa0a..4ad0ea2d853c3 100644
--- a/core/src/main/scala/kafka/log/LogCleaner.scala
+++ b/core/src/main/scala/kafka/log/LogCleaner.scala
@@ -104,7 +104,7 @@ class LogCleaner(initialConfig: CleanerConfig,
   private[log] val cleanerManager = new LogCleanerManager(logDirs, logs, logDirFailureChannel)
 
   /* a throttle used to limit the I/O of all the cleaner threads to a user-specified maximum rate */
-  private val throttler = new Throttler(desiredRatePerSec = config.maxIoBytesPerSecond,
+  private[log] val throttler = new Throttler(desiredRatePerSec = config.maxIoBytesPerSecond,
                                         checkIntervalMs = 300,
                                         throttleDown = true,
                                         "cleaner-io",
@@ -186,11 +186,20 @@ class LogCleaner(initialConfig: CleanerConfig,
   }
 
   /**
-    * Reconfigure log clean config. This simply stops current log cleaners and creates new ones.
+    * Reconfigure log clean config. The will:
+    * 1. update desiredRatePerSec in Throttler with logCleanerIoMaxBytesPerSecond, if necessary 
+    * 2. stop current log cleaners and create new ones.
     * That ensures that if any of the cleaners had failed, new cleaners are created to match the new config.
     */
   override def reconfigure(oldConfig: KafkaConfig, newConfig: KafkaConfig): Unit = {
     config = LogCleaner.cleanerConfig(newConfig)
+
+    val maxIoBytesPerSecond = config.maxIoBytesPerSecond;
+    if (maxIoBytesPerSecond != oldConfig.logCleanerIoMaxBytesPerSecond) {
+      info(s"Updating logCleanerIoMaxBytesPerSecond: $maxIoBytesPerSecond")
+      throttler.updateDesiredRatePerSec(maxIoBytesPerSecond)
+    }
+
     shutdown()
     startup()
   }
@@ -486,7 +495,7 @@ private[log] class Cleaner(val id: Int,
   /* buffer used for write i/o */
   private var writeBuffer = ByteBuffer.allocate(ioBufferSize)
 
-  private val decompressionBufferSupplier = BufferSupplier.create();
+  private val decompressionBufferSupplier = BufferSupplier.create()
 
   require(offsetMap.slots * dupBufferLoadFactor > 1, "offset map is too small to fit in even a single message, so log cleaning will never make progress. You can increase log.cleaner.dedupe.buffer.size or decrease log.cleaner.threads")
 
@@ -577,8 +586,10 @@ private[log] class Cleaner(val id: Int,
         val currentSegment = currentSegmentOpt.get
         val nextSegmentOpt = if (iter.hasNext) Some(iter.next()) else None
 
+        // Note that it is important to collect aborted transactions from the full log segment
+        // range since we need to rebuild the full transaction index for the new segment.
         val startOffset = currentSegment.baseOffset
-        val upperBoundOffset = nextSegmentOpt.map(_.baseOffset).getOrElse(map.latestOffset + 1)
+        val upperBoundOffset = nextSegmentOpt.map(_.baseOffset).getOrElse(currentSegment.readNextOffset)
         val abortedTransactions = log.collectAbortedTransactions(startOffset, upperBoundOffset)
         transactionMetadata.addAbortedTransactions(abortedTransactions)
 
@@ -690,6 +701,8 @@ private[log] class Cleaner(val id: Int,
         if (discardBatchRecords)
           // The batch is only retained to preserve producer sequence information; the records can be removed
           false
+        else if (batch.isControlBatch)
+          true
         else
           Cleaner.this.shouldRetainRecord(map, retainLegacyDeletesAndTxnMarkers, batch, record, stats, currentTime = currentTime)
       }
diff --git a/core/src/main/scala/kafka/log/LogCleanerManager.scala b/core/src/main/scala/kafka/log/LogCleanerManager.scala
index 8b6926b0d46d7..48f4d49b6d621 100755
--- a/core/src/main/scala/kafka/log/LogCleanerManager.scala
+++ b/core/src/main/scala/kafka/log/LogCleanerManager.scala
@@ -21,15 +21,15 @@ import java.io.File
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.locks.ReentrantLock
 
-import kafka.common.{KafkaException, LogCleaningAbortedException}
+import kafka.common.LogCleaningAbortedException
 import kafka.metrics.KafkaMetricsGroup
 import kafka.server.LogDirFailureChannel
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.utils.CoreUtils._
 import kafka.utils.{Logging, Pool}
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.utils.Time
+import org.apache.kafka.common.{KafkaException, TopicPartition}
 import org.apache.kafka.common.errors.KafkaStorageException
+import org.apache.kafka.common.utils.Time
 
 import scala.collection.{Iterable, Seq, mutable}
 
@@ -64,7 +64,7 @@ private[log] class LogCleanerManager(val logDirs: Seq[File],
   import LogCleanerManager._
 
 
-  protected override def loggerName = classOf[LogCleaner].getName
+  protected override def loggerName: String = classOf[LogCleaner].getName
 
   // package-private for testing
   private[log] val offsetCheckpointFile = "cleaner-offset-checkpoint"
@@ -400,11 +400,11 @@ private[log] class LogCleanerManager(val logDirs: Seq[File],
       try {
         checkpoints.get(sourceLogDir).flatMap(_.read().get(topicPartition)) match {
           case Some(offset) =>
-            debug(s"Removing the partition offset data in checkpoint file for '${topicPartition}' " +
+            debug(s"Removing the partition offset data in checkpoint file for '$topicPartition' " +
               s"from ${sourceLogDir.getAbsoluteFile} directory.")
             updateCheckpoints(sourceLogDir, partitionToRemove = Option(topicPartition))
 
-            debug(s"Adding the partition offset data in checkpoint file for '${topicPartition}' " +
+            debug(s"Adding the partition offset data in checkpoint file for '$topicPartition' " +
               s"to ${destLogDir.getAbsoluteFile} directory.")
             updateCheckpoints(destLogDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
           case None =>
@@ -525,15 +525,15 @@ private[log] class LogCleanerManager(val logDirs: Seq[File],
       // Remove deleted partitions
       uncleanablePartitions.values.foreach {
         partitions =>
-          val partitionsToRemove = partitions.filterNot(logs.contains(_)).toList
-          partitionsToRemove.foreach { partitions.remove(_) }
+          val partitionsToRemove = partitions.filterNot(logs.contains).toList
+          partitionsToRemove.foreach { partitions.remove }
       }
 
       // Remove entries with empty partition set.
       val logDirsToRemove = uncleanablePartitions.filter {
         case (_, partitions) => partitions.isEmpty
-      }.map { _._1}.toList
-      logDirsToRemove.foreach { uncleanablePartitions.remove(_) }
+      }.keys.toList
+      logDirsToRemove.foreach { uncleanablePartitions.remove }
     }
   }
 }
diff --git a/core/src/main/scala/kafka/log/LogConfig.scala b/core/src/main/scala/kafka/log/LogConfig.scala
index 845f80a3a1332..7b008fe26afea 100755
--- a/core/src/main/scala/kafka/log/LogConfig.scala
+++ b/core/src/main/scala/kafka/log/LogConfig.scala
@@ -17,7 +17,6 @@
 
 package kafka.log
 
-import kafka.api.{ApiVersion, ApiVersionValidator, KAFKA_3_0_IV1}
 import kafka.log.LogConfig.configDef
 import kafka.message.BrokerCompressionCodec
 import kafka.server.{KafkaConfig, ThrottledReplicaListValidator}
@@ -27,8 +26,14 @@ import org.apache.kafka.common.config.{AbstractConfig, ConfigDef, ConfigExceptio
 import org.apache.kafka.common.errors.InvalidConfigurationException
 import org.apache.kafka.common.record.{LegacyRecord, RecordVersion, TimestampType}
 import org.apache.kafka.common.utils.{ConfigUtils, Utils}
-
+import org.apache.kafka.metadata.ConfigSynonym
+import org.apache.kafka.metadata.ConfigSynonym.{HOURS_TO_MILLISECONDS, MINUTES_TO_MILLISECONDS}
+import java.util.Arrays.asList
 import java.util.{Collections, Locale, Properties}
+
+import org.apache.kafka.server.common.{MetadataVersion, MetadataVersionValidator}
+import org.apache.kafka.server.common.MetadataVersion._
+
 import scala.annotation.nowarn
 import scala.collection.{Map, mutable}
 import scala.jdk.CollectionConverters._
@@ -100,7 +105,7 @@ case class LogConfig(props: java.util.Map[_, _], overriddenConfigs: Set[String]
 
   /* See `TopicConfig.MESSAGE_FORMAT_VERSION_CONFIG` for details */
   @deprecated("3.0")
-  val messageFormatVersion = ApiVersion(getString(LogConfig.MessageFormatVersionProp))
+  val messageFormatVersion = MetadataVersion.fromVersionString(getString(LogConfig.MessageFormatVersionProp))
 
   val messageTimestampType = TimestampType.forName(getString(LogConfig.MessageTimestampTypeProp))
   val messageTimestampDifferenceMaxMs = getLong(LogConfig.MessageTimestampDifferenceMaxMsProp).longValue
@@ -142,7 +147,7 @@ case class LogConfig(props: java.util.Map[_, _], overriddenConfigs: Set[String]
         }
 
         if (localLogRetentionBytes > retentionSize) {
-          throw new ConfigException(LogConfig.LocalLogRetentionBytesProp, localLogRetentionBytes, s"Value must not be more than property: ${LogConfig.RetentionBytesProp} value.");
+          throw new ConfigException(LogConfig.LocalLogRetentionBytesProp, localLogRetentionBytes, s"Value must not be more than property: ${LogConfig.RetentionBytesProp} value.")
         }
 
         localLogRetentionBytes
@@ -154,7 +159,7 @@ case class LogConfig(props: java.util.Map[_, _], overriddenConfigs: Set[String]
   def remoteLogConfig = _remoteLogConfig
 
   @nowarn("cat=deprecation")
-  def recordVersion = messageFormatVersion.recordVersion
+  def recordVersion = messageFormatVersion.highestSupportedRecordVersion
 
   def randomSegmentJitter: Long =
     if (segmentJitterMs == 0) 0 else Utils.abs(scala.util.Random.nextInt()) % math.min(segmentJitterMs, segmentMs)
@@ -234,7 +239,7 @@ object LogConfig {
   val LocalLogRetentionMsDoc = TopicConfig.LOCAL_LOG_RETENTION_MS_DOC
   val LocalLogRetentionBytesDoc = TopicConfig.LOCAL_LOG_RETENTION_BYTES_DOC
   val MaxMessageSizeDoc = TopicConfig.MAX_MESSAGE_BYTES_DOC
-  val IndexIntervalDoc = TopicConfig.INDEX_INTERVAL_BYTES_DOCS
+  val IndexIntervalDoc = TopicConfig.INDEX_INTERVAL_BYTES_DOC
   val FileDeleteDelayMsDoc = TopicConfig.FILE_DELETE_DELAY_MS_DOC
   val DeleteRetentionMsDoc = TopicConfig.DELETE_RETENTION_MS_DOC
   val MinCompactionLagMsDoc = TopicConfig.MIN_COMPACTION_LAG_MS_DOC
@@ -265,7 +270,7 @@ object LogConfig {
 
   private[log] val ServerDefaultHeaderName = "Server Default Property"
 
-  val configsWithNoServerDefaults: Set[String] = Set(RemoteLogStorageEnableProp, LocalLogRetentionMsProp, LocalLogRetentionBytesProp);
+  val configsWithNoServerDefaults: Set[String] = Set(RemoteLogStorageEnableProp, LocalLogRetentionMsProp, LocalLogRetentionBytesProp)
 
   // Package private for testing
   private[log] class LogConfigDef(base: ConfigDef) extends ConfigDef(base) {
@@ -328,9 +333,9 @@ object LogConfig {
         KafkaConfig.LogRollTimeMillisProp)
       .define(SegmentJitterMsProp, LONG, Defaults.SegmentJitterMs, atLeast(0), MEDIUM, SegmentJitterMsDoc,
         KafkaConfig.LogRollTimeJitterMillisProp)
-      .define(SegmentIndexBytesProp, INT, Defaults.MaxIndexSize, atLeast(0), MEDIUM, MaxIndexSizeDoc,
+      .define(SegmentIndexBytesProp, INT, Defaults.MaxIndexSize, atLeast(4), MEDIUM, MaxIndexSizeDoc,
         KafkaConfig.LogIndexSizeMaxBytesProp)
-      .define(FlushMessagesProp, LONG, Defaults.FlushInterval, atLeast(0), MEDIUM, FlushIntervalDoc,
+      .define(FlushMessagesProp, LONG, Defaults.FlushInterval, atLeast(1), MEDIUM, FlushIntervalDoc,
         KafkaConfig.LogFlushIntervalMessagesProp)
       .define(FlushMsProp, LONG, Defaults.FlushMs, atLeast(0), MEDIUM, FlushMsDoc,
         KafkaConfig.LogFlushIntervalMsProp)
@@ -364,7 +369,7 @@ object LogConfig {
         MEDIUM, CompressionTypeDoc, KafkaConfig.CompressionTypeProp)
       .define(PreAllocateEnableProp, BOOLEAN, Defaults.PreAllocateEnable, MEDIUM, PreAllocateEnableDoc,
         KafkaConfig.LogPreAllocateProp)
-      .define(MessageFormatVersionProp, STRING, Defaults.MessageFormatVersion, ApiVersionValidator, MEDIUM, MessageFormatVersionDoc,
+      .define(MessageFormatVersionProp, STRING, Defaults.MessageFormatVersion, new MetadataVersionValidator(), MEDIUM, MessageFormatVersionDoc,
         KafkaConfig.LogMessageFormatVersionProp)
       .define(MessageTimestampTypeProp, STRING, Defaults.MessageTimestampType, in("CreateTime", "LogAppendTime"), MEDIUM, MessageTimestampTypeDoc,
         KafkaConfig.LogMessageTimestampTypeProp)
@@ -440,37 +445,86 @@ object LogConfig {
   }
 
   /**
-   * Map topic config to the broker config with highest priority. Some of these have additional synonyms
-   * that can be obtained using [[kafka.server.DynamicBrokerConfig#brokerConfigSynonyms]]
+   * Maps topic configurations to their equivalent broker configurations.
+   *
+   * Topics can be configured either by setting their dynamic topic configurations, or by
+   * setting equivalent broker configurations. For historical reasons, the equivalent broker
+   * configurations have different names. This table maps each topic configuration to its
+   * equivalent broker configurations.
+   *
+   * In some cases, the equivalent broker configurations must be transformed before they
+   * can be used. For example, log.roll.hours must be converted to milliseconds before it
+   * can be used as the value of segment.ms.
+   *
+   * The broker configurations will be used in the order specified here. In other words, if
+   * both the first and the second synonyms are configured, we will use only the value of
+   * the first synonym and ignore the second.
    */
   @nowarn("cat=deprecation")
-  val TopicConfigSynonyms = Map(
-    SegmentBytesProp -> KafkaConfig.LogSegmentBytesProp,
-    SegmentMsProp -> KafkaConfig.LogRollTimeMillisProp,
-    SegmentJitterMsProp -> KafkaConfig.LogRollTimeJitterMillisProp,
-    SegmentIndexBytesProp -> KafkaConfig.LogIndexSizeMaxBytesProp,
-    FlushMessagesProp -> KafkaConfig.LogFlushIntervalMessagesProp,
-    FlushMsProp -> KafkaConfig.LogFlushIntervalMsProp,
-    RetentionBytesProp -> KafkaConfig.LogRetentionBytesProp,
-    RetentionMsProp -> KafkaConfig.LogRetentionTimeMillisProp,
-    MaxMessageBytesProp -> KafkaConfig.MessageMaxBytesProp,
-    IndexIntervalBytesProp -> KafkaConfig.LogIndexIntervalBytesProp,
-    DeleteRetentionMsProp -> KafkaConfig.LogCleanerDeleteRetentionMsProp,
-    MinCompactionLagMsProp -> KafkaConfig.LogCleanerMinCompactionLagMsProp,
-    MaxCompactionLagMsProp -> KafkaConfig.LogCleanerMaxCompactionLagMsProp,
-    FileDeleteDelayMsProp -> KafkaConfig.LogDeleteDelayMsProp,
-    MinCleanableDirtyRatioProp -> KafkaConfig.LogCleanerMinCleanRatioProp,
-    CleanupPolicyProp -> KafkaConfig.LogCleanupPolicyProp,
-    UncleanLeaderElectionEnableProp -> KafkaConfig.UncleanLeaderElectionEnableProp,
-    MinInSyncReplicasProp -> KafkaConfig.MinInSyncReplicasProp,
-    CompressionTypeProp -> KafkaConfig.CompressionTypeProp,
-    PreAllocateEnableProp -> KafkaConfig.LogPreAllocateProp,
-    MessageFormatVersionProp -> KafkaConfig.LogMessageFormatVersionProp,
-    MessageTimestampTypeProp -> KafkaConfig.LogMessageTimestampTypeProp,
-    MessageTimestampDifferenceMaxMsProp -> KafkaConfig.LogMessageTimestampDifferenceMaxMsProp,
-    MessageDownConversionEnableProp -> KafkaConfig.LogMessageDownConversionEnableProp
-  )
+  val AllTopicConfigSynonyms = Map(
+    SegmentBytesProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogSegmentBytesProp)),
+    SegmentMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogRollTimeMillisProp),
+      new ConfigSynonym(KafkaConfig.LogRollTimeHoursProp, HOURS_TO_MILLISECONDS)),
+    SegmentJitterMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogRollTimeJitterMillisProp),
+      new ConfigSynonym(KafkaConfig.LogRollTimeJitterHoursProp, HOURS_TO_MILLISECONDS)),
+    SegmentIndexBytesProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogIndexSizeMaxBytesProp)),
+    FlushMessagesProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogFlushIntervalMessagesProp)),
+    FlushMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogFlushIntervalMsProp),
+      new ConfigSynonym(KafkaConfig.LogFlushSchedulerIntervalMsProp)),
+    RetentionBytesProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogRetentionBytesProp)),
+    RetentionMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogRetentionTimeMillisProp),
+      new ConfigSynonym(KafkaConfig.LogRetentionTimeMinutesProp, MINUTES_TO_MILLISECONDS),
+      new ConfigSynonym(KafkaConfig.LogRetentionTimeHoursProp, HOURS_TO_MILLISECONDS)),
+    MaxMessageBytesProp -> asList(
+      new ConfigSynonym(KafkaConfig.MessageMaxBytesProp)),
+    IndexIntervalBytesProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogIndexIntervalBytesProp)),
+    DeleteRetentionMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogCleanerDeleteRetentionMsProp)),
+    MinCompactionLagMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogCleanerMinCompactionLagMsProp)),
+    MaxCompactionLagMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogCleanerMaxCompactionLagMsProp)),
+    FileDeleteDelayMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogDeleteDelayMsProp)),
+    MinCleanableDirtyRatioProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogCleanerMinCleanRatioProp)),
+    CleanupPolicyProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogCleanupPolicyProp)),
+    UncleanLeaderElectionEnableProp -> asList(
+      new ConfigSynonym(KafkaConfig.UncleanLeaderElectionEnableProp)),
+    MinInSyncReplicasProp -> asList(
+      new ConfigSynonym(KafkaConfig.MinInSyncReplicasProp)),
+    CompressionTypeProp -> asList(
+      new ConfigSynonym(KafkaConfig.CompressionTypeProp)),
+    PreAllocateEnableProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogPreAllocateProp)),
+    MessageFormatVersionProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogMessageFormatVersionProp)),
+    MessageTimestampTypeProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogMessageTimestampTypeProp)),
+    MessageTimestampDifferenceMaxMsProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogMessageTimestampDifferenceMaxMsProp)),
+    MessageDownConversionEnableProp -> asList(
+      new ConfigSynonym(KafkaConfig.LogMessageDownConversionEnableProp)),
+  ).asJava
 
+  /**
+   * Map topic config to the broker config with highest priority. Some of these have additional synonyms
+   * that can be obtained using [[kafka.server.DynamicBrokerConfig#brokerConfigSynonyms]]
+   * or using [[AllTopicConfigSynonyms]]
+   */
+  val TopicConfigSynonyms = AllTopicConfigSynonyms.asScala.map {
+    case (k, v) => k -> v.get(0).name()
+  }
 
   /**
    * Copy the subset of properties that are relevant to Logs. The individual properties
@@ -508,17 +562,17 @@ object LogConfig {
     logProps
   }
 
-  def shouldIgnoreMessageFormatVersion(interBrokerProtocolVersion: ApiVersion): Boolean =
-    interBrokerProtocolVersion >= KAFKA_3_0_IV1
+  def shouldIgnoreMessageFormatVersion(interBrokerProtocolVersion: MetadataVersion): Boolean =
+    interBrokerProtocolVersion.isAtLeast(IBP_3_0_IV1)
 
   class MessageFormatVersion(messageFormatVersionString: String, interBrokerProtocolVersionString: String) {
-    val messageFormatVersion = ApiVersion(messageFormatVersionString)
-    private val interBrokerProtocolVersion = ApiVersion(interBrokerProtocolVersionString)
+    val messageFormatVersion = MetadataVersion.fromVersionString(messageFormatVersionString)
+    private val interBrokerProtocolVersion = MetadataVersion.fromVersionString(interBrokerProtocolVersionString)
 
     def shouldIgnore: Boolean = shouldIgnoreMessageFormatVersion(interBrokerProtocolVersion)
 
     def shouldWarn: Boolean =
-      interBrokerProtocolVersion >= KAFKA_3_0_IV1 && messageFormatVersion.recordVersion.precedes(RecordVersion.V2)
+      interBrokerProtocolVersion.isAtLeast(IBP_3_0_IV1) && messageFormatVersion.highestSupportedRecordVersion.precedes(RecordVersion.V2)
 
     @nowarn("cat=deprecation")
     def topicWarningMessage(topicName: String): String = {
diff --git a/core/src/main/scala/kafka/log/LogLoader.scala b/core/src/main/scala/kafka/log/LogLoader.scala
index eb9dec7a589b8..25ee89c72bbde 100644
--- a/core/src/main/scala/kafka/log/LogLoader.scala
+++ b/core/src/main/scala/kafka/log/LogLoader.scala
@@ -29,6 +29,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.errors.InvalidOffsetException
 import org.apache.kafka.common.utils.Time
 
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
 import scala.collection.{Set, mutable}
 
 case class LoadedLogOffsets(logStartOffset: Long,
@@ -64,6 +65,7 @@ object LogLoader extends Logging {
  * @param recoveryPointCheckpoint The checkpoint of the offset at which to begin the recovery
  * @param leaderEpochCache An optional LeaderEpochFileCache instance to be updated during recovery
  * @param producerStateManager The ProducerStateManager instance to be updated during recovery
+ * @param numRemainingSegments The remaining segments to be recovered in this log keyed by recovery thread name
  */
 class LogLoader(
   dir: File,
@@ -77,7 +79,8 @@ class LogLoader(
   logStartOffsetCheckpoint: Long,
   recoveryPointCheckpoint: Long,
   leaderEpochCache: Option[LeaderEpochFileCache],
-  producerStateManager: ProducerStateManager
+  producerStateManager: ProducerStateManager,
+  numRemainingSegments: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]
 ) extends Logging {
   logIdent = s"[LogLoader partition=$topicPartition, dir=${dir.getParent}] "
 
@@ -392,7 +395,7 @@ class LogLoader(
           Some(logEndOffset)
         else {
           warn(s"Deleting all segments because logEndOffset ($logEndOffset) " +
-            s"is smaller than logStartOffset ${logStartOffsetCheckpoint}. " +
+            s"is smaller than logStartOffset $logStartOffsetCheckpoint. " +
             "This could happen if segment files were deleted from the file system.")
           removeAndDeleteSegmentsAsync(segments.values)
           leaderEpochCache.foreach(_.clearAndFlush())
@@ -404,12 +407,18 @@ class LogLoader(
 
     // If we have the clean shutdown marker, skip recovery.
     if (!hadCleanShutdown) {
-      val unflushed = segments.values(recoveryPointCheckpoint, Long.MaxValue).iterator
+      val unflushed = segments.values(recoveryPointCheckpoint, Long.MaxValue)
+      val numUnflushed = unflushed.size
+      val unflushedIter = unflushed.iterator
       var truncated = false
+      var numFlushed = 0
+      val threadName = Thread.currentThread().getName
+      numRemainingSegments.put(threadName, numUnflushed)
+
+      while (unflushedIter.hasNext && !truncated) {
+        val segment = unflushedIter.next()
+        info(s"Recovering unflushed segment ${segment.baseOffset}. $numFlushed/$numUnflushed recovered for $topicPartition.")
 
-      while (unflushed.hasNext && !truncated) {
-        val segment = unflushed.next()
-        info(s"Recovering unflushed segment ${segment.baseOffset}")
         val truncatedBytes =
           try {
             recoverSegment(segment)
@@ -424,8 +433,13 @@ class LogLoader(
           // we had an invalid message, delete all remaining log
           warn(s"Corruption found in segment ${segment.baseOffset}," +
             s" truncating to offset ${segment.readNextOffset}")
-          removeAndDeleteSegmentsAsync(unflushed.toList)
+          removeAndDeleteSegmentsAsync(unflushedIter.toList)
           truncated = true
+          // segment is truncated, so set remaining segments to 0
+          numRemainingSegments.put(threadName, 0)
+        } else {
+          numFlushed += 1
+          numRemainingSegments.put(threadName, numUnflushed - numFlushed)
         }
       }
     }
diff --git a/core/src/main/scala/kafka/log/LogManager.scala b/core/src/main/scala/kafka/log/LogManager.scala
index b81f6a928a137..886f56c63cfa9 100755
--- a/core/src/main/scala/kafka/log/LogManager.scala
+++ b/core/src/main/scala/kafka/log/LogManager.scala
@@ -17,13 +17,12 @@
 
 package kafka.log
 
-import kafka.api.ApiVersion
 import kafka.log.LogConfig.MessageFormatVersion
-
 import java.io._
 import java.nio.file.Files
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicInteger
+
 import kafka.metrics.KafkaMetricsGroup
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.server.metadata.ConfigRepository
@@ -38,8 +37,10 @@ import scala.collection._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.{Failure, Success, Try}
 import kafka.utils.Implicits._
-
 import java.util.Properties
+
+import org.apache.kafka.server.common.MetadataVersion
+
 import scala.annotation.nowarn
 
 /**
@@ -65,7 +66,7 @@ class LogManager(logDirs: Seq[File],
                  val retentionCheckMs: Long,
                  val maxTransactionTimeoutMs: Int,
                  val maxPidExpirationMs: Int,
-                 interBrokerProtocolVersion: ApiVersion,
+                 interBrokerProtocolVersion: MetadataVersion,
                  scheduler: Scheduler,
                  brokerTopicStats: BrokerTopicStats,
                  logDirFailureChannel: LogDirFailureChannel,
@@ -261,7 +262,8 @@ class LogManager(logDirs: Seq[File],
                            recoveryPoints: Map[TopicPartition, Long],
                            logStartOffsets: Map[TopicPartition, Long],
                            defaultConfig: LogConfig,
-                           topicConfigOverrides: Map[String, LogConfig]): UnifiedLog = {
+                           topicConfigOverrides: Map[String, LogConfig],
+                           numRemainingSegments: ConcurrentMap[String, Int]): UnifiedLog = {
     val topicPartition = UnifiedLog.parseTopicPartitionName(logDir)
     val config = topicConfigOverrides.getOrElse(topicPartition.topic, defaultConfig)
     val logRecoveryPoint = recoveryPoints.getOrElse(topicPartition, 0L)
@@ -281,7 +283,8 @@ class LogManager(logDirs: Seq[File],
       logDirFailureChannel = logDirFailureChannel,
       lastShutdownClean = hadCleanShutdown,
       topicId = None,
-      keepPartitionMetadataFile = keepPartitionMetadataFile)
+      keepPartitionMetadataFile = keepPartitionMetadataFile,
+      numRemainingSegments = numRemainingSegments)
 
     if (logDir.getName.endsWith(UnifiedLog.DeleteDirSuffix)) {
       addLogToBeDeleted(log)
@@ -306,6 +309,27 @@ class LogManager(logDirs: Seq[File],
     log
   }
 
+  // factory class for naming the log recovery threads used in metrics
+  class LogRecoveryThreadFactory(val dirPath: String) extends ThreadFactory {
+    val threadNum = new AtomicInteger(0)
+
+    override def newThread(runnable: Runnable): Thread = {
+      KafkaThread.nonDaemon(logRecoveryThreadName(dirPath, threadNum.getAndIncrement()), runnable)
+    }
+  }
+
+  // create a unique log recovery thread name for each log dir as the format: prefix-dirPath-threadNum, ex: "log-recovery-/tmp/kafkaLogs-0"
+  private def logRecoveryThreadName(dirPath: String, threadNum: Int, prefix: String = "log-recovery"): String = s"$prefix-$dirPath-$threadNum"
+
+  /*
+   * decrement the number of remaining logs
+   * @return the number of remaining logs after decremented 1
+   */
+  private[log] def decNumRemainingLogs(numRemainingLogs: ConcurrentMap[String, Int], path: String): Int = {
+    require(path != null, "path cannot be null to update remaining logs metric.")
+    numRemainingLogs.compute(path, (_, oldVal) => oldVal - 1)
+  }
+
   /**
    * Recover and load all logs in the given data directories
    */
@@ -316,13 +340,22 @@ class LogManager(logDirs: Seq[File],
     val offlineDirs = mutable.Set.empty[(String, IOException)]
     val jobs = ArrayBuffer.empty[Seq[Future[_]]]
     var numTotalLogs = 0
+    // log dir path -> number of Remaining logs map for remainingLogsToRecover metric
+    val numRemainingLogs: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]
+    // log recovery thread name -> number of remaining segments map for remainingSegmentsToRecover metric
+    val numRemainingSegments: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]
+
+    def handleIOException(logDirAbsolutePath: String, e: IOException): Unit = {
+      offlineDirs.add((logDirAbsolutePath, e))
+      error(s"Error while loading log dir $logDirAbsolutePath", e)
+    }
 
     for (dir <- liveLogDirs) {
       val logDirAbsolutePath = dir.getAbsolutePath
       var hadCleanShutdown: Boolean = false
       try {
         val pool = Executors.newFixedThreadPool(numRecoveryThreadsPerDataDir,
-          KafkaThread.nonDaemon(s"log-recovery-$logDirAbsolutePath", _))
+          new LogRecoveryThreadFactory(logDirAbsolutePath))
         threadPools.append(pool)
 
         val cleanShutdownFile = new File(dir, LogLoader.CleanShutdownFile)
@@ -357,26 +390,32 @@ class LogManager(logDirs: Seq[File],
 
         val logsToLoad = Option(dir.listFiles).getOrElse(Array.empty).filter(logDir =>
           logDir.isDirectory && UnifiedLog.parseTopicPartitionName(logDir).topic != KafkaRaftServer.MetadataTopic)
-        val numLogsLoaded = new AtomicInteger(0)
         numTotalLogs += logsToLoad.length
+        numRemainingLogs.put(dir.getAbsolutePath, logsToLoad.length)
 
         val jobsForDir = logsToLoad.map { logDir =>
           val runnable: Runnable = () => {
+            debug(s"Loading log $logDir")
+            var log = None: Option[UnifiedLog]
+            val logLoadStartMs = time.hiResClockMs()
             try {
-              debug(s"Loading log $logDir")
-
-              val logLoadStartMs = time.hiResClockMs()
-              val log = loadLog(logDir, hadCleanShutdown, recoveryPoints, logStartOffsets,
-                defaultConfig, topicConfigOverrides)
-              val logLoadDurationMs = time.hiResClockMs() - logLoadStartMs
-              val currentNumLoaded = numLogsLoaded.incrementAndGet()
-
-              info(s"Completed load of $log with ${log.numberOfSegments} segments in ${logLoadDurationMs}ms " +
-                s"($currentNumLoaded/${logsToLoad.length} loaded in $logDirAbsolutePath)")
+              log = Some(loadLog(logDir, hadCleanShutdown, recoveryPoints, logStartOffsets,
+                defaultConfig, topicConfigOverrides, numRemainingSegments))
             } catch {
               case e: IOException =>
-                offlineDirs.add((logDirAbsolutePath, e))
-                error(s"Error while loading log dir $logDirAbsolutePath", e)
+                handleIOException(logDirAbsolutePath, e)
+              case e: KafkaStorageException if e.getCause.isInstanceOf[IOException] =>
+                // KafkaStorageException might be thrown, ex: during writing LeaderEpochFileCache
+                // And while converting IOException to KafkaStorageException, we've already handled the exception. So we can ignore it here.
+            } finally {
+              val logLoadDurationMs = time.hiResClockMs() - logLoadStartMs
+              val remainingLogs = decNumRemainingLogs(numRemainingLogs, dir.getAbsolutePath)
+              val currentNumLoaded = logsToLoad.length - remainingLogs
+              log match {
+                case Some(loadedLog) => info(s"Completed load of $loadedLog with ${loadedLog.numberOfSegments} segments in ${logLoadDurationMs}ms " +
+                  s"($currentNumLoaded/${logsToLoad.length} completed in $logDirAbsolutePath)")
+                case None => info(s"Error while loading logs in $logDir in ${logLoadDurationMs}ms ($currentNumLoaded/${logsToLoad.length} completed in $logDirAbsolutePath)")
+              }
             }
           }
           runnable
@@ -385,12 +424,12 @@ class LogManager(logDirs: Seq[File],
         jobs += jobsForDir.map(pool.submit)
       } catch {
         case e: IOException =>
-          offlineDirs.add((logDirAbsolutePath, e))
-          error(s"Error while loading log dir $logDirAbsolutePath", e)
+          handleIOException(logDirAbsolutePath, e)
       }
     }
 
     try {
+      addLogRecoveryMetrics(numRemainingLogs, numRemainingSegments)
       for (dirJobs <- jobs) {
         dirJobs.foreach(_.get)
       }
@@ -403,12 +442,37 @@ class LogManager(logDirs: Seq[File],
         error(s"There was an error in one of the threads during logs loading: ${e.getCause}")
         throw e.getCause
     } finally {
+      removeLogRecoveryMetrics()
       threadPools.foreach(_.shutdown())
     }
 
     info(s"Loaded $numTotalLogs logs in ${time.hiResClockMs() - startMs}ms.")
   }
 
+  private[log] def addLogRecoveryMetrics(numRemainingLogs: ConcurrentMap[String, Int],
+                                         numRemainingSegments: ConcurrentMap[String, Int]): Unit = {
+    debug("Adding log recovery metrics")
+    for (dir <- logDirs) {
+      newGauge("remainingLogsToRecover", () => numRemainingLogs.get(dir.getAbsolutePath),
+        Map("dir" -> dir.getAbsolutePath))
+      for (i <- 0 until numRecoveryThreadsPerDataDir) {
+        val threadName = logRecoveryThreadName(dir.getAbsolutePath, i)
+        newGauge("remainingSegmentsToRecover", () => numRemainingSegments.get(threadName),
+          Map("dir" -> dir.getAbsolutePath, "threadNum" -> i.toString))
+      }
+    }
+  }
+
+  private[log] def removeLogRecoveryMetrics(): Unit = {
+    debug("Removing log recovery metrics")
+    for (dir <- logDirs) {
+      removeMetric("remainingLogsToRecover", Map("dir" -> dir.getAbsolutePath))
+      for (i <- 0 until numRecoveryThreadsPerDataDir) {
+        removeMetric("remainingSegmentsToRecover", Map("dir" -> dir.getAbsolutePath, "threadNum" -> i.toString))
+      }
+    }
+  }
+
   /**
    *  Start the background threads to flush logs and do log cleanup
    */
@@ -1008,7 +1072,7 @@ class LogManager(logDirs: Seq[File],
       if (destLog == null)
         throw new KafkaStorageException(s"The future replica for $topicPartition is offline")
 
-      destLog.renameDir(UnifiedLog.logDirName(topicPartition))
+      destLog.renameDir(UnifiedLog.logDirName(topicPartition), true)
       destLog.updateHighWatermark(sourceLog.highWatermark)
 
       // Now that future replica has been successfully renamed to be the current replica
@@ -1021,7 +1085,7 @@ class LogManager(logDirs: Seq[File],
       }
 
       try {
-        sourceLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition))
+        sourceLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition), true)
         // Now that replica in source log directory has been successfully renamed for deletion.
         // Close the log, update checkpoint files, and enqueue this log to be deleted.
         sourceLog.close()
@@ -1068,7 +1132,7 @@ class LogManager(logDirs: Seq[File],
             cleaner.updateCheckpoints(removedLog.parentDirFile, partitionToRemove = Option(topicPartition))
           }
         }
-        removedLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition))
+        removedLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition), false)
         if (checkpoint) {
           val logDir = removedLog.parentDirFile
           val logsToCheckpoint = logsInDir(logDir)
diff --git a/core/src/main/scala/kafka/log/LogValidator.scala b/core/src/main/scala/kafka/log/LogValidator.scala
index 0949c1110dec1..74ea83297e5f5 100644
--- a/core/src/main/scala/kafka/log/LogValidator.scala
+++ b/core/src/main/scala/kafka/log/LogValidator.scala
@@ -17,7 +17,7 @@
 package kafka.log
 
 import java.nio.ByteBuffer
-import kafka.api.{ApiVersion, KAFKA_2_1_IV0}
+
 import kafka.common.{LongRef, RecordValidationException}
 import kafka.message.{CompressionCodec, NoCompressionCodec, ZStdCompressionCodec}
 import kafka.server.{BrokerTopicStats, RequestLocal}
@@ -29,6 +29,8 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.ProduceResponse.RecordError
 import org.apache.kafka.common.utils.Time
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_1_IV0
 
 import scala.collection.{Seq, mutable}
 import scala.jdk.CollectionConverters._
@@ -94,7 +96,7 @@ private[log] object LogValidator extends Logging {
                                                     timestampDiffMaxMs: Long,
                                                     partitionLeaderEpoch: Int,
                                                     origin: AppendOrigin,
-                                                    interBrokerProtocolVersion: ApiVersion,
+                                                    interBrokerProtocolVersion: MetadataVersion,
                                                     brokerTopicStats: BrokerTopicStats,
                                                     requestLocal: RequestLocal): ValidationAndOffsetAssignResult = {
     if (sourceCodec == NoCompressionCodec && targetCodec == NoCompressionCodec) {
@@ -365,11 +367,11 @@ private[log] object LogValidator extends Logging {
                                                  timestampDiffMaxMs: Long,
                                                  partitionLeaderEpoch: Int,
                                                  origin: AppendOrigin,
-                                                 interBrokerProtocolVersion: ApiVersion,
+                                                 interBrokerProtocolVersion: MetadataVersion,
                                                  brokerTopicStats: BrokerTopicStats,
                                                  requestLocal: RequestLocal): ValidationAndOffsetAssignResult = {
 
-    if (targetCodec == ZStdCompressionCodec && interBrokerProtocolVersion < KAFKA_2_1_IV0)
+    if (targetCodec == ZStdCompressionCodec && interBrokerProtocolVersion.isLessThan(IBP_2_1_IV0))
       throw new UnsupportedCompressionTypeException("Produce requests to inter.broker.protocol.version < 2.1 broker " +
         "are not allowed to use ZStandard compression")
 
diff --git a/core/src/main/scala/kafka/log/OffsetIndex.scala b/core/src/main/scala/kafka/log/OffsetIndex.scala
index a4183b1715048..62afbac930efd 100755
--- a/core/src/main/scala/kafka/log/OffsetIndex.scala
+++ b/core/src/main/scala/kafka/log/OffsetIndex.scala
@@ -156,7 +156,7 @@ class OffsetIndex(_file: File, baseOffset: Long, maxIndexSize: Int = -1, writabl
     }
   }
 
-  override def truncate() = truncateToEntries(0)
+  override def truncate(): Unit = truncateToEntries(0)
 
   override def truncateTo(offset: Long): Unit = {
     inLock(lock) {
diff --git a/core/src/main/scala/kafka/log/TimeIndex.scala b/core/src/main/scala/kafka/log/TimeIndex.scala
index 779a45138b051..2c464d602ffc1 100644
--- a/core/src/main/scala/kafka/log/TimeIndex.scala
+++ b/core/src/main/scala/kafka/log/TimeIndex.scala
@@ -159,7 +159,7 @@ class TimeIndex(_file: File, baseOffset: Long, maxIndexSize: Int = -1, writable:
     }
   }
 
-  override def truncate() = truncateToEntries(0)
+  override def truncate(): Unit = truncateToEntries(0)
 
   /**
    * Remove all entries from the index which have an offset greater than or equal to the given offset.
diff --git a/core/src/main/scala/kafka/log/UnifiedLog.scala b/core/src/main/scala/kafka/log/UnifiedLog.scala
index 3742d63f108c6..c4a2300237cd8 100644
--- a/core/src/main/scala/kafka/log/UnifiedLog.scala
+++ b/core/src/main/scala/kafka/log/UnifiedLog.scala
@@ -22,8 +22,7 @@ import com.yammer.metrics.core.MetricName
 import java.io.{File, IOException}
 import java.nio.file.Files
 import java.util.Optional
-import java.util.concurrent.TimeUnit
-import kafka.api.{ApiVersion, KAFKA_0_10_0_IV0}
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, TimeUnit}
 import kafka.common.{LongRef, OffsetsOutOfOrderException, UnexpectedAppendOffsetException}
 import kafka.log.AppendOrigin.RaftLeader
 import kafka.message.{BrokerCompressionCodec, CompressionCodec, NoCompressionCodec}
@@ -41,6 +40,8 @@ import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_
 import org.apache.kafka.common.requests.ProduceResponse.RecordError
 import org.apache.kafka.common.utils.{Time, Utils}
 import org.apache.kafka.common.{InvalidRecordException, KafkaException, TopicPartition, Uuid}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_0_10_0_IV0
 
 import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
@@ -146,7 +147,7 @@ case class LogOffsetSnapshot(logStartOffset: Long,
                              lastStableOffset: LogOffsetMetadata)
 
 /**
- * Another container which is used for lower level reads using  [[kafka.cluster.Partition.readRecords()]].
+ * Another container which is used for lower level reads using  [[kafka.cluster.Partition.fetchRecords()]].
  */
 case class LogReadInfo(fetchedData: FetchDataInfo,
                        divergingEpoch: Option[FetchResponseData.EpochEndOffset],
@@ -286,7 +287,7 @@ class UnifiedLog(@volatile var logStartOffset: Long,
    */
   @volatile private var highWatermarkMetadata: LogOffsetMetadata = LogOffsetMetadata(logStartOffset)
 
-  @volatile var partitionMetadataFile : PartitionMetadataFile = null
+  @volatile var partitionMetadataFile: Option[PartitionMetadataFile] = None
 
   locally {
     initializePartitionMetadata()
@@ -306,9 +307,12 @@ class UnifiedLog(@volatile var logStartOffset: Long,
    *   - Otherwise set _topicId to None
    */
   def initializeTopicId(): Unit =  {
-    if (partitionMetadataFile.exists()) {
+    val partMetadataFile = partitionMetadataFile.getOrElse(
+      throw new KafkaException("The partitionMetadataFile should have been initialized"))
+
+    if (partMetadataFile.exists()) {
       if (keepPartitionMetadataFile) {
-        val fileTopicId = partitionMetadataFile.read().topicId
+        val fileTopicId = partMetadataFile.read().topicId
         if (_topicId.isDefined && !_topicId.contains(fileTopicId))
           throw new InconsistentTopicIdException(s"Tried to assign topic ID $topicId to log for topic partition $topicPartition," +
             s"but log already contained topic ID $fileTopicId")
@@ -316,14 +320,14 @@ class UnifiedLog(@volatile var logStartOffset: Long,
         _topicId = Some(fileTopicId)
 
       } else {
-        try partitionMetadataFile.delete()
+        try partMetadataFile.delete()
         catch {
           case e: IOException =>
-            error(s"Error while trying to delete partition metadata file ${partitionMetadataFile}", e)
+            error(s"Error while trying to delete partition metadata file ${partMetadataFile}", e)
         }
       }
     } else if (keepPartitionMetadataFile) {
-      _topicId.foreach(partitionMetadataFile.record)
+      _topicId.foreach(partMetadataFile.record)
       scheduler.schedule("flush-metadata-file", maybeFlushMetadataFile)
     } else {
       // We want to keep the file and the in-memory topic ID in sync.
@@ -554,11 +558,11 @@ class UnifiedLog(@volatile var logStartOffset: Long,
 
   private def initializePartitionMetadata(): Unit = lock synchronized {
     val partitionMetadata = PartitionMetadataFile.newFile(dir)
-    partitionMetadataFile = new PartitionMetadataFile(partitionMetadata, logDirFailureChannel)
+    partitionMetadataFile = Some(new PartitionMetadataFile(partitionMetadata, logDirFailureChannel))
   }
 
   private def maybeFlushMetadataFile(): Unit = {
-    partitionMetadataFile.maybeFlush()
+    partitionMetadataFile.foreach(_.maybeFlush())
   }
 
   /** Only used for ZK clusters when we update and start using topic IDs on existing topics */
@@ -573,9 +577,14 @@ class UnifiedLog(@volatile var logStartOffset: Long,
       case None =>
         if (keepPartitionMetadataFile) {
           _topicId = Some(topicId)
-          if (!partitionMetadataFile.exists()) {
-            partitionMetadataFile.record(topicId)
-            scheduler.schedule("flush-metadata-file", maybeFlushMetadataFile)
+          partitionMetadataFile match {
+            case Some(partMetadataFile) =>
+              if (!partMetadataFile.exists()) {
+                partMetadataFile.record(topicId)
+                scheduler.schedule("flush-metadata-file", maybeFlushMetadataFile)
+              }
+            case _ => warn(s"The topic id $topicId will not be persisted to the partition metadata file " +
+              "since the partition is deleted")
           }
         }
     }
@@ -674,21 +683,29 @@ class UnifiedLog(@volatile var logStartOffset: Long,
   }
 
   /**
-   * Rename the directory of the local log
+   * Rename the directory of the local log. If the log's directory is being renamed for async deletion due to a
+   * StopReplica request, then the shouldReinitialize parameter should be set to false, otherwise it should be set to true.
    *
+   * @param name The new name that this log's directory is being renamed to
+   * @param shouldReinitialize Whether the log's metadata should be reinitialized after renaming
    * @throws KafkaStorageException if rename fails
    */
-  def renameDir(name: String): Unit = {
+  def renameDir(name: String, shouldReinitialize: Boolean): Unit = {
     lock synchronized {
       maybeHandleIOException(s"Error while renaming dir for $topicPartition in log dir ${dir.getParent}") {
         // Flush partitionMetadata file before initializing again
         maybeFlushMetadataFile()
         if (localLog.renameDir(name)) {
           producerStateManager.updateParentDir(dir)
-          // re-initialize leader epoch cache so that LeaderEpochCheckpointFile.checkpoint can correctly reference
-          // the checkpoint file in renamed log directory
-          initializeLeaderEpochCache()
-          initializePartitionMetadata()
+          if (shouldReinitialize) {
+            // re-initialize leader epoch cache so that LeaderEpochCheckpointFile.checkpoint can correctly reference
+            // the checkpoint file in renamed log directory
+            initializeLeaderEpochCache()
+            initializePartitionMetadata()
+          } else {
+            leaderEpochCache = None
+            partitionMetadataFile = None
+          }
         }
       }
     }
@@ -717,7 +734,7 @@ class UnifiedLog(@volatile var logStartOffset: Long,
   def appendAsLeader(records: MemoryRecords,
                      leaderEpoch: Int,
                      origin: AppendOrigin = AppendOrigin.Client,
-                     interBrokerProtocolVersion: ApiVersion = ApiVersion.latestVersion,
+                     interBrokerProtocolVersion: MetadataVersion = MetadataVersion.latest,
                      requestLocal: RequestLocal = RequestLocal.NoCaching): LogAppendInfo = {
     val validateAndAssignOffsets = origin != AppendOrigin.RaftLeader
     append(records, origin, interBrokerProtocolVersion, validateAndAssignOffsets, leaderEpoch, Some(requestLocal), ignoreRecordSize = false)
@@ -733,7 +750,7 @@ class UnifiedLog(@volatile var logStartOffset: Long,
   def appendAsFollower(records: MemoryRecords): LogAppendInfo = {
     append(records,
       origin = AppendOrigin.Replication,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       validateAndAssignOffsets = false,
       leaderEpoch = -1,
       None,
@@ -761,7 +778,7 @@ class UnifiedLog(@volatile var logStartOffset: Long,
    */
   private def append(records: MemoryRecords,
                      origin: AppendOrigin,
-                     interBrokerProtocolVersion: ApiVersion,
+                     interBrokerProtocolVersion: MetadataVersion,
                      validateAndAssignOffsets: Boolean,
                      leaderEpoch: Int,
                      requestLocal: Option[RequestLocal],
@@ -1225,12 +1242,12 @@ class UnifiedLog(@volatile var logStartOffset: Long,
     maybeHandleIOException(s"Error while fetching offset by timestamp for $topicPartition in dir ${dir.getParent}") {
       debug(s"Searching offset for timestamp $targetTimestamp")
 
-      if (config.messageFormatVersion < KAFKA_0_10_0_IV0 &&
+      if (config.messageFormatVersion.isLessThan(IBP_0_10_0_IV0) &&
         targetTimestamp != ListOffsetsRequest.EARLIEST_TIMESTAMP &&
         targetTimestamp != ListOffsetsRequest.LATEST_TIMESTAMP)
         throw new UnsupportedForMessageFormatException(s"Cannot search offsets based on timestamp because message format version " +
           s"for partition $topicPartition is ${config.messageFormatVersion} which is earlier than the minimum " +
-          s"required version $KAFKA_0_10_0_IV0")
+          s"required version $IBP_0_10_0_IV0")
 
       // For the earliest and latest, we do not need to return the timestamp.
       if (targetTimestamp == ListOffsetsRequest.EARLIEST_TIMESTAMP) {
@@ -1355,15 +1372,19 @@ class UnifiedLog(@volatile var logStartOffset: Long,
       val numToDelete = deletable.size
       if (numToDelete > 0) {
         // we must always have at least one segment, so if we are going to delete all the segments, create a new one first
-        if (localLog.segments.numberOfSegments == numToDelete)
-          roll()
-        lock synchronized {
-          localLog.checkIfMemoryMappedBufferClosed()
-          // remove the segments for lookups
-          localLog.removeAndDeleteSegments(deletable, asyncDelete = true, reason)
-          deleteProducerSnapshots(deletable, asyncDelete = true)
-          maybeIncrementLogStartOffset(localLog.segments.firstSegmentBaseOffset.get, SegmentDeletion)
+        var segmentsToDelete = deletable
+        if (localLog.segments.numberOfSegments == numToDelete) {
+          val newSegment = roll()
+          if (deletable.last.baseOffset == newSegment.baseOffset) {
+            warn(s"Empty active segment at ${deletable.last.baseOffset} was deleted and recreated due to $reason")
+            segmentsToDelete = deletable.dropRight(1)
+          }
         }
+        localLog.checkIfMemoryMappedBufferClosed()
+        // remove the segments for lookups
+        localLog.removeAndDeleteSegments(segmentsToDelete, asyncDelete = true, reason)
+        deleteProducerSnapshots(deletable, asyncDelete = true)
+        maybeIncrementLogStartOffset(localLog.segments.firstSegmentBaseOffset.get, SegmentDeletion)
       }
       numToDelete
     }
@@ -1782,7 +1803,8 @@ object UnifiedLog extends Logging {
             logDirFailureChannel: LogDirFailureChannel,
             lastShutdownClean: Boolean = true,
             topicId: Option[Uuid],
-            keepPartitionMetadataFile: Boolean): UnifiedLog = {
+            keepPartitionMetadataFile: Boolean,
+            numRemainingSegments: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]): UnifiedLog = {
     // create the log directory if it doesn't exist
     Files.createDirectories(dir.toPath)
     val topicPartition = UnifiedLog.parseTopicPartitionName(dir)
@@ -1807,7 +1829,8 @@ object UnifiedLog extends Logging {
       logStartOffset,
       recoveryPoint,
       leaderEpochCache,
-      producerStateManager
+      producerStateManager,
+      numRemainingSegments
     ).load()
     val localLog = new LocalLog(dir, config, segments, offsets.recoveryPoint,
       offsets.nextOffsetMetadata, scheduler, time, topicPartition, logDirFailureChannel)
diff --git a/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala b/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala
index 0d8354728ac8d..607cd188e0e1f 100755
--- a/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala
+++ b/core/src/main/scala/kafka/metrics/KafkaCSVMetricsReporter.scala
@@ -28,6 +28,7 @@ import java.util.concurrent.TimeUnit
 
 import kafka.utils.{Logging, VerifiableProperties}
 import org.apache.kafka.common.utils.Utils
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 
 private trait KafkaCSVMetricsReporterMBean extends KafkaMetricsReporterMBean
 
@@ -50,7 +51,7 @@ private class KafkaCSVMetricsReporter extends KafkaMetricsReporter
         val metricsConfig = new KafkaMetricsConfig(props)
         csvDir = new File(props.getString("kafka.csv.metrics.dir", "kafka_metrics"))
         Utils.delete(csvDir)
-        Files.createDirectories(csvDir.toPath())
+        Files.createDirectories(csvDir.toPath)
         underlying = new CsvReporter(KafkaYammerMetrics.defaultRegistry(), csvDir)
         if (props.getBoolean("kafka.csv.metrics.reporter.enabled", default = false)) {
           initialized = true
diff --git a/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala b/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala
index 177edea527387..161d1f2f36408 100644
--- a/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala
+++ b/core/src/main/scala/kafka/metrics/KafkaMetricsGroup.scala
@@ -19,9 +19,10 @@ package kafka.metrics
 
 import java.util.concurrent.TimeUnit
 
-import com.yammer.metrics.core.{Gauge, MetricName, Meter, Histogram, Timer}
+import com.yammer.metrics.core.{Gauge, Histogram, Meter, MetricName, Timer}
 import kafka.utils.Logging
 import org.apache.kafka.common.utils.Sanitizer
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 
 trait KafkaMetricsGroup extends Logging {
 
@@ -52,7 +53,7 @@ trait KafkaMetricsGroup extends Logging {
 
     nameBuilder.append(typeName)
 
-    if (name.length > 0) {
+    if (name.nonEmpty) {
       nameBuilder.append(",name=")
       nameBuilder.append(name)
     }
diff --git a/core/src/main/scala/kafka/metrics/LinuxIoMetricsCollector.scala b/core/src/main/scala/kafka/metrics/LinuxIoMetricsCollector.scala
index 17de008580aec..5a41dbad73c45 100644
--- a/core/src/main/scala/kafka/metrics/LinuxIoMetricsCollector.scala
+++ b/core/src/main/scala/kafka/metrics/LinuxIoMetricsCollector.scala
@@ -87,10 +87,10 @@ class LinuxIoMetricsCollector(procRoot: String, val time: Time, val logger: Logg
   }
 
   def usable(): Boolean = {
-    if (path.toFile().exists()) {
+    if (path.toFile.exists()) {
       updateValues(time.milliseconds())
     } else {
-      logger.debug(s"disabling IO metrics collection because ${path} does not exist.")
+      logger.debug(s"disabling IO metrics collection because $path does not exist.")
       false
     }
   }
diff --git a/core/src/main/scala/kafka/network/RequestChannel.scala b/core/src/main/scala/kafka/network/RequestChannel.scala
index 5e456b065e057..4fa611206a4cf 100644
--- a/core/src/main/scala/kafka/network/RequestChannel.scala
+++ b/core/src/main/scala/kafka/network/RequestChannel.scala
@@ -530,7 +530,7 @@ class RequestMetrics(name: String) extends KafkaMetricsGroup {
   Errors.values.foreach(error => errorMeters.put(error, new ErrorMeter(name, error)))
 
   def requestRate(version: Short): Meter = {
-    requestRateInternal.getAndMaybePut(version, newMeter("RequestsPerSec", "requests", TimeUnit.SECONDS, tags + ("version" -> version.toString)))
+    requestRateInternal.getAndMaybePut(version, newMeter(RequestsPerSec, "requests", TimeUnit.SECONDS, tags + ("version" -> version.toString)))
   }
 
   class ErrorMeter(name: String, error: Errors) {
@@ -575,7 +575,6 @@ class RequestMetrics(name: String) extends KafkaMetricsGroup {
     removeMetric(TotalTimeMs, tags)
     removeMetric(ResponseSendTimeMs, tags)
     removeMetric(RequestBytes, tags)
-    removeMetric(ResponseSendTimeMs, tags)
     if (name == ApiKeys.FETCH.name || name == ApiKeys.PRODUCE.name) {
       removeMetric(MessageConversionsTimeMs, tags)
       removeMetric(TemporaryMemoryBytes, tags)
diff --git a/core/src/main/scala/kafka/network/RequestConvertToJson.scala b/core/src/main/scala/kafka/network/RequestConvertToJson.scala
index bb8e327b1890a..6a374e7afb28e 100644
--- a/core/src/main/scala/kafka/network/RequestConvertToJson.scala
+++ b/core/src/main/scala/kafka/network/RequestConvertToJson.scala
@@ -32,7 +32,7 @@ object RequestConvertToJson {
       case req: AllocateProducerIdsRequest => AllocateProducerIdsRequestDataJsonConverter.write(req.data, request.version)
       case req: AlterClientQuotasRequest => AlterClientQuotasRequestDataJsonConverter.write(req.data, request.version)
       case req: AlterConfigsRequest => AlterConfigsRequestDataJsonConverter.write(req.data, request.version)
-      case req: AlterIsrRequest => AlterIsrRequestDataJsonConverter.write(req.data, request.version)
+      case req: AlterPartitionRequest => AlterPartitionRequestDataJsonConverter.write(req.data, request.version)
       case req: AlterPartitionReassignmentsRequest => AlterPartitionReassignmentsRequestDataJsonConverter.write(req.data, request.version)
       case req: AlterReplicaLogDirsRequest => AlterReplicaLogDirsRequestDataJsonConverter.write(req.data, request.version)
       case res: AlterUserScramCredentialsRequest => AlterUserScramCredentialsRequestDataJsonConverter.write(res.data, request.version)
@@ -107,7 +107,7 @@ object RequestConvertToJson {
       case res: AllocateProducerIdsResponse => AllocateProducerIdsResponseDataJsonConverter.write(res.data, version)
       case res: AlterClientQuotasResponse => AlterClientQuotasResponseDataJsonConverter.write(res.data, version)
       case res: AlterConfigsResponse => AlterConfigsResponseDataJsonConverter.write(res.data, version)
-      case res: AlterIsrResponse => AlterIsrResponseDataJsonConverter.write(res.data, version)
+      case res: AlterPartitionResponse => AlterPartitionResponseDataJsonConverter.write(res.data, version)
       case res: AlterPartitionReassignmentsResponse => AlterPartitionReassignmentsResponseDataJsonConverter.write(res.data, version)
       case res: AlterReplicaLogDirsResponse => AlterReplicaLogDirsResponseDataJsonConverter.write(res.data, version)
       case res: AlterUserScramCredentialsResponse => AlterUserScramCredentialsResponseDataJsonConverter.write(res.data, version)
diff --git a/core/src/main/scala/kafka/network/SocketServer.scala b/core/src/main/scala/kafka/network/SocketServer.scala
index 88dfa15b3f5c9..e91c240415c0d 100644
--- a/core/src/main/scala/kafka/network/SocketServer.scala
+++ b/core/src/main/scala/kafka/network/SocketServer.scala
@@ -22,7 +22,6 @@ import java.net._
 import java.nio.ByteBuffer
 import java.nio.channels.{Selector => NSelector, _}
 import java.util
-import java.util.Optional
 import java.util.concurrent._
 import java.util.concurrent.atomic._
 
@@ -34,7 +33,7 @@ import kafka.network.RequestChannel.{CloseConnectionResponse, EndThrottlingRespo
 import kafka.network.SocketServer._
 import kafka.security.CredentialProvider
 import kafka.server.{ApiVersionManager, BrokerReconfigurable, KafkaConfig}
-import kafka.utils.Implicits._
+import org.apache.kafka.common.message.ApiMessageType.ListenerType
 import kafka.utils._
 import org.apache.kafka.common.config.ConfigException
 import org.apache.kafka.common.config.internals.QuotaConfigs
@@ -104,51 +103,34 @@ class SocketServer(val config: KafkaConfig,
 
   private[this] val nextProcessorId: AtomicInteger = new AtomicInteger(0)
   val connectionQuotas = new ConnectionQuotas(config, time, metrics)
-  private var startedProcessingRequests = false
-  private var stoppedProcessingRequests = false
 
-  // Processors are now created by each Acceptor. However to preserve compatibility, we need to number the processors
-  // globally, so we keep the nextProcessorId counter in SocketServer
-  def nextProcessorId(): Int = {
-    nextProcessorId.getAndIncrement()
-  }
+  /**
+   * A future which is completed once all the authorizer futures are complete.
+   */
+  private val allAuthorizerFuturesComplete = new CompletableFuture[Void]
 
   /**
-   * Starts the socket server and creates all the Acceptors and the Processors. The Acceptors
-   * start listening at this stage so that the bound port is known when this method completes
-   * even when ephemeral ports are used. Acceptors and Processors are started if `startProcessingRequests`
-   * is true. If not, acceptors and processors are only started when [[kafka.network.SocketServer#startProcessingRequests()]]
-   * is invoked. Delayed starting of acceptors and processors is used to delay processing client
-   * connections until server is fully initialized, e.g. to ensure that all credentials have been
-   * loaded before authentications are performed. Incoming connections on this server are processed
-   * when processors start up and invoke [[org.apache.kafka.common.network.Selector#poll]].
-   *
-   * @param startProcessingRequests Flag indicating whether `Processor`s must be started.
-   * @param controlPlaneListener    The control plane listener, or None if there is none.
-   * @param dataPlaneListeners      The data plane listeners.
+   * True if the SocketServer is stopped. Must be accessed under the SocketServer lock.
    */
-  def startup(startProcessingRequests: Boolean = true,
-              controlPlaneListener: Option[EndPoint] = config.controlPlaneListener,
-              dataPlaneListeners: Seq[EndPoint] = config.dataPlaneListeners): Unit = {
-    this.synchronized {
-      createControlPlaneAcceptorAndProcessor(controlPlaneListener)
-      createDataPlaneAcceptorsAndProcessors(dataPlaneListeners)
-      if (startProcessingRequests) {
-        this.startProcessingRequests()
-      }
-    }
+  private var stopped = false
 
+  // Socket server metrics
+  newGauge(s"${DataPlaneAcceptor.MetricPrefix}NetworkProcessorAvgIdlePercent", () => SocketServer.this.synchronized {
     val dataPlaneProcessors = dataPlaneAcceptors.asScala.values.flatMap(a => a.processors)
-    val controlPlaneProcessorOpt = controlPlaneAcceptorOpt.map(a => a.processors(0))
-    newGauge(s"${DataPlaneAcceptor.MetricPrefix}NetworkProcessorAvgIdlePercent", () => SocketServer.this.synchronized {
-      val ioWaitRatioMetricNames = dataPlaneProcessors.map { p =>
-        metrics.metricName("io-wait-ratio", MetricsGroup, p.metricTags)
-      }
+    val ioWaitRatioMetricNames = dataPlaneProcessors.map { p =>
+      metrics.metricName("io-wait-ratio", MetricsGroup, p.metricTags)
+    }
+    if (dataPlaneProcessors.isEmpty) {
+      1.0
+    } else {
       ioWaitRatioMetricNames.map { metricName =>
         Option(metrics.metric(metricName)).fold(0.0)(m => Math.min(m.metricValue.asInstanceOf[Double], 1.0))
       }.sum / dataPlaneProcessors.size
-    })
+    }
+  })
+  if (config.requiresZookeeper) {
     newGauge(s"${ControlPlaneAcceptor.MetricPrefix}NetworkProcessorAvgIdlePercent", () => SocketServer.this.synchronized {
+      val controlPlaneProcessorOpt = controlPlaneAcceptorOpt.map(a => a.processors(0))
       val ioWaitRatioMetricName = controlPlaneProcessorOpt.map { p =>
         metrics.metricName("io-wait-ratio", MetricsGroup, p.metricTags)
       }
@@ -156,17 +138,21 @@ class SocketServer(val config: KafkaConfig,
         Option(metrics.metric(metricName)).fold(0.0)(m => Math.min(m.metricValue.asInstanceOf[Double], 1.0))
       }.getOrElse(Double.NaN)
     })
-    newGauge("MemoryPoolAvailable", () => memoryPool.availableMemory)
-    newGauge("MemoryPoolUsed", () => memoryPool.size() - memoryPool.availableMemory)
-    newGauge(s"${DataPlaneAcceptor.MetricPrefix}ExpiredConnectionsKilledCount", () => SocketServer.this.synchronized {
-      val expiredConnectionsKilledCountMetricNames = dataPlaneProcessors.map { p =>
-        metrics.metricName("expired-connections-killed-count", MetricsGroup, p.metricTags)
-      }
-      expiredConnectionsKilledCountMetricNames.map { metricName =>
-        Option(metrics.metric(metricName)).fold(0.0)(m => m.metricValue.asInstanceOf[Double])
-      }.sum
-    })
+  }
+  newGauge("MemoryPoolAvailable", () => memoryPool.availableMemory)
+  newGauge("MemoryPoolUsed", () => memoryPool.size() - memoryPool.availableMemory)
+  newGauge(s"${DataPlaneAcceptor.MetricPrefix}ExpiredConnectionsKilledCount", () => SocketServer.this.synchronized {
+    val dataPlaneProcessors = dataPlaneAcceptors.asScala.values.flatMap(a => a.processors)
+    val expiredConnectionsKilledCountMetricNames = dataPlaneProcessors.map { p =>
+      metrics.metricName("expired-connections-killed-count", MetricsGroup, p.metricTags)
+    }
+    expiredConnectionsKilledCountMetricNames.map { metricName =>
+      Option(metrics.metric(metricName)).fold(0.0)(m => m.metricValue.asInstanceOf[Double])
+    }.sum
+  })
+  if (config.requiresZookeeper) {
     newGauge(s"${ControlPlaneAcceptor.MetricPrefix}ExpiredConnectionsKilledCount", () => SocketServer.this.synchronized {
+      val controlPlaneProcessorOpt = controlPlaneAcceptorOpt.map(a => a.processors(0))
       val expiredConnectionsKilledCountMetricNames = controlPlaneProcessorOpt.map { p =>
         metrics.metricName("expired-connections-killed-count", MetricsGroup, p.metricTags)
       }
@@ -176,112 +162,86 @@ class SocketServer(val config: KafkaConfig,
     })
   }
 
-  /**
-   * Start processing requests and new connections. This method is used for delayed starting of
-   * all the acceptors and processors if [[kafka.network.SocketServer#startup]] was invoked with
-   * `startProcessingRequests=false`.
-   *
-   * Before starting processors for each endpoint, we ensure that authorizer has all the metadata
-   * to authorize requests on that endpoint by waiting on the provided future. We start inter-broker
-   * listener before other listeners. This allows authorization metadata for other listeners to be
-   * stored in Kafka topics in this cluster.
-   *
-   * @param authorizerFutures Future per [[EndPoint]] used to wait before starting the processor
-   *                          corresponding to the [[EndPoint]]
-   */
-  def startProcessingRequests(authorizerFutures: Map[Endpoint, CompletableFuture[Void]] = Map.empty): Unit = {
-    info("Starting socket server acceptors and processors")
-    this.synchronized {
-      if (!startedProcessingRequests) {
-        startControlPlaneProcessorAndAcceptor(authorizerFutures)
-        startDataPlaneProcessorsAndAcceptors(authorizerFutures)
-        startedProcessingRequests = true
-      } else {
-        info("Socket server acceptors and processors already started")
-      }
-    }
-    info("Started socket server acceptors and processors")
+  // Create acceptors and processors for the statically configured endpoints when the
+  // SocketServer is constructed. Note that this just opens the ports and creates the data
+  // structures. It does not start the acceptors and processors or their associated JVM
+  // threads.
+  if (apiVersionManager.listenerType.equals(ListenerType.CONTROLLER)) {
+    config.controllerListeners.foreach(createDataPlaneAcceptorAndProcessors)
+  } else {
+    config.controlPlaneListener.foreach(createControlPlaneAcceptorAndProcessor)
+    config.dataPlaneListeners.foreach(createDataPlaneAcceptorAndProcessors)
   }
 
-  /**
-   * Starts processors of the provided acceptor and the acceptor itself.
-   *
-   * Before starting them, we ensure that authorizer has all the metadata to authorize
-   * requests on that endpoint by waiting on the provided future.
-   */
-  private def startAcceptorAndProcessors(acceptor: Acceptor,
-                                         authorizerFutures: Map[Endpoint, CompletableFuture[Void]] = Map.empty): Unit = {
-    val endpoint = acceptor.endPoint
-    debug(s"Wait for authorizer to complete start up on listener ${endpoint.listenerName}")
-    waitForAuthorizerFuture(acceptor, authorizerFutures)
-    debug(s"Start processors on listener ${endpoint.listenerName}")
-    acceptor.startProcessors()
-    debug(s"Start acceptor thread on listener ${endpoint.listenerName}")
-    if (!acceptor.isStarted()) {
-      KafkaThread.nonDaemon(
-        s"${acceptor.threadPrefix()}-kafka-socket-acceptor-${endpoint.listenerName}-${endpoint.securityProtocol}-${endpoint.port}",
-        acceptor
-      ).start()
-      acceptor.awaitStartup()
-    }
-    info(s"Started ${acceptor.threadPrefix()} acceptor and processor(s) for endpoint : ${endpoint.listenerName}")
+  // Processors are now created by each Acceptor. However to preserve compatibility, we need to number the processors
+  // globally, so we keep the nextProcessorId counter in SocketServer
+  def nextProcessorId(): Int = {
+    nextProcessorId.getAndIncrement()
   }
 
   /**
-   * Starts processors of all the data-plane acceptors and all the acceptors of this server.
+   * This method enables request processing for all endpoints managed by this SocketServer. Each
+   * endpoint will be brought up asynchronously as soon as its associated future is completed.
+   * Therefore, we do not know that any particular request processor will be running by the end of
+   * this function -- just that it might be running.
    *
-   * We start inter-broker listener before other listeners. This allows authorization metadata for
-   * other listeners to be stored in Kafka topics in this cluster.
+   * @param authorizerFutures     Future per [[EndPoint]] used to wait before starting the
+   *                              processor corresponding to the [[EndPoint]]. Any endpoint
+   *                              that does not appear in this map will be started once all
+   *                              authorizerFutures are complete.
    */
-  private def startDataPlaneProcessorsAndAcceptors(authorizerFutures: Map[Endpoint, CompletableFuture[Void]]): Unit = {
-    val interBrokerListener = dataPlaneAcceptors.asScala.keySet
-      .find(_.listenerName == config.interBrokerListenerName)
-    val orderedAcceptors = interBrokerListener match {
-      case Some(interBrokerListener) => List(dataPlaneAcceptors.get(interBrokerListener)) ++
-        dataPlaneAcceptors.asScala.filter { case (k, _) => k != interBrokerListener }.values
-      case None => dataPlaneAcceptors.asScala.values
-    }
-    orderedAcceptors.foreach { acceptor =>
-      startAcceptorAndProcessors(acceptor, authorizerFutures)
+  def enableRequestProcessing(
+    authorizerFutures: Map[Endpoint, CompletableFuture[Void]]
+  ): Unit = this.synchronized {
+    if (stopped) {
+      throw new RuntimeException("Can't enable request processing: SocketServer is stopped.")
+    }
+
+    def chainAcceptorFuture(acceptor: Acceptor): Unit = {
+      // Because of ephemeral ports, we need to match acceptors to futures by looking at
+      // the listener name, rather than the endpoint object.
+      authorizerFutures.find {
+        case (endpoint, _) => acceptor.endPoint.listenerName.value().equals(endpoint.listenerName().get())
+      } match {
+        case None => chainFuture(allAuthorizerFuturesComplete, acceptor.startFuture)
+        case Some((_, future)) => chainFuture(future, acceptor.startFuture)
+      }
     }
-  }
 
-  /**
-   * Start the processor of control-plane acceptor and the acceptor of this server.
-   */
-  private def startControlPlaneProcessorAndAcceptor(authorizerFutures: Map[Endpoint, CompletableFuture[Void]]): Unit = {
-    controlPlaneAcceptorOpt.foreach { controlPlaneAcceptor =>
-      startAcceptorAndProcessors(controlPlaneAcceptor, authorizerFutures)
-    }
+    info("Enabling request processing.")
+    controlPlaneAcceptorOpt.foreach(chainAcceptorFuture)
+    dataPlaneAcceptors.values().forEach(chainAcceptorFuture)
+    chainFuture(CompletableFuture.allOf(authorizerFutures.values.toArray: _*),
+        allAuthorizerFuturesComplete)
   }
 
-  private def endpoints = config.listeners.map(l => l.listenerName -> l).toMap
-
-  def createDataPlaneAcceptorsAndProcessors(endpoints: Seq[EndPoint]): Unit = {
-    endpoints.foreach { endpoint =>
-      val parsedConfigs = config.valuesFromThisConfigWithPrefixOverride(endpoint.listenerName.configPrefix)
-      connectionQuotas.addListener(config, endpoint.listenerName)
-
-      val isPrivilegedListener = controlPlaneRequestChannelOpt.isEmpty && config.interBrokerListenerName == endpoint.listenerName
-
-      val dataPlaneAcceptor = createDataPlaneAcceptor(endpoint, isPrivilegedListener, dataPlaneRequestChannel)
-      config.addReconfigurable(dataPlaneAcceptor)
-      dataPlaneAcceptor.configure(parsedConfigs)
-      dataPlaneAcceptors.put(endpoint, dataPlaneAcceptor)
-      info(s"Created data-plane acceptor and processors for endpoint : ${endpoint.listenerName}")
+  def createDataPlaneAcceptorAndProcessors(endpoint: EndPoint): Unit = synchronized {
+    if (stopped) {
+      throw new RuntimeException("Can't create new data plane acceptor and processors: SocketServer is stopped.")
     }
+    val parsedConfigs = config.valuesFromThisConfigWithPrefixOverride(endpoint.listenerName.configPrefix)
+    connectionQuotas.addListener(config, endpoint.listenerName)
+    val isPrivilegedListener = controlPlaneRequestChannelOpt.isEmpty &&
+      config.interBrokerListenerName == endpoint.listenerName
+    val dataPlaneAcceptor = createDataPlaneAcceptor(endpoint, isPrivilegedListener, dataPlaneRequestChannel)
+    config.addReconfigurable(dataPlaneAcceptor)
+    dataPlaneAcceptor.configure(parsedConfigs)
+    dataPlaneAcceptors.put(endpoint, dataPlaneAcceptor)
+    info(s"Created data-plane acceptor and processors for endpoint : ${endpoint.listenerName}")
   }
 
-  private def createControlPlaneAcceptorAndProcessor(endpointOpt: Option[EndPoint]): Unit = {
-    endpointOpt.foreach { endpoint =>
-      connectionQuotas.addListener(config, endpoint.listenerName)
-      val controlPlaneAcceptor = createControlPlaneAcceptor(endpoint, controlPlaneRequestChannelOpt.get)
-      controlPlaneAcceptor.addProcessors(1)
-      controlPlaneAcceptorOpt = Some(controlPlaneAcceptor)
-      info(s"Created control-plane acceptor and processor for endpoint : ${endpoint.listenerName}")
+  private def createControlPlaneAcceptorAndProcessor(endpoint: EndPoint): Unit = synchronized {
+    if (stopped) {
+      throw new RuntimeException("Can't create new control plane acceptor and processor: SocketServer is stopped.")
     }
+    connectionQuotas.addListener(config, endpoint.listenerName)
+    val controlPlaneAcceptor = createControlPlaneAcceptor(endpoint, controlPlaneRequestChannelOpt.get)
+    controlPlaneAcceptor.addProcessors(1)
+    controlPlaneAcceptorOpt = Some(controlPlaneAcceptor)
+    info(s"Created control-plane acceptor and processor for endpoint : ${endpoint.listenerName}")
   }
 
+  private def endpoints = config.listeners.map(l => l.listenerName -> l).toMap
 
   protected def createDataPlaneAcceptor(endPoint: EndPoint, isPrivilegedListener: Boolean, requestChannel: RequestChannel): DataPlaneAcceptor = {
     new DataPlaneAcceptor(this, endPoint, config, nodeId, connectionQuotas, time, isPrivilegedListener, requestChannel, metrics, credentialProvider, logContext, memoryPool, apiVersionManager)
@@ -294,18 +254,18 @@ class SocketServer(val config: KafkaConfig,
   /**
    * Stop processing requests and new connections.
    */
-  def stopProcessingRequests(): Unit = {
-    info("Stopping socket server request processors")
-    this.synchronized {
-      dataPlaneAcceptors.asScala.values.foreach(_.initiateShutdown())
-      dataPlaneAcceptors.asScala.values.foreach(_.awaitShutdown())
-      controlPlaneAcceptorOpt.foreach(_.initiateShutdown())
-      controlPlaneAcceptorOpt.foreach(_.awaitShutdown())
+  def stopProcessingRequests(): Unit = synchronized {
+    if (!stopped) {
+      stopped = true
+      info("Stopping socket server request processors")
+      dataPlaneAcceptors.asScala.values.foreach(_.beginShutdown())
+      controlPlaneAcceptorOpt.foreach(_.beginShutdown())
+      dataPlaneAcceptors.asScala.values.foreach(_.close())
+      controlPlaneAcceptorOpt.foreach(_.close())
       dataPlaneRequestChannel.clear()
       controlPlaneRequestChannelOpt.foreach(_.clear())
-      stoppedProcessingRequests = true
+      info("Stopped socket server request processors")
     }
-    info("Stopped socket server request processors")
   }
 
   /**
@@ -314,9 +274,10 @@ class SocketServer(val config: KafkaConfig,
    */
   def shutdown(): Unit = {
     info("Shutting down socket server")
+    allAuthorizerFuturesComplete.completeExceptionally(new TimeoutException("The socket " +
+      "server was shut down before the Authorizer could be completely initialized."))
     this.synchronized {
-      if (!stoppedProcessingRequests)
-        stopProcessingRequests()
+      stopProcessingRequests()
       dataPlaneRequestChannel.shutdown()
       controlPlaneRequestChannelOpt.foreach(_.shutdown())
       connectionQuotas.close()
@@ -338,12 +299,20 @@ class SocketServer(val config: KafkaConfig,
     }
   }
 
+  /**
+   * This method is called to dynamically add listeners.
+   */
   def addListeners(listenersAdded: Seq[EndPoint]): Unit = synchronized {
+    if (stopped) {
+      throw new RuntimeException("can't add new listeners: SocketServer is stopped.")
+    }
     info(s"Adding data-plane listeners for endpoints $listenersAdded")
-    createDataPlaneAcceptorsAndProcessors(listenersAdded)
     listenersAdded.foreach { endpoint =>
+      createDataPlaneAcceptorAndProcessors(endpoint)
       val acceptor = dataPlaneAcceptors.get(endpoint)
-      startAcceptorAndProcessors(acceptor)
+      // There is no authorizer future for this new listener endpoint. So start the
+      // listener once all authorizer futures are complete.
+      chainFuture(allAuthorizerFuturesComplete, acceptor.startFuture)
     }
   }
 
@@ -352,8 +321,8 @@ class SocketServer(val config: KafkaConfig,
     listenersRemoved.foreach { endpoint =>
       connectionQuotas.removeListener(config, endpoint.listenerName)
       dataPlaneAcceptors.asScala.remove(endpoint).foreach { acceptor =>
-        acceptor.initiateShutdown()
-        acceptor.awaitShutdown()
+        acceptor.beginShutdown()
+        acceptor.close()
       }
     }
   }
@@ -387,15 +356,6 @@ class SocketServer(val config: KafkaConfig,
     }
   }
 
-  private def waitForAuthorizerFuture(acceptor: Acceptor,
-                                      authorizerFutures: Map[Endpoint, CompletableFuture[Void]]): Unit = {
-    //we can't rely on authorizerFutures.get() due to ephemeral ports. Get the future using listener name
-    authorizerFutures.forKeyValue { (endpoint, future) =>
-      if (endpoint.listenerName == Optional.of(acceptor.endPoint.listenerName.value))
-        future.join()
-    }
-  }
-
   // For test usage
   private[network] def connectionCount(address: InetAddress): Int =
     Option(connectionQuotas).fold(0)(_.get(address))
@@ -420,80 +380,22 @@ object SocketServer {
     KafkaConfig.MaxConnectionCreationRateProp)
 
   val ListenerReconfigurableConfigs = Set(KafkaConfig.MaxConnectionsProp, KafkaConfig.MaxConnectionCreationRateProp)
-}
 
-/**
- * A base class with some helper variables and methods
- */
-private[kafka] abstract class AbstractServerThread(connectionQuotas: ConnectionQuotas) extends Runnable with Logging {
-
-  private val startupLatch = new CountDownLatch(1)
-
-  // `shutdown()` is invoked before `startupComplete` and `shutdownComplete` if an exception is thrown in the constructor
-  // (e.g. if the address is already in use). We want `shutdown` to proceed in such cases, so we first assign an open
-  // latch and then replace it in `startupComplete()`.
-  @volatile private var shutdownLatch = new CountDownLatch(0)
-
-  private val alive = new AtomicBoolean(true)
-
-  def wakeup(): Unit
-
-  /**
-   * Initiates a graceful shutdown by signaling to stop
-   */
-  def initiateShutdown(): Unit = {
-    if (alive.getAndSet(false))
-      wakeup()
+  def closeSocket(
+    channel: SocketChannel,
+    logging: Logging
+  ): Unit = {
+    CoreUtils.swallow(channel.socket().close(), logging, Level.ERROR)
+    CoreUtils.swallow(channel.close(), logging, Level.ERROR)
   }
 
-  /**
-   * Wait for the thread to completely shutdown
-   */
-  def awaitShutdown(): Unit = shutdownLatch.await
-
-  /**
-   * Returns true if the thread is completely started
-   */
-  def isStarted(): Boolean = startupLatch.getCount == 0
-
-  /**
-   * Wait for the thread to completely start up
-   */
-  def awaitStartup(): Unit = startupLatch.await
-
-  /**
-   * Record that the thread startup is complete
-   */
-  protected def startupComplete(): Unit = {
-    // Replace the open latch with a closed one
-    shutdownLatch = new CountDownLatch(1)
-    startupLatch.countDown()
-  }
-
-  /**
-   * Record that the thread shutdown is complete
-   */
-  protected def shutdownComplete(): Unit = shutdownLatch.countDown()
-
-  /**
-   * Is the server still running?
-   */
-  protected def isRunning: Boolean = alive.get
-
-  /**
-   * Close `channel` and decrement the connection count.
-   */
-  def close(listenerName: ListenerName, channel: SocketChannel): Unit = {
-    if (channel != null) {
-      debug(s"Closing connection from ${channel.socket.getRemoteSocketAddress}")
-      connectionQuotas.dec(listenerName, channel.socket.getInetAddress)
-      closeSocket(channel)
-    }
-  }
-
-  protected def closeSocket(channel: SocketChannel): Unit = {
-    CoreUtils.swallow(channel.socket().close(), this, Level.ERROR)
-    CoreUtils.swallow(channel.close(), this, Level.ERROR)
+  def chainFuture(sourceFuture: CompletableFuture[Void],
+                  destinationFuture: CompletableFuture[Void]): Unit = {
+    sourceFuture.whenComplete((_, t) => if (t != null) {
+      destinationFuture.completeExceptionally(t)
+    } else {
+      destinationFuture.complete(null)
+    })
   }
 }
 
@@ -650,7 +552,7 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
                                        val endPoint: EndPoint,
                                        var config: KafkaConfig,
                                        nodeId: Int,
-                                       connectionQuotas: ConnectionQuotas,
+                                       val connectionQuotas: ConnectionQuotas,
                                        time: Time,
                                        isPrivilegedListener: Boolean,
                                        requestChannel: RequestChannel,
@@ -659,7 +561,9 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
                                        logContext: LogContext,
                                        memoryPool: MemoryPool,
                                        apiVersionManager: ApiVersionManager)
-  extends AbstractServerThread(connectionQuotas) with KafkaMetricsGroup {
+  extends Runnable with Logging with KafkaMetricsGroup {
+
+  val shouldRun = new AtomicBoolean(true)
 
   def metricPrefix(): String
   def threadPrefix(): String
@@ -671,7 +575,6 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
   private val nioSelector = NSelector.open()
   private[network] val serverChannel = openServerSocket(endPoint.host, endPoint.port, listenBacklogSize)
   private[network] val processors = new ArrayBuffer[Processor]()
-  private val processorsStarted = new AtomicBoolean
   // Build the metric name explicitly in order to keep the existing name for compatibility
   private val blockedPercentMeterMetricName = explicitMetricName(
     "kafka.network",
@@ -681,24 +584,27 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
   private val blockedPercentMeter = newMeter(blockedPercentMeterMetricName,"blocked time", TimeUnit.NANOSECONDS)
   private var currentProcessorIndex = 0
   private[network] val throttledSockets = new mutable.PriorityQueue[DelayedCloseSocket]()
+  private var started = false
+  private[network] val startFuture = new CompletableFuture[Void]()
 
-  private[network] case class DelayedCloseSocket(socket: SocketChannel, endThrottleTimeMs: Long) extends Ordered[DelayedCloseSocket] {
-    override def compare(that: DelayedCloseSocket): Int = endThrottleTimeMs compare that.endThrottleTimeMs
-  }
+  val thread = KafkaThread.nonDaemon(
+    s"${threadPrefix()}-kafka-socket-acceptor-${endPoint.listenerName}-${endPoint.securityProtocol}-${endPoint.port}",
+    this)
 
-  private[network] def startProcessors(): Unit = synchronized {
-    if (!processorsStarted.getAndSet(true)) {
-      startProcessors(processors)
+  startFuture.thenRun(() => synchronized {
+    if (!shouldRun.get()) {
+      debug(s"Ignoring start future for ${endPoint.listenerName} since the acceptor has already been shut down.")
+    } else {
+      debug(s"Starting processors for listener ${endPoint.listenerName}")
+      started = true
+      processors.foreach(_.start())
+      debug(s"Starting acceptor thread for listener ${endPoint.listenerName}")
+      thread.start()
     }
-  }
+  })
 
-  private def startProcessors(processors: Seq[Processor]): Unit = synchronized {
-    processors.foreach { processor =>
-      KafkaThread.nonDaemon(
-        s"${threadPrefix()}-kafka-network-thread-$nodeId-${endPoint.listenerName}-${endPoint.securityProtocol}-${processor.id}",
-        processor
-      ).start()
-    }
+  private[network] case class DelayedCloseSocket(socket: SocketChannel, endThrottleTimeMs: Long) extends Ordered[DelayedCloseSocket] {
+    override def compare(that: DelayedCloseSocket): Int = endThrottleTimeMs compare that.endThrottleTimeMs
   }
 
   private[network] def removeProcessors(removeCount: Int): Unit = synchronized {
@@ -707,33 +613,34 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
     // The processors are then removed from `requestChannel` and any pending responses to these processors are dropped.
     val toRemove = processors.takeRight(removeCount)
     processors.remove(processors.size - removeCount, removeCount)
-    toRemove.foreach(_.initiateShutdown())
-    toRemove.foreach(_.awaitShutdown())
+    toRemove.foreach(_.close())
     toRemove.foreach(processor => requestChannel.removeProcessor(processor.id))
   }
 
-  override def initiateShutdown(): Unit = {
-    super.initiateShutdown()
-    synchronized {
-      processors.foreach(_.initiateShutdown())
+  def beginShutdown(): Unit = {
+    if (shouldRun.getAndSet(false)) {
+      wakeup()
+      synchronized {
+        processors.foreach(_.beginShutdown())
+      }
     }
   }
 
-  override def awaitShutdown(): Unit = {
-    super.awaitShutdown()
+  def close(): Unit = {
+    beginShutdown()
+    thread.join()
     synchronized {
-      processors.foreach(_.awaitShutdown())
+      processors.foreach(_.close())
     }
   }
 
   /**
    * Accept loop that checks for new connection attempts
    */
-  def run(): Unit = {
+  override def run(): Unit = {
     serverChannel.register(nioSelector, SelectionKey.OP_ACCEPT)
-    startupComplete()
     try {
-      while (isRunning) {
+      while (shouldRun.get()) {
         try {
           acceptNewConnections()
           closeThrottledConnections()
@@ -750,9 +657,8 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
       debug("Closing server socket, selector, and any throttled sockets.")
       CoreUtils.swallow(serverChannel.close(), this, Level.ERROR)
       CoreUtils.swallow(nioSelector.close(), this, Level.ERROR)
-      throttledSockets.foreach(throttledSocket => closeSocket(throttledSocket.socket))
+      throttledSockets.foreach(throttledSocket => closeSocket(throttledSocket.socket, this))
       throttledSockets.clear()
-      shutdownComplete()
     }
   }
 
@@ -788,7 +694,7 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
     if (ready > 0) {
       val keys = nioSelector.selectedKeys()
       val iter = keys.iterator()
-      while (iter.hasNext && isRunning) {
+      while (iter.hasNext && shouldRun.get()) {
         try {
           val key = iter.next
           iter.remove()
@@ -833,7 +739,7 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
     } catch {
       case e: TooManyConnectionsException =>
         info(s"Rejected connection from ${e.ip}, address already has the configured maximum of ${e.count} connections.")
-        close(endPoint.listenerName, socketChannel)
+        connectionQuotas.closeChannel(this, endPoint.listenerName, socketChannel)
         None
       case e: ConnectionThrottledException =>
         val ip = socketChannel.socket.getInetAddress
@@ -843,7 +749,7 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
         None
       case e: IOException =>
         error(s"Encountered an error while configuring the connection, closing it.", e)
-        close(endPoint.listenerName, socketChannel)
+        connectionQuotas.closeChannel(this, endPoint.listenerName, socketChannel)
         None
     }
   }
@@ -864,7 +770,7 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
     while (throttledSockets.headOption.exists(_.endThrottleTimeMs < timeMs)) {
       val closingSocket = throttledSockets.dequeue()
       debug(s"Closing socket from ip ${closingSocket.socket.getRemoteAddress}")
-      closeSocket(closingSocket.socket)
+      closeSocket(closingSocket.socket, this)
     }
   }
 
@@ -882,10 +788,9 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
   /**
    * Wakeup the thread for selection.
    */
-  @Override
   def wakeup(): Unit = nioSelector.wakeup()
 
-  def addProcessors(toCreate: Int): Unit = {
+  def addProcessors(toCreate: Int): Unit = synchronized {
     val listenerName = endPoint.listenerName
     val securityProtocol = endPoint.securityProtocol
     val listenerProcessors = new ArrayBuffer[Processor]()
@@ -894,14 +799,16 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
       val processor = newProcessor(socketServer.nextProcessorId(), listenerName, securityProtocol)
       listenerProcessors += processor
       requestChannel.addProcessor(processor)
-    }
 
+      if (started) {
+        processor.start()
+      }
+    }
     processors ++= listenerProcessors
-    if (processorsStarted.get)
-      startProcessors(listenerProcessors)
   }
 
   def newProcessor(id: Int, listenerName: ListenerName, securityProtocol: SecurityProtocol): Processor = {
+    val name = s"${threadPrefix()}-kafka-network-thread-$nodeId-${endPoint.listenerName}-${endPoint.securityProtocol}-${id}"
     new Processor(id,
                   time,
                   config.socketRequestMaxBytes,
@@ -918,9 +825,9 @@ private[kafka] abstract class Acceptor(val socketServer: SocketServer,
                   logContext,
                   Processor.ConnectionQueueSize,
                   isPrivilegedListener,
-                  apiVersionManager)
+                  apiVersionManager,
+                  name)
   }
-
 }
 
 private[kafka] object Processor {
@@ -940,23 +847,29 @@ private[kafka] object Processor {
  *                             forwarding requests; if the control plane is not defined, the processor
  *                             relying on the inter broker listener would be acting as the privileged listener.
  */
-private[kafka] class Processor(val id: Int,
-                               time: Time,
-                               maxRequestSize: Int,
-                               requestChannel: RequestChannel,
-                               connectionQuotas: ConnectionQuotas,
-                               connectionsMaxIdleMs: Long,
-                               failedAuthenticationDelayMs: Int,
-                               listenerName: ListenerName,
-                               securityProtocol: SecurityProtocol,
-                               config: KafkaConfig,
-                               metrics: Metrics,
-                               credentialProvider: CredentialProvider,
-                               memoryPool: MemoryPool,
-                               logContext: LogContext,
-                               connectionQueueSize: Int,
-                               isPrivilegedListener: Boolean,
-                               apiVersionManager: ApiVersionManager) extends AbstractServerThread(connectionQuotas) with KafkaMetricsGroup {
+private[kafka] class Processor(
+  val id: Int,
+  time: Time,
+  maxRequestSize: Int,
+  requestChannel: RequestChannel,
+  connectionQuotas: ConnectionQuotas,
+  connectionsMaxIdleMs: Long,
+  failedAuthenticationDelayMs: Int,
+  listenerName: ListenerName,
+  securityProtocol: SecurityProtocol,
+  config: KafkaConfig,
+  metrics: Metrics,
+  credentialProvider: CredentialProvider,
+  memoryPool: MemoryPool,
+  logContext: LogContext,
+  connectionQueueSize: Int,
+  isPrivilegedListener: Boolean,
+  apiVersionManager: ApiVersionManager,
+  threadName: String
+) extends Runnable with KafkaMetricsGroup {
+  val shouldRun = new AtomicBoolean(true)
+
+  val thread = KafkaThread.nonDaemon(threadName, this)
 
   private object ConnectionId {
     def fromString(s: String): Option[ConnectionId] = s.split("-") match {
@@ -1036,9 +949,8 @@ private[kafka] class Processor(val id: Int,
   private var nextConnectionIndex = 0
 
   override def run(): Unit = {
-    startupComplete()
     try {
-      while (isRunning) {
+      while (shouldRun.get()) {
         try {
           // setup any new connections that have been queued up
           configureNewConnections()
@@ -1062,7 +974,6 @@ private[kafka] class Processor(val id: Int,
     } finally {
       debug(s"Closing selector - processor $id")
       CoreUtils.swallow(closeAll(), this, Level.ERROR)
-      shutdownComplete()
     }
   }
 
@@ -1325,7 +1236,7 @@ private[kafka] class Processor(val id: Int,
         case e: Throwable =>
           val remoteAddress = channel.socket.getRemoteSocketAddress
           // need to close the channel here to avoid a socket leak.
-          close(listenerName, channel)
+          connectionQuotas.closeChannel(this, listenerName, channel)
           processException(s"Processor $id closed connection from $remoteAddress", e)
       }
     }
@@ -1392,15 +1303,27 @@ private[kafka] class Processor(val id: Int,
   private[network] def channel(connectionId: String): Option[KafkaChannel] =
     Option(selector.channel(connectionId))
 
+  def start(): Unit = thread.start()
+
   /**
    * Wakeup the thread for selection.
    */
-  override def wakeup(): Unit = selector.wakeup()
+  def wakeup(): Unit = selector.wakeup()
 
-  override def initiateShutdown(): Unit = {
-    super.initiateShutdown()
-    removeMetric("IdlePercent", Map("networkProcessor" -> id.toString))
-    metrics.removeMetric(expiredConnectionsKilledCountMetricName)
+  def beginShutdown(): Unit = {
+    if (shouldRun.getAndSet(false)) {
+      wakeup()
+    }
+  }
+
+  def close(): Unit = {
+    try {
+      beginShutdown()
+      thread.join()
+    } finally {
+      removeMetric("IdlePercent", Map("networkProcessor" -> id.toString))
+      metrics.removeMetric(expiredConnectionsKilledCountMetricName)
+    }
   }
 }
 
@@ -1864,6 +1787,18 @@ class ConnectionQuotas(config: KafkaConfig, time: Time, metrics: Metrics) extend
       sensor
     }
   }
+
+  /**
+   * Close `channel` and decrement the connection count.
+   */
+  def closeChannel(log: Logging, listenerName: ListenerName, channel: SocketChannel): Unit = {
+    if (channel != null) {
+      log.debug(s"Closing connection from ${channel.socket.getRemoteSocketAddress}")
+      dec(listenerName, channel.socket.getInetAddress)
+      closeSocket(channel, log)
+    }
+  }
+
 }
 
 class TooManyConnectionsException(val ip: InetAddress, val count: Int) extends KafkaException(s"Too many connections from $ip (maximum = $count)")
diff --git a/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala b/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala
index 1b0aef3fed792..83b8bee444795 100644
--- a/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala
+++ b/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala
@@ -257,7 +257,11 @@ final class KafkaMetadataLog private (
   }
 
   override def storeSnapshot(snapshotId: OffsetAndEpoch): Optional[RawSnapshotWriter] = {
-    if (snapshots.contains(snapshotId)) {
+    val containsSnapshotId = snapshots synchronized {
+      snapshots.contains(snapshotId)
+    }
+
+    if (containsSnapshotId) {
       Optional.empty()
     } else {
       Optional.of(FileRawSnapshotWriter.create(log.dir.toPath, snapshotId, Optional.of(this)))
@@ -368,7 +372,7 @@ final class KafkaMetadataLog private (
       val firstBatch = batchIterator.next()
       val records = firstBatch.streamingIterator(new BufferSupplier.GrowableBufferSupplier())
       if (firstBatch.isControlBatch) {
-        val header = ControlRecordUtils.deserializedSnapshotHeaderRecord(records.next());
+        val header = ControlRecordUtils.deserializedSnapshotHeaderRecord(records.next())
         Some(header.lastContainedLogTimestamp())
       } else {
         warn("Did not find control record at beginning of snapshot")
@@ -405,7 +409,7 @@ final class KafkaMetadataLog private (
    *
    * For the given predicate, we are testing if the snapshot identified by the first argument should be deleted.
    */
-  private def cleanSnapshots(predicate: (OffsetAndEpoch) => Boolean): Boolean = {
+  private def cleanSnapshots(predicate: OffsetAndEpoch => Boolean): Boolean = {
     if (snapshots.size < 2)
       return false
 
diff --git a/core/src/main/scala/kafka/raft/KafkaNetworkChannel.scala b/core/src/main/scala/kafka/raft/KafkaNetworkChannel.scala
index d99039132d8b4..c44d57102c5b0 100644
--- a/core/src/main/scala/kafka/raft/KafkaNetworkChannel.scala
+++ b/core/src/main/scala/kafka/raft/KafkaNetworkChannel.scala
@@ -45,7 +45,7 @@ object KafkaNetworkChannel {
         // Since we already have the request, we go through a simplified builder
         new AbstractRequest.Builder[FetchRequest](ApiKeys.FETCH) {
           override def build(version: Short): FetchRequest = new FetchRequest(fetchRequest, version)
-          override def toString(): String = fetchRequest.toString
+          override def toString: String = fetchRequest.toString
         }
       case fetchSnapshotRequest: FetchSnapshotRequestData =>
         new FetchSnapshotRequest.Builder(fetchSnapshotRequest)
diff --git a/core/src/main/scala/kafka/raft/RaftManager.scala b/core/src/main/scala/kafka/raft/RaftManager.scala
index 4c29250073681..a44d9d8fe014b 100644
--- a/core/src/main/scala/kafka/raft/RaftManager.scala
+++ b/core/src/main/scala/kafka/raft/RaftManager.scala
@@ -24,7 +24,6 @@ import java.util.concurrent.CompletableFuture
 import kafka.log.UnifiedLog
 import kafka.raft.KafkaRaftManager.RaftIoThread
 import kafka.server.{KafkaConfig, MetaProperties}
-import kafka.server.KafkaRaftServer.ControllerRole
 import kafka.utils.timer.SystemTimer
 import kafka.utils.{KafkaScheduler, Logging, ShutdownableThread}
 import org.apache.kafka.clients.{ApiVersions, ManualMetadataUpdater, NetworkClient}
@@ -111,6 +110,7 @@ class KafkaRaftManager[T](
   val controllerQuorumVotersFuture: CompletableFuture[util.Map[Integer, AddressSpec]]
 ) extends RaftManager[T] with Logging {
 
+  val apiVersions = new ApiVersions()
   private val raftConfig = new RaftConfig(config)
   private val threadNamePrefix = threadNamePrefixOpt.getOrElse("kafka-raft")
   private val logContext = new LogContext(s"[RaftManager nodeId=${config.nodeId}] ")
@@ -180,12 +180,7 @@ class KafkaRaftManager[T](
     val expirationTimer = new SystemTimer("raft-expiration-executor")
     val expirationService = new TimingWheelExpirationService(expirationTimer)
     val quorumStateStore = new FileBasedStateStore(new File(dataDir, "quorum-state"))
-
-    val nodeId = if (config.processRoles.contains(ControllerRole)) {
-      OptionalInt.of(config.nodeId)
-    } else {
-      OptionalInt.empty()
-    }
+    val nodeId = OptionalInt.of(config.nodeId)
 
     val client = new KafkaRaftClient(
       recordSerde,
@@ -274,7 +269,7 @@ class KafkaRaftManager[T](
       config.connectionSetupTimeoutMaxMs,
       time,
       discoverBrokerVersions,
-      new ApiVersions,
+      apiVersions,
       logContext
     )
   }
diff --git a/core/src/main/scala/kafka/security/authorizer/AclAuthorizer.scala b/core/src/main/scala/kafka/security/authorizer/AclAuthorizer.scala
index 88648fd3178c8..1de9a27402cb3 100644
--- a/core/src/main/scala/kafka/security/authorizer/AclAuthorizer.scala
+++ b/core/src/main/scala/kafka/security/authorizer/AclAuthorizer.scala
@@ -20,7 +20,6 @@ import java.{lang, util}
 import java.util.concurrent.{CompletableFuture, CompletionStage}
 
 import com.typesafe.scalalogging.Logger
-import kafka.api.KAFKA_2_0_IV1
 import kafka.security.authorizer.AclEntry.ResourceSeparator
 import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.utils._
@@ -37,6 +36,7 @@ import org.apache.kafka.common.security.auth.KafkaPrincipal
 import org.apache.kafka.common.utils.{SecurityUtils, Time}
 import org.apache.kafka.server.authorizer.AclDeleteResult.AclBindingDeleteResult
 import org.apache.kafka.server.authorizer._
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_0_IV1
 import org.apache.zookeeper.client.ZKClientConfig
 
 import scala.annotation.nowarn
@@ -121,6 +121,8 @@ object AclAuthorizer {
   private def validateAclBinding(aclBinding: AclBinding): Unit = {
     if (aclBinding.isUnknown)
       throw new IllegalArgumentException("ACL binding contains unknown elements")
+    if (aclBinding.pattern().name().contains("/"))
+      throw new IllegalArgumentException(s"ACL binding contains invalid resource name: ${aclBinding.pattern().name()}")
   }
 }
 
@@ -182,7 +184,7 @@ class AclAuthorizer extends Authorizer with Logging {
       metricGroup = "kafka.security", metricType = "AclAuthorizer", createChrootIfNecessary = true)
     zkClient.createAclPaths()
 
-    extendedAclSupport = kafkaConfig.interBrokerProtocolVersion >= KAFKA_2_0_IV1
+    extendedAclSupport = kafkaConfig.interBrokerProtocolVersion.isAtLeast(IBP_2_0_IV1)
 
     // Start change listeners first and then populate the cache so that there is no timing window
     // between loading cache and processing change notifications.
@@ -192,11 +194,12 @@ class AclAuthorizer extends Authorizer with Logging {
 
   override def start(serverInfo: AuthorizerServerInfo): util.Map[Endpoint, _ <: CompletionStage[Void]] = {
     serverInfo.endpoints.asScala.map { endpoint =>
-      endpoint -> CompletableFuture.completedFuture[Void](null) }.toMap.asJava
+      endpoint -> CompletableFuture.completedFuture[Void](null)
+    }.toMap.asJava
   }
 
   override def authorize(requestContext: AuthorizableRequestContext, actions: util.List[Action]): util.List[AuthorizationResult] = {
-    actions.asScala.map { action => authorizeAction(requestContext, action) }.asJava
+    actions.asScala.map(action => authorizeAction(requestContext, action)).asJava
   }
 
   override def createAcls(requestContext: AuthorizableRequestContext,
@@ -206,7 +209,7 @@ class AclAuthorizer extends Authorizer with Logging {
       try {
         if (!extendedAclSupport && aclBinding.pattern.patternType == PatternType.PREFIXED) {
           throw new UnsupportedVersionException(s"Adding ACLs on prefixed resource patterns requires " +
-            s"${KafkaConfig.InterBrokerProtocolVersionProp} of $KAFKA_2_0_IV1 or greater")
+            s"${KafkaConfig.InterBrokerProtocolVersionProp} of $IBP_2_0_IV1 or greater")
         }
         validateAclBinding(aclBinding)
         true
@@ -440,7 +443,6 @@ class AclAuthorizer extends Authorizer with Logging {
     false
   }
 
-
   private def authorizeAction(requestContext: AuthorizableRequestContext, action: Action): AuthorizationResult = {
     val resource = action.resourcePattern
     if (resource.patternType != PatternType.LITERAL) {
@@ -547,7 +549,7 @@ class AclAuthorizer extends Authorizer with Logging {
 
   private def loadCache(): Unit = {
     lock synchronized  {
-      ZkAclStore.stores.foreach(store => {
+      ZkAclStore.stores.foreach { store =>
         val resourceTypes = zkClient.getResourceTypes(store.patternType)
         for (rType <- resourceTypes) {
           val resourceType = Try(SecurityUtils.resourceType(rType))
@@ -562,7 +564,7 @@ class AclAuthorizer extends Authorizer with Logging {
             case Failure(_) => warn(s"Ignoring unknown ResourceType: $rType")
           }
         }
-      })
+      }
     }
   }
 
@@ -691,14 +693,14 @@ class AclAuthorizer extends Authorizer with Logging {
     val acesToAdd = newAces.diff(currentAces)
     val acesToRemove = currentAces.diff(newAces)
 
-    acesToAdd.foreach(ace => {
+    acesToAdd.foreach { ace =>
       val resourceTypeKey = ResourceTypeKey(ace, resource.resourceType(), resource.patternType())
       resourceCache.get(resourceTypeKey) match {
         case Some(resources) => resourceCache += (resourceTypeKey -> (resources + resource.name()))
         case None => resourceCache += (resourceTypeKey -> immutable.HashSet(resource.name()))
       }
-    })
-    acesToRemove.foreach(ace => {
+    }
+    acesToRemove.foreach { ace =>
       val resourceTypeKey = ResourceTypeKey(ace, resource.resourceType(), resource.patternType())
       resourceCache.get(resourceTypeKey) match {
         case Some(resources) =>
@@ -710,7 +712,7 @@ class AclAuthorizer extends Authorizer with Logging {
           }
         case None =>
       }
-    })
+    }
 
     if (versionedAcls.acls.nonEmpty) {
       aclCache = aclCache.updated(resource, versionedAcls)
diff --git a/core/src/main/scala/kafka/security/authorizer/AclEntry.scala b/core/src/main/scala/kafka/security/authorizer/AclEntry.scala
index 2014916e7e4ba..9e2d49fc883c0 100644
--- a/core/src/main/scala/kafka/security/authorizer/AclEntry.scala
+++ b/core/src/main/scala/kafka/security/authorizer/AclEntry.scala
@@ -19,7 +19,7 @@ package kafka.security.authorizer
 
 import kafka.utils.Json
 import org.apache.kafka.common.acl.{AccessControlEntry, AclOperation, AclPermissionType}
-import org.apache.kafka.common.acl.AclOperation.{READ, WRITE, CREATE, DESCRIBE, DELETE, ALTER, DESCRIBE_CONFIGS, ALTER_CONFIGS, CLUSTER_ACTION, IDEMPOTENT_WRITE}
+import org.apache.kafka.common.acl.AclOperation.{READ, WRITE, CREATE, DESCRIBE, DELETE, ALTER, DESCRIBE_CONFIGS, ALTER_CONFIGS, CLUSTER_ACTION, IDEMPOTENT_WRITE, CREATE_TOKENS, DESCRIBE_TOKENS}
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.resource.{ResourcePattern, ResourceType}
 import org.apache.kafka.common.security.auth.KafkaPrincipal
@@ -103,6 +103,7 @@ object AclEntry {
       case ResourceType.CLUSTER => Set(CREATE, CLUSTER_ACTION, DESCRIBE_CONFIGS, ALTER_CONFIGS, IDEMPOTENT_WRITE, ALTER, DESCRIBE)
       case ResourceType.TRANSACTIONAL_ID => Set(DESCRIBE, WRITE)
       case ResourceType.DELEGATION_TOKEN => Set(DESCRIBE)
+      case ResourceType.USER => Set(CREATE_TOKENS, DESCRIBE_TOKENS)
       case _ => throw new IllegalArgumentException("Not a concrete resource type")
     }
   }
diff --git a/core/src/main/scala/kafka/server/AbstractFetcherManager.scala b/core/src/main/scala/kafka/server/AbstractFetcherManager.scala
index 778053548e658..ddc45693f8728 100755
--- a/core/src/main/scala/kafka/server/AbstractFetcherManager.scala
+++ b/core/src/main/scala/kafka/server/AbstractFetcherManager.scala
@@ -62,19 +62,22 @@ abstract class AbstractFetcherManager[T <: AbstractFetcherThread](val name: Stri
 
   def resizeThreadPool(newSize: Int): Unit = {
     def migratePartitions(newSize: Int): Unit = {
+      val allRemovedPartitionsMap = mutable.Map[TopicPartition, InitialFetchState]()
       fetcherThreadMap.forKeyValue { (id, thread) =>
-        val partitionStates = removeFetcherForPartitions(thread.partitions)
+        val partitionStates = thread.removeAllPartitions()
         if (id.fetcherId >= newSize)
           thread.shutdown()
-        val fetchStates = partitionStates.map { case (topicPartition, currentFetchState) =>
-          val initialFetchState = InitialFetchState(currentFetchState.topicId, thread.sourceBroker,
-            currentLeaderEpoch = currentFetchState.currentLeaderEpoch,
-            initOffset = currentFetchState.fetchOffset)
-          topicPartition -> initialFetchState
+        partitionStates.forKeyValue { (topicPartition, currentFetchState) =>
+            val initialFetchState = InitialFetchState(currentFetchState.topicId, thread.leader.brokerEndPoint(),
+              currentLeaderEpoch = currentFetchState.currentLeaderEpoch,
+              initOffset = currentFetchState.fetchOffset)
+            allRemovedPartitionsMap += topicPartition -> initialFetchState
         }
-        addFetcherForPartitions(fetchStates)
       }
+      // failed partitions are removed when adding partitions to fetcher
+      addFetcherForPartitions(allRemovedPartitionsMap)
     }
+
     lock synchronized {
       val currentSize = numFetchersPerBroker
       info(s"Resizing fetcher thread pool size from $currentSize to $newSize")
@@ -136,7 +139,7 @@ abstract class AbstractFetcherManager[T <: AbstractFetcherThread](val name: Stri
       for ((brokerAndFetcherId, initialFetchOffsets) <- partitionsPerFetcher) {
         val brokerIdAndFetcherId = BrokerIdAndFetcherId(brokerAndFetcherId.broker.id, brokerAndFetcherId.fetcherId)
         val fetcherThread = fetcherThreadMap.get(brokerIdAndFetcherId) match {
-          case Some(currentFetcherThread) if currentFetcherThread.sourceBroker == brokerAndFetcherId.broker =>
+          case Some(currentFetcherThread) if currentFetcherThread.leader.brokerEndPoint() == brokerAndFetcherId.broker =>
             // reuse the fetcher thread
             currentFetcherThread
           case Some(f) =>
@@ -145,7 +148,7 @@ abstract class AbstractFetcherManager[T <: AbstractFetcherThread](val name: Stri
           case None =>
             addAndStartFetcherThread(brokerAndFetcherId, brokerIdAndFetcherId)
         }
-
+        // failed partitions are removed when added partitions to thread
         addPartitionsToFetcherThread(fetcherThread, initialFetchOffsets)
       }
     }
@@ -160,7 +163,7 @@ abstract class AbstractFetcherManager[T <: AbstractFetcherThread](val name: Stri
   protected def addPartitionsToFetcherThread(fetcherThread: T,
                                              initialOffsetAndEpochs: collection.Map[TopicPartition, InitialFetchState]): Unit = {
     fetcherThread.addPartitions(initialOffsetAndEpochs)
-    info(s"Added fetcher to broker ${fetcherThread.sourceBroker.id} for partitions $initialOffsetAndEpochs")
+    info(s"Added fetcher to broker ${fetcherThread.leader.brokerEndPoint().id} for partitions $initialOffsetAndEpochs")
   }
 
   /**
@@ -251,6 +254,10 @@ class FailedPartitions {
   def contains(topicPartition: TopicPartition): Boolean = synchronized {
     failedPartitionsSet.contains(topicPartition)
   }
+
+  def partitions(): Set[TopicPartition] = synchronized {
+    failedPartitionsSet.toSet
+  }
 }
 
 case class BrokerAndFetcherId(broker: BrokerEndPoint, fetcherId: Int)
diff --git a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala
index 492cec425e342..2ae3f45023bb7 100755
--- a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala
+++ b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala
@@ -17,7 +17,6 @@
 
 package kafka.server
 
-import kafka.cluster.BrokerEndPoint
 import kafka.common.ClientIdAndBroker
 import kafka.log.LogAppendInfo
 import kafka.metrics.KafkaMetricsGroup
@@ -51,7 +50,7 @@ import scala.math._
  */
 abstract class AbstractFetcherThread(name: String,
                                      clientId: String,
-                                     val sourceBroker: BrokerEndPoint,
+                                     val leader: LeaderEndPoint,
                                      failedPartitions: FailedPartitions,
                                      fetchBackOffMs: Int = 0,
                                      isInterruptible: Boolean = true,
@@ -65,7 +64,7 @@ abstract class AbstractFetcherThread(name: String,
   protected val partitionMapLock = new ReentrantLock
   private val partitionMapCond = partitionMapLock.newCondition()
 
-  private val metricId = ClientIdAndBroker(clientId, sourceBroker.host, sourceBroker.port)
+  private val metricId = ClientIdAndBroker(clientId, leader.brokerEndPoint().host, leader.brokerEndPoint().port)
   val fetcherStats = new FetcherStats(metricId)
   val fetcherLagStats = new FetcherLagStats(metricId)
 
@@ -80,8 +79,6 @@ abstract class AbstractFetcherThread(name: String,
 
   protected def truncateFullyAndStartAt(topicPartition: TopicPartition, offset: Long): Unit
 
-  protected def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]]
-
   protected def latestEpoch(topicPartition: TopicPartition): Option[Int]
 
   protected def logStartOffset(topicPartition: TopicPartition): Long
@@ -90,18 +87,8 @@ abstract class AbstractFetcherThread(name: String,
 
   protected def endOffsetForEpoch(topicPartition: TopicPartition, epoch: Int): Option[OffsetAndEpoch]
 
-  protected def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset]
-
-  protected def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData]
-
-  protected def fetchEarliestOffsetFromLeader(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long
-
-  protected def fetchLatestOffsetFromLeader(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long
-
   protected val isOffsetForLeaderEpochSupported: Boolean
 
-  protected val isTruncationOnFetchSupported: Boolean
-
   override def shutdown(): Unit = {
     initiateShutdown()
     inLock(partitionMapLock) {
@@ -121,7 +108,7 @@ abstract class AbstractFetcherThread(name: String,
 
   private def maybeFetch(): Unit = {
     val fetchRequestOpt = inLock(partitionMapLock) {
-      val ResultWithPartitions(fetchRequestOpt, partitionsWithError) = buildFetch(partitionStates.partitionStateMap.asScala)
+      val ResultWithPartitions(fetchRequestOpt, partitionsWithError) = leader.buildFetch(partitionStates.partitionStateMap.asScala)
 
       handlePartitionsWithErrors(partitionsWithError, "maybeFetch")
 
@@ -209,7 +196,7 @@ abstract class AbstractFetcherThread(name: String,
     *   occur during truncation.
     */
   private def truncateToEpochEndOffsets(latestEpochsForPartitions: Map[TopicPartition, EpochData]): Unit = {
-    val endOffsets = fetchEpochEndOffsets(latestEpochsForPartitions)
+    val endOffsets = leader.fetchEpochEndOffsets(latestEpochsForPartitions)
     //Ensure we hold a lock during truncation.
     inLock(partitionMapLock) {
       //Check no leadership and no leader epoch changes happened whilst we were unlocked, fetching epochs
@@ -319,7 +306,7 @@ abstract class AbstractFetcherThread(name: String,
 
     try {
       trace(s"Sending fetch request $fetchRequest")
-      responseData = fetchFromLeader(fetchRequest)
+      responseData = leader.fetch(fetchRequest)
     } catch {
       case t: Throwable =>
         if (isRunning) {
@@ -364,7 +351,7 @@ abstract class AbstractFetcherThread(name: String,
                         fetcherStats.byteRate.mark(validBytes)
                       }
                     }
-                    if (isTruncationOnFetchSupported) {
+                    if (leader.isTruncationOnFetchSupported) {
                       FetchResponse.divergingEpoch(partitionData).ifPresent { divergingEpoch =>
                         divergingEndOffsets += topicPartition -> new EpochEndOffset()
                           .setPartition(topicPartition.partition)
@@ -482,7 +469,7 @@ abstract class AbstractFetcherThread(name: String,
       currentState
     } else if (initialFetchState.initOffset < 0) {
       fetchOffsetAndTruncate(tp, initialFetchState.topicId, initialFetchState.currentLeaderEpoch)
-    } else if (isTruncationOnFetchSupported) {
+    } else if (leader.isTruncationOnFetchSupported) {
       // With old message format, `latestEpoch` will be empty and we use Truncating state
       // to truncate to high watermark.
       val lastFetchedEpoch = latestEpoch(tp)
@@ -537,7 +524,7 @@ abstract class AbstractFetcherThread(name: String,
         val maybeTruncationComplete = fetchOffsets.get(topicPartition) match {
           case Some(offsetTruncationState) =>
             val lastFetchedEpoch = latestEpoch(topicPartition)
-            val state = if (isTruncationOnFetchSupported || offsetTruncationState.truncationCompleted)
+            val state = if (leader.isTruncationOnFetchSupported || offsetTruncationState.truncationCompleted)
               Fetching
             else
               Truncating
@@ -557,11 +544,11 @@ abstract class AbstractFetcherThread(name: String,
    * For each topic partition, the offset to truncate to is calculated based on leader's returned
    * epoch and offset:
    *  -- If the leader replied with undefined epoch offset, we must use the high watermark. This can
-   *  happen if 1) the leader is still using message format older than KAFKA_0_11_0; 2) the follower
+   *  happen if 1) the leader is still using message format older than IBP_0_11_0; 2) the follower
    *  requested leader epoch < the first leader epoch known to the leader.
    *  -- If the leader replied with the valid offset but undefined leader epoch, we truncate to
    *  leader's offset if it is lower than follower's Log End Offset. This may happen if the
-   *  leader is on the inter-broker protocol version < KAFKA_2_0_IV0
+   *  leader is on the inter-broker protocol version < IBP_2_0_IV0
    *  -- If the leader replied with leader epoch not known to the follower, we truncate to the
    *  end offset of the largest epoch that is smaller than the epoch the leader replied with, and
    *  send OffsetsForLeaderEpochRequest with that leader epoch. In a more rare case, where the
@@ -584,7 +571,7 @@ abstract class AbstractFetcherThread(name: String,
            s"The initial fetch offset ${partitionStates.stateValue(tp).fetchOffset} will be used for truncation.")
       OffsetTruncationState(partitionStates.stateValue(tp).fetchOffset, truncationCompleted = true)
     } else if (leaderEpochOffset.leaderEpoch == UNDEFINED_EPOCH) {
-      // either leader or follower or both use inter-broker protocol version < KAFKA_2_0_IV0
+      // either leader or follower or both use inter-broker protocol version < IBP_2_0_IV0
       // (version 0 of OffsetForLeaderEpoch request/response)
       warn(s"Leader or replica is on protocol version where leader epoch is not considered in the OffsetsForLeaderEpoch response. " +
            s"The leader's offset ${leaderEpochOffset.endOffset} will be used for truncation in $tp.")
@@ -669,7 +656,7 @@ abstract class AbstractFetcherThread(name: String,
      *
      * There is a potential for a mismatch between the logs of the two replicas here. We don't fix this mismatch as of now.
      */
-    val leaderEndOffset = fetchLatestOffsetFromLeader(topicPartition, currentLeaderEpoch)
+    val leaderEndOffset = leader.fetchLatestOffset(topicPartition, currentLeaderEpoch)
     if (leaderEndOffset < replicaEndOffset) {
       warn(s"Reset fetch offset for partition $topicPartition from $replicaEndOffset to current " +
         s"leader's latest offset $leaderEndOffset")
@@ -700,7 +687,7 @@ abstract class AbstractFetcherThread(name: String,
        * Putting the two cases together, the follower should fetch from the higher one of its replica log end offset
        * and the current leader's log start offset.
        */
-      val leaderStartOffset = fetchEarliestOffsetFromLeader(topicPartition, currentLeaderEpoch)
+      val leaderStartOffset = leader.fetchEarliestOffset(topicPartition, currentLeaderEpoch)
       warn(s"Reset fetch offset for partition $topicPartition from $replicaEndOffset to current " +
         s"leader's start offset $leaderStartOffset")
       val offsetToFetch = Math.max(leaderStartOffset, replicaEndOffset)
@@ -743,6 +730,18 @@ abstract class AbstractFetcherThread(name: String,
     } finally partitionMapLock.unlock()
   }
 
+  def removeAllPartitions(): Map[TopicPartition, PartitionFetchState] = {
+    partitionMapLock.lockInterruptibly()
+    try {
+      val allPartitionState = partitionStates.partitionStateMap.asScala.toMap
+      allPartitionState.keys.foreach { tp =>
+        partitionStates.remove(tp)
+        fetcherLagStats.unregister(tp)
+      }
+      allPartitionState
+    } finally partitionMapLock.unlock()
+  }
+
   def partitionCount: Int = {
     partitionMapLock.lockInterruptibly()
     try partitionStates.size
diff --git a/core/src/main/scala/kafka/server/AclApis.scala b/core/src/main/scala/kafka/server/AclApis.scala
index 97b685bc0aae2..485cafeca2038 100644
--- a/core/src/main/scala/kafka/server/AclApis.scala
+++ b/core/src/main/scala/kafka/server/AclApis.scala
@@ -24,14 +24,16 @@ import org.apache.kafka.common.acl.AclOperation._
 import org.apache.kafka.common.acl.AclBinding
 import org.apache.kafka.common.errors._
 import org.apache.kafka.common.message.CreateAclsResponseData.AclCreationResult
+import org.apache.kafka.common.message.DeleteAclsResponseData.DeleteAclsFilterResult
 import org.apache.kafka.common.message._
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests._
 import org.apache.kafka.common.resource.Resource.CLUSTER_NAME
 import org.apache.kafka.common.resource.ResourceType
 import org.apache.kafka.server.authorizer._
-import java.util
 
+import java.util
+import java.util.concurrent.CompletableFuture
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable
 import scala.compat.java8.OptionConverters._
@@ -53,7 +55,7 @@ class AclApis(authHelper: AuthHelper,
 
   def close(): Unit = alterAclsPurgatory.shutdown()
 
-  def handleDescribeAcls(request: RequestChannel.Request): Unit = {
+  def handleDescribeAcls(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, DESCRIBE)
     val describeAclsRequest = request.body[DescribeAclsRequest]
     authorizer match {
@@ -74,9 +76,10 @@ class AclApis(authHelper: AuthHelper,
             .setResources(DescribeAclsResponse.aclsResources(returnedAcls)),
           describeAclsRequest.version))
     }
+    CompletableFuture.completedFuture[Unit](())
   }
 
-  def handleCreateAcls(request: RequestChannel.Request): Unit = {
+  def handleCreateAcls(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, ALTER)
     val createAclsRequest = request.body[CreateAclsRequest]
 
@@ -84,6 +87,7 @@ class AclApis(authHelper: AuthHelper,
       case None => requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
         createAclsRequest.getErrorResponse(requestThrottleMs,
           new SecurityDisabledException("No Authorizer is configured.")))
+        CompletableFuture.completedFuture[Unit](())
       case Some(auth) =>
         val allBindings = createAclsRequest.aclCreations.asScala.map(CreateAclsRequest.aclBinding)
         val errorResults = mutable.Map[AclBinding, AclCreateResult]()
@@ -103,6 +107,7 @@ class AclApis(authHelper: AuthHelper,
             validBindings += acl
         }
 
+        val future = new CompletableFuture[util.List[AclCreationResult]]()
         val createResults = auth.createAcls(request.context, validBindings.asJava).asScala.map(_.toCompletableFuture)
 
         def sendResponseCallback(): Unit = {
@@ -117,17 +122,20 @@ class AclApis(authHelper: AuthHelper,
             }
             creationResult
           }
+          future.complete(aclCreationResults.asJava)
+        }
+        alterAclsPurgatory.tryCompleteElseWatch(config.connectionsMaxIdleMs, createResults, sendResponseCallback)
+
+        future.thenApply[Unit] { aclCreationResults =>
           requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
             new CreateAclsResponse(new CreateAclsResponseData()
               .setThrottleTimeMs(requestThrottleMs)
-              .setResults(aclCreationResults.asJava)))
+              .setResults(aclCreationResults)))
         }
-
-        alterAclsPurgatory.tryCompleteElseWatch(config.connectionsMaxIdleMs, createResults, sendResponseCallback)
     }
   }
 
-  def handleDeleteAcls(request: RequestChannel.Request): Unit = {
+  def handleDeleteAcls(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, ALTER)
     val deleteAclsRequest = request.body[DeleteAclsRequest]
     authorizer match {
@@ -135,13 +143,20 @@ class AclApis(authHelper: AuthHelper,
         requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
           deleteAclsRequest.getErrorResponse(requestThrottleMs,
             new SecurityDisabledException("No Authorizer is configured.")))
+        CompletableFuture.completedFuture[Unit](())
       case Some(auth) =>
 
+        val future = new CompletableFuture[util.List[DeleteAclsFilterResult]]()
         val deleteResults = auth.deleteAcls(request.context, deleteAclsRequest.filters)
           .asScala.map(_.toCompletableFuture).toList
 
         def sendResponseCallback(): Unit = {
           val filterResults = deleteResults.map(_.get).map(DeleteAclsResponse.filterResult).asJava
+          future.complete(filterResults)
+        }
+
+        alterAclsPurgatory.tryCompleteElseWatch(config.connectionsMaxIdleMs, deleteResults, sendResponseCallback)
+        future.thenApply[Unit] { filterResults =>
           requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
             new DeleteAclsResponse(
               new DeleteAclsResponseData()
@@ -149,7 +164,6 @@ class AclApis(authHelper: AuthHelper,
                 .setFilterResults(filterResults),
               deleteAclsRequest.version))
         }
-        alterAclsPurgatory.tryCompleteElseWatch(config.connectionsMaxIdleMs, deleteResults, sendResponseCallback)
     }
   }
-}
+ }
diff --git a/core/src/main/scala/kafka/server/AlterIsrManager.scala b/core/src/main/scala/kafka/server/AlterIsrManager.scala
deleted file mode 100644
index 64441ad7b9b0c..0000000000000
--- a/core/src/main/scala/kafka/server/AlterIsrManager.scala
+++ /dev/null
@@ -1,292 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package kafka.server
-
-import java.util
-import java.util.concurrent.atomic.AtomicBoolean
-import java.util.concurrent.{CompletableFuture, ConcurrentHashMap, TimeUnit}
-
-import kafka.api.LeaderAndIsr
-import kafka.metrics.KafkaMetricsGroup
-import kafka.utils.{KafkaScheduler, Logging, Scheduler}
-import kafka.zk.KafkaZkClient
-import org.apache.kafka.clients.ClientResponse
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.errors.OperationNotAttemptedException
-import org.apache.kafka.common.message.AlterIsrRequestData
-import org.apache.kafka.common.metrics.Metrics
-import org.apache.kafka.common.protocol.Errors
-import org.apache.kafka.common.requests.{AlterIsrRequest, AlterIsrResponse}
-import org.apache.kafka.common.utils.Time
-
-import scala.collection.mutable
-import scala.collection.mutable.ListBuffer
-import scala.jdk.CollectionConverters._
-
-/**
- * Handles updating the ISR by sending AlterIsr requests to the controller (as of 2.7) or by updating ZK directly
- * (prior to 2.7). Updating the ISR is an asynchronous operation, so partitions will learn about the result of their
- * request through a callback.
- *
- * Note that ISR state changes can still be initiated by the controller and sent to the partitions via LeaderAndIsr
- * requests.
- */
-trait AlterIsrManager {
-  def start(): Unit = {}
-
-  def shutdown(): Unit = {}
-
-  def submit(
-    topicPartition: TopicPartition,
-    leaderAndIsr: LeaderAndIsr,
-    controllerEpoch: Int
-  ): CompletableFuture[LeaderAndIsr]
-}
-
-case class AlterIsrItem(topicPartition: TopicPartition,
-                        leaderAndIsr: LeaderAndIsr,
-                        future: CompletableFuture[LeaderAndIsr],
-                        controllerEpoch: Int) // controllerEpoch needed for Zk impl
-
-object AlterIsrManager {
-
-  /**
-   * Factory to AlterIsr based implementation, used when IBP >= 2.7-IV2
-   */
-  def apply(
-    config: KafkaConfig,
-    metadataCache: MetadataCache,
-    scheduler: KafkaScheduler,
-    time: Time,
-    metrics: Metrics,
-    threadNamePrefix: Option[String],
-    brokerEpochSupplier: () => Long,
-    brokerId: Int
-  ): AlterIsrManager = {
-    val nodeProvider = MetadataCacheControllerNodeProvider(config, metadataCache)
-
-    val channelManager = BrokerToControllerChannelManager(
-      controllerNodeProvider = nodeProvider,
-      time = time,
-      metrics = metrics,
-      config = config,
-      channelName = "alterIsr",
-      threadNamePrefix = threadNamePrefix,
-      retryTimeoutMs = Long.MaxValue
-    )
-    new DefaultAlterIsrManager(
-      controllerChannelManager = channelManager,
-      scheduler = scheduler,
-      time = time,
-      brokerId = brokerId,
-      brokerEpochSupplier = brokerEpochSupplier
-    )
-  }
-
-  /**
-   * Factory for ZK based implementation, used when IBP < 2.7-IV2
-   */
-  def apply(
-    scheduler: Scheduler,
-    time: Time,
-    zkClient: KafkaZkClient
-  ): AlterIsrManager = {
-    new ZkIsrManager(scheduler, time, zkClient)
-  }
-
-}
-
-class DefaultAlterIsrManager(
-  val controllerChannelManager: BrokerToControllerChannelManager,
-  val scheduler: Scheduler,
-  val time: Time,
-  val brokerId: Int,
-  val brokerEpochSupplier: () => Long
-) extends AlterIsrManager with Logging with KafkaMetricsGroup {
-
-  // Used to allow only one pending ISR update per partition (visible for testing)
-  private[server] val unsentIsrUpdates: util.Map[TopicPartition, AlterIsrItem] = new ConcurrentHashMap[TopicPartition, AlterIsrItem]()
-
-  // Used to allow only one in-flight request at a time
-  private val inflightRequest: AtomicBoolean = new AtomicBoolean(false)
-
-  override def start(): Unit = {
-    controllerChannelManager.start()
-  }
-
-  override def shutdown(): Unit = {
-    controllerChannelManager.shutdown()
-  }
-
-  override def submit(
-    topicPartition: TopicPartition,
-    leaderAndIsr: LeaderAndIsr,
-    controllerEpoch: Int
-  ): CompletableFuture[LeaderAndIsr] = {
-    val future = new CompletableFuture[LeaderAndIsr]()
-    val alterIsrItem = AlterIsrItem(topicPartition, leaderAndIsr, future, controllerEpoch)
-    val enqueued = unsentIsrUpdates.putIfAbsent(alterIsrItem.topicPartition, alterIsrItem) == null
-    if (enqueued) {
-      maybePropagateIsrChanges()
-    } else {
-      future.completeExceptionally(new OperationNotAttemptedException(
-        s"Failed to enqueue ISR change state $leaderAndIsr for partition $topicPartition"))
-    }
-    future
-  }
-
-  private[server] def maybePropagateIsrChanges(): Unit = {
-    // Send all pending items if there is not already a request in-flight.
-    if (!unsentIsrUpdates.isEmpty && inflightRequest.compareAndSet(false, true)) {
-      // Copy current unsent ISRs but don't remove from the map, they get cleared in the response handler
-      val inflightAlterIsrItems = new ListBuffer[AlterIsrItem]()
-      unsentIsrUpdates.values.forEach(item => inflightAlterIsrItems.append(item))
-      sendRequest(inflightAlterIsrItems.toSeq)
-    }
-  }
-
-  private[server] def clearInFlightRequest(): Unit = {
-    if (!inflightRequest.compareAndSet(true, false)) {
-      warn("Attempting to clear AlterIsr in-flight flag when no apparent request is in-flight")
-    }
-  }
-
-  private def sendRequest(inflightAlterIsrItems: Seq[AlterIsrItem]): Unit = {
-    val message = buildRequest(inflightAlterIsrItems)
-    debug(s"Sending AlterIsr to controller $message")
-
-    // We will not timeout AlterISR request, instead letting it retry indefinitely
-    // until a response is received, or a new LeaderAndIsr overwrites the existing isrState
-    // which causes the response for those partitions to be ignored.
-    controllerChannelManager.sendRequest(new AlterIsrRequest.Builder(message),
-      new ControllerRequestCompletionHandler {
-        override def onComplete(response: ClientResponse): Unit = {
-          debug(s"Received AlterIsr response $response")
-          val error = try {
-            if (response.authenticationException != null) {
-              // For now we treat authentication errors as retriable. We use the
-              // `NETWORK_EXCEPTION` error code for lack of a good alternative.
-              // Note that `BrokerToControllerChannelManager` will still log the
-              // authentication errors so that users have a chance to fix the problem.
-              Errors.NETWORK_EXCEPTION
-            } else if (response.versionMismatch != null) {
-              Errors.UNSUPPORTED_VERSION
-            } else {
-              val body = response.responseBody().asInstanceOf[AlterIsrResponse]
-              handleAlterIsrResponse(body, message.brokerEpoch, inflightAlterIsrItems)
-            }
-          } finally {
-            // clear the flag so future requests can proceed
-            clearInFlightRequest()
-          }
-
-          // check if we need to send another request right away
-          error match {
-              case Errors.NONE =>
-                // In the normal case, check for pending updates to send immediately
-                maybePropagateIsrChanges()
-              case _ =>
-                // If we received a top-level error from the controller, retry the request in the near future
-                scheduler.schedule("send-alter-isr", () => maybePropagateIsrChanges(), 50, -1, TimeUnit.MILLISECONDS)
-            }
-        }
-
-        override def onTimeout(): Unit = {
-          throw new IllegalStateException("Encountered unexpected timeout when sending AlterIsr to the controller")
-        }
-      })
-  }
-
-  private def buildRequest(inflightAlterIsrItems: Seq[AlterIsrItem]): AlterIsrRequestData = {
-    val message = new AlterIsrRequestData()
-      .setBrokerId(brokerId)
-      .setBrokerEpoch(brokerEpochSupplier.apply())
-
-    inflightAlterIsrItems.groupBy(_.topicPartition.topic).foreach { case (topic, items) =>
-      val topicData = new AlterIsrRequestData.TopicData().setName(topic)
-      message.topics.add(topicData)
-      items.foreach { item =>
-        topicData.partitions.add(new AlterIsrRequestData.PartitionData()
-          .setPartitionIndex(item.topicPartition.partition)
-          .setLeaderEpoch(item.leaderAndIsr.leaderEpoch)
-          .setNewIsr(item.leaderAndIsr.isr.map(Integer.valueOf).asJava)
-          .setCurrentIsrVersion(item.leaderAndIsr.zkVersion)
-        )
-      }
-    }
-    message
-  }
-
-  def handleAlterIsrResponse(alterIsrResponse: AlterIsrResponse,
-                             sentBrokerEpoch: Long,
-                             inflightAlterIsrItems: Seq[AlterIsrItem]): Errors = {
-    val data = alterIsrResponse.data
-
-    Errors.forCode(data.errorCode) match {
-      case Errors.STALE_BROKER_EPOCH =>
-        warn(s"Broker had a stale broker epoch ($sentBrokerEpoch), retrying.")
-
-      case Errors.CLUSTER_AUTHORIZATION_FAILED =>
-        error(s"Broker is not authorized to send AlterIsr to controller",
-          Errors.CLUSTER_AUTHORIZATION_FAILED.exception("Broker is not authorized to send AlterIsr to controller"))
-
-      case Errors.NONE =>
-        // Collect partition-level responses to pass to the callbacks
-        val partitionResponses = new mutable.HashMap[TopicPartition, Either[Errors, LeaderAndIsr]]()
-        data.topics.forEach { topic =>
-          topic.partitions.forEach { partition =>
-            val tp = new TopicPartition(topic.name, partition.partitionIndex)
-            val error = Errors.forCode(partition.errorCode)
-            debug(s"Controller successfully handled AlterIsr request for $tp: $partition")
-            if (error == Errors.NONE) {
-              val newLeaderAndIsr = new LeaderAndIsr(partition.leaderId, partition.leaderEpoch,
-                partition.isr.asScala.toList.map(_.toInt), partition.currentIsrVersion)
-              partitionResponses(tp) = Right(newLeaderAndIsr)
-            } else {
-              partitionResponses(tp) = Left(error)
-            }
-          }
-        }
-
-        // Iterate across the items we sent rather than what we received to ensure we run the callback even if a
-        // partition was somehow erroneously excluded from the response. Note that these callbacks are run from
-        // the leaderIsrUpdateLock write lock in Partition#sendAlterIsrRequest
-        inflightAlterIsrItems.foreach { inflightAlterIsr =>
-          partitionResponses.get(inflightAlterIsr.topicPartition) match {
-            case Some(leaderAndIsrOrError) =>
-              try {
-                leaderAndIsrOrError match {
-                  case Left(error) => inflightAlterIsr.future.completeExceptionally(error.exception)
-                  case Right(leaderAndIsr) => inflightAlterIsr.future.complete(leaderAndIsr)
-                }
-              } finally {
-                // Regardless of callback outcome, we need to clear from the unsent updates map to unblock further updates
-                unsentIsrUpdates.remove(inflightAlterIsr.topicPartition)
-              }
-            case None =>
-              // Don't remove this partition from the update map so it will get re-sent
-              warn(s"Partition ${inflightAlterIsr.topicPartition} was sent but not included in the response")
-          }
-        }
-
-      case e =>
-        warn(s"Controller returned an unexpected top-level error when handling AlterIsr request: $e")
-    }
-
-    Errors.forCode(data.errorCode)
-  }
-}
diff --git a/core/src/main/scala/kafka/server/AlterPartitionManager.scala b/core/src/main/scala/kafka/server/AlterPartitionManager.scala
new file mode 100644
index 0000000000000..574df470a36e6
--- /dev/null
+++ b/core/src/main/scala/kafka/server/AlterPartitionManager.scala
@@ -0,0 +1,380 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.server
+
+import java.util
+import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.{CompletableFuture, ConcurrentHashMap, TimeUnit}
+import kafka.api.LeaderAndIsr
+import kafka.metrics.KafkaMetricsGroup
+import kafka.utils.{KafkaScheduler, Logging, Scheduler}
+import kafka.zk.KafkaZkClient
+import org.apache.kafka.clients.ClientResponse
+import org.apache.kafka.common.TopicIdPartition
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.Uuid
+import org.apache.kafka.common.errors.OperationNotAttemptedException
+import org.apache.kafka.common.message.AlterPartitionRequestData
+import org.apache.kafka.common.metrics.Metrics
+import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.requests.RequestHeader
+import org.apache.kafka.common.requests.{AlterPartitionRequest, AlterPartitionResponse}
+import org.apache.kafka.common.utils.Time
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.MetadataVersion
+
+import scala.collection.mutable
+import scala.collection.mutable.ListBuffer
+import scala.compat.java8.OptionConverters._
+import scala.jdk.CollectionConverters._
+
+/**
+ * Handles updating the ISR by sending AlterPartition requests to the controller (as of 2.7) or by updating ZK directly
+ * (prior to 2.7). Updating the ISR is an asynchronous operation, so partitions will learn about the result of their
+ * request through a callback.
+ *
+ * Note that ISR state changes can still be initiated by the controller and sent to the partitions via LeaderAndIsr
+ * requests.
+ */
+trait AlterPartitionManager {
+  def start(): Unit = {}
+
+  def shutdown(): Unit = {}
+
+  def submit(
+    topicIdPartition: TopicIdPartition,
+    leaderAndIsr: LeaderAndIsr,
+    controllerEpoch: Int
+  ): CompletableFuture[LeaderAndIsr]
+}
+
+case class AlterPartitionItem(
+  topicIdPartition: TopicIdPartition,
+  leaderAndIsr: LeaderAndIsr,
+  future: CompletableFuture[LeaderAndIsr],
+  controllerEpoch: Int // controllerEpoch needed for `ZkAlterPartitionManager`
+)
+
+object AlterPartitionManager {
+
+  /**
+   * Factory to AlterPartition based implementation, used when IBP >= 2.7-IV2
+   */
+  def apply(
+    config: KafkaConfig,
+    metadataCache: MetadataCache,
+    scheduler: KafkaScheduler,
+    controllerNodeProvider: ControllerNodeProvider,
+    time: Time,
+    metrics: Metrics,
+    threadNamePrefix: Option[String],
+    brokerEpochSupplier: () => Long,
+  ): AlterPartitionManager = {
+    val channelManager = BrokerToControllerChannelManager(
+      controllerNodeProvider,
+      time = time,
+      metrics = metrics,
+      config = config,
+      channelName = "alterPartition",
+      threadNamePrefix = threadNamePrefix,
+      retryTimeoutMs = Long.MaxValue
+    )
+    new DefaultAlterPartitionManager(
+      controllerChannelManager = channelManager,
+      scheduler = scheduler,
+      time = time,
+      brokerId = config.brokerId,
+      brokerEpochSupplier = brokerEpochSupplier,
+      metadataVersionSupplier = () => metadataCache.metadataVersion()
+    )
+  }
+
+  /**
+   * Factory for ZK based implementation, used when IBP < 2.7-IV2
+   */
+  def apply(
+    scheduler: Scheduler,
+    time: Time,
+    zkClient: KafkaZkClient
+  ): AlterPartitionManager = {
+    new ZkAlterPartitionManager(scheduler, time, zkClient)
+  }
+}
+
+class DefaultAlterPartitionManager(
+  val controllerChannelManager: BrokerToControllerChannelManager,
+  val scheduler: Scheduler,
+  val time: Time,
+  val brokerId: Int,
+  val brokerEpochSupplier: () => Long,
+  val metadataVersionSupplier: () => MetadataVersion
+) extends AlterPartitionManager with Logging with KafkaMetricsGroup {
+
+  // Used to allow only one pending ISR update per partition (visible for testing).
+  // Note that we key items by TopicPartition despite using TopicIdPartition while
+  // submitting changes. We do this to ensure that topics with the same name but
+  // with a different topic id or no topic id collide here. There are two cases to
+  // consider:
+  // 1) When the cluster is upgraded from IBP < 2.8 to IBP >= 2.8, the ZK controller
+  //    assigns topic ids to the partitions. So partitions will start sending updates
+  //    with a topic id while they might still have updates without topic ids in this
+  //    Map. This would break the contract of only allowing one pending ISR update per
+  //    partition.
+  // 2) When a topic is deleted and re-created, we cannot have two entries in this Map
+  //    especially if we cannot use an AlterPartition request version which supports
+  //    topic ids in the end because the two updates with the same name would be merged
+  //    together.
+  private[server] val unsentIsrUpdates: util.Map[TopicPartition, AlterPartitionItem] = new ConcurrentHashMap[TopicPartition, AlterPartitionItem]()
+
+  // Used to allow only one in-flight request at a time
+  private val inflightRequest: AtomicBoolean = new AtomicBoolean(false)
+
+  override def start(): Unit = {
+    controllerChannelManager.start()
+  }
+
+  override def shutdown(): Unit = {
+    controllerChannelManager.shutdown()
+  }
+
+  override def submit(
+    topicIdPartition: TopicIdPartition,
+    leaderAndIsr: LeaderAndIsr,
+    controllerEpoch: Int
+  ): CompletableFuture[LeaderAndIsr] = {
+    val future = new CompletableFuture[LeaderAndIsr]()
+    val alterPartitionItem = AlterPartitionItem(topicIdPartition, leaderAndIsr, future, controllerEpoch)
+    val enqueued = unsentIsrUpdates.putIfAbsent(alterPartitionItem.topicIdPartition.topicPartition, alterPartitionItem) == null
+    if (enqueued) {
+      maybePropagateIsrChanges()
+    } else {
+      future.completeExceptionally(new OperationNotAttemptedException(
+        s"Failed to enqueue ISR change state $leaderAndIsr for partition $topicIdPartition"))
+    }
+    future
+  }
+
+  private[server] def maybePropagateIsrChanges(): Unit = {
+    // Send all pending items if there is not already a request in-flight.
+    if (!unsentIsrUpdates.isEmpty && inflightRequest.compareAndSet(false, true)) {
+      // Copy current unsent ISRs but don't remove from the map, they get cleared in the response handler
+      val inflightAlterPartitionItems = new ListBuffer[AlterPartitionItem]()
+      unsentIsrUpdates.values.forEach(item => inflightAlterPartitionItems.append(item))
+      sendRequest(inflightAlterPartitionItems.toSeq)
+    }
+  }
+
+  private[server] def clearInFlightRequest(): Unit = {
+    if (!inflightRequest.compareAndSet(true, false)) {
+      warn("Attempting to clear AlterPartition in-flight flag when no apparent request is in-flight")
+    }
+  }
+
+  private def sendRequest(inflightAlterPartitionItems: Seq[AlterPartitionItem]): Unit = {
+    val brokerEpoch = brokerEpochSupplier()
+    val (request, topicNamesByIds) = buildRequest(inflightAlterPartitionItems, brokerEpoch)
+    debug(s"Sending AlterPartition to controller $request")
+
+    // We will not timeout AlterPartition request, instead letting it retry indefinitely
+    // until a response is received, or a new LeaderAndIsr overwrites the existing isrState
+    // which causes the response for those partitions to be ignored.
+    controllerChannelManager.sendRequest(request,
+      new ControllerRequestCompletionHandler {
+        override def onComplete(response: ClientResponse): Unit = {
+          debug(s"Received AlterPartition response $response")
+          val error = try {
+            if (response.authenticationException != null) {
+              // For now we treat authentication errors as retriable. We use the
+              // `NETWORK_EXCEPTION` error code for lack of a good alternative.
+              // Note that `BrokerToControllerChannelManager` will still log the
+              // authentication errors so that users have a chance to fix the problem.
+              Errors.NETWORK_EXCEPTION
+            } else if (response.versionMismatch != null) {
+              Errors.UNSUPPORTED_VERSION
+            } else {
+              handleAlterPartitionResponse(
+                response.requestHeader,
+                response.responseBody.asInstanceOf[AlterPartitionResponse],
+                brokerEpoch,
+                inflightAlterPartitionItems,
+                topicNamesByIds
+              )
+            }
+          } finally {
+            // clear the flag so future requests can proceed
+            clearInFlightRequest()
+          }
+
+          // check if we need to send another request right away
+          error match {
+              case Errors.NONE =>
+                // In the normal case, check for pending updates to send immediately
+                maybePropagateIsrChanges()
+              case _ =>
+                // If we received a top-level error from the controller, retry the request in the near future
+                scheduler.schedule("send-alter-partition", () => maybePropagateIsrChanges(), 50, -1, TimeUnit.MILLISECONDS)
+            }
+        }
+
+        override def onTimeout(): Unit = {
+          throw new IllegalStateException("Encountered unexpected timeout when sending AlterPartition to the controller")
+        }
+      })
+  }
+
+  /**
+   * Builds an AlterPartition request.
+   *
+   * While building the request, we don't know which version of the AlterPartition API is
+   * supported by the controller. The final decision is taken when the AlterPartitionRequest
+   * is built in the network client based on the advertised api versions of the controller.
+   *
+   * We could use version 2 or above if all the pending changes have an topic id defined;
+   * otherwise we must use version 1 or below.
+   *
+   * @return A tuple containing the AlterPartitionRequest.Builder and a mapping from
+   *         topic id to topic name. This mapping is used in the response handling.
+   */
+  private def buildRequest(
+    inflightAlterPartitionItems: Seq[AlterPartitionItem],
+    brokerEpoch: Long
+  ): (AlterPartitionRequest.Builder, mutable.Map[Uuid, String]) = {
+    val metadataVersion = metadataVersionSupplier()
+    // We build this mapping in order to map topic id back to their name when we
+    // receive the response. We cannot rely on the metadata cache for this because
+    // the metadata cache is updated after the partition state so it might not know
+    // yet about a topic id already used here.
+    val topicNamesByIds = mutable.HashMap[Uuid, String]()
+    // We can use topic ids only if all the pending changed have one defined and
+    // we use IBP 2.8 or above.
+    var canUseTopicIds = metadataVersion.isTopicIdsSupported
+
+    val message = new AlterPartitionRequestData()
+      .setBrokerId(brokerId)
+      .setBrokerEpoch(brokerEpoch)
+
+    inflightAlterPartitionItems.groupBy(_.topicIdPartition.topic).foreach { case (topicName, items) =>
+      val topicId = items.head.topicIdPartition.topicId
+      canUseTopicIds &= topicId != Uuid.ZERO_UUID
+      topicNamesByIds(topicId) = topicName
+
+      // Both the topic name and the topic id are set here because at this stage
+      // we don't know which version of the request will be used.
+      val topicData = new AlterPartitionRequestData.TopicData()
+        .setTopicName(topicName)
+        .setTopicId(topicId)
+      message.topics.add(topicData)
+
+      items.foreach { item =>
+        val partitionData = new AlterPartitionRequestData.PartitionData()
+          .setPartitionIndex(item.topicIdPartition.partition)
+          .setLeaderEpoch(item.leaderAndIsr.leaderEpoch)
+          .setNewIsr(item.leaderAndIsr.isr.map(Integer.valueOf).asJava)
+          .setPartitionEpoch(item.leaderAndIsr.partitionEpoch)
+
+        if (metadataVersion.isLeaderRecoverySupported) {
+          partitionData.setLeaderRecoveryState(item.leaderAndIsr.leaderRecoveryState.value)
+        }
+
+        topicData.partitions.add(partitionData)
+      }
+    }
+
+    // If we cannot use topic ids, the builder will ensure that no version higher than 1 is used.
+    (new AlterPartitionRequest.Builder(message, canUseTopicIds), topicNamesByIds)
+  }
+
+  def handleAlterPartitionResponse(
+    requestHeader: RequestHeader,
+    alterPartitionResp: AlterPartitionResponse,
+    sentBrokerEpoch: Long,
+    inflightAlterPartitionItems: Seq[AlterPartitionItem],
+    topicNamesByIds: mutable.Map[Uuid, String]
+  ): Errors = {
+    val data = alterPartitionResp.data
+
+    Errors.forCode(data.errorCode) match {
+      case Errors.STALE_BROKER_EPOCH =>
+        warn(s"Broker had a stale broker epoch ($sentBrokerEpoch), retrying.")
+
+      case Errors.CLUSTER_AUTHORIZATION_FAILED =>
+        error(s"Broker is not authorized to send AlterPartition to controller",
+          Errors.CLUSTER_AUTHORIZATION_FAILED.exception("Broker is not authorized to send AlterPartition to controller"))
+
+      case Errors.NONE =>
+        // Collect partition-level responses to pass to the callbacks
+        val partitionResponses = new mutable.HashMap[TopicPartition, Either[Errors, LeaderAndIsr]]()
+        data.topics.forEach { topic =>
+          // Topic IDs are used since version 2 of the AlterPartition API.
+          val topicName = if (requestHeader.apiVersion > 1) topicNamesByIds.get(topic.topicId).orNull else topic.topicName
+          if (topicName == null || topicName.isEmpty) {
+            error(s"Received an unexpected topic $topic in the alter partition response, ignoring it.")
+          } else {
+            topic.partitions.forEach { partition =>
+              val tp = new TopicPartition(topicName, partition.partitionIndex)
+              val apiError = Errors.forCode(partition.errorCode)
+              debug(s"Controller successfully handled AlterPartition request for $tp: $partition")
+              if (apiError == Errors.NONE) {
+                LeaderRecoveryState.optionalOf(partition.leaderRecoveryState).asScala match {
+                  case Some(leaderRecoveryState) =>
+                    partitionResponses(tp) = Right(
+                      LeaderAndIsr(
+                        partition.leaderId,
+                        partition.leaderEpoch,
+                        partition.isr.asScala.toList.map(_.toInt),
+                        leaderRecoveryState,
+                        partition.partitionEpoch
+                      )
+                    )
+
+                  case None =>
+                    error(s"Controller returned an invalid leader recovery state (${partition.leaderRecoveryState}) for $tp: $partition")
+                    partitionResponses(tp) = Left(Errors.UNKNOWN_SERVER_ERROR)
+                }
+              } else {
+                partitionResponses(tp) = Left(apiError)
+              }
+            }
+          }
+        }
+
+        // Iterate across the items we sent rather than what we received to ensure we run the callback even if a
+        // partition was somehow erroneously excluded from the response. Note that these callbacks are run from
+        // the leaderIsrUpdateLock write lock in Partition#sendAlterPartitionRequest
+        inflightAlterPartitionItems.foreach { inflightAlterPartition =>
+          partitionResponses.get(inflightAlterPartition.topicIdPartition.topicPartition) match {
+            case Some(leaderAndIsrOrError) =>
+              // Regardless of callback outcome, we need to clear from the unsent updates map to unblock further
+              // updates. We clear it now to allow the callback to submit a new update if needed.
+              unsentIsrUpdates.remove(inflightAlterPartition.topicIdPartition.topicPartition)
+              leaderAndIsrOrError match {
+                case Left(error) => inflightAlterPartition.future.completeExceptionally(error.exception)
+                case Right(leaderAndIsr) => inflightAlterPartition.future.complete(leaderAndIsr)
+              }
+            case None =>
+              // Don't remove this partition from the update map so it will get re-sent
+              warn(s"Partition ${inflightAlterPartition.topicIdPartition} was sent but not included in the response")
+          }
+        }
+
+      case e =>
+        warn(s"Controller returned an unexpected top-level error when handling AlterPartition request: $e")
+    }
+
+    Errors.forCode(data.errorCode)
+  }
+}
diff --git a/core/src/main/scala/kafka/server/ApiVersionManager.scala b/core/src/main/scala/kafka/server/ApiVersionManager.scala
index e3d62c6acc1af..6d329673a8891 100644
--- a/core/src/main/scala/kafka/server/ApiVersionManager.scala
+++ b/core/src/main/scala/kafka/server/ApiVersionManager.scala
@@ -16,7 +16,6 @@
  */
 package kafka.server
 
-import kafka.api.ApiVersion
 import kafka.network
 import kafka.network.RequestChannel
 import org.apache.kafka.common.message.ApiMessageType.ListenerType
@@ -38,15 +37,14 @@ object ApiVersionManager {
     listenerType: ListenerType,
     config: KafkaConfig,
     forwardingManager: Option[ForwardingManager],
-    features: BrokerFeatures,
-    featureCache: FinalizedFeatureCache
+    supportedFeatures: BrokerFeatures,
+    metadataCache: MetadataCache
   ): ApiVersionManager = {
     new DefaultApiVersionManager(
       listenerType,
-      config.interBrokerProtocolVersion,
       forwardingManager,
-      features,
-      featureCache
+      supportedFeatures,
+      metadataCache
     )
   }
 }
@@ -69,33 +67,24 @@ class SimpleApiVersionManager(
 
 class DefaultApiVersionManager(
   val listenerType: ListenerType,
-  interBrokerProtocolVersion: ApiVersion,
   forwardingManager: Option[ForwardingManager],
   features: BrokerFeatures,
-  featureCache: FinalizedFeatureCache
+  metadataCache: MetadataCache
 ) extends ApiVersionManager {
 
   override def apiVersionResponse(throttleTimeMs: Int): ApiVersionsResponse = {
     val supportedFeatures = features.supportedFeatures
-    val finalizedFeaturesOpt = featureCache.get
+    val finalizedFeatures = metadataCache.features()
     val controllerApiVersions = forwardingManager.flatMap(_.controllerApiVersions)
 
-    finalizedFeaturesOpt match {
-      case Some(finalizedFeatures) => ApiVersion.apiVersionsResponse(
+    ApiVersionsResponse.createApiVersionsResponse(
         throttleTimeMs,
-        interBrokerProtocolVersion.recordVersion,
+        metadataCache.metadataVersion().highestSupportedRecordVersion,
         supportedFeatures,
-        finalizedFeatures.features,
+        finalizedFeatures.features.map(kv => (kv._1, kv._2.asInstanceOf[java.lang.Short])).asJava,
         finalizedFeatures.epoch,
-        controllerApiVersions,
+        controllerApiVersions.orNull,
         listenerType)
-      case None => ApiVersion.apiVersionsResponse(
-        throttleTimeMs,
-        interBrokerProtocolVersion.recordVersion,
-        supportedFeatures,
-        controllerApiVersions,
-        listenerType)
-    }
   }
 
   override def enabledApis: collection.Set[ApiKeys] = {
diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherBlockingSend.scala b/core/src/main/scala/kafka/server/BrokerBlockingSender.scala
similarity index 87%
rename from core/src/main/scala/kafka/server/ReplicaFetcherBlockingSend.scala
rename to core/src/main/scala/kafka/server/BrokerBlockingSender.scala
index fd69b5a8a215f..7d9fb0512a5b7 100644
--- a/core/src/main/scala/kafka/server/ReplicaFetcherBlockingSend.scala
+++ b/core/src/main/scala/kafka/server/BrokerBlockingSender.scala
@@ -33,6 +33,8 @@ import scala.jdk.CollectionConverters._
 
 trait BlockingSend {
 
+  def brokerEndPoint(): BrokerEndPoint
+
   def sendRequest(requestBuilder: AbstractRequest.Builder[_ <: AbstractRequest]): ClientResponse
 
   def initiateClose(): Unit
@@ -40,13 +42,13 @@ trait BlockingSend {
   def close(): Unit
 }
 
-class ReplicaFetcherBlockingSend(sourceBroker: BrokerEndPoint,
-                                 brokerConfig: KafkaConfig,
-                                 metrics: Metrics,
-                                 time: Time,
-                                 fetcherId: Int,
-                                 clientId: String,
-                                 logContext: LogContext) extends BlockingSend {
+class BrokerBlockingSender(sourceBroker: BrokerEndPoint,
+                           brokerConfig: KafkaConfig,
+                           metrics: Metrics,
+                           time: Time,
+                           fetcherId: Int,
+                           clientId: String,
+                           logContext: LogContext) extends BlockingSend {
 
   private val sourceNode = new Node(sourceBroker.id, sourceBroker.host, sourceBroker.port)
   private val socketTimeout: Int = brokerConfig.replicaSocketTimeoutMs
@@ -99,6 +101,8 @@ class ReplicaFetcherBlockingSend(sourceBroker: BrokerEndPoint,
     (networkClient, reconfigurableChannelBuilder)
   }
 
+  override def brokerEndPoint(): BrokerEndPoint = sourceBroker
+
   override def sendRequest(requestBuilder: Builder[_ <: AbstractRequest]): ClientResponse = {
     try {
       if (!NetworkClientUtils.awaitReady(networkClient, sourceNode, time, socketTimeout))
@@ -124,4 +128,8 @@ class ReplicaFetcherBlockingSend(sourceBroker: BrokerEndPoint,
   def close(): Unit = {
     networkClient.close()
   }
+
+  override def toString: String = {
+    s"BrokerBlockingSender(sourceBroker=$sourceBroker, fetcherId=$fetcherId)"
+  }
 }
diff --git a/core/src/main/scala/kafka/server/BrokerFeatures.scala b/core/src/main/scala/kafka/server/BrokerFeatures.scala
index dd84f9e73e70f..70ef7c71cbba3 100644
--- a/core/src/main/scala/kafka/server/BrokerFeatures.scala
+++ b/core/src/main/scala/kafka/server/BrokerFeatures.scala
@@ -18,9 +18,10 @@
 package kafka.server
 
 import kafka.utils.Logging
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
-import org.apache.kafka.common.feature.Features._
+import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
+import org.apache.kafka.server.common.MetadataVersion
 
+import java.util
 import scala.jdk.CollectionConverters._
 
 /**
@@ -32,19 +33,19 @@ import scala.jdk.CollectionConverters._
 class BrokerFeatures private (@volatile var supportedFeatures: Features[SupportedVersionRange]) {
   // For testing only.
   def setSupportedFeatures(newFeatures: Features[SupportedVersionRange]): Unit = {
-    supportedFeatures = newFeatures
+    val combined = new util.HashMap[String, SupportedVersionRange](supportedFeatures.features())
+    combined.putAll(newFeatures.features())
+    supportedFeatures = Features.supportedFeatures(combined)
   }
 
   /**
-   * Returns the default finalized features that a new Kafka cluster with IBP config >= KAFKA_2_7_IV0
+   * Returns the default finalized features that a new Kafka cluster with IBP config >= IBP_2_7_IV0
    * needs to be bootstrapped with.
    */
-  def defaultFinalizedFeatures: Features[FinalizedVersionRange] = {
-    Features.finalizedFeatures(
-      supportedFeatures.features.asScala.map {
-        case(name, versionRange) => (
-          name, new FinalizedVersionRange(versionRange.min, versionRange.max))
-      }.asJava)
+  def defaultFinalizedFeatures: Map[String, Short] = {
+    supportedFeatures.features.asScala.map {
+      case(name, versionRange) => (name, versionRange.max)
+    }.toMap
   }
 
   /**
@@ -62,7 +63,7 @@ class BrokerFeatures private (@volatile var supportedFeatures: Features[Supporte
    * @return            The subset of input features which are incompatible. If the returned object
    *                    is empty, it means there were no feature incompatibilities found.
    */
-  def incompatibleFeatures(finalized: Features[FinalizedVersionRange]): Features[FinalizedVersionRange] = {
+  def incompatibleFeatures(finalized: Map[String, Short]): Map[String, Short] = {
     BrokerFeatures.incompatibleFeatures(supportedFeatures, finalized, logIncompatibilities = true)
   }
 }
@@ -70,9 +71,13 @@ class BrokerFeatures private (@volatile var supportedFeatures: Features[Supporte
 object BrokerFeatures extends Logging {
 
   def createDefault(): BrokerFeatures = {
-    // The arguments are currently empty, but, in the future as we define features we should
-    // populate the required values here.
-    new BrokerFeatures(emptySupportedFeatures)
+    new BrokerFeatures(Features.supportedFeatures(
+      java.util.Collections.singletonMap(MetadataVersion.FEATURE_NAME,
+        new SupportedVersionRange(MetadataVersion.MINIMUM_KRAFT_VERSION.featureLevel(), MetadataVersion.latest().featureLevel()))))
+  }
+
+  def createEmpty(): BrokerFeatures = {
+    new BrokerFeatures(Features.emptySupportedFeatures())
   }
 
   /**
@@ -86,19 +91,19 @@ object BrokerFeatures extends Logging {
    *                            - False otherwise.
    */
   def hasIncompatibleFeatures(supportedFeatures: Features[SupportedVersionRange],
-                              finalizedFeatures: Features[FinalizedVersionRange]): Boolean = {
-    !incompatibleFeatures(supportedFeatures, finalizedFeatures, logIncompatibilities = false).empty
+                              finalizedFeatures: Map[String, Short]): Boolean = {
+    incompatibleFeatures(supportedFeatures, finalizedFeatures, logIncompatibilities = false).nonEmpty
   }
 
   private def incompatibleFeatures(supportedFeatures: Features[SupportedVersionRange],
-                                   finalizedFeatures: Features[FinalizedVersionRange],
-                                   logIncompatibilities: Boolean): Features[FinalizedVersionRange] = {
-    val incompatibleFeaturesInfo = finalizedFeatures.features.asScala.map {
+                                   finalizedFeatures: Map[String, Short],
+                                   logIncompatibilities: Boolean): Map[String, Short] = {
+    val incompatibleFeaturesInfo = finalizedFeatures.map {
       case (feature, versionLevels) =>
         val supportedVersions = supportedFeatures.get(feature)
         if (supportedVersions == null) {
           (feature, versionLevels, "{feature=%s, reason='Unsupported feature'}".format(feature))
-        } else if (versionLevels.isIncompatibleWith(supportedVersions)) {
+        } else if (supportedVersions.isIncompatibleWith(versionLevels)) {
           (feature, versionLevels, "{feature=%s, reason='%s is incompatible with %s'}".format(
             feature, versionLevels, supportedVersions))
         } else {
@@ -110,7 +115,6 @@ object BrokerFeatures extends Logging {
       warn("Feature incompatibilities seen: " +
            incompatibleFeaturesInfo.map { case(_, _, errorReason) => errorReason }.mkString(", "))
     }
-    Features.finalizedFeatures(
-      incompatibleFeaturesInfo.map { case(feature, versionLevels, _) => (feature, versionLevels) }.toMap.asJava)
+    incompatibleFeaturesInfo.map { case(feature, versionLevels, _) => (feature, versionLevels) }.toMap
   }
 }
diff --git a/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala b/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala
index 394c353e45c1a..39dff71ad11ee 100644
--- a/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala
+++ b/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala
@@ -97,6 +97,11 @@ class BrokerLifecycleManager(val config: KafkaConfig,
    */
   val initialCatchUpFuture = new CompletableFuture[Void]()
 
+  /**
+   * A future which is completed when the broker is unfenced for the first time.
+   */
+  val initialUnfenceFuture = new CompletableFuture[Void]()
+
   /**
    * A future which is completed when controlled shutdown is done.
    */
@@ -189,8 +194,9 @@ class BrokerLifecycleManager(val config: KafkaConfig,
       channelManager, clusterId, advertisedListeners, supportedFeatures))
   }
 
-  def setReadyToUnfence(): Unit = {
+  def setReadyToUnfence(): CompletableFuture[Void] = {
     eventQueue.append(new SetReadyToUnfenceEvent())
+    initialUnfenceFuture
   }
 
   def brokerEpoch: Long = _brokerEpoch
@@ -264,7 +270,7 @@ class BrokerLifecycleManager(val config: KafkaConfig,
         new DeadlineFunction(time.nanoseconds() + initialTimeoutNs),
         new RegistrationTimeoutEvent())
       sendBrokerRegistration()
-      info(s"Incarnation ${incarnationId} of broker ${nodeId} in cluster ${clusterId} " +
+      info(s"Incarnation $incarnationId of broker $nodeId in cluster $clusterId " +
         "is now STARTING.")
     }
   }
@@ -285,7 +291,7 @@ class BrokerLifecycleManager(val config: KafkaConfig,
         setListeners(_advertisedListeners).
         setRack(rack.orNull)
     if (isDebugEnabled) {
-      debug(s"Sending broker registration ${data}")
+      debug(s"Sending broker registration $data")
     }
     _channelManager.sendRequest(new BrokerRegistrationRequest.Builder(data),
       new BrokerRegistrationResponseHandler())
@@ -294,18 +300,18 @@ class BrokerLifecycleManager(val config: KafkaConfig,
   private class BrokerRegistrationResponseHandler extends ControllerRequestCompletionHandler {
     override def onComplete(response: ClientResponse): Unit = {
       if (response.authenticationException() != null) {
-        error(s"Unable to register broker ${nodeId} because of an authentication exception.",
-          response.authenticationException());
+        error(s"Unable to register broker $nodeId because of an authentication exception.",
+          response.authenticationException())
         scheduleNextCommunicationAfterFailure()
       } else if (response.versionMismatch() != null) {
-        error(s"Unable to register broker ${nodeId} because of an API version problem.",
-          response.versionMismatch());
+        error(s"Unable to register broker $nodeId because of an API version problem.",
+          response.versionMismatch())
         scheduleNextCommunicationAfterFailure()
       } else if (response.responseBody() == null) {
-        warn(s"Unable to register broker ${nodeId}.")
+        warn(s"Unable to register broker $nodeId.")
         scheduleNextCommunicationAfterFailure()
       } else if (!response.responseBody().isInstanceOf[BrokerRegistrationResponse]) {
-        error(s"Unable to register broker ${nodeId} because the controller returned an " +
+        error(s"Unable to register broker $nodeId because the controller returned an " +
           "invalid response type.")
         scheduleNextCommunicationAfterFailure()
       } else {
@@ -316,11 +322,11 @@ class BrokerLifecycleManager(val config: KafkaConfig,
           _brokerEpoch = message.data().brokerEpoch()
           registered = true
           initialRegistrationSucceeded = true
-          info(s"Successfully registered broker ${nodeId} with broker epoch ${_brokerEpoch}")
+          info(s"Successfully registered broker $nodeId with broker epoch ${_brokerEpoch}")
           scheduleNextCommunicationImmediately() // Immediately send a heartbeat
         } else {
-          info(s"Unable to register broker ${nodeId} because the controller returned " +
-            s"error ${errorCode}")
+          info(s"Unable to register broker $nodeId because the controller returned " +
+            s"error $errorCode")
           scheduleNextCommunicationAfterFailure()
         }
       }
@@ -341,7 +347,7 @@ class BrokerLifecycleManager(val config: KafkaConfig,
       setWantFence(!readyToUnfence).
       setWantShutDown(_state == BrokerState.PENDING_CONTROLLED_SHUTDOWN)
     if (isTraceEnabled) {
-      trace(s"Sending broker heartbeat ${data}")
+      trace(s"Sending broker heartbeat $data")
     }
     _channelManager.sendRequest(new BrokerHeartbeatRequest.Builder(data),
       new BrokerHeartbeatResponseHandler())
@@ -350,18 +356,18 @@ class BrokerLifecycleManager(val config: KafkaConfig,
   private class BrokerHeartbeatResponseHandler extends ControllerRequestCompletionHandler {
     override def onComplete(response: ClientResponse): Unit = {
       if (response.authenticationException() != null) {
-        error(s"Unable to send broker heartbeat for ${nodeId} because of an " +
-          "authentication exception.", response.authenticationException());
+        error(s"Unable to send broker heartbeat for $nodeId because of an " +
+          "authentication exception.", response.authenticationException())
         scheduleNextCommunicationAfterFailure()
       } else if (response.versionMismatch() != null) {
-        error(s"Unable to send broker heartbeat for ${nodeId} because of an API " +
-          "version problem.", response.versionMismatch());
+        error(s"Unable to send broker heartbeat for $nodeId because of an API " +
+          "version problem.", response.versionMismatch())
         scheduleNextCommunicationAfterFailure()
       } else if (response.responseBody() == null) {
-        warn(s"Unable to send broker heartbeat for ${nodeId}. Retrying.")
+        warn(s"Unable to send broker heartbeat for $nodeId. Retrying.")
         scheduleNextCommunicationAfterFailure()
       } else if (!response.responseBody().isInstanceOf[BrokerHeartbeatResponse]) {
-        error(s"Unable to send broker heartbeat for ${nodeId} because the controller " +
+        error(s"Unable to send broker heartbeat for $nodeId because the controller " +
           "returned an invalid response type.")
         scheduleNextCommunicationAfterFailure()
       } else {
@@ -371,7 +377,7 @@ class BrokerLifecycleManager(val config: KafkaConfig,
           failedAttempts = 0
           _state match {
             case BrokerState.STARTING =>
-              if (message.data().isCaughtUp()) {
+              if (message.data().isCaughtUp) {
                 info(s"The broker has caught up. Transitioning from STARTING to RECOVERY.")
                 _state = BrokerState.RECOVERY
                 initialCatchUpFuture.complete(null)
@@ -382,8 +388,9 @@ class BrokerLifecycleManager(val config: KafkaConfig,
               // there is no recovery work to be done, we start up a bit quicker.
               scheduleNextCommunication(NANOSECONDS.convert(10, MILLISECONDS))
             case BrokerState.RECOVERY =>
-              if (!message.data().isFenced()) {
+              if (!message.data().isFenced) {
                 info(s"The broker has been unfenced. Transitioning from RECOVERY to RUNNING.")
+                initialUnfenceFuture.complete(null)
                 _state = BrokerState.RUNNING
               } else {
                 info(s"The broker is in RECOVERY.")
@@ -417,7 +424,7 @@ class BrokerLifecycleManager(val config: KafkaConfig,
               scheduleNextCommunicationAfterSuccess()
           }
         } else {
-          warn(s"Broker ${nodeId} sent a heartbeat request but received error ${errorCode}.")
+          warn(s"Broker $nodeId sent a heartbeat request but received error $errorCode.")
           scheduleNextCommunicationAfterFailure()
         }
       }
@@ -476,6 +483,7 @@ class BrokerLifecycleManager(val config: KafkaConfig,
       _state = BrokerState.SHUTTING_DOWN
       controlledShutdownFuture.complete(null)
       initialCatchUpFuture.cancel(false)
+      initialUnfenceFuture.cancel(false)
       if (_channelManager != null) {
         _channelManager.shutdown()
         _channelManager = null
diff --git a/core/src/main/scala/kafka/server/BrokerMetadataCheckpoint.scala b/core/src/main/scala/kafka/server/BrokerMetadataCheckpoint.scala
index 0a9bfbda535f3..67d27cb052931 100755
--- a/core/src/main/scala/kafka/server/BrokerMetadataCheckpoint.scala
+++ b/core/src/main/scala/kafka/server/BrokerMetadataCheckpoint.scala
@@ -21,9 +21,10 @@ import java.io._
 import java.nio.file.{Files, NoSuchFileException}
 import java.util.Properties
 
-import kafka.common.{InconsistentBrokerMetadataException, KafkaException}
+import kafka.common.InconsistentBrokerMetadataException
 import kafka.server.RawMetaProperties._
 import kafka.utils._
+import org.apache.kafka.common.KafkaException
 import org.apache.kafka.common.utils.Utils
 
 import scala.collection.mutable
diff --git a/core/src/main/scala/kafka/server/BrokerServer.scala b/core/src/main/scala/kafka/server/BrokerServer.scala
index 602ee95268b76..1008decadb11e 100644
--- a/core/src/main/scala/kafka/server/BrokerServer.scala
+++ b/core/src/main/scala/kafka/server/BrokerServer.scala
@@ -22,17 +22,19 @@ import java.util
 import java.util.concurrent.atomic.AtomicBoolean
 import java.util.concurrent.locks.ReentrantLock
 import java.util.concurrent.{CompletableFuture, ExecutionException, TimeUnit, TimeoutException}
+
 import kafka.cluster.Broker.ServerInfo
 import kafka.coordinator.group.GroupCoordinator
 import kafka.coordinator.transaction.{ProducerIdManager, TransactionCoordinator}
 import kafka.log.LogManager
-import kafka.metrics.KafkaYammerMetrics
 import kafka.network.{DataPlaneAcceptor, SocketServer}
 import kafka.raft.RaftManager
 import kafka.security.CredentialProvider
 import kafka.server.KafkaRaftServer.ControllerRole
+import kafka.server.metadata.BrokerServerMetrics
 import kafka.server.metadata.{BrokerMetadataListener, BrokerMetadataPublisher, BrokerMetadataSnapshotter, ClientQuotaMetadataManager, KRaftMetadataCache, SnapshotWriterBuilder}
 import kafka.utils.{CoreUtils, KafkaScheduler}
+import org.apache.kafka.common.feature.SupportedVersionRange
 import org.apache.kafka.common.message.ApiMessageType.ListenerType
 import org.apache.kafka.common.message.BrokerRegistrationRequestData.{Listener, ListenerCollection}
 import org.apache.kafka.common.metrics.Metrics
@@ -42,11 +44,14 @@ import org.apache.kafka.common.security.scram.internals.ScramMechanism
 import org.apache.kafka.common.security.token.delegation.internals.DelegationTokenCache
 import org.apache.kafka.common.utils.{AppInfoParser, LogContext, Time, Utils}
 import org.apache.kafka.common.{ClusterResource, Endpoint}
+import org.apache.kafka.metadata.authorizer.ClusterMetadataAuthorizer
 import org.apache.kafka.metadata.{BrokerState, VersionRange}
 import org.apache.kafka.raft.RaftConfig.AddressSpec
 import org.apache.kafka.raft.{RaftClient, RaftConfig}
 import org.apache.kafka.server.authorizer.Authorizer
 import org.apache.kafka.server.common.ApiMessageAndVersion
+import org.apache.kafka.server.fault.FaultHandler
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.kafka.snapshot.SnapshotWriter
 
 import scala.collection.{Map, Seq}
@@ -58,13 +63,8 @@ class BrokerSnapshotWriterBuilder(raftClient: RaftClient[ApiMessageAndVersion])
     extends SnapshotWriterBuilder {
   override def build(committedOffset: Long,
                      committedEpoch: Int,
-                     lastContainedLogTime: Long): SnapshotWriter[ApiMessageAndVersion] = {
-    raftClient.createSnapshot(committedOffset, committedEpoch, lastContainedLogTime).
-        asScala.getOrElse(
-      throw new RuntimeException("A snapshot already exists with " +
-        s"committedOffset=${committedOffset}, committedEpoch=${committedEpoch}, " +
-        s"lastContainedLogTime=${lastContainedLogTime}")
-    )
+                     lastContainedLogTime: Long): Option[SnapshotWriter[ApiMessageAndVersion]] = {
+    raftClient.createSnapshot(committedOffset, committedEpoch, lastContainedLogTime).asScala
   }
 }
 
@@ -77,13 +77,17 @@ class BrokerServer(
   val raftManager: RaftManager[ApiMessageAndVersion],
   val time: Time,
   val metrics: Metrics,
+  val brokerMetrics: BrokerServerMetrics,
   val threadNamePrefix: Option[String],
   val initialOfflineDirs: Seq[String],
   val controllerQuorumVotersFuture: CompletableFuture[util.Map[Integer, AddressSpec]],
-  val supportedFeatures: util.Map[String, VersionRange]
+  val fatalFaultHandler: FaultHandler,
+  val metadataLoadingFaultHandler: FaultHandler,
+  val metadataPublishingFaultHandler: FaultHandler
 ) extends KafkaBroker {
 
-  override def brokerState: BrokerState = lifecycleManager.state
+  override def brokerState: BrokerState = Option(lifecycleManager).
+    flatMap(m => Some(m.state)).getOrElse(BrokerState.NOT_RUNNING)
 
   import kafka.server.Server._
 
@@ -91,7 +95,7 @@ class BrokerServer(
 
   this.logIdent = logContext.logPrefix
 
-  @volatile private var lifecycleManager: BrokerLifecycleManager = null
+  @volatile var lifecycleManager: BrokerLifecycleManager = null
 
   private val isShuttingDown = new AtomicBoolean(false)
 
@@ -126,7 +130,7 @@ class BrokerServer(
 
   var forwardingManager: ForwardingManager = null
 
-  var alterIsrManager: AlterIsrManager = null
+  var alterPartitionManager: AlterPartitionManager = null
 
   var autoTopicCreationManager: AutoTopicCreationManager = null
 
@@ -140,10 +144,6 @@ class BrokerServer(
 
   @volatile var brokerTopicStats: BrokerTopicStats = null
 
-  val brokerFeatures: BrokerFeatures = BrokerFeatures.createDefault()
-
-  val featureCache: FinalizedFeatureCache = new FinalizedFeatureCache(brokerFeatures)
-
   val clusterId: String = metaProps.clusterId
 
   var metadataSnapshotter: Option[BrokerMetadataSnapshotter] = None
@@ -152,7 +152,9 @@ class BrokerServer(
 
   var metadataPublisher: BrokerMetadataPublisher = null
 
-  def kafkaYammerMetrics: kafka.metrics.KafkaYammerMetrics = KafkaYammerMetrics.INSTANCE
+  val brokerFeatures: BrokerFeatures = BrokerFeatures.createDefault()
+
+  def kafkaYammerMetrics: KafkaYammerMetrics = KafkaYammerMetrics.INSTANCE
 
   private def maybeChangeStatus(from: ProcessStatus, to: ProcessStatus): Boolean = {
     lock.lock()
@@ -182,7 +184,9 @@ class BrokerServer(
 
       config.dynamicConfig.initialize(zkClientOpt = None)
 
-      lifecycleManager = new BrokerLifecycleManager(config, time, threadNamePrefix)
+      lifecycleManager = new BrokerLifecycleManager(config,
+        time,
+        threadNamePrefix)
 
       /* start scheduler */
       kafkaScheduler = new KafkaScheduler(config.backgroundThreads)
@@ -222,39 +226,33 @@ class BrokerServer(
       clientToControllerChannelManager.start()
       forwardingManager = new ForwardingManagerImpl(clientToControllerChannelManager)
 
+
       val apiVersionManager = ApiVersionManager(
         ListenerType.BROKER,
         config,
         Some(forwardingManager),
         brokerFeatures,
-        featureCache
+        metadataCache
       )
 
       // Create and start the socket server acceptor threads so that the bound port is known.
       // Delay starting processors until the end of the initialization sequence to ensure
       // that credentials have been loaded before processing authentications.
       socketServer = new SocketServer(config, metrics, time, credentialProvider, apiVersionManager)
-      socketServer.startup(startProcessingRequests = false)
 
       clientQuotaMetadataManager = new ClientQuotaMetadataManager(quotaManagers, socketServer.connectionQuotas)
 
-      val alterIsrChannelManager = BrokerToControllerChannelManager(
-        controllerNodeProvider,
-        time,
-        metrics,
+      alterPartitionManager = AlterPartitionManager(
         config,
-        channelName = "alterIsr",
-        threadNamePrefix,
-        retryTimeoutMs = Long.MaxValue
-      )
-      alterIsrManager = new DefaultAlterIsrManager(
-        controllerChannelManager = alterIsrChannelManager,
+        metadataCache,
         scheduler = kafkaScheduler,
+        controllerNodeProvider,
         time = time,
-        brokerId = config.nodeId,
+        metrics,
+        threadNamePrefix,
         brokerEpochSupplier = () => lifecycleManager.brokerEpoch
       )
-      alterIsrManager.start()
+      alterPartitionManager.start()
 
       this._replicaManager = new ReplicaManager(
         config = config,
@@ -265,7 +263,7 @@ class BrokerServer(
         quotaManagers = quotaManagers,
         metadataCache = metadataCache,
         logDirFailureChannel = logDirFailureChannel,
-        alterIsrManager = alterIsrManager,
+        alterPartitionManager = alterPartitionManager,
         brokerTopicStats = brokerTopicStats,
         isShuttingDown = isShuttingDown,
         zkClient = None,
@@ -316,11 +314,14 @@ class BrokerServer(
         ))
       }
 
-      metadataListener = new BrokerMetadataListener(config.nodeId,
-                                                    time,
-                                                    threadNamePrefix,
-                                                    config.metadataSnapshotMaxNewRecordBytes,
-                                                    metadataSnapshotter)
+      metadataListener = new BrokerMetadataListener(
+        config.nodeId,
+        time,
+        threadNamePrefix,
+        config.metadataSnapshotMaxNewRecordBytes,
+        metadataSnapshotter,
+        brokerMetrics,
+        metadataLoadingFaultHandler)
 
       val networkListeners = new ListenerCollection()
       config.effectiveAdvertisedListeners.foreach { ep =>
@@ -330,10 +331,28 @@ class BrokerServer(
           setPort(if (ep.port == 0) socketServer.boundPort(ep.listenerName) else ep.port).
           setSecurityProtocol(ep.securityProtocol.id))
       }
-      lifecycleManager.start(() => metadataListener.highestMetadataOffset,
-        BrokerToControllerChannelManager(controllerNodeProvider, time, metrics, config,
-          "heartbeat", threadNamePrefix, config.brokerSessionTimeoutMs.toLong),
-        metaProps.clusterId, networkListeners, supportedFeatures)
+
+      val featuresRemapped = brokerFeatures.supportedFeatures.features().asScala.map {
+        case (k: String, v: SupportedVersionRange) =>
+          k -> VersionRange.of(v.min, v.max)
+      }.asJava
+
+      val brokerLifecycleChannelManager = BrokerToControllerChannelManager(
+        controllerNodeProvider,
+        time,
+        metrics,
+        config,
+        "heartbeat",
+        threadNamePrefix,
+        config.brokerSessionTimeoutMs.toLong
+      )
+      lifecycleManager.start(
+        () => metadataListener.highestMetadataOffset,
+        brokerLifecycleChannelManager,
+        metaProps.clusterId,
+        networkListeners,
+        featuresRemapped
+      )
 
       // Register a listener with the Raft layer to receive metadata event notifications
       raftManager.register(metadataListener)
@@ -355,10 +374,13 @@ class BrokerServer(
           endpoints.asScala.map(ep => ep.listenerName().orElse("(none)")).mkString(", "))
       }
       val authorizerInfo = ServerInfo(new ClusterResource(clusterId),
-        config.nodeId, endpoints, interBrokerListener)
+        config.nodeId,
+        endpoints,
+        interBrokerListener,
+        config.earlyStartListeners.map(_.value()).asJava)
 
-      /* Get the authorizer and initialize it if one is specified.*/
-      authorizer = config.authorizer
+      // Create and initialize an authorizer if one is configured.
+      authorizer = config.createNewAuthorizer()
       authorizer.foreach(_.configure(config.originals))
       val authorizerFutures: Map[Endpoint, CompletableFuture[Void]] = authorizer match {
         case Some(authZ) =>
@@ -414,25 +436,45 @@ class BrokerServer(
         groupCoordinator,
         transactionCoordinator,
         clientQuotaMetadataManager,
-        featureCache,
         dynamicConfigHandlers.toMap,
-        authorizer)
+        authorizer,
+        fatalFaultHandler,
+        metadataPublishingFaultHandler)
 
       // Tell the metadata listener to start publishing its output, and wait for the first
       // publish operation to complete. This first operation will initialize logManager,
       // replicaManager, groupCoordinator, and txnCoordinator. The log manager may perform
       // a potentially lengthy recovery-from-unclean-shutdown operation here, if required.
-      metadataListener.startPublishing(metadataPublisher).get()
+      try {
+        metadataListener.startPublishing(metadataPublisher).get()
+      } catch {
+        case t: Throwable => throw new RuntimeException("Received a fatal error while " +
+          "waiting for the broker to catch up with the current cluster metadata.", t)
+      }
 
       // Log static broker configurations.
       new KafkaConfig(config.originals(), true)
 
-      // Enable inbound TCP connections.
-      socketServer.startProcessingRequests(authorizerFutures)
+      // Enable inbound TCP connections. Each endpoint will be started only once its matching
+      // authorizer future is completed.
+      socketServer.enableRequestProcessing(authorizerFutures)
+
+      // If we are using a ClusterMetadataAuthorizer which stores its ACLs in the metadata log,
+      // notify it that the loading process is complete.
+      authorizer match {
+        case Some(clusterMetadataAuthorizer: ClusterMetadataAuthorizer) =>
+          clusterMetadataAuthorizer.completeInitialLoad()
+        case _ => // nothing to do
+      }
 
       // We're now ready to unfence the broker. This also allows this broker to transition
       // from RECOVERY state to RUNNING state, once the controller unfences the broker.
-      lifecycleManager.setReadyToUnfence()
+      try {
+        lifecycleManager.setReadyToUnfence().get()
+      } catch {
+        case t: Throwable => throw new RuntimeException("Received a fatal error while " +
+          "waiting for the broker to be unfenced.", t)
+      }
 
       maybeChangeStatus(STARTING, STARTED)
     } catch {
@@ -450,11 +492,9 @@ class BrokerServer(
       info("shutting down")
 
       if (config.controlledShutdownEnable) {
-        // Shut down the broker metadata listener, so that we don't get added to any
-        // more ISRs.
-        if (metadataListener !=  null) {
-          metadataListener.beginShutdown()
-        }
+        if (replicaManager != null)
+          replicaManager.beginControlledShutdown()
+
         lifecycleManager.beginControlledShutdown()
         try {
           lifecycleManager.controlledShutdownFuture.get(5L, TimeUnit.MINUTES)
@@ -465,6 +505,10 @@ class BrokerServer(
             error("Got unexpected exception waiting for controlled shutdown future", e)
         }
       }
+
+      if (metadataListener != null)
+        metadataListener.beginShutdown()
+
       lifecycleManager.beginShutdown()
 
       // Stop socket server to stop accepting any more connections and requests.
@@ -479,7 +523,7 @@ class BrokerServer(
       if (controlPlaneRequestProcessor != null)
         CoreUtils.swallow(controlPlaneRequestProcessor.close(), this)
       CoreUtils.swallow(authorizer.foreach(_.close()), this)
-      if (metadataListener !=  null) {
+      if (metadataListener != null) {
         CoreUtils.swallow(metadataListener.close(), this)
       }
       metadataSnapshotter.foreach(snapshotter => CoreUtils.swallow(snapshotter.close(), this))
@@ -508,8 +552,8 @@ class BrokerServer(
       if (replicaManager != null)
         CoreUtils.swallow(replicaManager.shutdown(), this)
 
-      if (alterIsrManager != null)
-        CoreUtils.swallow(alterIsrManager.shutdown(), this)
+      if (alterPartitionManager != null)
+        CoreUtils.swallow(alterPartitionManager.shutdown(), this)
 
       if (clientToControllerChannelManager != null)
         CoreUtils.swallow(clientToControllerChannelManager.shutdown(), this)
diff --git a/core/src/main/scala/kafka/server/BrokerToControllerChannelManager.scala b/core/src/main/scala/kafka/server/BrokerToControllerChannelManager.scala
index b671c700ac665..a4879798342dc 100644
--- a/core/src/main/scala/kafka/server/BrokerToControllerChannelManager.scala
+++ b/core/src/main/scala/kafka/server/BrokerToControllerChannelManager.scala
@@ -19,12 +19,11 @@ package kafka.server
 
 import java.util.concurrent.LinkedBlockingDeque
 import java.util.concurrent.atomic.AtomicReference
-
 import kafka.common.{InterBrokerSendThread, RequestAndCompletionHandler}
 import kafka.raft.RaftManager
 import kafka.utils.Logging
 import org.apache.kafka.clients._
-import org.apache.kafka.common.Node
+import org.apache.kafka.common.{Node, Reconfigurable}
 import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.network._
 import org.apache.kafka.common.protocol.Errors
@@ -78,9 +77,11 @@ class MetadataCacheControllerNodeProvider(
 }
 
 object RaftControllerNodeProvider {
-  def apply(raftManager: RaftManager[ApiMessageAndVersion],
-            config: KafkaConfig,
-            controllerQuorumVoterNodes: Seq[Node]): RaftControllerNodeProvider = {
+  def apply(
+    raftManager: RaftManager[ApiMessageAndVersion],
+    config: KafkaConfig,
+    controllerQuorumVoterNodes: Seq[Node]
+  ): RaftControllerNodeProvider = {
     val controllerListenerName = new ListenerName(config.controllerListenerNames.head)
     val controllerSecurityProtocol = config.effectiveListenerSecurityProtocolMap.getOrElse(controllerListenerName, SecurityProtocol.forName(controllerListenerName.value()))
     val controllerSaslMechanism = config.saslMechanismControllerProtocol
@@ -98,12 +99,13 @@ object RaftControllerNodeProvider {
  * Finds the controller node by checking the metadata log manager.
  * This provider is used when we are using a Raft-based metadata quorum.
  */
-class RaftControllerNodeProvider(val raftManager: RaftManager[ApiMessageAndVersion],
-                                 controllerQuorumVoterNodes: Seq[Node],
-                                 val listenerName: ListenerName,
-                                 val securityProtocol: SecurityProtocol,
-                                 val saslMechanism: String
-                                ) extends ControllerNodeProvider with Logging {
+class RaftControllerNodeProvider(
+  val raftManager: RaftManager[ApiMessageAndVersion],
+  controllerQuorumVoterNodes: Seq[Node],
+  val listenerName: ListenerName,
+  val securityProtocol: SecurityProtocol,
+  val saslMechanism: String
+) extends ControllerNodeProvider with Logging {
   val idToNode = controllerQuorumVoterNodes.map(node => node.id() -> node).toMap
 
   override def get(): Option[Node] = {
@@ -133,7 +135,6 @@ object BrokerToControllerChannelManager {
   }
 }
 
-
 trait BrokerToControllerChannelManager {
   def start(): Unit
   def shutdown(): Unit
@@ -144,7 +145,6 @@ trait BrokerToControllerChannelManager {
   ): Unit
 }
 
-
 /**
  * This class manages the connection between a broker and the controller. It runs a single
  * [[BrokerToControllerRequestThread]] which uses the broker's metadata cache as its own metadata to find
@@ -164,7 +164,6 @@ class BrokerToControllerChannelManagerImpl(
   private val logContext = new LogContext(s"[BrokerToControllerChannelManager broker=${config.brokerId} name=$channelName] ")
   private val manualMetadataUpdater = new ManualMetadataUpdater()
   private val apiVersions = new ApiVersions()
-  private val currentNodeApiVersions = NodeApiVersions.create()
   private val requestThread = newRequestThread
 
   def start(): Unit = {
@@ -188,6 +187,10 @@ class BrokerToControllerChannelManagerImpl(
         config.saslInterBrokerHandshakeRequestEnable,
         logContext
       )
+      channelBuilder match {
+        case reconfigurable: Reconfigurable => config.addReconfigurable(reconfigurable)
+        case _ =>
+      }
       val selector = new Selector(
         NetworkReceive.UNLIMITED,
         Selector.NO_IDLE_TIMEOUT_MS,
@@ -250,13 +253,11 @@ class BrokerToControllerChannelManagerImpl(
     ))
   }
 
-  def controllerApiVersions(): Option[NodeApiVersions] =
-    requestThread.activeControllerAddress().flatMap(
-      activeController => if (activeController.id() == config.brokerId)
-        Some(currentNodeApiVersions)
-      else
-        Option(apiVersions.get(activeController.idString()))
-  )
+  def controllerApiVersions(): Option[NodeApiVersions] = {
+    requestThread.activeControllerAddress().flatMap { activeController =>
+      Option(apiVersions.get(activeController.idString))
+    }
+  }
 }
 
 abstract class ControllerRequestCompletionHandler extends RequestCompletionHandler {
@@ -351,10 +352,10 @@ class BrokerToControllerRequestThread(
       requestQueue.putFirst(queueItem)
     } else if (response.responseBody().errorCounts().containsKey(Errors.NOT_CONTROLLER)) {
       // just close the controller connection and wait for metadata cache update in doWork
-      activeControllerAddress().foreach { controllerAddress => {
+      activeControllerAddress().foreach { controllerAddress =>
         networkClient.disconnect(controllerAddress.idString)
         updateControllerAddress(null)
-      }}
+      }
 
       requestQueue.putFirst(queueItem)
     } else {
diff --git a/core/src/main/scala/kafka/server/ClientQuotaManager.scala b/core/src/main/scala/kafka/server/ClientQuotaManager.scala
index 7334519986805..ee7c70bec93c3 100644
--- a/core/src/main/scala/kafka/server/ClientQuotaManager.scala
+++ b/core/src/main/scala/kafka/server/ClientQuotaManager.scala
@@ -593,7 +593,7 @@ class ClientQuotaManager(private val config: ClientQuotaManagerConfig,
       if (sanitizedUser != null && clientId != null) {
         val userEntity = Some(UserEntity(sanitizedUser))
         val clientIdEntity = Some(ClientIdEntity(clientId))
-        if (!sanitizedUser.isEmpty && !clientId.isEmpty) {
+        if (sanitizedUser.nonEmpty && clientId.nonEmpty) {
           // /config/users/<user>/clients/<client-id>
           quota = overriddenQuotas.get(KafkaQuotaEntity(userEntity, clientIdEntity))
           if (quota == null) {
@@ -608,14 +608,14 @@ class ClientQuotaManager(private val config: ClientQuotaManagerConfig,
             // /config/users/<default>/clients/<default>
             quota = overriddenQuotas.get(DefaultUserClientIdQuotaEntity)
           }
-        } else if (!sanitizedUser.isEmpty) {
+        } else if (sanitizedUser.nonEmpty) {
           // /config/users/<user>
           quota = overriddenQuotas.get(KafkaQuotaEntity(userEntity, None))
           if (quota == null) {
             // /config/users/<default>
             quota = overriddenQuotas.get(DefaultUserQuotaEntity)
           }
-        } else if (!clientId.isEmpty) {
+        } else if (clientId.nonEmpty) {
           // /config/clients/<client-id>
           quota = overriddenQuotas.get(KafkaQuotaEntity(None, clientIdEntity))
           if (quota == null) {
diff --git a/core/src/main/scala/kafka/server/ConfigAdminManager.scala b/core/src/main/scala/kafka/server/ConfigAdminManager.scala
index a7f5c6bdefeec..cc7a98179dd4e 100644
--- a/core/src/main/scala/kafka/server/ConfigAdminManager.scala
+++ b/core/src/main/scala/kafka/server/ConfigAdminManager.scala
@@ -494,17 +494,18 @@ object ConfigAdminManager {
         case OpType.DELETE => configProps.remove(alterConfigOp.configEntry.name)
         case OpType.APPEND => {
           if (!listType(alterConfigOp.configEntry.name, configKeys))
-            throw new InvalidRequestException(s"Config value append is not allowed for config key: ${alterConfigOp.configEntry.name}")
+            throw new InvalidConfigurationException(s"Config value append is not allowed for config key: ${alterConfigOp.configEntry.name}")
           val oldValueList = Option(configProps.getProperty(alterConfigOp.configEntry.name))
             .orElse(Option(ConfigDef.convertToString(configKeys(configPropName).defaultValue, ConfigDef.Type.LIST)))
             .getOrElse("")
             .split(",").toList
-          val newValueList = oldValueList ::: alterConfigOp.configEntry.value.split(",").toList
+          val appendingValueList = alterConfigOp.configEntry.value.split(",").toList.filter(value => !oldValueList.contains(value))
+          val newValueList = oldValueList ::: appendingValueList
           configProps.setProperty(alterConfigOp.configEntry.name, newValueList.mkString(","))
         }
         case OpType.SUBTRACT => {
           if (!listType(alterConfigOp.configEntry.name, configKeys))
-            throw new InvalidRequestException(s"Config value subtract is not allowed for config key: ${alterConfigOp.configEntry.name}")
+            throw new InvalidConfigurationException(s"Config value subtract is not allowed for config key: ${alterConfigOp.configEntry.name}")
           val oldValueList = Option(configProps.getProperty(alterConfigOp.configEntry.name))
             .orElse(Option(ConfigDef.convertToString(configKeys(configPropName).defaultValue, ConfigDef.Type.LIST)))
             .getOrElse("")
diff --git a/core/src/main/scala/kafka/server/ConfigHandler.scala b/core/src/main/scala/kafka/server/ConfigHandler.scala
index 2fe49ad17baf1..13be872878ede 100644
--- a/core/src/main/scala/kafka/server/ConfigHandler.scala
+++ b/core/src/main/scala/kafka/server/ConfigHandler.scala
@@ -64,7 +64,7 @@ class TopicConfigHandler(private val logManager: LogManager, kafkaConfig: KafkaC
     }
     logManager.updateTopicConfig(topic, props)
 
-    def updateThrottledList(prop: String, quotaManager: ReplicationQuotaManager) = {
+    def updateThrottledList(prop: String, quotaManager: ReplicationQuotaManager): Unit = {
       if (topicConfig.containsKey(prop) && topicConfig.getProperty(prop).nonEmpty) {
         val partitions = parseThrottledPartitions(topicConfig, kafkaConfig.brokerId, prop)
         quotaManager.markThrottled(topic, partitions)
@@ -105,7 +105,7 @@ class TopicConfigHandler(private val logManager: LogManager, kafkaConfig: KafkaC
         if (messageFormatVersion.shouldWarn)
           warn(messageFormatVersion.topicWarningMessage(topic))
         Some(LogConfig.MessageFormatVersionProp)
-      } else if (kafkaConfig.interBrokerProtocolVersion < messageFormatVersion.messageFormatVersion) {
+      } else if (kafkaConfig.interBrokerProtocolVersion.isLessThan(messageFormatVersion.messageFormatVersion)) {
         warn(s"Topic configuration ${LogConfig.MessageFormatVersionProp} is ignored for `$topic` because `$versionString` " +
           s"is higher than what is allowed by the inter-broker protocol version `${kafkaConfig.interBrokerProtocolVersionString}`")
         Some(LogConfig.MessageFormatVersionProp)
@@ -177,7 +177,7 @@ class UserConfigHandler(private val quotaManagers: QuotaManagers, val credential
     val sanitizedUser = entities(0)
     val sanitizedClientId = if (entities.length == 3) Some(entities(2)) else None
     updateQuotaConfig(Some(sanitizedUser), sanitizedClientId, config)
-    if (!sanitizedClientId.isDefined && sanitizedUser != ConfigEntityName.Default)
+    if (sanitizedClientId.isEmpty && sanitizedUser != ConfigEntityName.Default)
       credentialProvider.updateCredentials(Sanitizer.desanitize(sanitizedUser), config)
   }
 }
diff --git a/core/src/main/scala/kafka/server/ControllerApis.scala b/core/src/main/scala/kafka/server/ControllerApis.scala
index e6d302cdb2db7..efb6a36c3dbce 100644
--- a/core/src/main/scala/kafka/server/ControllerApis.scala
+++ b/core/src/main/scala/kafka/server/ControllerApis.scala
@@ -18,18 +18,16 @@
 package kafka.server
 
 import java.util
-import java.util.Collections
+import java.util.{Collections, OptionalLong}
 import java.util.Map.Entry
-import java.util.concurrent.TimeUnit.{MILLISECONDS, NANOSECONDS}
-import java.util.concurrent.{CompletableFuture, ExecutionException}
-
+import java.util.concurrent.{CompletableFuture, CompletionException}
 import kafka.network.RequestChannel
 import kafka.raft.RaftManager
 import kafka.server.QuotaFactory.QuotaManagers
 import kafka.utils.Logging
 import org.apache.kafka.clients.admin.AlterConfigOp
 import org.apache.kafka.common.Uuid.ZERO_UUID
-import org.apache.kafka.common.acl.AclOperation.{ALTER, ALTER_CONFIGS, CLUSTER_ACTION, CREATE, DELETE, DESCRIBE}
+import org.apache.kafka.common.acl.AclOperation.{ALTER, ALTER_CONFIGS, CLUSTER_ACTION, CREATE, DELETE, DESCRIBE, DESCRIBE_CONFIGS}
 import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.errors.{ApiException, ClusterAuthorizationException, InvalidRequestException, TopicDeletionDisabledException}
 import org.apache.kafka.common.internals.FatalExitError
@@ -47,8 +45,9 @@ import org.apache.kafka.common.resource.Resource.CLUSTER_NAME
 import org.apache.kafka.common.resource.ResourceType.{CLUSTER, TOPIC}
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.common.{Node, Uuid}
-import org.apache.kafka.controller.Controller
-import org.apache.kafka.metadata.{BrokerHeartbeatReply, BrokerRegistrationReply, VersionRange}
+import org.apache.kafka.controller.ControllerRequestContext.requestTimeoutMsToDeadlineNs
+import org.apache.kafka.controller.{Controller, ControllerRequestContext}
+import org.apache.kafka.metadata.{BrokerHeartbeatReply, BrokerRegistrationReply}
 import org.apache.kafka.server.authorizer.Authorizer
 import org.apache.kafka.server.common.ApiMessageAndVersion
 
@@ -62,7 +61,6 @@ class ControllerApis(val requestChannel: RequestChannel,
                      val authorizer: Option[Authorizer],
                      val quotas: QuotaManagers,
                      val time: Time,
-                     val supportedFeatures: Map[String, VersionRange],
                      val controller: Controller,
                      val raftManager: RaftManager[ApiMessageAndVersion],
                      val config: KafkaConfig,
@@ -80,7 +78,7 @@ class ControllerApis(val requestChannel: RequestChannel,
 
   override def handle(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
     try {
-      request.header.apiKey match {
+      val handlerFuture: CompletableFuture[Unit] = request.header.apiKey match {
         case ApiKeys.FETCH => handleFetch(request)
         case ApiKeys.FETCH_SNAPSHOT => handleFetchSnapshot(request)
         case ApiKeys.CREATE_TOPICS => handleCreateTopics(request)
@@ -91,7 +89,7 @@ class ControllerApis(val requestChannel: RequestChannel,
         case ApiKeys.BEGIN_QUORUM_EPOCH => handleBeginQuorumEpoch(request)
         case ApiKeys.END_QUORUM_EPOCH => handleEndQuorumEpoch(request)
         case ApiKeys.DESCRIBE_QUORUM => handleDescribeQuorum(request)
-        case ApiKeys.ALTER_ISR => handleAlterIsrRequest(request)
+        case ApiKeys.ALTER_PARTITION => handleAlterPartitionRequest(request)
         case ApiKeys.BROKER_REGISTRATION => handleBrokerRegistration(request)
         case ApiKeys.BROKER_HEARTBEAT => handleBrokerHeartBeatRequest(request)
         case ApiKeys.UNREGISTER_BROKER => handleUnregisterBroker(request)
@@ -108,58 +106,84 @@ class ControllerApis(val requestChannel: RequestChannel,
         case ApiKeys.CREATE_ACLS => aclApis.handleCreateAcls(request)
         case ApiKeys.DELETE_ACLS => aclApis.handleDeleteAcls(request)
         case ApiKeys.ELECT_LEADERS => handleElectLeaders(request)
+        case ApiKeys.UPDATE_FEATURES => handleUpdateFeatures(request)
         case _ => throw new ApiException(s"Unsupported ApiKey ${request.context.header.apiKey}")
       }
+
+      // This catches exceptions in the future and subsequent completion stages returned by the request handlers.
+      handlerFuture.whenComplete { (_, exception) =>
+        if (exception != null) {
+          // CompletionException does not include the stack frames in its "cause" exception, so we need to
+          // log the original exception here
+          error(s"Unexpected error handling request ${request.requestDesc(true)} " +
+            s"with context ${request.context}", exception)
+
+          // For building the correct error request, we do need send the "cause" exception
+          val actualException = if (exception.isInstanceOf[CompletionException]) exception.getCause else exception
+          requestHelper.handleError(request, actualException)
+        }
+      }
     } catch {
       case e: FatalExitError => throw e
-      case e: Throwable => {
-        val t = if (e.isInstanceOf[ExecutionException]) e.getCause() else e
+      case t: Throwable => {
+        // This catches exceptions in the blocking parts of the request handlers
         error(s"Unexpected error handling request ${request.requestDesc(true)} " +
           s"with context ${request.context}", t)
         requestHelper.handleError(request, t)
       }
+    } finally {
+      // Only record local completion time if it is unset.
+      if (request.apiLocalCompleteTimeNanos < 0) {
+        request.apiLocalCompleteTimeNanos = time.nanoseconds
+      }
     }
   }
 
-  def handleEnvelopeRequest(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
+  def handleEnvelopeRequest(request: RequestChannel.Request, requestLocal: RequestLocal): CompletableFuture[Unit] = {
     if (!authHelper.authorize(request.context, CLUSTER_ACTION, CLUSTER, CLUSTER_NAME)) {
       requestHelper.sendErrorResponseMaybeThrottle(request, new ClusterAuthorizationException(
         s"Principal ${request.context.principal} does not have required CLUSTER_ACTION for envelope"))
     } else {
       EnvelopeUtils.handleEnvelopeRequest(request, requestChannel.metrics, handle(_, requestLocal))
     }
+    CompletableFuture.completedFuture[Unit](())
   }
 
-  def handleSaslHandshakeRequest(request: RequestChannel.Request): Unit = {
+  def handleSaslHandshakeRequest(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val responseData = new SaslHandshakeResponseData().setErrorCode(ILLEGAL_SASL_STATE.code)
     requestHelper.sendResponseMaybeThrottle(request, _ => new SaslHandshakeResponse(responseData))
+    CompletableFuture.completedFuture[Unit](())
   }
 
-  def handleSaslAuthenticateRequest(request: RequestChannel.Request): Unit = {
+  def handleSaslAuthenticateRequest(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val responseData = new SaslAuthenticateResponseData()
       .setErrorCode(ILLEGAL_SASL_STATE.code)
       .setErrorMessage("SaslAuthenticate request received after successful authentication")
     requestHelper.sendResponseMaybeThrottle(request, _ => new SaslAuthenticateResponse(responseData))
+    CompletableFuture.completedFuture[Unit](())
   }
 
-  def handleFetch(request: RequestChannel.Request): Unit = {
+  def handleFetch(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
     handleRaftRequest(request, response => new FetchResponse(response.asInstanceOf[FetchResponseData]))
   }
 
-  def handleFetchSnapshot(request: RequestChannel.Request): Unit = {
+  def handleFetchSnapshot(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
     handleRaftRequest(request, response => new FetchSnapshotResponse(response.asInstanceOf[FetchSnapshotResponseData]))
   }
 
-  def handleDeleteTopics(request: RequestChannel.Request): Unit = {
+  def handleDeleteTopics(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val deleteTopicsRequest = request.body[DeleteTopicsRequest]
-    val future = deleteTopics(deleteTopicsRequest.data,
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      requestTimeoutMsToDeadlineNs(time, deleteTopicsRequest.data.timeoutMs))
+    val future = deleteTopics(context,
+      deleteTopicsRequest.data,
       request.context.apiVersion,
       authHelper.authorize(request.context, DELETE, CLUSTER, CLUSTER_NAME, logIfDenied = false),
       names => authHelper.filterByAuthorized(request.context, DESCRIBE, TOPIC, names)(n => n),
       names => authHelper.filterByAuthorized(request.context, DELETE, TOPIC, names)(n => n))
-    future.whenComplete { (results, exception) =>
+    future.handle[Unit] { (results, exception) =>
       requestHelper.sendResponseMaybeThrottle(request, throttleTimeMs => {
         if (exception != null) {
           deleteTopicsRequest.getErrorResponse(throttleTimeMs, exception)
@@ -173,12 +197,14 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def deleteTopics(request: DeleteTopicsRequestData,
-                   apiVersion: Int,
-                   hasClusterAuth: Boolean,
-                   getDescribableTopics: Iterable[String] => Set[String],
-                   getDeletableTopics: Iterable[String] => Set[String])
-                   : CompletableFuture[util.List[DeletableTopicResult]] = {
+  def deleteTopics(
+    context: ControllerRequestContext,
+    request: DeleteTopicsRequestData,
+    apiVersion: Int,
+    hasClusterAuth: Boolean,
+    getDescribableTopics: Iterable[String] => Set[String],
+    getDeletableTopics: Iterable[String] => Set[String]
+  ): CompletableFuture[util.List[DeletableTopicResult]] = {
     // Check if topic deletion is enabled at all.
     if (!config.deleteTopicEnable) {
       if (apiVersion < 3) {
@@ -187,7 +213,6 @@ class ControllerApis(val requestChannel: RequestChannel,
         throw new TopicDeletionDisabledException()
       }
     }
-    val deadlineNs = time.nanoseconds() + NANOSECONDS.convert(request.timeoutMs, MILLISECONDS);
     // The first step is to load up the names and IDs that have been provided by the
     // request.  This is a bit messy because we support multiple ways of referring to
     // topics (both by name and by id) and because we need to check for duplicates or
@@ -240,7 +265,7 @@ class ControllerApis(val requestChannel: RequestChannel,
     val toAuthenticate = new util.HashSet[String]
     toAuthenticate.addAll(providedNames)
     val idToName = new util.HashMap[Uuid, String]
-    controller.findTopicNames(deadlineNs, providedIds).thenCompose { topicNames =>
+    controller.findTopicNames(context, providedIds).thenCompose { topicNames =>
       topicNames.forEach { (id, nameOrError) =>
         if (nameOrError.isError) {
           appendResponse(null, id, nameOrError.error())
@@ -275,7 +300,7 @@ class ControllerApis(val requestChannel: RequestChannel,
       }
       // For each topic that was provided by name, check if authentication failed.
       // If so, create an error response for it. Otherwise, add it to the idToName map.
-      controller.findTopicIds(deadlineNs, providedNames).thenCompose { topicIds =>
+      controller.findTopicIds(context, providedNames).thenCompose { topicIds =>
         topicIds.forEach { (name, idOrError) =>
           if (!describeable.contains(name)) {
             appendResponse(name, ZERO_UUID, new ApiError(TOPIC_AUTHORIZATION_FAILED))
@@ -299,7 +324,7 @@ class ControllerApis(val requestChannel: RequestChannel,
         }
         // Finally, the idToName map contains all the topics that we are authorized to delete.
         // Perform the deletion and create responses for each one.
-        controller.deleteTopics(deadlineNs, idToName.keySet).thenApply { idToError =>
+        controller.deleteTopics(context, idToName.keySet).thenApply { idToError =>
           idToError.forEach { (id, error) =>
             appendResponse(idToName.get(id), id, error)
           }
@@ -312,12 +337,17 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleCreateTopics(request: RequestChannel.Request): Unit = {
+  def handleCreateTopics(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val createTopicsRequest = request.body[CreateTopicsRequest]
-    val future = createTopics(createTopicsRequest.data(),
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      requestTimeoutMsToDeadlineNs(time, createTopicsRequest.data.timeoutMs))
+    val future = createTopics(context,
+        createTopicsRequest.data,
         authHelper.authorize(request.context, CREATE, CLUSTER, CLUSTER_NAME, logIfDenied = false),
-        names => authHelper.filterByAuthorized(request.context, CREATE, TOPIC, names)(identity))
-    future.whenComplete { (result, exception) =>
+        names => authHelper.filterByAuthorized(request.context, CREATE, TOPIC, names)(identity),
+        names => authHelper.filterByAuthorized(request.context, DESCRIBE_CONFIGS, TOPIC,
+            names, logIfDenied = false)(identity))
+    future.handle[Unit] { (result, exception) =>
       requestHelper.sendResponseMaybeThrottle(request, throttleTimeMs => {
         if (exception != null) {
           createTopicsRequest.getErrorResponse(throttleTimeMs, exception)
@@ -329,10 +359,13 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def createTopics(request: CreateTopicsRequestData,
-                   hasClusterAuth: Boolean,
-                   getCreatableTopics: Iterable[String] => Set[String])
-                   : CompletableFuture[CreateTopicsResponseData] = {
+  def createTopics(
+    context: ControllerRequestContext,
+    request: CreateTopicsRequestData,
+    hasClusterAuth: Boolean,
+    getCreatableTopics: Iterable[String] => Set[String],
+    getDescribableTopics: Iterable[String] => Set[String]
+  ): CompletableFuture[CreateTopicsResponseData] = {
     val topicNames = new util.HashSet[String]()
     val duplicateTopicNames = new util.HashSet[String]()
     request.topics().forEach { topicData =>
@@ -348,6 +381,7 @@ class ControllerApis(val requestChannel: RequestChannel,
     } else {
       getCreatableTopics.apply(topicNames.asScala)
     }
+    val describableTopicNames = getDescribableTopics.apply(topicNames.asScala).asJava
     val effectiveRequest = request.duplicate()
     val iterator = effectiveRequest.topics().iterator()
     while (iterator.hasNext) {
@@ -357,7 +391,7 @@ class ControllerApis(val requestChannel: RequestChannel,
         iterator.remove()
       }
     }
-    controller.createTopics(effectiveRequest).thenApply { response =>
+    controller.createTopics(context, effectiveRequest, describableTopicNames).thenApply { response =>
       duplicateTopicNames.forEach { name =>
         response.topics().add(new CreatableTopicResult().
           setName(name).
@@ -375,7 +409,7 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleApiVersionsRequest(request: RequestChannel.Request): Unit = {
+  def handleApiVersionsRequest(request: RequestChannel.Request): CompletableFuture[Unit] = {
     // Note that broker returns its full list of supported ApiKeys and versions regardless of current
     // authentication state (e.g., before SASL authentication on an SASL listener, do note that no
     // Kafka protocol requests may take place on an SSL listener before the SSL handshake is finished).
@@ -393,6 +427,7 @@ class ControllerApis(val requestChannel: RequestChannel,
       }
     }
     requestHelper.sendResponseMaybeThrottle(request, createResponseCallback)
+    CompletableFuture.completedFuture[Unit](())
   }
 
   def authorizeAlterResource(requestContext: RequestContext,
@@ -414,9 +449,10 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleLegacyAlterConfigs(request: RequestChannel.Request): Unit = {
+  def handleLegacyAlterConfigs(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val response = new AlterConfigsResponseData()
     val alterConfigsRequest = request.body[AlterConfigsRequest]
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal, OptionalLong.empty())
     val duplicateResources = new util.HashSet[ConfigResource]
     val configChanges = new util.HashMap[ConfigResource, util.Map[String, String]]()
     alterConfigsRequest.data.resources.forEach { resource =>
@@ -455,8 +491,8 @@ class ControllerApis(val requestChannel: RequestChannel,
         iterator.remove()
       }
     }
-    controller.legacyAlterConfigs(configChanges, alterConfigsRequest.data.validateOnly)
-      .whenComplete { (controllerResults, exception) =>
+    controller.legacyAlterConfigs(context, configChanges, alterConfigsRequest.data.validateOnly)
+      .handle[Unit] { (controllerResults, exception) =>
         if (exception != null) {
           requestHelper.handleError(request, exception)
         } else {
@@ -472,32 +508,33 @@ class ControllerApis(val requestChannel: RequestChannel,
       }
   }
 
-  def handleVote(request: RequestChannel.Request): Unit = {
+  def handleVote(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
     handleRaftRequest(request, response => new VoteResponse(response.asInstanceOf[VoteResponseData]))
   }
 
-  def handleBeginQuorumEpoch(request: RequestChannel.Request): Unit = {
+  def handleBeginQuorumEpoch(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
     handleRaftRequest(request, response => new BeginQuorumEpochResponse(response.asInstanceOf[BeginQuorumEpochResponseData]))
   }
 
-  def handleEndQuorumEpoch(request: RequestChannel.Request): Unit = {
+  def handleEndQuorumEpoch(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
     handleRaftRequest(request, response => new EndQuorumEpochResponse(response.asInstanceOf[EndQuorumEpochResponseData]))
   }
 
-  def handleDescribeQuorum(request: RequestChannel.Request): Unit = {
+  def handleDescribeQuorum(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, DESCRIBE)
     handleRaftRequest(request, response => new DescribeQuorumResponse(response.asInstanceOf[DescribeQuorumResponseData]))
   }
 
-  def handleElectLeaders(request: RequestChannel.Request): Unit = {
+  def handleElectLeaders(request: RequestChannel.Request): CompletableFuture[Unit] = {
     authHelper.authorizeClusterOperation(request, ALTER)
-
     val electLeadersRequest = request.body[ElectLeadersRequest]
-    val future = controller.electLeaders(electLeadersRequest.data)
-    future.whenComplete { (responseData, exception) =>
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      requestTimeoutMsToDeadlineNs(time, electLeadersRequest.data.timeoutMs))
+    val future = controller.electLeaders(context, electLeadersRequest.data)
+    future.handle[Unit] { (responseData, exception) =>
       if (exception != null) {
         requestHelper.sendResponseMaybeThrottle(request, throttleMs => {
           electLeadersRequest.getErrorResponse(throttleMs, exception)
@@ -510,25 +547,28 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleAlterIsrRequest(request: RequestChannel.Request): Unit = {
-    val alterIsrRequest = request.body[AlterIsrRequest]
+  def handleAlterPartitionRequest(request: RequestChannel.Request): CompletableFuture[Unit] = {
+    val alterPartitionRequest = request.body[AlterPartitionRequest]
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
-    val future = controller.alterIsr(alterIsrRequest.data)
-    future.whenComplete { (result, exception) =>
+    val future = controller.alterPartition(context, alterPartitionRequest.data)
+    future.handle[Unit] { (result, exception) =>
       val response = if (exception != null) {
-        alterIsrRequest.getErrorResponse(exception)
+        alterPartitionRequest.getErrorResponse(exception)
       } else {
-        new AlterIsrResponse(result)
+        new AlterPartitionResponse(result)
       }
       requestHelper.sendResponseExemptThrottle(request, response)
     }
   }
 
-  def handleBrokerHeartBeatRequest(request: RequestChannel.Request): Unit = {
+  def handleBrokerHeartBeatRequest(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val heartbeatRequest = request.body[BrokerHeartbeatRequest]
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
-
-    controller.processBrokerHeartbeat(heartbeatRequest.data).handle[Unit] { (reply, e) =>
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      requestTimeoutMsToDeadlineNs(time, config.brokerHeartbeatIntervalMs))
+    controller.processBrokerHeartbeat(context, heartbeatRequest.data).handle[Unit] { (reply, e) =>
       def createResponseCallback(requestThrottleMs: Int,
                                  reply: BrokerHeartbeatReply,
                                  e: Throwable): BrokerHeartbeatResponse = {
@@ -550,11 +590,13 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleUnregisterBroker(request: RequestChannel.Request): Unit = {
+  def handleUnregisterBroker(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val decommissionRequest = request.body[UnregisterBrokerRequest]
     authHelper.authorizeClusterOperation(request, ALTER)
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
 
-    controller.unregisterBroker(decommissionRequest.data().brokerId()).handle[Unit] { (_, e) =>
+    controller.unregisterBroker(context, decommissionRequest.data.brokerId).handle[Unit] { (_, e) =>
       def createResponseCallback(requestThrottleMs: Int,
                                  e: Throwable): UnregisterBrokerResponse = {
         if (e != null) {
@@ -571,11 +613,13 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleBrokerRegistration(request: RequestChannel.Request): Unit = {
+  def handleBrokerRegistration(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val registrationRequest = request.body[BrokerRegistrationRequest]
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
 
-    controller.registerBroker(registrationRequest.data).handle[Unit] { (reply, e) =>
+    controller.registerBroker(context, registrationRequest.data).handle[Unit] { (reply, e) =>
       def createResponseCallback(requestThrottleMs: Int,
                                  reply: BrokerRegistrationReply,
                                  e: Throwable): BrokerRegistrationResponse = {
@@ -596,11 +640,10 @@ class ControllerApis(val requestChannel: RequestChannel,
   }
 
   private def handleRaftRequest(request: RequestChannel.Request,
-                                buildResponse: ApiMessage => AbstractResponse): Unit = {
+                                buildResponse: ApiMessage => AbstractResponse): CompletableFuture[Unit] = {
     val requestBody = request.body[AbstractRequest]
     val future = raftManager.handleRequest(request.header, requestBody.data, time.milliseconds())
-
-    future.whenComplete { (responseData, exception) =>
+    future.handle[Unit] { (responseData, exception) =>
       val response = if (exception != null) {
         requestBody.getErrorResponse(exception)
       } else {
@@ -610,11 +653,13 @@ class ControllerApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleAlterClientQuotas(request: RequestChannel.Request): Unit = {
+  def handleAlterClientQuotas(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val quotaRequest = request.body[AlterClientQuotasRequest]
     authHelper.authorizeClusterOperation(request, ALTER_CONFIGS)
-    controller.alterClientQuotas(quotaRequest.entries, quotaRequest.validateOnly)
-      .whenComplete { (results, exception) =>
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
+    controller.alterClientQuotas(context, quotaRequest.entries, quotaRequest.validateOnly)
+      .handle[Unit] { (results, exception) =>
         if (exception != null) {
           requestHelper.handleError(request, exception)
         } else {
@@ -624,9 +669,11 @@ class ControllerApis(val requestChannel: RequestChannel,
       }
   }
 
-  def handleIncrementalAlterConfigs(request: RequestChannel.Request): Unit = {
+  def handleIncrementalAlterConfigs(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val response = new IncrementalAlterConfigsResponseData()
     val alterConfigsRequest = request.body[IncrementalAlterConfigsRequest]
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
     val duplicateResources = new util.HashSet[ConfigResource]
     val configChanges = new util.HashMap[ConfigResource,
       util.Map[String, Entry[AlterConfigOp.OpType, String]]]()
@@ -669,8 +716,8 @@ class ControllerApis(val requestChannel: RequestChannel,
         iterator.remove()
       }
     }
-    controller.incrementalAlterConfigs(configChanges, alterConfigsRequest.data.validateOnly)
-      .whenComplete { (controllerResults, exception) =>
+    controller.incrementalAlterConfigs(context, configChanges, alterConfigsRequest.data.validateOnly)
+      .handle[Unit] { (controllerResults, exception) =>
         if (exception != null) {
           requestHelper.handleError(request, exception)
         } else {
@@ -686,17 +733,17 @@ class ControllerApis(val requestChannel: RequestChannel,
       }
   }
 
-  def handleCreatePartitions(request: RequestChannel.Request): Unit = {
+  def handleCreatePartitions(request: RequestChannel.Request): CompletableFuture[Unit] = {
     def filterAlterAuthorizedTopics(topics: Iterable[String]): Set[String] = {
       authHelper.filterByAuthorized(request.context, ALTER, TOPIC, topics)(n => n)
     }
-
-    val future = createPartitions(
-      request.body[CreatePartitionsRequest].data,
-      filterAlterAuthorizedTopics
-    )
-
-    future.whenComplete { (responses, exception) =>
+    val createPartitionsRequest = request.body[CreatePartitionsRequest]
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      requestTimeoutMsToDeadlineNs(time, createPartitionsRequest.data.timeoutMs))
+    val future = createPartitions(context,
+      createPartitionsRequest.data(),
+      filterAlterAuthorizedTopics)
+    future.handle[Unit] { (responses, exception) =>
       if (exception != null) {
         requestHelper.handleError(request, exception)
       } else {
@@ -711,10 +758,10 @@ class ControllerApis(val requestChannel: RequestChannel,
   }
 
   def createPartitions(
+    context: ControllerRequestContext,
     request: CreatePartitionsRequestData,
     getAlterAuthorizedTopics: Iterable[String] => Set[String]
   ): CompletableFuture[util.List[CreatePartitionsTopicResult]] = {
-    val deadlineNs = time.nanoseconds() + NANOSECONDS.convert(request.timeoutMs, MILLISECONDS);
     val responses = new util.ArrayList[CreatePartitionsTopicResult]()
     val duplicateTopicNames = new util.HashSet[String]()
     val topicNames = new util.HashSet[String]()
@@ -742,33 +789,43 @@ class ControllerApis(val requestChannel: RequestChannel,
           setErrorCode(TOPIC_AUTHORIZATION_FAILED.code))
       }
     }
-    controller.createPartitions(deadlineNs, topics).thenApply { results =>
+    controller.createPartitions(context, topics, request.validateOnly).thenApply { results =>
       results.forEach(response => responses.add(response))
       responses
     }
   }
 
-  def handleAlterPartitionReassignments(request: RequestChannel.Request): Unit = {
+  def handleAlterPartitionReassignments(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val alterRequest = request.body[AlterPartitionReassignmentsRequest]
     authHelper.authorizeClusterOperation(request, ALTER)
-    val response = controller.alterPartitionReassignments(alterRequest.data()).get()
-    requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
-      new AlterPartitionReassignmentsResponse(response.setThrottleTimeMs(requestThrottleMs)))
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      requestTimeoutMsToDeadlineNs(time, alterRequest.data.timeoutMs))
+    controller.alterPartitionReassignments(context, alterRequest.data)
+      .thenApply[Unit] { response =>
+        requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
+          new AlterPartitionReassignmentsResponse(response.setThrottleTimeMs(requestThrottleMs)))
+      }
   }
 
-  def handleListPartitionReassignments(request: RequestChannel.Request): Unit = {
+  def handleListPartitionReassignments(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val listRequest = request.body[ListPartitionReassignmentsRequest]
     authHelper.authorizeClusterOperation(request, DESCRIBE)
-    val response = controller.listPartitionReassignments(listRequest.data()).get()
-    requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
-      new ListPartitionReassignmentsResponse(response.setThrottleTimeMs(requestThrottleMs)))
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
+    controller.listPartitionReassignments(context, listRequest.data)
+      .thenApply[Unit] { response =>
+        requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
+          new ListPartitionReassignmentsResponse(response.setThrottleTimeMs(requestThrottleMs)))
+      }
   }
 
-  def handleAllocateProducerIdsRequest(request: RequestChannel.Request): Unit = {
+  def handleAllocateProducerIdsRequest(request: RequestChannel.Request): CompletableFuture[Unit] = {
     val allocatedProducerIdsRequest = request.body[AllocateProducerIdsRequest]
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
-    controller.allocateProducerIds(allocatedProducerIdsRequest.data)
-      .whenComplete((results, exception) => {
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+        OptionalLong.empty())
+    controller.allocateProducerIds(context, allocatedProducerIdsRequest.data)
+      .handle[Unit] { (results, exception) =>
         if (exception != null) {
           requestHelper.handleError(request, exception)
         } else {
@@ -777,6 +834,22 @@ class ControllerApis(val requestChannel: RequestChannel,
             new AllocateProducerIdsResponse(results)
           })
         }
-      })
+      }
+  }
+
+  def handleUpdateFeatures(request: RequestChannel.Request): CompletableFuture[Unit] = {
+    val updateFeaturesRequest = request.body[UpdateFeaturesRequest]
+    authHelper.authorizeClusterOperation(request, ALTER)
+    val context = new ControllerRequestContext(request.context.header.data, request.context.principal,
+      OptionalLong.empty())
+    controller.updateFeatures(context, updateFeaturesRequest.data)
+      .handle[Unit] { (response, exception) =>
+        if (exception != null) {
+          requestHelper.handleError(request, exception)
+        } else {
+          requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
+            new UpdateFeaturesResponse(response.setThrottleTimeMs(requestThrottleMs)))
+        }
+      }
   }
 }
diff --git a/core/src/main/scala/kafka/server/ControllerConfigurationValidator.scala b/core/src/main/scala/kafka/server/ControllerConfigurationValidator.scala
index 5cc075ef044a6..a5dc78d388788 100644
--- a/core/src/main/scala/kafka/server/ControllerConfigurationValidator.scala
+++ b/core/src/main/scala/kafka/server/ControllerConfigurationValidator.scala
@@ -24,7 +24,7 @@ import kafka.log.LogConfig
 import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.config.ConfigResource.Type.{BROKER, TOPIC}
 import org.apache.kafka.controller.ConfigurationValidator
-import org.apache.kafka.common.errors.InvalidRequestException
+import org.apache.kafka.common.errors.{InvalidConfigurationException, InvalidRequestException}
 import org.apache.kafka.common.internals.Topic
 
 import scala.collection.mutable
@@ -44,13 +44,56 @@ import scala.collection.mutable
  * as the others. It is not persisted to the metadata log (or to ZK, when we're in that mode).
  */
 class ControllerConfigurationValidator extends ConfigurationValidator {
-  override def validate(resource: ConfigResource, config: util.Map[String, String]): Unit = {
+  private def validateTopicName(
+    name: String
+  ): Unit = {
+    if (name.isEmpty()) {
+      throw new InvalidRequestException("Default topic resources are not allowed.")
+    }
+    Topic.validate(name)
+  }
+
+  private def validateBrokerName(
+    name: String
+  ): Unit = {
+    if (!name.isEmpty()) {
+      val brokerId = try {
+        Integer.valueOf(name)
+      } catch {
+        case _: NumberFormatException =>
+          throw new InvalidRequestException("Unable to parse broker name as a base 10 number.")
+      }
+      if (brokerId < 0) {
+        throw new InvalidRequestException("Invalid negative broker ID.")
+      }
+    }
+  }
+
+  private def throwExceptionForUnknownResourceType(
+    resource: ConfigResource
+  ): Unit = {
+    // Note: we should never handle BROKER_LOGGER resources here, since changes to
+    // those resources are not persisted in the metadata.
+    throw new InvalidRequestException(s"Unknown resource type ${resource.`type`}")
+  }
+
+  override def validate(
+    resource: ConfigResource
+  ): Unit = {
+    resource.`type`() match {
+      case TOPIC => validateTopicName(resource.name())
+      case BROKER => validateBrokerName(resource.name())
+      case _ => throwExceptionForUnknownResourceType(resource)
+    }
+  }
+
+  override def validate(
+    resource: ConfigResource,
+    config: util.Map[String, String]
+  ): Unit = {
     resource.`type`() match {
       case TOPIC =>
-        if (resource.name().isEmpty()) {
-          throw new InvalidRequestException("Default topic resources are not allowed.")
-        }
-        Topic.validate(resource.name())
+        validateTopicName(resource.name())
         val properties = new Properties()
         val nullTopicConfigs = new mutable.ArrayBuffer[String]()
         config.entrySet().forEach(e => {
@@ -61,26 +104,12 @@ class ControllerConfigurationValidator extends ConfigurationValidator {
           }
         })
         if (nullTopicConfigs.nonEmpty) {
-          throw new InvalidRequestException("Null value not supported for topic configs : " +
+          throw new InvalidConfigurationException("Null value not supported for topic configs: " +
             nullTopicConfigs.mkString(","))
         }
         LogConfig.validate(properties)
-      case BROKER =>
-        if (resource.name().nonEmpty) {
-          val brokerId = try {
-            Integer.valueOf(resource.name())
-          } catch {
-            case _: NumberFormatException =>
-              throw new InvalidRequestException("Unable to parse broker name as a base 10 number.")
-          }
-          if (brokerId < 0) {
-            throw new InvalidRequestException("Invalid negative broker ID.")
-          }
-        }
-      case _ =>
-        // Note: we should never handle BROKER_LOGGER resources here, since changes to
-        // those resources are not persisted in the metadata.
-        throw new InvalidRequestException(s"Unknown resource type ${resource.`type`}")
+      case BROKER => validateBrokerName(resource.name())
+      case _ => throwExceptionForUnknownResourceType(resource)
     }
   }
 }
\ No newline at end of file
diff --git a/core/src/main/scala/kafka/server/ControllerServer.scala b/core/src/main/scala/kafka/server/ControllerServer.scala
index e26b25d630486..19a6e307d62be 100644
--- a/core/src/main/scala/kafka/server/ControllerServer.scala
+++ b/core/src/main/scala/kafka/server/ControllerServer.scala
@@ -18,31 +18,35 @@
 package kafka.server
 
 import java.util
+import java.util.OptionalLong
 import java.util.concurrent.locks.ReentrantLock
 import java.util.concurrent.{CompletableFuture, TimeUnit}
-
 import kafka.cluster.Broker.ServerInfo
-import kafka.metrics.{KafkaMetricsGroup, KafkaYammerMetrics, LinuxIoMetricsCollector}
+import kafka.metrics.{KafkaMetricsGroup, LinuxIoMetricsCollector}
 import kafka.network.{DataPlaneAcceptor, SocketServer}
 import kafka.raft.RaftManager
 import kafka.security.CredentialProvider
 import kafka.server.KafkaConfig.{AlterConfigPolicyClassNameProp, CreateTopicPolicyClassNameProp}
+import kafka.server.KafkaRaftServer.BrokerRole
 import kafka.server.QuotaFactory.QuotaManagers
 import kafka.utils.{CoreUtils, Logging}
+import org.apache.kafka.clients.ApiVersions
 import org.apache.kafka.common.message.ApiMessageType.ListenerType
 import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.security.scram.internals.ScramMechanism
 import org.apache.kafka.common.security.token.delegation.internals.DelegationTokenCache
 import org.apache.kafka.common.utils.{LogContext, Time}
 import org.apache.kafka.common.{ClusterResource, Endpoint}
-import org.apache.kafka.controller.{Controller, QuorumController, QuorumControllerMetrics}
-import org.apache.kafka.metadata.{KafkaConfigSchema, VersionRange}
+import org.apache.kafka.controller.{BootstrapMetadata, Controller, ControllerMetrics, QuorumController, QuorumFeatures}
+import org.apache.kafka.metadata.KafkaConfigSchema
 import org.apache.kafka.raft.RaftConfig
 import org.apache.kafka.raft.RaftConfig.AddressSpec
 import org.apache.kafka.server.authorizer.Authorizer
 import org.apache.kafka.server.common.ApiMessageAndVersion
 import org.apache.kafka.common.config.ConfigException
 import org.apache.kafka.metadata.authorizer.ClusterMetadataAuthorizer
+import org.apache.kafka.server.fault.FaultHandler
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.kafka.server.policy.{AlterConfigPolicy, CreateTopicPolicy}
 
 import scala.jdk.CollectionConverters._
@@ -57,12 +61,19 @@ class ControllerServer(
   val raftManager: RaftManager[ApiMessageAndVersion],
   val time: Time,
   val metrics: Metrics,
+  val controllerMetrics: ControllerMetrics,
   val threadNamePrefix: Option[String],
   val controllerQuorumVotersFuture: CompletableFuture[util.Map[Integer, AddressSpec]],
   val configSchema: KafkaConfigSchema,
+  val raftApiVersions: ApiVersions,
+  val bootstrapMetadata: BootstrapMetadata,
+  val metadataFaultHandler: FaultHandler,
+  val fatalFaultHandler: FaultHandler,
 ) extends Logging with KafkaMetricsGroup {
   import kafka.server.Server._
 
+  config.dynamicConfig.initialize(zkClientOpt = None)
+
   val lock = new ReentrantLock()
   val awaitShutdownCond = lock.newCondition()
   var status: ProcessStatus = SHUTDOWN
@@ -76,7 +87,6 @@ class ControllerServer(
   var createTopicPolicy: Option[CreateTopicPolicy] = None
   var alterConfigPolicy: Option[AlterConfigPolicy] = None
   var controller: Controller = null
-  val supportedFeatures: Map[String, VersionRange] = Map()
   var quotaManagers: QuotaManagers = null
   var controllerApis: ControllerApis = null
   var controllerApisHandlerPool: KafkaRequestHandlerPool = null
@@ -93,6 +103,13 @@ class ControllerServer(
     true
   }
 
+  private def doRemoteKraftSetup(): Unit = {
+    // Explicitly configure metric reporters on this remote controller.
+    // We do not yet support dynamic reconfiguration on remote controllers in general;
+    // remove this once that is implemented.
+    new DynamicMetricReporterState(config.nodeId, config, metrics, clusterId)
+  }
+
   def clusterId: String = metaProperties.clusterId
 
   def startup(): Unit = {
@@ -101,7 +118,6 @@ class ControllerServer(
       info("Starting controller")
 
       maybeChangeStatus(STARTING, STARTED)
-      // TODO: initialize the log dir(s)
       this.logIdent = new LogContext(s"[ControllerServer id=${config.nodeId}] ").logPrefix()
 
       newGauge("ClusterId", () => clusterId)
@@ -114,7 +130,7 @@ class ControllerServer(
       }
 
       val javaListeners = config.controllerListeners.map(_.toJava).asJava
-      authorizer = config.authorizer
+      authorizer = config.createNewAuthorizer()
       authorizer.foreach(_.configure(config.originals))
 
       val authorizerFutures: Map[Endpoint, CompletableFuture[Void]] = authorizer match {
@@ -123,7 +139,11 @@ class ControllerServer(
           // AuthorizerServerInfo, such as the assumption that there is an inter-broker
           // listener, or that ID is named brokerId.
           val controllerAuthorizerInfo = ServerInfo(
-            new ClusterResource(clusterId), config.nodeId, javaListeners, javaListeners.get(0))
+            new ClusterResource(clusterId),
+            config.nodeId,
+            javaListeners,
+            javaListeners.get(0),
+            config.earlyStartListeners.map(_.value()).asJava)
           authZ.start(controllerAuthorizerInfo).asScala.map { case (ep, cs) =>
             ep -> cs.toCompletableFuture
           }.toMap
@@ -142,13 +162,12 @@ class ControllerServer(
         time,
         credentialProvider,
         apiVersionManager)
-      socketServer.startup(startProcessingRequests = false, controlPlaneListener = None, config.controllerListeners)
 
       if (config.controllerListeners.nonEmpty) {
         socketServerFirstBoundPortFuture.complete(socketServer.boundPort(
           config.controllerListeners.head.listenerName))
       } else {
-        throw new ConfigException("No controller.listener.names defined for controller");
+        throw new ConfigException("No controller.listener.names defined for controller")
       }
 
       val threadNamePrefixAsString = threadNamePrefix.getOrElse("")
@@ -158,38 +177,61 @@ class ControllerServer(
       alterConfigPolicy = Option(config.
         getConfiguredInstance(AlterConfigPolicyClassNameProp, classOf[AlterConfigPolicy]))
 
-      val controllerBuilder = new QuorumController.Builder(config.nodeId, metaProperties.clusterId).
-        setTime(time).
-        setThreadNamePrefix(threadNamePrefixAsString).
-        setConfigSchema(configSchema).
-        setRaftClient(raftManager.client).
-        setDefaultReplicationFactor(config.defaultReplicationFactor.toShort).
-        setDefaultNumPartitions(config.numPartitions.intValue()).
-        setSessionTimeoutNs(TimeUnit.NANOSECONDS.convert(config.brokerSessionTimeoutMs.longValue(),
-          TimeUnit.MILLISECONDS)).
-        setSnapshotMaxNewRecordBytes(config.metadataSnapshotMaxNewRecordBytes).
-        setMetrics(new QuorumControllerMetrics(KafkaYammerMetrics.defaultRegistry())).
-        setCreateTopicPolicy(createTopicPolicy.asJava).
-        setAlterConfigPolicy(alterConfigPolicy.asJava).
-        setConfigurationValidator(new ControllerConfigurationValidator())
+      val controllerNodes = RaftConfig.voterConnectionsToNodes(controllerQuorumVotersFuture.get())
+      val quorumFeatures = QuorumFeatures.create(config.nodeId, raftApiVersions, QuorumFeatures.defaultFeatureMap(), controllerNodes)
+
+      val controllerBuilder = {
+        val leaderImbalanceCheckIntervalNs = if (config.autoLeaderRebalanceEnable) {
+          OptionalLong.of(TimeUnit.NANOSECONDS.convert(config.leaderImbalanceCheckIntervalSeconds, TimeUnit.SECONDS))
+        } else {
+          OptionalLong.empty()
+        }
+
+        val maxIdleIntervalNs = config.metadataMaxIdleIntervalNs.fold(OptionalLong.empty)(OptionalLong.of)
+
+        new QuorumController.Builder(config.nodeId, metaProperties.clusterId).
+          setTime(time).
+          setThreadNamePrefix(threadNamePrefixAsString).
+          setConfigSchema(configSchema).
+          setRaftClient(raftManager.client).
+          setQuorumFeatures(quorumFeatures).
+          setDefaultReplicationFactor(config.defaultReplicationFactor.toShort).
+          setDefaultNumPartitions(config.numPartitions.intValue()).
+          setSessionTimeoutNs(TimeUnit.NANOSECONDS.convert(config.brokerSessionTimeoutMs.longValue(),
+            TimeUnit.MILLISECONDS)).
+          setSnapshotMaxNewRecordBytes(config.metadataSnapshotMaxNewRecordBytes).
+          setLeaderImbalanceCheckIntervalNs(leaderImbalanceCheckIntervalNs).
+          setMaxIdleIntervalNs(maxIdleIntervalNs).
+          setMetrics(controllerMetrics).
+          setCreateTopicPolicy(createTopicPolicy.asJava).
+          setAlterConfigPolicy(alterConfigPolicy.asJava).
+          setConfigurationValidator(new ControllerConfigurationValidator()).
+          setStaticConfig(config.originals).
+          setBootstrapMetadata(bootstrapMetadata).
+          setMetadataFaultHandler(metadataFaultHandler).
+          setFatalFaultHandler(fatalFaultHandler)
+      }
       authorizer match {
         case Some(a: ClusterMetadataAuthorizer) => controllerBuilder.setAuthorizer(a)
         case _ => // nothing to do
       }
       controller = controllerBuilder.build()
 
+      // Perform any setup that is done only when this node is a controller-only node.
+      if (!config.processRoles.contains(BrokerRole)) {
+        doRemoteKraftSetup()
+      }
+
       quotaManagers = QuotaFactory.instantiate(config, metrics, time, threadNamePrefix.getOrElse(""))
-      val controllerNodes = RaftConfig.voterConnectionsToNodes(controllerQuorumVotersFuture.get()).asScala
       controllerApis = new ControllerApis(socketServer.dataPlaneRequestChannel,
         authorizer,
         quotaManagers,
         time,
-        supportedFeatures,
         controller,
         raftManager,
         config,
         metaProperties,
-        controllerNodes.toSeq,
+        controllerNodes.asScala.toSeq,
         apiVersionManager)
       controllerApisHandlerPool = new KafkaRequestHandlerPool(config.nodeId,
         socketServer.dataPlaneRequestChannel,
@@ -198,7 +240,17 @@ class ControllerServer(
         config.numIoThreads,
         s"${DataPlaneAcceptor.MetricPrefix}RequestHandlerAvgIdlePercent",
         DataPlaneAcceptor.ThreadPrefix)
-      socketServer.startProcessingRequests(authorizerFutures)
+
+      /**
+       * Enable the controller endpoint(s). If we are using an authorizer which stores
+       * ACLs in the metadata log, such as StandardAuthorizer, we will be able to start
+       * accepting requests from principals included super.users right after this point,
+       * but we will not be able to process requests from non-superusers until the
+       * QuorumController declares that we have caught up to the high water mark of the
+       * metadata log. See @link{QuorumController#maybeCompleteAuthorizerInitialLoad}
+       * and KIP-801 for details.
+       */
+      socketServer.enableRequestProcessing(authorizerFutures)
     } catch {
       case e: Throwable =>
         maybeChangeStatus(STARTING, STARTED)
@@ -226,6 +278,7 @@ class ControllerServer(
         CoreUtils.swallow(quotaManagers.shutdown(), this)
       if (controller != null)
         controller.close()
+      CoreUtils.swallow(authorizer.foreach(_.close()), this)
       createTopicPolicy.foreach(policy => CoreUtils.swallow(policy.close(), this))
       alterConfigPolicy.foreach(policy => CoreUtils.swallow(policy.close(), this))
       socketServerFirstBoundPortFuture.completeExceptionally(new RuntimeException("shutting down"))
diff --git a/core/src/main/scala/kafka/server/DelayedFetch.scala b/core/src/main/scala/kafka/server/DelayedFetch.scala
index 8d38ef8b6d3d0..55a15682b64e0 100644
--- a/core/src/main/scala/kafka/server/DelayedFetch.scala
+++ b/core/src/main/scala/kafka/server/DelayedFetch.scala
@@ -23,7 +23,6 @@ import kafka.metrics.KafkaMetricsGroup
 import org.apache.kafka.common.TopicIdPartition
 import org.apache.kafka.common.errors._
 import org.apache.kafka.common.protocol.Errors
-import org.apache.kafka.common.replica.ClientMetadata
 import org.apache.kafka.common.requests.FetchRequest.PartitionData
 import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.{UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET}
 
@@ -38,36 +37,23 @@ case class FetchPartitionStatus(startOffsetMetadata: LogOffsetMetadata, fetchInf
   }
 }
 
-/**
- * The fetch metadata maintained by the delayed fetch operation
- */
-case class FetchMetadata(fetchMinBytes: Int,
-                         fetchMaxBytes: Int,
-                         hardMaxBytesLimit: Boolean,
-                         fetchOnlyLeader: Boolean,
-                         fetchIsolation: FetchIsolation,
-                         isFromFollower: Boolean,
-                         replicaId: Int,
-                         fetchPartitionStatus: Seq[(TopicIdPartition, FetchPartitionStatus)]) {
-
-  override def toString = "FetchMetadata(minBytes=" + fetchMinBytes + ", " +
-    "maxBytes=" + fetchMaxBytes + ", " +
-    "onlyLeader=" + fetchOnlyLeader + ", " +
-    "fetchIsolation=" + fetchIsolation + ", " +
-    "replicaId=" + replicaId + ", " +
-    "partitionStatus=" + fetchPartitionStatus + ")"
-}
 /**
  * A delayed fetch operation that can be created by the replica manager and watched
  * in the fetch operation purgatory
  */
-class DelayedFetch(delayMs: Long,
-                   fetchMetadata: FetchMetadata,
-                   replicaManager: ReplicaManager,
-                   quota: ReplicaQuota,
-                   clientMetadata: Option[ClientMetadata],
-                   responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit)
-  extends DelayedOperation(delayMs) {
+class DelayedFetch(
+  params: FetchParams,
+  fetchPartitionStatus: Seq[(TopicIdPartition, FetchPartitionStatus)],
+  replicaManager: ReplicaManager,
+  quota: ReplicaQuota,
+  responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit
+) extends DelayedOperation(params.maxWaitMs) {
+
+  override def toString: String = {
+    s"DelayedFetch(params=$params" +
+      s", numPartitions=${fetchPartitionStatus.size}" +
+      ")"
+  }
 
   /**
    * The operation can be completed if:
@@ -84,16 +70,16 @@ class DelayedFetch(delayMs: Long,
    */
   override def tryComplete(): Boolean = {
     var accumulatedSize = 0
-    fetchMetadata.fetchPartitionStatus.foreach {
+    fetchPartitionStatus.foreach {
       case (topicIdPartition, fetchStatus) =>
         val fetchOffset = fetchStatus.startOffsetMetadata
         val fetchLeaderEpoch = fetchStatus.fetchInfo.currentLeaderEpoch
         try {
           if (fetchOffset != LogOffsetMetadata.UnknownOffsetMetadata) {
             val partition = replicaManager.getPartitionOrException(topicIdPartition.topicPartition)
-            val offsetSnapshot = partition.fetchOffsetSnapshot(fetchLeaderEpoch, fetchMetadata.fetchOnlyLeader)
+            val offsetSnapshot = partition.fetchOffsetSnapshot(fetchLeaderEpoch, params.fetchOnlyLeader)
 
-            val endOffset = fetchMetadata.fetchIsolation match {
+            val endOffset = params.isolation match {
               case FetchLogEnd => offsetSnapshot.logEndOffset
               case FetchHighWatermark => offsetSnapshot.highWatermark
               case FetchTxnCommitted => offsetSnapshot.lastStableOffset
@@ -105,19 +91,19 @@ class DelayedFetch(delayMs: Long,
             if (endOffset.messageOffset != fetchOffset.messageOffset) {
               if (endOffset.onOlderSegment(fetchOffset)) {
                 // Case F, this can happen when the new fetch operation is on a truncated leader
-                debug(s"Satisfying fetch $fetchMetadata since it is fetching later segments of partition $topicIdPartition.")
+                debug(s"Satisfying fetch $this since it is fetching later segments of partition $topicIdPartition.")
                 return forceComplete()
               } else if (fetchOffset.onOlderSegment(endOffset)) {
                 // Case F, this can happen when the fetch operation is falling behind the current segment
                 // or the partition has just rolled a new segment
-                debug(s"Satisfying fetch $fetchMetadata immediately since it is fetching older segments.")
+                debug(s"Satisfying fetch $this immediately since it is fetching older segments.")
                 // We will not force complete the fetch request if a replica should be throttled.
-                if (!fetchMetadata.isFromFollower || !replicaManager.shouldLeaderThrottle(quota, partition, fetchMetadata.replicaId))
+                if (!params.isFromFollower || !replicaManager.shouldLeaderThrottle(quota, partition, params.replicaId))
                   return forceComplete()
               } else if (fetchOffset.messageOffset < endOffset.messageOffset) {
                 // we take the partition fetch size as upper bound when accumulating the bytes (skip if a throttled partition)
                 val bytesAvailable = math.min(endOffset.positionDiff(fetchOffset), fetchStatus.fetchInfo.maxBytes)
-                if (!fetchMetadata.isFromFollower || !replicaManager.shouldLeaderThrottle(quota, partition, fetchMetadata.replicaId))
+                if (!params.isFromFollower || !replicaManager.shouldLeaderThrottle(quota, partition, params.replicaId))
                   accumulatedSize += bytesAvailable
               }
             }
@@ -131,7 +117,7 @@ class DelayedFetch(delayMs: Long,
                 debug(s"Could not obtain last offset for leader epoch for partition $topicIdPartition, epochEndOffset=$epochEndOffset.")
                 return forceComplete()
               } else if (epochEndOffset.leaderEpoch < fetchEpoch || epochEndOffset.endOffset < fetchStatus.fetchInfo.fetchOffset) {
-                debug(s"Satisfying fetch $fetchMetadata since it has diverging epoch requiring truncation for partition " +
+                debug(s"Satisfying fetch $this since it has diverging epoch requiring truncation for partition " +
                   s"$topicIdPartition epochEndOffset=$epochEndOffset fetchEpoch=$fetchEpoch fetchOffset=${fetchStatus.fetchInfo.fetchOffset}.")
                 return forceComplete()
               }
@@ -139,30 +125,30 @@ class DelayedFetch(delayMs: Long,
           }
         } catch {
           case _: NotLeaderOrFollowerException =>  // Case A or Case B
-            debug(s"Broker is no longer the leader or follower of $topicIdPartition, satisfy $fetchMetadata immediately")
+            debug(s"Broker is no longer the leader or follower of $topicIdPartition, satisfy $this immediately")
             return forceComplete()
           case _: UnknownTopicOrPartitionException => // Case C
-            debug(s"Broker no longer knows of partition $topicIdPartition, satisfy $fetchMetadata immediately")
+            debug(s"Broker no longer knows of partition $topicIdPartition, satisfy $this immediately")
             return forceComplete()
           case _: KafkaStorageException => // Case D
-            debug(s"Partition $topicIdPartition is in an offline log directory, satisfy $fetchMetadata immediately")
+            debug(s"Partition $topicIdPartition is in an offline log directory, satisfy $this immediately")
             return forceComplete()
           case _: FencedLeaderEpochException => // Case E
             debug(s"Broker is the leader of partition $topicIdPartition, but the requested epoch " +
-              s"$fetchLeaderEpoch is fenced by the latest leader epoch, satisfy $fetchMetadata immediately")
+              s"$fetchLeaderEpoch is fenced by the latest leader epoch, satisfy $this immediately")
             return forceComplete()
         }
     }
 
     // Case G
-    if (accumulatedSize >= fetchMetadata.fetchMinBytes)
+    if (accumulatedSize >= params.minBytes)
        forceComplete()
     else
       false
   }
 
   override def onExpiration(): Unit = {
-    if (fetchMetadata.isFromFollower)
+    if (params.isFromFollower)
       DelayedFetchMetrics.followerExpiredRequestMeter.mark()
     else
       DelayedFetchMetrics.consumerExpiredRequestMeter.mark()
@@ -172,19 +158,20 @@ class DelayedFetch(delayMs: Long,
    * Upon completion, read whatever data is available and pass to the complete callback
    */
   override def onComplete(): Unit = {
+    val fetchInfos = fetchPartitionStatus.map { case (tp, status) =>
+      tp -> status.fetchInfo
+    }
+
     val logReadResults = replicaManager.readFromLocalLog(
-      replicaId = fetchMetadata.replicaId,
-      fetchOnlyFromLeader = fetchMetadata.fetchOnlyLeader,
-      fetchIsolation = fetchMetadata.fetchIsolation,
-      fetchMaxBytes = fetchMetadata.fetchMaxBytes,
-      hardMaxBytesLimit = fetchMetadata.hardMaxBytesLimit,
-      readPartitionInfo = fetchMetadata.fetchPartitionStatus.map { case (tp, status) => tp -> status.fetchInfo },
-      clientMetadata = clientMetadata,
-      quota = quota)
+      params,
+      fetchInfos,
+      quota,
+      readFromPurgatory = true
+    )
 
     val fetchPartitionData = logReadResults.map { case (tp, result) =>
-      val isReassignmentFetch = fetchMetadata.isFromFollower &&
-        replicaManager.isAddingReplica(tp.topicPartition, fetchMetadata.replicaId)
+      val isReassignmentFetch = params.isFromFollower &&
+        replicaManager.isAddingReplica(tp.topicPartition, params.replicaId)
 
       tp -> result.toFetchPartitionData(isReassignmentFetch)
     }
diff --git a/core/src/main/scala/kafka/server/DelegationTokenManager.scala b/core/src/main/scala/kafka/server/DelegationTokenManager.scala
index 536a296383aac..235e3a12ad075 100644
--- a/core/src/main/scala/kafka/server/DelegationTokenManager.scala
+++ b/core/src/main/scala/kafka/server/DelegationTokenManager.scala
@@ -42,13 +42,14 @@ import scala.collection.mutable
 object DelegationTokenManager {
   val DefaultHmacAlgorithm = "HmacSHA512"
   val OwnerKey ="owner"
+  val TokenRequesterKey = "tokenRequester"
   val RenewersKey = "renewers"
   val IssueTimestampKey = "issueTimestamp"
   val MaxTimestampKey = "maxTimestamp"
   val ExpiryTimestampKey = "expiryTimestamp"
   val TokenIdKey = "tokenId"
   val VersionKey = "version"
-  val CurrentVersion = 1
+  val CurrentVersion = 3
   val ErrorTimestamp = -1
 
   /**
@@ -103,6 +104,7 @@ object DelegationTokenManager {
     val tokenInfoMap = mutable.Map[String, Any]()
     tokenInfoMap(VersionKey) = CurrentVersion
     tokenInfoMap(OwnerKey) = Sanitizer.sanitize(tokenInfo.ownerAsString)
+    tokenInfoMap(TokenRequesterKey) = Sanitizer.sanitize(tokenInfo.tokenRequester.toString)
     tokenInfoMap(RenewersKey) = tokenInfo.renewersAsString.asScala.map(e => Sanitizer.sanitize(e)).asJava
     tokenInfoMap(IssueTimestampKey) = tokenInfo.issueTimestamp
     tokenInfoMap(MaxTimestampKey) = tokenInfo.maxTimestamp
@@ -118,16 +120,20 @@ object DelegationTokenManager {
     Json.parseBytes(bytes) match {
       case Some(js) =>
         val mainJs = js.asJsonObject
-        require(mainJs(VersionKey).to[Int] == CurrentVersion)
+        val version = mainJs(VersionKey).to[Int]
+        require(version > 0 && version <= CurrentVersion)
         val owner = SecurityUtils.parseKafkaPrincipal(Sanitizer.desanitize(mainJs(OwnerKey).to[String]))
+        var tokenRequester = owner
+        if (version >= 3)
+          tokenRequester = SecurityUtils.parseKafkaPrincipal(Sanitizer.desanitize(mainJs(TokenRequesterKey).to[String]))
         val renewerStr = mainJs(RenewersKey).to[Seq[String]]
-        val renewers = renewerStr.map(Sanitizer.desanitize(_)).map(SecurityUtils.parseKafkaPrincipal(_))
+        val renewers = renewerStr.map(Sanitizer.desanitize).map(SecurityUtils.parseKafkaPrincipal)
         val issueTimestamp = mainJs(IssueTimestampKey).to[Long]
         val expiryTimestamp = mainJs(ExpiryTimestampKey).to[Long]
         val maxTimestamp = mainJs(MaxTimestampKey).to[Long]
         val tokenId = mainJs(TokenIdKey).to[String]
 
-        val tokenInfo = new TokenInformation(tokenId, owner, renewers.asJava,
+        val tokenInfo = new TokenInformation(tokenId, owner, tokenRequester, renewers.asJava,
           issueTimestamp, maxTimestamp, expiryTimestamp)
 
         Some(tokenInfo)
@@ -136,17 +142,18 @@ object DelegationTokenManager {
     }
   }
 
-  def filterToken(requestedPrincipal: KafkaPrincipal, owners : Option[List[KafkaPrincipal]], token: TokenInformation, authorizeToken: String => Boolean) : Boolean = {
+  def filterToken(requesterPrincipal: KafkaPrincipal, owners : Option[List[KafkaPrincipal]], token: TokenInformation,
+                  authorizeToken: String => Boolean, authorizeRequester: KafkaPrincipal  => Boolean) : Boolean = {
 
     val allow =
     //exclude tokens which are not requested
-      if (!owners.isEmpty && !owners.get.exists(owner => token.ownerOrRenewer(owner))) {
+      if (owners.isDefined && !owners.get.exists(owner => token.ownerOrRenewer(owner))) {
         false
         //Owners and the renewers can describe their own tokens
-      } else if (token.ownerOrRenewer(requestedPrincipal)) {
+      } else if (token.ownerOrRenewer(requesterPrincipal)) {
         true
         // Check permission for non-owned tokens
-      } else if ((authorizeToken(token.tokenId))) {
+      } else if (authorizeToken(token.tokenId) || authorizeRequester(token.owner)) {
         true
       }
       else {
@@ -172,7 +179,7 @@ class DelegationTokenManager(val config: KafkaConfig,
 
   val secretKey = {
     val keyBytes =  if (config.tokenAuthEnabled) config.delegationTokenSecretKey.value.getBytes(StandardCharsets.UTF_8) else null
-    if (keyBytes == null || keyBytes.length == 0) null
+    if (keyBytes == null || keyBytes.isEmpty) null
     else
       createSecretKey(keyBytes)
   }
@@ -183,7 +190,7 @@ class DelegationTokenManager(val config: KafkaConfig,
   private val lock = new Object()
   private var tokenChangeListener: ZkNodeChangeNotificationListener = null
 
-  def startup() = {
+  def startup(): Unit = {
     if (config.tokenAuthEnabled) {
       zkClient.createDelegationTokenPaths()
       loadCache()
@@ -192,7 +199,7 @@ class DelegationTokenManager(val config: KafkaConfig,
     }
   }
 
-  def shutdown() = {
+  def shutdown(): Unit = {
     if (config.tokenAuthEnabled) {
       if (tokenChangeListener != null) tokenChangeListener.close()
     }
@@ -259,12 +266,13 @@ class DelegationTokenManager(val config: KafkaConfig,
    * @param responseCallback
    */
   def createToken(owner: KafkaPrincipal,
+                  tokenRequester: KafkaPrincipal,
                   renewers: List[KafkaPrincipal],
                   maxLifeTimeMs: Long,
                   responseCallback: CreateResponseCallback): Unit = {
 
     if (!config.tokenAuthEnabled) {
-      responseCallback(CreateTokenResult(-1, -1, -1, "", Array[Byte](), Errors.DELEGATION_TOKEN_AUTH_DISABLED))
+      responseCallback(CreateTokenResult(owner, tokenRequester, -1, -1, -1, "", Array[Byte](), Errors.DELEGATION_TOKEN_AUTH_DISABLED))
     } else {
       lock.synchronized {
         val tokenId = CoreUtils.generateUuidAsBase64()
@@ -274,13 +282,13 @@ class DelegationTokenManager(val config: KafkaConfig,
         val maxLifeTimeStamp = issueTimeStamp + maxLifeTime
         val expiryTimeStamp = Math.min(maxLifeTimeStamp, issueTimeStamp + defaultTokenRenewTime)
 
-        val tokenInfo = new TokenInformation(tokenId, owner, renewers.asJava, issueTimeStamp, maxLifeTimeStamp, expiryTimeStamp)
+        val tokenInfo = new TokenInformation(tokenId, owner, tokenRequester, renewers.asJava, issueTimeStamp, maxLifeTimeStamp, expiryTimeStamp)
 
         val hmac = createHmac(tokenId, secretKey)
         val token = new DelegationToken(tokenInfo, hmac)
         updateToken(token)
         info(s"Created a delegation token: $tokenId for owner: $owner")
-        responseCallback(CreateTokenResult(issueTimeStamp, expiryTimeStamp, maxLifeTimeStamp, tokenId, hmac, Errors.NONE))
+        responseCallback(CreateTokenResult(owner, tokenRequester, issueTimeStamp, expiryTimeStamp, maxLifeTimeStamp, tokenId, hmac, Errors.NONE))
       }
     }
   }
@@ -485,7 +493,9 @@ class DelegationTokenManager(val config: KafkaConfig,
 
 }
 
-case class CreateTokenResult(issueTimestamp: Long,
+case class CreateTokenResult(owner: KafkaPrincipal,
+                             tokenRequester: KafkaPrincipal,
+                             issueTimestamp: Long,
                              expiryTimestamp: Long,
                              maxTimestamp: Long,
                              tokenId: String,
@@ -496,6 +506,8 @@ case class CreateTokenResult(issueTimestamp: Long,
     other match {
       case that: CreateTokenResult =>
         error.equals(that.error) &&
+          owner.equals(that.owner) &&
+          tokenRequester.equals(that.tokenRequester) &&
           tokenId.equals(that.tokenId) &&
           issueTimestamp.equals(that.issueTimestamp) &&
           expiryTimestamp.equals(that.expiryTimestamp) &&
@@ -506,7 +518,7 @@ case class CreateTokenResult(issueTimestamp: Long,
   }
 
   override def hashCode(): Int = {
-    val fields = Seq(issueTimestamp, expiryTimestamp, maxTimestamp, tokenId, hmac, error)
+    val fields = Seq(owner, tokenRequester, issueTimestamp, expiryTimestamp, maxTimestamp, tokenId, hmac, error)
     fields.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
   }
 }
diff --git a/core/src/main/scala/kafka/server/DynamicBrokerConfig.scala b/core/src/main/scala/kafka/server/DynamicBrokerConfig.scala
index cb6cd84d3b630..a860938124e6a 100755
--- a/core/src/main/scala/kafka/server/DynamicBrokerConfig.scala
+++ b/core/src/main/scala/kafka/server/DynamicBrokerConfig.scala
@@ -19,6 +19,7 @@ package kafka.server
 
 import java.util
 import java.util.{Collections, Properties}
+import java.util.concurrent.CopyOnWriteArrayList
 import java.util.concurrent.locks.ReentrantReadWriteLock
 import kafka.cluster.EndPoint
 import kafka.log.{LogCleaner, LogConfig, LogManager}
@@ -29,7 +30,7 @@ import kafka.utils.Implicits._
 import kafka.zk.{AdminZkClient, KafkaZkClient}
 import org.apache.kafka.common.Reconfigurable
 import org.apache.kafka.common.config.{AbstractConfig, ConfigDef, ConfigException, SslConfigs}
-import org.apache.kafka.common.metrics.MetricsReporter
+import org.apache.kafka.common.metrics.{Metrics, MetricsReporter}
 import org.apache.kafka.common.config.types.Password
 import org.apache.kafka.common.network.{ListenerName, ListenerReconfigurable}
 import org.apache.kafka.common.security.authenticator.LoginManager
@@ -181,16 +182,18 @@ object DynamicBrokerConfig {
   private[server] def dynamicConfigUpdateModes: util.Map[String, String] = {
     AllDynamicConfigs.map { name =>
       val mode = if (PerBrokerConfigs.contains(name)) "per-broker" else "cluster-wide"
-      (name -> mode)
+      name -> mode
     }.toMap.asJava
   }
 
   private[server] def resolveVariableConfigs(propsOriginal: Properties): Properties = {
     val props = new Properties
     val config = new AbstractConfig(new ConfigDef(), propsOriginal, false)
-    config.originals.asScala.filter(!_._1.startsWith(AbstractConfig.CONFIG_PROVIDERS_CONFIG)).foreach {case (key: String, value: Object) => {
-      props.put(key, value)
-    }}
+    config.originals.forEach { (key, value) =>
+      if (!key.startsWith(AbstractConfig.CONFIG_PROVIDERS_CONFIG)) {
+        props.put(key, value)
+      }
+    }
     props
   }
 }
@@ -201,11 +204,18 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
   private[server] val staticDefaultConfigs = ConfigDef.convertToStringMapWithPasswordValues(KafkaConfig.defaultValues.asJava).asScala
   private val dynamicBrokerConfigs = mutable.Map[String, String]()
   private val dynamicDefaultConfigs = mutable.Map[String, String]()
-  private val reconfigurables = mutable.Buffer[Reconfigurable]()
-  private val brokerReconfigurables = mutable.Buffer[BrokerReconfigurable]()
+
+  // Use COWArrayList to prevent concurrent modification exception when an item is added by one thread to these
+  // collections, while another thread is iterating over them.
+  private val reconfigurables = new CopyOnWriteArrayList[Reconfigurable]()
+  private val brokerReconfigurables = new CopyOnWriteArrayList[BrokerReconfigurable]()
   private val lock = new ReentrantReadWriteLock
   private var currentConfig: KafkaConfig = null
-  private val dynamicConfigPasswordEncoder = maybeCreatePasswordEncoder(kafkaConfig.passwordEncoderSecret)
+  private val dynamicConfigPasswordEncoder = if (kafkaConfig.processRoles.isEmpty) {
+    maybeCreatePasswordEncoder(kafkaConfig.passwordEncoderSecret)
+  } else {
+    Some(PasswordEncoder.noop())
+  }
 
   private[server] def initialize(zkClientOpt: Option[KafkaZkClient]): Unit = {
     currentConfig = new KafkaConfig(kafkaConfig.props, false, None)
@@ -248,8 +258,8 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
       case _ =>
     }
     addReconfigurable(kafkaServer.kafkaYammerMetrics)
-    addReconfigurable(new DynamicMetricsReporters(kafkaConfig.brokerId, kafkaServer))
-    addReconfigurable(new DynamicClientQuotaCallback(kafkaConfig.brokerId, kafkaServer))
+    addReconfigurable(new DynamicMetricsReporters(kafkaConfig.brokerId, kafkaServer.config, kafkaServer.metrics, kafkaServer.clusterId))
+    addReconfigurable(new DynamicClientQuotaCallback(kafkaServer))
 
     addBrokerReconfigurable(new DynamicThreadPool(kafkaServer))
     addBrokerReconfigurable(new DynamicLogConfig(kafkaServer.logManager, kafkaServer))
@@ -259,16 +269,16 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
 
   def addReconfigurable(reconfigurable: Reconfigurable): Unit = CoreUtils.inWriteLock(lock) {
     verifyReconfigurableConfigs(reconfigurable.reconfigurableConfigs.asScala)
-    reconfigurables += reconfigurable
+    reconfigurables.add(reconfigurable)
   }
 
   def addBrokerReconfigurable(reconfigurable: BrokerReconfigurable): Unit = CoreUtils.inWriteLock(lock) {
     verifyReconfigurableConfigs(reconfigurable.reconfigurableConfigs)
-    brokerReconfigurables += reconfigurable
+    brokerReconfigurables.add(reconfigurable)
   }
 
   def removeReconfigurable(reconfigurable: Reconfigurable): Unit = CoreUtils.inWriteLock(lock) {
-    reconfigurables -= reconfigurable
+    reconfigurables.remove(reconfigurable)
   }
 
   private def verifyReconfigurableConfigs(configNames: Set[String]): Unit = CoreUtils.inWriteLock(lock) {
@@ -320,7 +330,7 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
    * changes are processed. At the moment, only listener configs are considered for reloading.
    */
   private[server] def reloadUpdatedFilesWithoutConfigChange(newProps: Properties): Unit = CoreUtils.inWriteLock(lock) {
-    reconfigurables
+    reconfigurables.asScala
       .filter(reconfigurable => ReloadableFileConfigs.exists(reconfigurable.reconfigurableConfigs.contains))
       .foreach {
         case reconfigurable: ListenerReconfigurable =>
@@ -334,7 +344,7 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
 
   private def maybeCreatePasswordEncoder(secret: Option[Password]): Option[PasswordEncoder] = {
    secret.map { secret =>
-      new PasswordEncoder(secret,
+     PasswordEncoder.encrypting(secret,
         kafkaConfig.passwordEncoderKeyFactoryAlgorithm,
         kafkaConfig.passwordEncoderCipherAlgorithm,
         kafkaConfig.passwordEncoderKeyLength,
@@ -417,7 +427,7 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
                 debug(s"Dynamic password config $configName could not be decoded using old secret, new secret will be used.")
                 None
             }
-            decoded.foreach { value => props.put(configName, passwordEncoder.encode(new Password(value))) }
+            decoded.foreach(value => props.put(configName, passwordEncoder.encode(new Password(value))))
           }
         }
         adminZkClient.changeBrokerConfig(Some(kafkaConfig.brokerId), props)
@@ -483,8 +493,8 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
    * Returns the change in configurations between the new props and current props by returning a
    * map of the changed configs, as well as the set of deleted keys
    */
-  private def updatedConfigs(newProps: java.util.Map[String, _], currentProps: java.util.Map[String, _]):
-  (mutable.Map[String, _], Set[String]) = {
+  private def updatedConfigs(newProps: java.util.Map[String, _],
+                             currentProps: java.util.Map[String, _]): (mutable.Map[String, _], Set[String]) = {
     val changeMap = newProps.asScala.filter {
       case (k, v) => v != currentProps.get(k)
     }
@@ -535,8 +545,8 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
     if (changeMap.nonEmpty || deletedKeySet.nonEmpty) {
       try {
         val customConfigs = new util.HashMap[String, Object](newConfig.originalsFromThisConfig) // non-Kafka configs
-        newConfig.valuesFromThisConfig.keySet.forEach(customConfigs.remove(_))
-        reconfigurables.foreach {
+        newConfig.valuesFromThisConfig.keySet.forEach(k => customConfigs.remove(k))
+        reconfigurables.forEach {
           case listenerReconfigurable: ListenerReconfigurable =>
             processListenerReconfigurable(listenerReconfigurable, newConfig, customConfigs, validateOnly, reloadOnly = false)
           case reconfigurable =>
@@ -546,7 +556,7 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
 
         // BrokerReconfigurable updates are processed after config is updated. Only do the validation here.
         val brokerReconfigurablesToUpdate = mutable.Buffer[BrokerReconfigurable]()
-        brokerReconfigurables.foreach { reconfigurable =>
+        brokerReconfigurables.forEach { reconfigurable =>
           if (needsReconfiguration(reconfigurable.reconfigurableConfigs.asJava, changeMap.keySet, deletedKeySet)) {
             reconfigurable.validateReconfiguration(newConfig)
             if (!validateOnly)
@@ -593,7 +603,7 @@ class DynamicBrokerConfig(private val kafkaConfig: KafkaConfig) extends Logging
                                     newCustomConfigs: util.Map[String, Object],
                                     validateOnly: Boolean): Unit = {
     val newConfigs = new util.HashMap[String, Object]
-    allNewConfigs.forEach { (k, v) => newConfigs.put(k, v.asInstanceOf[AnyRef]) }
+    allNewConfigs.forEach((k, v) => newConfigs.put(k, v.asInstanceOf[AnyRef]))
     newConfigs.putAll(newCustomConfigs)
     try {
       reconfigurable.validateReconfiguration(newConfigs)
@@ -647,7 +657,9 @@ class DynamicLogConfig(logManager: LogManager, server: KafkaBroker) extends Brok
     logManager.allLogs.foreach { log =>
       val props = mutable.Map.empty[Any, Any]
       props ++= newBrokerDefaults
-      props ++= log.config.originals.asScala.filter { case (k, _) => log.config.overriddenConfigs.contains(k) }
+      props ++= log.config.originals.asScala.filter { case (k, _) =>
+        log.config.overriddenConfigs.contains(k)
+      }
 
       val logConfig = LogConfig(props.asJava, log.config.overriddenConfigs)
       log.updateConfig(logConfig)
@@ -736,17 +748,18 @@ class DynamicThreadPool(server: KafkaBroker) extends BrokerReconfigurable {
   }
 }
 
-class DynamicMetricsReporters(brokerId: Int, server: KafkaBroker) extends Reconfigurable {
+class DynamicMetricsReporters(brokerId: Int, config: KafkaConfig, metrics: Metrics, clusterId: String) extends Reconfigurable {
+  private val reporterState = new DynamicMetricReporterState(brokerId, config, metrics, clusterId)
+  private val currentReporters = reporterState.currentReporters
+  private val dynamicConfig = reporterState.dynamicConfig
 
-  private val dynamicConfig = server.config.dynamicConfig
-  private val metrics = server.metrics
-  private val propsOverride = Map[String, AnyRef](KafkaConfig.BrokerIdProp -> brokerId.toString)
-  private val currentReporters = mutable.Map[String, MetricsReporter]()
+  private def metricsReporterClasses(configs: util.Map[String, _]): mutable.Buffer[String] =
+    reporterState.metricsReporterClasses(configs)
 
-  createReporters(dynamicConfig.currentKafkaConfig.getList(KafkaConfig.MetricReporterClassesProp),
-    Collections.emptyMap[String, Object])
+  private def createReporters(reporterClasses: util.List[String], updatedConfigs: util.Map[String, _]): Unit =
+    reporterState.createReporters(reporterClasses, updatedConfigs)
 
-  private[server] def currentMetricsReporters: List[MetricsReporter] = currentReporters.values.toList
+  private def removeReporter(className: String): Unit = reporterState.removeReporter(className)
 
   override def configure(configs: util.Map[String, _]): Unit = {}
 
@@ -789,31 +802,53 @@ class DynamicMetricsReporters(brokerId: Int, server: KafkaBroker) extends Reconf
     val added = updatedMetricsReporters.filterNot(currentReporters.keySet)
     createReporters(added.asJava, configs)
   }
+}
 
-  private def createReporters(reporterClasses: util.List[String],
+class DynamicMetricReporterState(brokerId: Int, config: KafkaConfig, metrics: Metrics, clusterId: String) {
+  private[server] val dynamicConfig = config.dynamicConfig
+  private val propsOverride = Map[String, AnyRef](KafkaConfig.BrokerIdProp -> brokerId.toString)
+  private[server] val currentReporters = mutable.Map[String, MetricsReporter]()
+  createReporters(config, clusterId, metricsReporterClasses(dynamicConfig.currentKafkaConfig.values()).asJava,
+    Collections.emptyMap[String, Object])
+
+  private[server] def createReporters(reporterClasses: util.List[String],
+                                      updatedConfigs: util.Map[String, _]): Unit = {
+    createReporters(config, clusterId, reporterClasses, updatedConfigs)
+  }
+
+  private def createReporters(config: KafkaConfig,
+                              clusterId: String,
+                              reporterClasses: util.List[String],
                               updatedConfigs: util.Map[String, _]): Unit = {
     val props = new util.HashMap[String, AnyRef]
-    updatedConfigs.forEach { (k, v) => props.put(k, v.asInstanceOf[AnyRef]) }
-    propsOverride.forKeyValue { (k, v) => props.put(k, v) }
+    updatedConfigs.forEach((k, v) => props.put(k, v.asInstanceOf[AnyRef]))
+    propsOverride.forKeyValue((k, v) => props.put(k, v))
     val reporters = dynamicConfig.currentKafkaConfig.getConfiguredInstances(reporterClasses, classOf[MetricsReporter], props)
+    // Call notifyMetricsReporters first to satisfy the contract for MetricsReporter.contextChange,
+    // which provides that MetricsReporter.contextChange must be called before the first call to MetricsReporter.init.
+    // The first call to MetricsReporter.init is done when we call metrics.addReporter below.
+    KafkaBroker.notifyMetricsReporters(clusterId, config, reporters.asScala)
     reporters.forEach { reporter =>
       metrics.addReporter(reporter)
       currentReporters += reporter.getClass.getName -> reporter
     }
-    KafkaBroker.notifyClusterListeners(server.clusterId, reporters.asScala)
-    KafkaBroker.notifyMetricsReporters(server.clusterId, server.config, reporters.asScala)
+    KafkaBroker.notifyClusterListeners(clusterId, reporters.asScala)
   }
 
-  private def removeReporter(className: String): Unit = {
+  private[server] def removeReporter(className: String): Unit = {
     currentReporters.remove(className).foreach(metrics.removeReporter)
   }
 
-  private def metricsReporterClasses(configs: util.Map[String, _]): mutable.Buffer[String] = {
+  private[server] def metricsReporterClasses(configs: util.Map[String, _]): mutable.Buffer[String] = {
     configs.get(KafkaConfig.MetricReporterClassesProp).asInstanceOf[util.List[String]].asScala
   }
 }
-object DynamicListenerConfig {
 
+object DynamicListenerConfig {
+  /**
+   * The set of configurations which the DynamicListenerConfig object listens for. Many of
+   * these are also monitored by other objects such as ChannelBuilders and SocketServers.
+   */
   val ReconfigurableConfigs = Set(
     // Listener configs
     KafkaConfig.AdvertisedListenersProp,
@@ -864,7 +899,7 @@ object DynamicListenerConfig {
   )
 }
 
-class DynamicClientQuotaCallback(brokerId: Int, server: KafkaBroker) extends Reconfigurable {
+class DynamicClientQuotaCallback(server: KafkaBroker) extends Reconfigurable {
 
   override def configure(configs: util.Map[String, _]): Unit = {}
 
@@ -901,11 +936,32 @@ class DynamicListenerConfig(server: KafkaBroker) extends BrokerReconfigurable wi
     DynamicListenerConfig.ReconfigurableConfigs
   }
 
+  private def listenerRegistrationsAltered(
+    oldAdvertisedListeners: Map[ListenerName, EndPoint],
+    newAdvertisedListeners: Map[ListenerName, EndPoint]
+  ): Boolean = {
+    if (oldAdvertisedListeners.size != newAdvertisedListeners.size) return true
+    oldAdvertisedListeners.forKeyValue {
+      case (oldListenerName, oldEndpoint) =>
+        newAdvertisedListeners.get(oldListenerName) match {
+          case None => return true
+          case Some(newEndpoint) => if (!newEndpoint.equals(oldEndpoint)) {
+            return true
+          }
+        }
+    }
+    false
+  }
+
+  private def verifyListenerRegistrationAlterationSupported(): Unit = {
+    if (!server.config.requiresZookeeper) {
+      throw new ConfigException("Advertised listeners cannot be altered when using a " +
+        "Raft-based metadata quorum.")
+    }
+  }
+
   def validateReconfiguration(newConfig: KafkaConfig): Unit = {
     val oldConfig = server.config
-    if (!oldConfig.requiresZookeeper) {
-      throw new ConfigException("Dynamic reconfiguration of listeners is not yet supported when using a Raft-based metadata quorum")
-    }
     val newListeners = listenersToMap(newConfig.listeners)
     val newAdvertisedListeners = listenersToMap(newConfig.effectiveAdvertisedListeners)
     val oldListeners = listenersToMap(oldConfig.listeners)
@@ -928,6 +984,13 @@ class DynamicListenerConfig(server: KafkaBroker) extends BrokerReconfigurable wi
     }
     if (!newAdvertisedListeners.contains(newConfig.interBrokerListenerName))
       throw new ConfigException(s"Advertised listener must be specified for inter-broker listener ${newConfig.interBrokerListenerName}")
+
+    // Currently, we do not support adding or removing listeners when in KRaft mode.
+    // However, we support changing other listener configurations (max connections, etc.)
+    if (listenerRegistrationsAltered(listenersToMap(oldConfig.effectiveAdvertisedListeners),
+        listenersToMap(newConfig.effectiveAdvertisedListeners))) {
+      verifyListenerRegistrationAlterationSupported()
+    }
   }
 
   def reconfigure(oldConfig: KafkaConfig, newConfig: KafkaConfig): Unit = {
@@ -937,18 +1000,18 @@ class DynamicListenerConfig(server: KafkaBroker) extends BrokerReconfigurable wi
     val oldListenerMap = listenersToMap(oldListeners)
     val listenersRemoved = oldListeners.filterNot(e => newListenerMap.contains(e.listenerName))
     val listenersAdded = newListeners.filterNot(e => oldListenerMap.contains(e.listenerName))
-
-    // Clear SASL login cache to force re-login
-    if (listenersAdded.nonEmpty || listenersRemoved.nonEmpty)
-      LoginManager.closeAll()
-
-    server.socketServer.removeListeners(listenersRemoved)
-    if (listenersAdded.nonEmpty)
-      server.socketServer.addListeners(listenersAdded)
-
-    server match {
-      case kafkaServer: KafkaServer => kafkaServer.kafkaController.updateBrokerInfo(kafkaServer.createBrokerInfo)
-      case _ =>
+    if (listenersRemoved.nonEmpty || listenersAdded.nonEmpty) {
+      LoginManager.closeAll() // Clear SASL login cache to force re-login
+      if (listenersRemoved.nonEmpty) server.socketServer.removeListeners(listenersRemoved)
+      if (listenersAdded.nonEmpty) server.socketServer.addListeners(listenersAdded)
+    }
+    if (listenerRegistrationsAltered(listenersToMap(oldConfig.effectiveAdvertisedListeners),
+        listenersToMap(newConfig.effectiveAdvertisedListeners))) {
+      verifyListenerRegistrationAlterationSupported()
+      server match {
+        case kafkaServer: KafkaServer => kafkaServer.kafkaController.updateBrokerInfo(kafkaServer.createBrokerInfo)
+        case _ => throw new RuntimeException("Unable to handle non-kafkaServer")
+      }
     }
   }
 
diff --git a/core/src/main/scala/kafka/server/EnvelopeUtils.scala b/core/src/main/scala/kafka/server/EnvelopeUtils.scala
index ec8871f3822ef..97c532ebb45bf 100644
--- a/core/src/main/scala/kafka/server/EnvelopeUtils.scala
+++ b/core/src/main/scala/kafka/server/EnvelopeUtils.scala
@@ -32,7 +32,8 @@ object EnvelopeUtils {
   def handleEnvelopeRequest(
     request: RequestChannel.Request,
     requestChannelMetrics: RequestChannel.Metrics,
-    handler: RequestChannel.Request => Unit): Unit = {
+    handler: RequestChannel.Request => Unit
+  ): Unit = {
     val envelope = request.body[EnvelopeRequest]
     val forwardedPrincipal = parseForwardedPrincipal(request.context, envelope.requestPrincipal)
     val forwardedClientAddress = parseForwardedClientAddress(envelope.clientAddress)
@@ -83,7 +84,7 @@ object EnvelopeUtils {
     requestChannelMetrics: RequestChannel.Metrics
   ): RequestChannel.Request = {
     try {
-      new RequestChannel.Request(
+      val forwardedRequest = new RequestChannel.Request(
         processor = envelope.processor,
         context = forwardedContext,
         startTimeNanos = envelope.startTimeNanos,
@@ -92,6 +93,9 @@ object EnvelopeUtils {
         requestChannelMetrics,
         Some(envelope)
       )
+      // set the dequeue time of forwardedRequest as the value of envelope request
+      forwardedRequest.requestDequeueTimeNanos = envelope.requestDequeueTimeNanos
+      forwardedRequest
     } catch {
       case e: InvalidRequestException =>
         // We use UNSUPPORTED_VERSION if the embedded request cannot be parsed.
diff --git a/core/src/main/scala/kafka/server/FetchDataInfo.scala b/core/src/main/scala/kafka/server/FetchDataInfo.scala
index f6cf725843ef9..95b68c0839576 100644
--- a/core/src/main/scala/kafka/server/FetchDataInfo.scala
+++ b/core/src/main/scala/kafka/server/FetchDataInfo.scala
@@ -17,15 +17,78 @@
 
 package kafka.server
 
+import kafka.api.Request
+import org.apache.kafka.common.IsolationLevel
 import org.apache.kafka.common.message.FetchResponseData
-import org.apache.kafka.common.record.Records
+import org.apache.kafka.common.record.{MemoryRecords, Records}
+import org.apache.kafka.common.replica.ClientMetadata
+import org.apache.kafka.common.requests.FetchRequest
 
 sealed trait FetchIsolation
 case object FetchLogEnd extends FetchIsolation
 case object FetchHighWatermark extends FetchIsolation
 case object FetchTxnCommitted extends FetchIsolation
 
-case class FetchDataInfo(fetchOffsetMetadata: LogOffsetMetadata,
-                         records: Records,
-                         firstEntryIncomplete: Boolean = false,
-                         abortedTransactions: Option[List[FetchResponseData.AbortedTransaction]] = None)
+object FetchIsolation {
+  def apply(
+    request: FetchRequest
+  ): FetchIsolation = {
+    apply(request.replicaId, request.isolationLevel)
+  }
+
+  def apply(
+    replicaId: Int,
+    isolationLevel: IsolationLevel
+  ): FetchIsolation = {
+    if (!Request.isConsumer(replicaId))
+      FetchLogEnd
+    else if (isolationLevel == IsolationLevel.READ_COMMITTED)
+      FetchTxnCommitted
+    else
+      FetchHighWatermark
+  }
+}
+
+case class FetchParams(
+  requestVersion: Short,
+  replicaId: Int,
+  maxWaitMs: Long,
+  minBytes: Int,
+  maxBytes: Int,
+  isolation: FetchIsolation,
+  clientMetadata: Option[ClientMetadata]
+) {
+  def isFromFollower: Boolean = Request.isValidBrokerId(replicaId)
+  def isFromConsumer: Boolean = Request.isConsumer(replicaId)
+  def fetchOnlyLeader: Boolean = isFromFollower || (isFromConsumer && clientMetadata.isEmpty)
+  def hardMaxBytesLimit: Boolean = requestVersion <= 2
+
+  override def toString: String = {
+    s"FetchParams(requestVersion=$requestVersion" +
+      s", replicaId=$replicaId" +
+      s", maxWaitMs=$maxWaitMs" +
+      s", minBytes=$minBytes" +
+      s", maxBytes=$maxBytes" +
+      s", isolation=$isolation" +
+      s", clientMetadata= $clientMetadata" +
+      ")"
+  }
+}
+
+object FetchDataInfo {
+  def empty(fetchOffset: Long): FetchDataInfo = {
+    FetchDataInfo(
+      fetchOffsetMetadata = LogOffsetMetadata(fetchOffset),
+      records = MemoryRecords.EMPTY,
+      firstEntryIncomplete = false,
+      abortedTransactions = None
+    )
+  }
+}
+
+case class FetchDataInfo(
+  fetchOffsetMetadata: LogOffsetMetadata,
+  records: Records,
+  firstEntryIncomplete: Boolean = false,
+  abortedTransactions: Option[List[FetchResponseData.AbortedTransaction]] = None
+)
diff --git a/core/src/main/scala/kafka/server/FetchSession.scala b/core/src/main/scala/kafka/server/FetchSession.scala
index f7d348ddc5a50..b32cb8bcf66ee 100644
--- a/core/src/main/scala/kafka/server/FetchSession.scala
+++ b/core/src/main/scala/kafka/server/FetchSession.scala
@@ -790,7 +790,7 @@ class FetchManager(private val time: Time,
         new FullFetchContext(time, cache, reqMetadata, fetchData, reqVersion >= 13, isFollower)
       }
       debug(s"Created a new full FetchContext with ${partitionsToLogString(fetchData.keySet)}."+
-        s"${removedFetchSessionStr}${suffix}")
+        s"$removedFetchSessionStr$suffix")
       context
     } else {
       cache.synchronized {
diff --git a/core/src/main/scala/kafka/server/FinalizedFeatureCache.scala b/core/src/main/scala/kafka/server/FinalizedFeatureCache.scala
deleted file mode 100644
index 88addb76c4d5c..0000000000000
--- a/core/src/main/scala/kafka/server/FinalizedFeatureCache.scala
+++ /dev/null
@@ -1,183 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.server
-
-import java.util
-import java.util.Collections
-import kafka.utils.Logging
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange}
-import org.apache.kafka.image.FeaturesDelta
-
-import scala.concurrent.TimeoutException
-import scala.math.max
-
-import scala.compat.java8.OptionConverters._
-
-// Raised whenever there was an error in updating the FinalizedFeatureCache with features.
-class FeatureCacheUpdateException(message: String) extends RuntimeException(message) {
-}
-
-// Helper class that represents finalized features along with an epoch value.
-case class FinalizedFeaturesAndEpoch(features: Features[FinalizedVersionRange], epoch: Long) {
-  override def toString(): String = {
-    s"FinalizedFeaturesAndEpoch(features=$features, epoch=$epoch)"
-  }
-}
-
-/**
- * A common mutable cache containing the latest finalized features and epoch. By default the contents of
- * the cache are empty. This cache needs to be populated at least once for its contents to become
- * non-empty. Currently the main reader of this cache is the read path that serves an ApiVersionsRequest,
- * returning the features information in the response. This cache is typically updated asynchronously
- * whenever the finalized features and epoch values are modified in ZK by the KafkaController.
- * This cache is thread-safe for reads and writes.
- *
- * @see FinalizedFeatureChangeListener
- */
-class FinalizedFeatureCache(private val brokerFeatures: BrokerFeatures) extends Logging {
-  @volatile private var featuresAndEpoch: Option[FinalizedFeaturesAndEpoch] = Option.empty
-
-  /**
-   * @return   the latest known FinalizedFeaturesAndEpoch or empty if not defined in the cache.
-   */
-  def get: Option[FinalizedFeaturesAndEpoch] = {
-    featuresAndEpoch
-  }
-
-  def isEmpty: Boolean = {
-    featuresAndEpoch.isEmpty
-  }
-
-  /**
-   * Waits no more than timeoutMs for the cache's epoch to reach an epoch >= minExpectedEpoch.
-   *
-   * @param minExpectedEpoch   the minimum expected epoch to be reached by the cache
-   *                           (should be >= 0)
-   * @param timeoutMs          the timeout (in milli seconds)
-   *
-   * @throws                   TimeoutException if the cache's epoch has not reached at least
-   *                           minExpectedEpoch within timeoutMs.
-   */
-  def waitUntilEpochOrThrow(minExpectedEpoch: Long, timeoutMs: Long): Unit = {
-    if(minExpectedEpoch < 0L) {
-      throw new IllegalArgumentException(
-        s"Expected minExpectedEpoch >= 0, but $minExpectedEpoch was provided.")
-    }
-    waitUntilConditionOrThrow(
-      () => featuresAndEpoch.isDefined && featuresAndEpoch.get.epoch >= minExpectedEpoch,
-      timeoutMs)
-  }
-
-  /**
-   * Clears all existing finalized features and epoch from the cache.
-   */
-  def clear(): Unit = {
-    synchronized {
-      featuresAndEpoch = Option.empty
-      notifyAll()
-    }
-    info("Cleared cache")
-  }
-
-  /**
-   * Updates the cache to the latestFeatures, and updates the existing epoch to latestEpoch.
-   * Expects that the latestEpoch should be always greater than the existing epoch (when the
-   * existing epoch is defined).
-   *
-   * @param latestFeatures   the latest finalized features to be set in the cache
-   * @param latestEpoch      the latest epoch value to be set in the cache
-   *
-   * @throws                 FeatureCacheUpdateException if the cache update operation fails
-   *                         due to invalid parameters or incompatibilities with the broker's
-   *                         supported features. In such a case, the existing cache contents are
-   *                         not modified.
-   */
-  def updateOrThrow(latestFeatures: Features[FinalizedVersionRange], latestEpoch: Long): Unit = {
-    val latest = FinalizedFeaturesAndEpoch(latestFeatures, latestEpoch)
-    val existing = featuresAndEpoch.map(item => item.toString()).getOrElse("<empty>")
-    if (featuresAndEpoch.isDefined && featuresAndEpoch.get.epoch > latest.epoch) {
-      val errorMsg = s"FinalizedFeatureCache update failed due to invalid epoch in new $latest." +
-        s" The existing cache contents are $existing."
-      throw new FeatureCacheUpdateException(errorMsg)
-    } else {
-      val incompatibleFeatures = brokerFeatures.incompatibleFeatures(latest.features)
-      if (!incompatibleFeatures.empty) {
-        val errorMsg = "FinalizedFeatureCache update failed since feature compatibility" +
-          s" checks failed! Supported ${brokerFeatures.supportedFeatures} has incompatibilities" +
-          s" with the latest $latest."
-        throw new FeatureCacheUpdateException(errorMsg)
-      } else {
-        val logMsg = s"Updated cache from existing $existing to latest $latest."
-        synchronized {
-          featuresAndEpoch = Some(latest)
-          notifyAll()
-        }
-        info(logMsg)
-      }
-    }
-  }
-
-  def update(featuresDelta: FeaturesDelta, highestMetadataOffset: Long): Unit = {
-    val features = featuresAndEpoch.getOrElse(
-      FinalizedFeaturesAndEpoch(Features.emptyFinalizedFeatures(), -1))
-    val newFeatures = new util.HashMap[String, FinalizedVersionRange]()
-    newFeatures.putAll(features.features.features())
-    featuresDelta.changes().entrySet().forEach { e =>
-      e.getValue().asScala match {
-        case None => newFeatures.remove(e.getKey)
-        case Some(feature) => newFeatures.put(e.getKey,
-          new FinalizedVersionRange(feature.min(), feature.max()))
-      }
-    }
-    featuresAndEpoch = Some(FinalizedFeaturesAndEpoch(Features.finalizedFeatures(
-      Collections.unmodifiableMap(newFeatures)), highestMetadataOffset))
-  }
-
-  /**
-   * Causes the current thread to wait no more than timeoutMs for the specified condition to be met.
-   * It is guaranteed that the provided condition will always be invoked only from within a
-   * synchronized block.
-   *
-   * @param waitCondition   the condition to be waited upon:
-   *                         - if the condition returns true, then, the wait will stop.
-   *                         - if the condition returns false, it means the wait must continue until
-   *                           timeout.
-   *
-   * @param timeoutMs       the timeout (in milli seconds)
-   *
-   * @throws                TimeoutException if the condition is not met within timeoutMs.
-   */
-  private def waitUntilConditionOrThrow(waitCondition: () => Boolean, timeoutMs: Long): Unit = {
-    if(timeoutMs < 0L) {
-      throw new IllegalArgumentException(s"Expected timeoutMs >= 0, but $timeoutMs was provided.")
-    }
-    val waitEndTimeNanos = System.nanoTime() + (timeoutMs * 1000000)
-    synchronized {
-      while (!waitCondition()) {
-        val nowNanos = System.nanoTime()
-        if (nowNanos > waitEndTimeNanos) {
-          throw new TimeoutException(
-            s"Timed out after waiting for ${timeoutMs}ms for required condition to be met." +
-              s" Current epoch: ${featuresAndEpoch.map(fe => fe.epoch).getOrElse("<none>")}.")
-        }
-        val sleepTimeMs = max(1L, (waitEndTimeNanos - nowNanos) / 1000000)
-        wait(sleepTimeMs)
-      }
-    }
-  }
-}
diff --git a/core/src/main/scala/kafka/server/FinalizedFeatureChangeListener.scala b/core/src/main/scala/kafka/server/FinalizedFeatureChangeListener.scala
index 8f10ab661a895..83e9c1492842f 100644
--- a/core/src/main/scala/kafka/server/FinalizedFeatureChangeListener.scala
+++ b/core/src/main/scala/kafka/server/FinalizedFeatureChangeListener.scala
@@ -17,8 +17,9 @@
 
 package kafka.server
 
-import java.util.concurrent.{CountDownLatch, LinkedBlockingQueue, TimeUnit}
+import kafka.server.metadata.ZkMetadataCache
 
+import java.util.concurrent.{CountDownLatch, LinkedBlockingQueue, TimeUnit}
 import kafka.utils.{Logging, ShutdownableThread}
 import kafka.zk.{FeatureZNode, FeatureZNodeStatus, KafkaZkClient, ZkVersion}
 import kafka.zookeeper.{StateChangeHandler, ZNodeChangeHandler}
@@ -32,10 +33,12 @@ import scala.concurrent.TimeoutException
  * to the latest features read from ZK. The cache updates are serialized through a single
  * notification processor thread.
  *
+ * This updates the features cached in ZkMetadataCache
+ *
  * @param finalizedFeatureCache   the finalized feature cache
  * @param zkClient                the Zookeeper client
  */
-class FinalizedFeatureChangeListener(private val finalizedFeatureCache: FinalizedFeatureCache,
+class FinalizedFeatureChangeListener(private val finalizedFeatureCache: ZkMetadataCache,
                                      private val zkClient: KafkaZkClient) extends Logging {
 
   /**
@@ -87,7 +90,7 @@ class FinalizedFeatureChangeListener(private val finalizedFeatureCache: Finalize
       //                                           a case.
       if (version == ZkVersion.UnknownVersion) {
         info(s"Feature ZK node at path: $featureZkNodePath does not exist")
-        finalizedFeatureCache.clear()
+        finalizedFeatureCache.clearFeatures()
       } else {
         var maybeFeatureZNode: Option[FeatureZNode] = Option.empty
         try {
@@ -95,17 +98,17 @@ class FinalizedFeatureChangeListener(private val finalizedFeatureCache: Finalize
         } catch {
           case e: IllegalArgumentException => {
             error(s"Unable to deserialize feature ZK node at path: $featureZkNodePath", e)
-            finalizedFeatureCache.clear()
+            finalizedFeatureCache.clearFeatures()
           }
         }
         maybeFeatureZNode.foreach(featureZNode => {
           featureZNode.status match {
             case FeatureZNodeStatus.Disabled => {
               info(s"Feature ZK node at path: $featureZkNodePath is in disabled status.")
-              finalizedFeatureCache.clear()
+              finalizedFeatureCache.clearFeatures()
             }
             case FeatureZNodeStatus.Enabled => {
-              finalizedFeatureCache.updateOrThrow(featureZNode.features, version)
+              finalizedFeatureCache.updateFeaturesOrThrow(featureZNode.features.toMap, version)
             }
             case _ => throw new IllegalStateException(s"Unexpected FeatureZNodeStatus found in $featureZNode")
           }
diff --git a/core/src/main/scala/kafka/server/KafkaApis.scala b/core/src/main/scala/kafka/server/KafkaApis.scala
index 2bc28e40c2958..4703118ebfa24 100644
--- a/core/src/main/scala/kafka/server/KafkaApis.scala
+++ b/core/src/main/scala/kafka/server/KafkaApis.scala
@@ -18,7 +18,7 @@
 package kafka.server
 
 import kafka.admin.AdminUtils
-import kafka.api.{ApiVersion, ElectLeadersRequestOps, KAFKA_0_11_0_IV0, KAFKA_2_3_IV0}
+import kafka.api.ElectLeadersRequestOps
 import kafka.common.OffsetAndMetadata
 import kafka.controller.ReplicaAssignment
 import kafka.coordinator.group._
@@ -79,6 +79,9 @@ import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicInteger
 import java.util.{Collections, Optional}
 
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_11_0_IV0, IBP_2_3_IV0}
+
 import scala.annotation.nowarn
 import scala.collection.{Map, Seq, Set, immutable, mutable}
 import scala.jdk.CollectionConverters._
@@ -221,11 +224,12 @@ class KafkaApis(val requestChannel: RequestChannel,
         case ApiKeys.ALTER_CLIENT_QUOTAS => maybeForwardToController(request, handleAlterClientQuotasRequest)
         case ApiKeys.DESCRIBE_USER_SCRAM_CREDENTIALS => handleDescribeUserScramCredentialsRequest(request)
         case ApiKeys.ALTER_USER_SCRAM_CREDENTIALS => maybeForwardToController(request, handleAlterUserScramCredentialsRequest)
-        case ApiKeys.ALTER_ISR => handleAlterIsrRequest(request)
+        case ApiKeys.ALTER_PARTITION => handleAlterPartitionRequest(request)
         case ApiKeys.UPDATE_FEATURES => maybeForwardToController(request, handleUpdateFeatures)
         case ApiKeys.ENVELOPE => handleEnvelope(request, requestLocal)
         case ApiKeys.DESCRIBE_CLUSTER => handleDescribeCluster(request)
         case ApiKeys.DESCRIBE_PRODUCERS => handleDescribeProducersRequest(request)
+        case ApiKeys.UNREGISTER_BROKER => forwardToControllerOrFail(request)
         case ApiKeys.DESCRIBE_TRANSACTIONS => handleDescribeTransactionsRequest(request)
         case ApiKeys.LIST_TRANSACTIONS => handleListTransactionsRequest(request)
         case ApiKeys.ALLOCATE_PRODUCER_IDS => handleAllocateProducerIdsRequest(request)
@@ -430,7 +434,7 @@ class KafkaApis(val requestChannel: RequestChannel,
             .setTopics(responseTopicList)
             .setThrottleTimeMs(requestThrottleMs)
       ))
-    } else if (offsetCommitRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion < KAFKA_2_3_IV0) {
+    } else if (offsetCommitRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion.isLessThan(IBP_2_3_IV0)) {
       // Only enable static membership when IBP >= 2.3, because it is not safe for the broker to use the static member logic
       // until we are sure that all brokers support it. If static group being loaded by an older coordinator, it will discard
       // the group.instance.id field, so static members could accidentally become "dynamic", which leads to wrong states.
@@ -692,18 +696,6 @@ class KafkaApis(val requestChannel: RequestChannel,
       forgottenTopics,
       topicNames)
 
-    val clientMetadata: Option[ClientMetadata] = if (versionId >= 11) {
-      // Fetch API version 11 added preferred replica logic
-      Some(new DefaultClientMetadata(
-        fetchRequest.rackId,
-        clientId,
-        request.context.clientAddress,
-        request.context.principal,
-        request.context.listenerName.value))
-    } else {
-      None
-    }
-
     val erroneous = mutable.ArrayBuffer[(TopicIdPartition, FetchResponseData.PartitionData)]()
     val interesting = mutable.ArrayBuffer[(TopicIdPartition, FetchRequest.PartitionData)]()
     if (fetchRequest.isFromFollower) {
@@ -940,31 +932,49 @@ class KafkaApis(val requestChannel: RequestChannel,
       }
     }
 
-    // for fetch from consumer, cap fetchMaxBytes to the maximum bytes that could be fetched without being throttled given
-    // no bytes were recorded in the recent quota window
-    // trying to fetch more bytes would result in a guaranteed throttling potentially blocking consumer progress
-    val maxQuotaWindowBytes = if (fetchRequest.isFromFollower)
-      Int.MaxValue
-    else
-      quotas.fetch.getMaxValueInQuotaWindow(request.session, clientId).toInt
-
-    val fetchMaxBytes = Math.min(Math.min(fetchRequest.maxBytes, config.fetchMaxBytes), maxQuotaWindowBytes)
-    val fetchMinBytes = Math.min(fetchRequest.minBytes, fetchMaxBytes)
-    if (interesting.isEmpty)
+    if (interesting.isEmpty) {
       processResponseCallback(Seq.empty)
-    else {
+    } else {
+      // for fetch from consumer, cap fetchMaxBytes to the maximum bytes that could be fetched without being throttled given
+      // no bytes were recorded in the recent quota window
+      // trying to fetch more bytes would result in a guaranteed throttling potentially blocking consumer progress
+      val maxQuotaWindowBytes = if (fetchRequest.isFromFollower)
+        Int.MaxValue
+      else
+        quotas.fetch.getMaxValueInQuotaWindow(request.session, clientId).toInt
+
+      val fetchMaxBytes = Math.min(Math.min(fetchRequest.maxBytes, config.fetchMaxBytes), maxQuotaWindowBytes)
+      val fetchMinBytes = Math.min(fetchRequest.minBytes, fetchMaxBytes)
+
+      val clientMetadata: Option[ClientMetadata] = if (versionId >= 11) {
+        // Fetch API version 11 added preferred replica logic
+        Some(new DefaultClientMetadata(
+          fetchRequest.rackId,
+          clientId,
+          request.context.clientAddress,
+          request.context.principal,
+          request.context.listenerName.value))
+      } else {
+        None
+      }
+
+      val params = FetchParams(
+        requestVersion = versionId,
+        replicaId = fetchRequest.replicaId,
+        maxWaitMs = fetchRequest.maxWait,
+        minBytes = fetchMinBytes,
+        maxBytes = fetchMaxBytes,
+        isolation = FetchIsolation(fetchRequest),
+        clientMetadata = clientMetadata
+      )
+
       // call the replica manager to fetch messages from the local replica
       replicaManager.fetchMessages(
-        fetchRequest.maxWait.toLong,
-        fetchRequest.replicaId,
-        fetchMinBytes,
-        fetchMaxBytes,
-        versionId <= 2,
-        interesting,
-        replicationQuota(fetchRequest),
-        processResponseCallback,
-        fetchRequest.isolationLevel,
-        clientMetadata)
+        params = params,
+        fetchInfos = interesting,
+        quota = replicationQuota(fetchRequest),
+        responseCallback = processResponseCallback,
+      )
     }
   }
 
@@ -1226,7 +1236,7 @@ class KafkaApis(val requestChannel: RequestChannel,
     var unauthorizedForCreateTopics = Set[String]()
 
     if (authorizedTopics.nonEmpty) {
-      val nonExistingTopics = authorizedTopics.filterNot(metadataCache.contains(_))
+      val nonExistingTopics = authorizedTopics.filterNot(metadataCache.contains)
       if (metadataRequest.allowAutoTopicCreation && config.autoCreateTopicsEnable && nonExistingTopics.nonEmpty) {
         if (!authHelper.authorize(request.context, CREATE, CLUSTER, CLUSTER_NAME, logIfDenied = false)) {
           val authorizedForCreateTopics = authHelper.filterByAuthorized(request.context, CREATE, TOPIC,
@@ -1350,7 +1360,7 @@ class KafkaApis(val requestChannel: RequestChannel,
                 val payloadOpt = zkSupport.zkClient.getConsumerOffset(offsetFetchRequest.groupId, topicPartition)
                 payloadOpt match {
                   case Some(payload) =>
-                    (topicPartition, new OffsetFetchResponse.PartitionData(payload.toLong,
+                    (topicPartition, new OffsetFetchResponse.PartitionData(payload,
                       Optional.empty(), OffsetFetchResponse.NO_METADATA, Errors.NONE))
                   case None =>
                     (topicPartition, OffsetFetchResponse.UNKNOWN_PARTITION)
@@ -1615,7 +1625,7 @@ class KafkaApis(val requestChannel: RequestChannel,
                 val listedGroup = new ListGroupsResponseData.ListedGroup()
                   .setGroupId(group.groupId)
                   .setProtocolType(group.protocolType)
-                  .setGroupState(group.state.toString)
+                  .setGroupState(group.state)
                 listedGroup
             }.asJava)
             .setThrottleTimeMs(throttleMs)
@@ -1664,7 +1674,7 @@ class KafkaApis(val requestChannel: RequestChannel,
       requestHelper.sendResponseMaybeThrottle(request, createResponse)
     }
 
-    if (joinGroupRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion < KAFKA_2_3_IV0) {
+    if (joinGroupRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion.isLessThan(IBP_2_3_IV0)) {
       // Only enable static membership when IBP >= 2.3, because it is not safe for the broker to use the static member logic
       // until we are sure that all brokers support it. If static group being loaded by an older coordinator, it will discard
       // the group.instance.id field, so static members could accidentally become "dynamic", which leads to wrong states.
@@ -1718,7 +1728,7 @@ class KafkaApis(val requestChannel: RequestChannel,
         ))
     }
 
-    if (syncGroupRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion < KAFKA_2_3_IV0) {
+    if (syncGroupRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion.isLessThan(IBP_2_3_IV0)) {
       // Only enable static membership when IBP >= 2.3, because it is not safe for the broker to use the static member logic
       // until we are sure that all brokers support it. If static group being loaded by an older coordinator, it will discard
       // the group.instance.id field, so static members could accidentally become "dynamic", which leads to wrong states.
@@ -1791,7 +1801,7 @@ class KafkaApis(val requestChannel: RequestChannel,
       requestHelper.sendResponseMaybeThrottle(request, createResponse)
     }
 
-    if (heartbeatRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion < KAFKA_2_3_IV0) {
+    if (heartbeatRequest.data.groupInstanceId != null && config.interBrokerProtocolVersion.isLessThan(IBP_2_3_IV0)) {
       // Only enable static membership when IBP >= 2.3, because it is not safe for the broker to use the static member logic
       // until we are sure that all brokers support it. If static group being loaded by an older coordinator, it will discard
       // the group.instance.id field, so static members could accidentally become "dynamic", which leads to wrong states.
@@ -2156,16 +2166,14 @@ class KafkaApis(val requestChannel: RequestChannel,
       requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
         new DeleteRecordsResponse(new DeleteRecordsResponseData()
           .setThrottleTimeMs(requestThrottleMs)
-          .setTopics(new DeleteRecordsResponseData.DeleteRecordsTopicResultCollection(mergedResponseStatus.groupBy(_._1.topic).map { case (topic, partitionMap) => {
+          .setTopics(new DeleteRecordsResponseData.DeleteRecordsTopicResultCollection(mergedResponseStatus.groupBy(_._1.topic).map { case (topic, partitionMap) =>
             new DeleteRecordsTopicResult()
               .setName(topic)
-              .setPartitions(new DeleteRecordsResponseData.DeleteRecordsPartitionResultCollection(partitionMap.map { case (topicPartition, partitionResult) => {
+              .setPartitions(new DeleteRecordsResponseData.DeleteRecordsPartitionResultCollection(partitionMap.map { case (topicPartition, partitionResult) =>
                 new DeleteRecordsPartitionResult().setPartitionIndex(topicPartition.partition)
                   .setLowWatermark(partitionResult.lowWatermark)
                   .setErrorCode(partitionResult.errorCode)
-              }
               }.toList.asJava.iterator()))
-          }
           }.toList.asJava.iterator()))))
     }
 
@@ -2231,7 +2239,7 @@ class KafkaApis(val requestChannel: RequestChannel,
   }
 
   def handleEndTxnRequest(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
-    ensureInterBrokerVersion(KAFKA_0_11_0_IV0)
+    ensureInterBrokerVersion(IBP_0_11_0_IV0)
     val endTxnRequest = request.body[EndTxnRequest]
     val transactionalId = endTxnRequest.data.transactionalId
 
@@ -2272,7 +2280,7 @@ class KafkaApis(val requestChannel: RequestChannel,
   }
 
   def handleWriteTxnMarkersRequest(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
-    ensureInterBrokerVersion(KAFKA_0_11_0_IV0)
+    ensureInterBrokerVersion(IBP_0_11_0_IV0)
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
     val writeTxnMarkersRequest = request.body[WriteTxnMarkersRequest]
     val errors = new ConcurrentHashMap[java.lang.Long, util.Map[TopicPartition, Errors]]()
@@ -2377,13 +2385,13 @@ class KafkaApis(val requestChannel: RequestChannel,
       requestHelper.sendResponseExemptThrottle(request, new WriteTxnMarkersResponse(errors))
   }
 
-  def ensureInterBrokerVersion(version: ApiVersion): Unit = {
-    if (config.interBrokerProtocolVersion < version)
+  def ensureInterBrokerVersion(version: MetadataVersion): Unit = {
+    if (config.interBrokerProtocolVersion.isLessThan(version))
       throw new UnsupportedVersionException(s"inter.broker.protocol.version: ${config.interBrokerProtocolVersion.version} is less than the required version: ${version.version}")
   }
 
   def handleAddPartitionToTxnRequest(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
-    ensureInterBrokerVersion(KAFKA_0_11_0_IV0)
+    ensureInterBrokerVersion(IBP_0_11_0_IV0)
     val addPartitionsToTxnRequest = request.body[AddPartitionsToTxnRequest]
     val transactionalId = addPartitionsToTxnRequest.data.transactionalId
     val partitionsToAdd = addPartitionsToTxnRequest.partitions.asScala
@@ -2446,7 +2454,7 @@ class KafkaApis(val requestChannel: RequestChannel,
   }
 
   def handleAddOffsetsToTxnRequest(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
-    ensureInterBrokerVersion(KAFKA_0_11_0_IV0)
+    ensureInterBrokerVersion(IBP_0_11_0_IV0)
     val addOffsetsToTxnRequest = request.body[AddOffsetsToTxnRequest]
     val transactionalId = addOffsetsToTxnRequest.data.transactionalId
     val groupId = addOffsetsToTxnRequest.data.groupId
@@ -2496,7 +2504,7 @@ class KafkaApis(val requestChannel: RequestChannel,
   }
 
   def handleTxnOffsetCommitRequest(request: RequestChannel.Request, requestLocal: RequestLocal): Unit = {
-    ensureInterBrokerVersion(KAFKA_0_11_0_IV0)
+    ensureInterBrokerVersion(IBP_0_11_0_IV0)
     val header = request.header
     val txnOffsetCommitRequest = request.body[TxnOffsetCommitRequest]
 
@@ -2926,24 +2934,40 @@ class KafkaApis(val requestChannel: RequestChannel,
       trace(s"Sending create token response for correlation id ${request.header.correlationId} " +
         s"to client ${request.header.clientId}.")
       requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
-        CreateDelegationTokenResponse.prepareResponse(requestThrottleMs, createResult.error, request.context.principal, createResult.issueTimestamp,
-          createResult.expiryTimestamp, createResult.maxTimestamp, createResult.tokenId, ByteBuffer.wrap(createResult.hmac)))
+        CreateDelegationTokenResponse.prepareResponse(request.context.requestVersion, requestThrottleMs, createResult.error, createResult.owner,
+          createResult.tokenRequester, createResult.issueTimestamp, createResult.expiryTimestamp, createResult.maxTimestamp, createResult.tokenId,
+          ByteBuffer.wrap(createResult.hmac)))
     }
 
-    if (!allowTokenRequests(request))
+    val ownerPrincipalName = createTokenRequest.data.ownerPrincipalName
+    val owner = if (ownerPrincipalName == null || ownerPrincipalName.isEmpty) {
+      request.context.principal
+    } else {
+      new KafkaPrincipal(createTokenRequest.data.ownerPrincipalType, ownerPrincipalName)
+    }
+    val requester = request.context.principal
+
+    if (!allowTokenRequests(request)) {
       requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
-        CreateDelegationTokenResponse.prepareResponse(requestThrottleMs, Errors.DELEGATION_TOKEN_REQUEST_NOT_ALLOWED, request.context.principal))
-    else {
+        CreateDelegationTokenResponse.prepareResponse(request.context.requestVersion, requestThrottleMs,
+          Errors.DELEGATION_TOKEN_REQUEST_NOT_ALLOWED, owner, requester))
+    } else if (!owner.equals(requester) && !authHelper.authorize(request.context, CREATE_TOKENS, USER, owner.toString)) {
+      requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
+        CreateDelegationTokenResponse.prepareResponse(request.context.requestVersion, requestThrottleMs,
+          Errors.DELEGATION_TOKEN_AUTHORIZATION_FAILED, owner, requester))
+    } else {
       val renewerList = createTokenRequest.data.renewers.asScala.toList.map(entry =>
         new KafkaPrincipal(entry.principalType, entry.principalName))
 
       if (renewerList.exists(principal => principal.getPrincipalType != KafkaPrincipal.USER_TYPE)) {
         requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
-          CreateDelegationTokenResponse.prepareResponse(requestThrottleMs, Errors.INVALID_PRINCIPAL_TYPE, request.context.principal))
+          CreateDelegationTokenResponse.prepareResponse(request.context.requestVersion, requestThrottleMs,
+            Errors.INVALID_PRINCIPAL_TYPE, owner, requester))
       }
       else {
         tokenManager.createToken(
-          request.context.principal,
+          owner,
+          requester,
           renewerList,
           createTokenRequest.data.maxLifetimeMs,
           sendResponseCallback
@@ -3014,7 +3038,7 @@ class KafkaApis(val requestChannel: RequestChannel,
     // the callback for sending a describe token response
     def sendResponseCallback(error: Errors, tokenDetails: List[DelegationToken]): Unit = {
       requestHelper.sendResponseMaybeThrottle(request, requestThrottleMs =>
-        new DescribeDelegationTokenResponse(requestThrottleMs, error, tokenDetails.asJava))
+        new DescribeDelegationTokenResponse(request.context.requestVersion(), requestThrottleMs, error, tokenDetails.asJava))
       trace("Sending describe token response for correlation id %d to client %s."
         .format(request.header.correlationId, request.header.clientId))
     }
@@ -3035,7 +3059,9 @@ class KafkaApis(val requestChannel: RequestChannel,
         else
           Some(describeTokenRequest.data.owners.asScala.map(p => new KafkaPrincipal(p.principalType(), p.principalName)).toList)
         def authorizeToken(tokenId: String) = authHelper.authorize(request.context, DESCRIBE, DELEGATION_TOKEN, tokenId)
-        def eligible(token: TokenInformation) = DelegationTokenManager.filterToken(requestPrincipal, owners, token, authorizeToken)
+        def authorizeRequester(owner: KafkaPrincipal) = authHelper.authorize(request.context, DESCRIBE_TOKENS, USER, owner.toString)
+        def eligible(token: TokenInformation) = DelegationTokenManager
+          .filterToken(requestPrincipal, owners, token, authorizeToken, authorizeRequester)
         val tokens =  tokenManager.getTokens(eligible)
         sendResponseCallback(Errors.NONE, tokens)
       }
@@ -3289,18 +3315,17 @@ class KafkaApis(val requestChannel: RequestChannel,
     }
   }
 
-  def handleAlterIsrRequest(request: RequestChannel.Request): Unit = {
+  def handleAlterPartitionRequest(request: RequestChannel.Request): Unit = {
     val zkSupport = metadataSupport.requireZkOrThrow(KafkaApis.shouldNeverReceive(request))
-    val alterIsrRequest = request.body[AlterIsrRequest]
+    val alterPartitionRequest = request.body[AlterPartitionRequest]
     authHelper.authorizeClusterOperation(request, CLUSTER_ACTION)
 
     if (!zkSupport.controller.isActive)
-      requestHelper.sendResponseExemptThrottle(request, alterIsrRequest.getErrorResponse(
+      requestHelper.sendResponseExemptThrottle(request, alterPartitionRequest.getErrorResponse(
         AbstractResponse.DEFAULT_THROTTLE_TIME, Errors.NOT_CONTROLLER.exception))
     else
-      zkSupport.controller.alterIsrs(alterIsrRequest.data, alterIsrResp =>
-        requestHelper.sendResponseExemptThrottle(request, new AlterIsrResponse(alterIsrResp))
-      )
+      zkSupport.controller.alterPartitions(alterPartitionRequest.data, request.context.apiVersion, alterPartitionResp =>
+        requestHelper.sendResponseExemptThrottle(request, new AlterPartitionResponse(alterPartitionResp)))
   }
 
   def handleUpdateFeatures(request: RequestChannel.Request): Unit = {
@@ -3356,7 +3381,7 @@ class KafkaApis(val requestChannel: RequestChannel,
         .setThrottleTimeMs(requestThrottleMs)
         .setClusterId(clusterId)
         .setControllerId(controllerId)
-        .setClusterAuthorizedOperations(clusterAuthorizedOperations);
+        .setClusterAuthorizedOperations(clusterAuthorizedOperations)
 
 
       brokers.foreach { broker =>
diff --git a/core/src/main/scala/kafka/server/KafkaBroker.scala b/core/src/main/scala/kafka/server/KafkaBroker.scala
index f4c6abc306531..b02b1167c50be 100644
--- a/core/src/main/scala/kafka/server/KafkaBroker.scala
+++ b/core/src/main/scala/kafka/server/KafkaBroker.scala
@@ -20,7 +20,7 @@ package kafka.server
 import com.yammer.metrics.core.MetricName
 import kafka.coordinator.group.GroupCoordinator
 import kafka.log.LogManager
-import kafka.metrics.{KafkaMetricsGroup, KafkaYammerMetrics, LinuxIoMetricsCollector}
+import kafka.metrics.{KafkaMetricsGroup, LinuxIoMetricsCollector}
 import kafka.network.SocketServer
 import kafka.security.CredentialProvider
 import kafka.utils.KafkaScheduler
@@ -31,6 +31,7 @@ import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.metadata.BrokerState
 import org.apache.kafka.server.authorizer.Authorizer
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 
 import scala.collection.Seq
 import scala.jdk.CollectionConverters._
@@ -88,6 +89,7 @@ trait KafkaBroker extends KafkaMetricsGroup {
   def shutdown(): Unit
   def brokerTopicStats: BrokerTopicStats
   def credentialProvider: CredentialProvider
+  def clientToControllerChannelManager: BrokerToControllerChannelManager
 
   // For backwards compatibility, we need to keep older metrics tied
   // to their original name when this class was named `KafkaServer`
diff --git a/core/src/main/scala/kafka/server/KafkaConfig.scala b/core/src/main/scala/kafka/server/KafkaConfig.scala
index 4b697fcf1bffa..860056f9a3e4b 100755
--- a/core/src/main/scala/kafka/server/KafkaConfig.scala
+++ b/core/src/main/scala/kafka/server/KafkaConfig.scala
@@ -18,14 +18,14 @@
 package kafka.server
 
 import java.util
+import java.util.concurrent.TimeUnit
 import java.util.{Collections, Locale, Properties}
-import kafka.api.{ApiVersion, ApiVersionValidator, KAFKA_0_10_0_IV1, KAFKA_2_1_IV0, KAFKA_2_7_IV0, KAFKA_2_8_IV0, KAFKA_3_0_IV1}
 import kafka.cluster.EndPoint
 import kafka.coordinator.group.OffsetConfig
 import kafka.coordinator.transaction.{TransactionLog, TransactionStateManager}
 import kafka.log.LogConfig
 import kafka.log.LogConfig.MessageFormatVersion
-import kafka.message.{BrokerCompressionCodec, CompressionCodec, ZStdCompressionCodec}
+import kafka.message.{BrokerCompressionCodec, CompressionCodec, ProducerCompressionCodec, ZStdCompressionCodec}
 import kafka.security.authorizer.AuthorizerUtils
 import kafka.server.KafkaConfig.{ControllerListenerNamesProp, ListenerSecurityProtocolMapProp}
 import kafka.server.KafkaRaftServer.{BrokerRole, ControllerRole, ProcessRole}
@@ -47,6 +47,8 @@ import org.apache.kafka.common.security.authenticator.DefaultKafkaPrincipalBuild
 import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.raft.RaftConfig
 import org.apache.kafka.server.authorizer.Authorizer
+import org.apache.kafka.server.common.{MetadataVersion, MetadataVersionValidator}
+import org.apache.kafka.server.common.MetadataVersion._
 import org.apache.kafka.server.log.remote.storage.RemoteLogManagerConfig
 import org.apache.zookeeper.client.ZKClientConfig
 
@@ -79,6 +81,7 @@ object Defaults {
   val BrokerHeartbeatIntervalMs = 2000
   val BrokerSessionTimeoutMs = 9000
   val MetadataSnapshotMaxNewRecordBytes = 20 * 1024 * 1024
+  val MetadataMaxIdleIntervalMs = 500
 
   /** KRaft mode configs */
   val EmptyNodeId: Int = -1
@@ -141,7 +144,7 @@ object Defaults {
 
   /* See `TopicConfig.MESSAGE_FORMAT_VERSION_CONFIG` for details */
   @deprecated("3.0")
-  val LogMessageFormatVersion = KAFKA_3_0_IV1.version
+  val LogMessageFormatVersion = IBP_3_0_IV1.version
 
   val LogMessageTimestampType = "CreateTime"
   val LogMessageTimestampDifferenceMaxMs = Long.MaxValue
@@ -172,7 +175,7 @@ object Defaults {
   val LeaderImbalanceCheckIntervalSeconds = 300
   val UncleanLeaderElectionEnable = false
   val InterBrokerSecurityProtocol = SecurityProtocol.PLAINTEXT.toString
-  val InterBrokerProtocolVersion = ApiVersion.latestVersion.toString
+  val InterBrokerProtocolVersion = MetadataVersion.latest.version
 
   /** ********* Controlled shutdown configuration ***********/
   val ControlledShutdownMaxRetries = 3
@@ -227,7 +230,7 @@ object Defaults {
 
   val DeleteTopicEnable = true
 
-  val CompressionType = "producer"
+  val CompressionType = ProducerCompressionCodec.name
 
   val MaxIdMapSnapshots = 2
   /** ********* Kafka Metrics Configuration ***********/
@@ -400,9 +403,12 @@ object KafkaConfig {
   val MetadataMaxRetentionBytesProp = "metadata.max.retention.bytes"
   val MetadataMaxRetentionMillisProp = "metadata.max.retention.ms"
   val QuorumVotersProp = RaftConfig.QUORUM_VOTERS_CONFIG
+  val MetadataMaxIdleIntervalMsProp = "metadata.max.idle.interval.ms"
 
   /************* Authorizer Configuration ***********/
   val AuthorizerClassNameProp = "authorizer.class.name"
+  val EarlyStartListenersProp = "early.start.listeners"
+
   /** ********* Socket Server Configuration ***********/
   val ListenersProp = "listeners"
   val AdvertisedListenersProp = "advertised.listeners"
@@ -708,6 +714,9 @@ object KafkaConfig {
   val MetadataLogDirDoc = "This configuration determines where we put the metadata log for clusters in KRaft mode. " +
     "If it is not set, the metadata log is placed in the first log directory from log.dirs."
   val MetadataSnapshotMaxNewRecordBytesDoc = "This is the maximum number of bytes in the log between the latest snapshot and the high-watermark needed before generating a new snapshot."
+  val MetadataMaxIdleIntervalMsDoc = "This configuration controls how often the active " +
+    "controller should write no-op records to the metadata partition. If the value is 0, no-op records " +
+    s"are not appended to the metadata partition. The default value is ${Defaults.MetadataMaxIdleIntervalMs}";
   val ControllerListenerNamesDoc = "A comma-separated list of the names of the listeners used by the controller. This is required " +
     "if running in KRaft mode. When communicating with the controller quorum, the broker will always use the first listener in this list.\n " +
     "Note: The ZK-based controller should not set this configuration."
@@ -723,7 +732,12 @@ object KafkaConfig {
 
   /************* Authorizer Configuration ***********/
   val AuthorizerClassNameDoc = s"The fully qualified name of a class that implements <code>${classOf[Authorizer].getName}</code>" +
-  " interface, which is used by the broker for authorization."
+    " interface, which is used by the broker for authorization."
+  val EarlyStartListenersDoc = "A comma-separated list of listener names which may be started before the authorizer has finished " +
+   "initialization. This is useful when the authorizer is dependent on the cluster itself for bootstrapping, as is the case for " +
+   "the StandardAuthorizer (which stores ACLs in the metadata log.) By default, all listeners included in controller.listener.names " +
+   "will also be early start listeners. A listener should not appear in this list if it accepts external traffic."
+
   /** ********* Socket Server Configuration ***********/
   val ListenersDoc = "Listener List - Comma-separated list of URIs we will listen on and the listener names." +
     s" If the listener name is not a security protocol, <code>$ListenerSecurityProtocolMapProp</code> must also be set.\n" +
@@ -766,7 +780,8 @@ object KafkaConfig {
     "listener.security.protocol.map = INTERNAL:PLAINTEXT, EXTERNAL:SSL, CONTROLLER:SSL\n" +
     "control.plane.listener.name = CONTROLLER\n" +
     "then controller will use \"broker1.example.com:9094\" with security protocol \"SSL\" to connect to the broker.\n" +
-    "If not explicitly configured, the default value will be null and there will be no dedicated endpoints for controller connections."
+    "If not explicitly configured, the default value will be null and there will be no dedicated endpoints for controller connections.\n" +
+    s"If explicitly configured, the value cannot be the same as the value of <code>$InterBrokerListenerNameProp</code>."
 
   val SocketSendBufferBytesDoc = "The SO_SNDBUF buffer of the socket server sockets. If the value is -1, the OS default will be used."
   val SocketReceiveBufferBytesDoc = "The SO_RCVBUF buffer of the socket server sockets. If the value is -1, the OS default will be used."
@@ -797,7 +812,7 @@ object KafkaConfig {
   /** ********* Log Configuration ***********/
   val NumPartitionsDoc = "The default number of log partitions per topic"
   val LogDirDoc = "The directory in which the log data is kept (supplemental for " + LogDirsProp + " property)"
-  val LogDirsDoc = "The directories in which the log data is kept. If not set, the value in " + LogDirProp + " is used"
+  val LogDirsDoc = "A comma-separated list of the directories where the log data is stored. If not set, the value in " + LogDirProp + " is used."
   val LogSegmentBytesDoc = "The maximum size of a single log file"
   val LogRollTimeMillisDoc = "The maximum time before a new log segment is rolled out (in milliseconds). If not set, the value in " + LogRollTimeHoursProp + " is used"
   val LogRollTimeHoursDoc = "The maximum time before a new log segment is rolled out (in hours), secondary to " + LogRollTimeMillisProp + " property"
@@ -840,8 +855,8 @@ object KafkaConfig {
   val LogFlushOffsetCheckpointIntervalMsDoc = "The frequency with which we update the persistent record of the last flush which acts as the log recovery point"
   val LogFlushStartOffsetCheckpointIntervalMsDoc = "The frequency with which we update the persistent record of log start offset"
   val LogPreAllocateEnableDoc = "Should pre allocate file when create new segment? If you are using Kafka on Windows, you probably need to set it to true."
-  val LogMessageFormatVersionDoc = "Specify the message format version the broker will use to append messages to the logs. The value should be a valid ApiVersion. " +
-    "Some examples are: 0.8.2, 0.9.0.0, 0.10.0, check ApiVersion for more details. By setting a particular message format version, the " +
+  val LogMessageFormatVersionDoc = "Specify the message format version the broker will use to append messages to the logs. The value should be a valid MetadataVersion. " +
+    "Some examples are: 0.8.2, 0.9.0.0, 0.10.0, check MetadataVersion for more details. By setting a particular message format version, the " +
     "user is certifying that all the existing messages on disk are smaller or equal than the specified version. Setting this value incorrectly " +
     "will cause consumers with older versions to break as they will receive messages with a format that they don't understand."
 
@@ -890,8 +905,10 @@ object KafkaConfig {
     "will still be returned to ensure that progress can be made. As such, this is not an absolute maximum. The maximum " +
     "record batch size accepted by the broker is defined via <code>message.max.bytes</code> (broker config) or " +
     "<code>max.message.bytes</code> (topic config)."
-  val NumReplicaFetchersDoc = "Number of fetcher threads used to replicate messages from a source broker. " +
-  "Increasing this value can increase the degree of I/O parallelism in the follower broker."
+  val NumReplicaFetchersDoc = "Number of fetcher threads used to replicate records from each source broker. The total number of fetchers " +
+  "on each broker is bound by <code>num.replica.fetchers</code> multiplied by the number of brokers in the cluster." +
+  "Increasing this value can increase the degree of I/O parallelism in the follower and leader broker at the cost " +
+  "of higher CPU and memory utilization."
   val ReplicaFetchBackoffMsDoc = "The amount of time to sleep when fetch partition error occurs."
   val ReplicaHighWatermarkCheckpointIntervalMsDoc = "The frequency with which the high watermark is saved out to disk"
   val FetchPurgatoryPurgeIntervalRequestsDoc = "The purge interval (in number of requests) of the fetch request purgatory"
@@ -906,7 +923,7 @@ object KafkaConfig {
     "properties at the same time."
   val InterBrokerProtocolVersionDoc = "Specify which version of the inter-broker protocol will be used.\n" +
   " This is typically bumped after all brokers were upgraded to a new version.\n" +
-  " Example of some valid values are: 0.8.0, 0.8.1, 0.8.1.1, 0.8.2, 0.8.2.0, 0.8.2.1, 0.9.0.0, 0.9.0.1 Check ApiVersion for the full list."
+  " Example of some valid values are: 0.8.0, 0.8.1, 0.8.1.1, 0.8.2, 0.8.2.0, 0.8.2.1, 0.9.0.0, 0.9.0.1 Check MetadataVersion for the full list."
   val InterBrokerListenerNameDoc = s"Name of listener used for communication between brokers. If this is unset, the listener name is defined by $InterBrokerSecurityProtocolProp. " +
     s"It is an error to set this and $InterBrokerSecurityProtocolProp properties at the same time."
   val ReplicaSelectorClassDoc = "The fully qualified class name that implements ReplicaSelector. This is used by the broker to find the preferred read replica. By default, we use an implementation that returns the leader."
@@ -963,8 +980,8 @@ object KafkaConfig {
   val ControllerQuotaWindowSizeSecondsDoc = "The time span of each sample for controller mutations quotas"
 
   val ClientQuotaCallbackClassDoc = "The fully qualified name of a class that implements the ClientQuotaCallback interface, " +
-    "which is used to determine quota limits applied to client requests. By default, &lt;user&gt;, &lt;client-id&gt;, &lt;user&gt; or &lt;client-id&gt; " +
-    "quotas stored in ZooKeeper are applied. For any given request, the most specific quota that matches the user principal " +
+    "which is used to determine quota limits applied to client requests. By default, the &lt;user&gt; and &lt;client-id&gt; " +
+    "quotas that are stored in ZooKeeper are applied. For any given request, the most specific quota that matches the user principal " +
     "of the session and the client-id of the request is applied."
 
   val DeleteTopicEnableDoc = "Enables delete topic. Delete topic through the admin tool will have no effect if this config is turned off"
@@ -1136,9 +1153,11 @@ object KafkaConfig {
       .define(MetadataLogSegmentMillisProp, LONG, Defaults.LogRollHours * 60 * 60 * 1000L, null, HIGH, MetadataLogSegmentMillisDoc)
       .define(MetadataMaxRetentionBytesProp, LONG, Defaults.LogRetentionBytes, null, HIGH, MetadataMaxRetentionBytesDoc)
       .define(MetadataMaxRetentionMillisProp, LONG, Defaults.LogRetentionHours * 60 * 60 * 1000L, null, HIGH, MetadataMaxRetentionMillisDoc)
+      .define(MetadataMaxIdleIntervalMsProp, INT, Defaults.MetadataMaxIdleIntervalMs, atLeast(0), LOW, MetadataMaxIdleIntervalMsDoc)
 
       /************* Authorizer Configuration ***********/
-      .define(AuthorizerClassNameProp, STRING, Defaults.AuthorizerClassName, LOW, AuthorizerClassNameDoc)
+      .define(AuthorizerClassNameProp, STRING, Defaults.AuthorizerClassName, new ConfigDef.NonNullValidator(), LOW, AuthorizerClassNameDoc)
+      .define(EarlyStartListenersProp, STRING, null,  HIGH, EarlyStartListenersDoc)
 
       /** ********* Socket Server Configuration ***********/
       .define(ListenersProp, STRING, Defaults.Listeners, HIGH, ListenersDoc)
@@ -1184,11 +1203,11 @@ object KafkaConfig {
       .define(LogCleanerIoBufferSizeProp, INT, Defaults.LogCleanerIoBufferSize, atLeast(0), MEDIUM, LogCleanerIoBufferSizeDoc)
       .define(LogCleanerDedupeBufferLoadFactorProp, DOUBLE, Defaults.LogCleanerDedupeBufferLoadFactor, MEDIUM, LogCleanerDedupeBufferLoadFactorDoc)
       .define(LogCleanerBackoffMsProp, LONG, Defaults.LogCleanerBackoffMs, atLeast(0), MEDIUM, LogCleanerBackoffMsDoc)
-      .define(LogCleanerMinCleanRatioProp, DOUBLE, Defaults.LogCleanerMinCleanRatio, MEDIUM, LogCleanerMinCleanRatioDoc)
+      .define(LogCleanerMinCleanRatioProp, DOUBLE, Defaults.LogCleanerMinCleanRatio, between(0, 1), MEDIUM, LogCleanerMinCleanRatioDoc)
       .define(LogCleanerEnableProp, BOOLEAN, Defaults.LogCleanerEnable, MEDIUM, LogCleanerEnableDoc)
-      .define(LogCleanerDeleteRetentionMsProp, LONG, Defaults.LogCleanerDeleteRetentionMs, MEDIUM, LogCleanerDeleteRetentionMsDoc)
-      .define(LogCleanerMinCompactionLagMsProp, LONG, Defaults.LogCleanerMinCompactionLagMs, MEDIUM, LogCleanerMinCompactionLagMsDoc)
-      .define(LogCleanerMaxCompactionLagMsProp, LONG, Defaults.LogCleanerMaxCompactionLagMs, MEDIUM, LogCleanerMaxCompactionLagMsDoc)
+      .define(LogCleanerDeleteRetentionMsProp, LONG, Defaults.LogCleanerDeleteRetentionMs, atLeast(0), MEDIUM, LogCleanerDeleteRetentionMsDoc)
+      .define(LogCleanerMinCompactionLagMsProp, LONG, Defaults.LogCleanerMinCompactionLagMs, atLeast(0), MEDIUM, LogCleanerMinCompactionLagMsDoc)
+      .define(LogCleanerMaxCompactionLagMsProp, LONG, Defaults.LogCleanerMaxCompactionLagMs, atLeast(1), MEDIUM, LogCleanerMaxCompactionLagMsDoc)
       .define(LogIndexSizeMaxBytesProp, INT, Defaults.LogIndexSizeMaxBytes, atLeast(4), MEDIUM, LogIndexSizeMaxBytesDoc)
       .define(LogIndexIntervalBytesProp, INT, Defaults.LogIndexIntervalBytes, atLeast(0), MEDIUM, LogIndexIntervalBytesDoc)
       .define(LogFlushIntervalMessagesProp, LONG, Defaults.LogFlushIntervalMessages, atLeast(1), HIGH, LogFlushIntervalMessagesDoc)
@@ -1201,9 +1220,9 @@ object KafkaConfig {
       .define(NumRecoveryThreadsPerDataDirProp, INT, Defaults.NumRecoveryThreadsPerDataDir, atLeast(1), HIGH, NumRecoveryThreadsPerDataDirDoc)
       .define(AutoCreateTopicsEnableProp, BOOLEAN, Defaults.AutoCreateTopicsEnable, HIGH, AutoCreateTopicsEnableDoc)
       .define(MinInSyncReplicasProp, INT, Defaults.MinInSyncReplicas, atLeast(1), HIGH, MinInSyncReplicasDoc)
-      .define(LogMessageFormatVersionProp, STRING, Defaults.LogMessageFormatVersion, ApiVersionValidator, MEDIUM, LogMessageFormatVersionDoc)
+      .define(LogMessageFormatVersionProp, STRING, Defaults.LogMessageFormatVersion, new MetadataVersionValidator(), MEDIUM, LogMessageFormatVersionDoc)
       .define(LogMessageTimestampTypeProp, STRING, Defaults.LogMessageTimestampType, in("CreateTime", "LogAppendTime"), MEDIUM, LogMessageTimestampTypeDoc)
-      .define(LogMessageTimestampDifferenceMaxMsProp, LONG, Defaults.LogMessageTimestampDifferenceMaxMs, MEDIUM, LogMessageTimestampDifferenceMaxMsDoc)
+      .define(LogMessageTimestampDifferenceMaxMsProp, LONG, Defaults.LogMessageTimestampDifferenceMaxMs,  atLeast(0), MEDIUM, LogMessageTimestampDifferenceMaxMsDoc)
       .define(CreateTopicPolicyClassNameProp, CLASS, null, LOW, CreateTopicPolicyClassNameDoc)
       .define(AlterConfigPolicyClassNameProp, CLASS, null, LOW, AlterConfigPolicyClassNameDoc)
       .define(LogMessageDownConversionEnableProp, BOOLEAN, Defaults.MessageDownConversionEnable, LOW, LogMessageDownConversionEnableDoc)
@@ -1226,10 +1245,10 @@ object KafkaConfig {
       .define(DeleteRecordsPurgatoryPurgeIntervalRequestsProp, INT, Defaults.DeleteRecordsPurgatoryPurgeIntervalRequests, MEDIUM, DeleteRecordsPurgatoryPurgeIntervalRequestsDoc)
       .define(AutoLeaderRebalanceEnableProp, BOOLEAN, Defaults.AutoLeaderRebalanceEnable, HIGH, AutoLeaderRebalanceEnableDoc)
       .define(LeaderImbalancePerBrokerPercentageProp, INT, Defaults.LeaderImbalancePerBrokerPercentage, HIGH, LeaderImbalancePerBrokerPercentageDoc)
-      .define(LeaderImbalanceCheckIntervalSecondsProp, LONG, Defaults.LeaderImbalanceCheckIntervalSeconds, HIGH, LeaderImbalanceCheckIntervalSecondsDoc)
+      .define(LeaderImbalanceCheckIntervalSecondsProp, LONG, Defaults.LeaderImbalanceCheckIntervalSeconds, atLeast(1), HIGH, LeaderImbalanceCheckIntervalSecondsDoc)
       .define(UncleanLeaderElectionEnableProp, BOOLEAN, Defaults.UncleanLeaderElectionEnable, HIGH, UncleanLeaderElectionEnableDoc)
-      .define(InterBrokerSecurityProtocolProp, STRING, Defaults.InterBrokerSecurityProtocol, MEDIUM, InterBrokerSecurityProtocolDoc)
-      .define(InterBrokerProtocolVersionProp, STRING, Defaults.InterBrokerProtocolVersion, ApiVersionValidator, MEDIUM, InterBrokerProtocolVersionDoc)
+      .define(InterBrokerSecurityProtocolProp, STRING, Defaults.InterBrokerSecurityProtocol, in(Utils.enumOptions(classOf[SecurityProtocol]):_*), MEDIUM, InterBrokerSecurityProtocolDoc)
+      .define(InterBrokerProtocolVersionProp, STRING, Defaults.InterBrokerProtocolVersion, new MetadataVersionValidator(), MEDIUM, InterBrokerProtocolVersionDoc)
       .define(InterBrokerListenerNameProp, STRING, null, MEDIUM, InterBrokerListenerNameDoc)
       .define(ReplicaSelectorClassProp, STRING, null, MEDIUM, ReplicaSelectorClassDoc)
 
@@ -1256,7 +1275,7 @@ object KafkaConfig {
       .define(OffsetCommitTimeoutMsProp, INT, Defaults.OffsetCommitTimeoutMs, atLeast(1), HIGH, OffsetCommitTimeoutMsDoc)
       .define(OffsetCommitRequiredAcksProp, SHORT, Defaults.OffsetCommitRequiredAcks, HIGH, OffsetCommitRequiredAcksDoc)
       .define(DeleteTopicEnableProp, BOOLEAN, Defaults.DeleteTopicEnable, HIGH, DeleteTopicEnableDoc)
-      .define(CompressionTypeProp, STRING, Defaults.CompressionType, HIGH, CompressionTypeDoc)
+      .define(CompressionTypeProp, STRING, Defaults.CompressionType, in(BrokerCompressionCodec.brokerCompressionOptions:_*), HIGH, CompressionTypeDoc)
 
       /** ********* Transaction management configuration ***********/
       .define(TransactionalIdExpirationMsProp, INT, Defaults.TransactionalIdExpirationMs, atLeast(1), HIGH, TransactionalIdExpirationMsDoc)
@@ -1474,6 +1493,7 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
 
   // Cache the current config to avoid acquiring read lock to access from dynamicConfig
   @volatile private var currentConfig = this
+  val processRoles: Set[ProcessRole] = parseProcessRoles()
   private[server] val dynamicConfig = dynamicConfigOverride.getOrElse(new DynamicBrokerConfig(this))
 
   private[server] def updateCurrentConfig(newConfig: KafkaConfig): Unit = {
@@ -1593,7 +1613,6 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   val maxReservedBrokerId: Int = getInt(KafkaConfig.MaxReservedBrokerIdProp)
   var brokerId: Int = getInt(KafkaConfig.BrokerIdProp)
   val nodeId: Int = getInt(KafkaConfig.NodeIdProp)
-  val processRoles: Set[ProcessRole] = parseProcessRoles()
   val initialRegistrationTimeoutMs: Int = getInt(KafkaConfig.InitialBrokerRegistrationTimeoutMsProp)
   val brokerHeartbeatIntervalMs: Int = getInt(KafkaConfig.BrokerHeartbeatIntervalMsProp)
   val brokerSessionTimeoutMs: Int = getInt(KafkaConfig.BrokerSessionTimeoutMsProp)
@@ -1618,6 +1637,10 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
     distinctRoles
   }
 
+  def isKRaftCoResidentMode: Boolean = {
+    processRoles == Set(BrokerRole, ControllerRole)
+  }
+
   def metadataLogDir: String = {
     Option(getString(KafkaConfig.MetadataLogDirProp)) match {
       case Some(dir) => dir
@@ -1630,7 +1653,6 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   def metadataRetentionBytes = getLong(KafkaConfig.MetadataMaxRetentionBytesProp)
   def metadataRetentionMillis = getLong(KafkaConfig.MetadataMaxRetentionMillisProp)
 
-
   def numNetworkThreads = getInt(KafkaConfig.NumNetworkThreadsProp)
   def backgroundThreads = getInt(KafkaConfig.BackgroundThreadsProp)
   val queuedMaxRequests = getInt(KafkaConfig.QueuedMaxRequestsProp)
@@ -1648,9 +1670,13 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
 
   /************* Metadata Configuration ***********/
   val metadataSnapshotMaxNewRecordBytes = getLong(KafkaConfig.MetadataSnapshotMaxNewRecordBytesProp)
+  val metadataMaxIdleIntervalNs: Option[Long] = {
+    val value = TimeUnit.NANOSECONDS.convert(getInt(KafkaConfig.MetadataMaxIdleIntervalMsProp).toLong, TimeUnit.MILLISECONDS)
+    if (value > 0) Some(value) else None
+  }
 
   /************* Authorizer Configuration ***********/
-  val authorizer: Option[Authorizer] = {
+  def createNewAuthorizer(): Option[Authorizer] = {
     val className = getString(KafkaConfig.AuthorizerClassNameProp)
     if (className == null || className.isEmpty)
       None
@@ -1659,6 +1685,23 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
     }
   }
 
+  val earlyStartListeners: Set[ListenerName] = {
+    val listenersSet = listeners.map(_.listenerName).toSet
+    val controllerListenersSet = controllerListeners.map(_.listenerName).toSet
+    Option(getString(KafkaConfig.EarlyStartListenersProp)) match {
+      case None => controllerListenersSet
+      case Some(str) =>
+        str.split(",").map(_.trim()).filter(!_.isEmpty).map { str =>
+          val listenerName = new ListenerName(str)
+          if (!listenersSet.contains(listenerName) && !controllerListenersSet.contains(listenerName))
+            throw new ConfigException(s"${KafkaConfig.EarlyStartListenersProp} contains " +
+              s"listener ${listenerName.value()}, but this is not contained in " +
+              s"${KafkaConfig.ListenersProp} or ${KafkaConfig.ControllerListenerNamesProp}")
+          listenerName
+        }.toSet
+    }
+  }
+
   /** ********* Socket Server Configuration ***********/
   val socketSendBufferBytes = getInt(KafkaConfig.SocketSendBufferBytesProp)
   val socketReceiveBufferBytes = getInt(KafkaConfig.SocketReceiveBufferBytesProp)
@@ -1711,7 +1754,7 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   def minInSyncReplicas = getInt(KafkaConfig.MinInSyncReplicasProp)
   def logPreAllocateEnable: java.lang.Boolean = getBoolean(KafkaConfig.LogPreAllocateProp)
 
-  // We keep the user-provided String as `ApiVersion.apply` can choose a slightly different version (eg if `0.10.0`
+  // We keep the user-provided String as `MetadataVersion.fromVersionString` can choose a slightly different version (eg if `0.10.0`
   // is passed, `0.10.0-IV0` may be picked)
   @nowarn("cat=deprecation")
   private val logMessageFormatVersionString = getString(KafkaConfig.LogMessageFormatVersionProp)
@@ -1720,8 +1763,8 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   @deprecated("3.0")
   lazy val logMessageFormatVersion =
     if (LogConfig.shouldIgnoreMessageFormatVersion(interBrokerProtocolVersion))
-      ApiVersion(Defaults.LogMessageFormatVersion)
-    else ApiVersion(logMessageFormatVersionString)
+      MetadataVersion.fromVersionString(Defaults.LogMessageFormatVersion)
+    else MetadataVersion.fromVersionString(logMessageFormatVersionString)
 
   def logMessageTimestampType = TimestampType.forName(getString(KafkaConfig.LogMessageTimestampTypeProp))
   def logMessageTimestampDifferenceMaxMs: Long = getLong(KafkaConfig.LogMessageTimestampDifferenceMaxMsProp)
@@ -1745,13 +1788,31 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   val deleteRecordsPurgatoryPurgeIntervalRequests = getInt(KafkaConfig.DeleteRecordsPurgatoryPurgeIntervalRequestsProp)
   val autoLeaderRebalanceEnable = getBoolean(KafkaConfig.AutoLeaderRebalanceEnableProp)
   val leaderImbalancePerBrokerPercentage = getInt(KafkaConfig.LeaderImbalancePerBrokerPercentageProp)
-  val leaderImbalanceCheckIntervalSeconds = getLong(KafkaConfig.LeaderImbalanceCheckIntervalSecondsProp)
+  val leaderImbalanceCheckIntervalSeconds: Long = getLong(KafkaConfig.LeaderImbalanceCheckIntervalSecondsProp)
   def uncleanLeaderElectionEnable: java.lang.Boolean = getBoolean(KafkaConfig.UncleanLeaderElectionEnableProp)
 
-  // We keep the user-provided String as `ApiVersion.apply` can choose a slightly different version (eg if `0.10.0`
+  // We keep the user-provided String as `MetadataVersion.fromVersionString` can choose a slightly different version (eg if `0.10.0`
   // is passed, `0.10.0-IV0` may be picked)
   val interBrokerProtocolVersionString = getString(KafkaConfig.InterBrokerProtocolVersionProp)
-  val interBrokerProtocolVersion = ApiVersion(interBrokerProtocolVersionString)
+  val interBrokerProtocolVersion = if (processRoles.isEmpty) {
+    MetadataVersion.fromVersionString(interBrokerProtocolVersionString)
+  } else {
+    if (originals.containsKey(KafkaConfig.InterBrokerProtocolVersionProp)) {
+      // A user-supplied IBP was given
+      val configuredVersion = MetadataVersion.fromVersionString(interBrokerProtocolVersionString)
+      if (!configuredVersion.isKRaftSupported) {
+        throw new ConfigException(s"A non-KRaft version ${interBrokerProtocolVersionString} given for ${KafkaConfig.InterBrokerProtocolVersionProp}. " +
+          s"The minimum version is ${MetadataVersion.MINIMUM_KRAFT_VERSION}")
+      } else {
+        warn(s"${KafkaConfig.InterBrokerProtocolVersionProp} is deprecated in KRaft mode as of 3.3 and will only " +
+          s"be read when first upgrading from a KRaft prior to 3.3. See kafka-storage.sh help for details on setting " +
+          s"the metadata version for a new KRaft cluster.")
+      }
+    }
+    // In KRaft mode, we pin this value to the minimum KRaft-supported version. This prevents inadvertent usage of
+    // the static IBP config in broker components running in KRaft mode
+    MetadataVersion.MINIMUM_KRAFT_VERSION
+  }
 
   /** ********* Controlled shutdown configuration ***********/
   val controlledShutdownMaxRetries = getInt(KafkaConfig.ControlledShutdownMaxRetriesProp)
@@ -1759,7 +1820,7 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   val controlledShutdownEnable = getBoolean(KafkaConfig.ControlledShutdownEnableProp)
 
   /** ********* Feature configuration ***********/
-  def isFeatureVersioningSupported = interBrokerProtocolVersion >= KAFKA_2_7_IV0
+  def isFeatureVersioningSupported = interBrokerProtocolVersion.isFeatureVersioningSupported()
 
   /** ********* Group coordinator configuration ***********/
   val groupMinSessionTimeoutMs = getInt(KafkaConfig.GroupMinSessionTimeoutMsProp)
@@ -1811,7 +1872,7 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
   def controlPlaneListenerName = getControlPlaneListenerNameAndSecurityProtocol.map { case (listenerName, _) => listenerName }
   def controlPlaneSecurityProtocol = getControlPlaneListenerNameAndSecurityProtocol.map { case (_, securityProtocol) => securityProtocol }
   def saslMechanismInterBrokerProtocol = getString(KafkaConfig.SaslMechanismInterBrokerProtocolProp)
-  val saslInterBrokerHandshakeRequestEnable = interBrokerProtocolVersion >= KAFKA_0_10_0_IV1
+  val saslInterBrokerHandshakeRequestEnable = interBrokerProtocolVersion.isSaslInterBrokerHandshakeRequestEnabled()
 
   /** ********* DelegationToken Configuration **************/
   val delegationTokenSecretKey = Option(getPassword(KafkaConfig.DelegationTokenSecretKeyProp))
@@ -1992,7 +2053,7 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
 
   // Topic IDs are used with all self-managed quorum clusters and ZK cluster with IBP greater than or equal to 2.8
   def usesTopicId: Boolean =
-    usesSelfManagedQuorum || interBrokerProtocolVersion >= KAFKA_2_8_IV0
+    usesSelfManagedQuorum || interBrokerProtocolVersion.isTopicIdsSupported()
 
   validateValues()
 
@@ -2107,7 +2168,7 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
       validateControllerQuorumVotersMustContainNodeIdForKRaftController()
       validateControllerListenerExistsForKRaftController()
       validateControllerListenerNamesMustAppearInListenersForKRaftController()
-    } else if (processRoles == Set(BrokerRole, ControllerRole)) {
+    } else if (isKRaftCoResidentMode) {
       // KRaft colocated broker and controller
       validateNonEmptyQuorumVotersForKRaft()
       validateControlPlaneListenerEmptyForKRaft()
@@ -2156,15 +2217,15 @@ class KafkaConfig private(doLog: Boolean, val props: java.util.Map[_, _], dynami
     if (messageFormatVersion.shouldWarn)
       warn(messageFormatVersion.brokerWarningMessage)
 
-    val recordVersion = logMessageFormatVersion.recordVersion
-    require(interBrokerProtocolVersion.recordVersion.value >= recordVersion.value,
+    val recordVersion = logMessageFormatVersion.highestSupportedRecordVersion
+    require(interBrokerProtocolVersion.highestSupportedRecordVersion().value >= recordVersion.value,
       s"log.message.format.version $logMessageFormatVersionString can only be used when inter.broker.protocol.version " +
-      s"is set to version ${ApiVersion.minSupportedFor(recordVersion).shortVersion} or higher")
+      s"is set to version ${MetadataVersion.minSupportedFor(recordVersion).shortVersion} or higher")
 
     if (offsetsTopicCompressionCodec == ZStdCompressionCodec)
-      require(interBrokerProtocolVersion.recordVersion.value >= KAFKA_2_1_IV0.recordVersion.value,
+      require(interBrokerProtocolVersion.highestSupportedRecordVersion().value >= IBP_2_1_IV0.highestSupportedRecordVersion().value,
         "offsets.topic.compression.codec zstd can only be used when inter.broker.protocol.version " +
-        s"is set to version ${KAFKA_2_1_IV0.shortVersion} or higher")
+        s"is set to version ${IBP_2_1_IV0.shortVersion} or higher")
 
     val interBrokerUsesSasl = interBrokerSecurityProtocol == SecurityProtocol.SASL_PLAINTEXT || interBrokerSecurityProtocol == SecurityProtocol.SASL_SSL
     require(!interBrokerUsesSasl || saslInterBrokerHandshakeRequestEnable || saslMechanismInterBrokerProtocol == SaslConfigs.GSSAPI_MECHANISM,
diff --git a/core/src/main/scala/kafka/server/KafkaRaftServer.scala b/core/src/main/scala/kafka/server/KafkaRaftServer.scala
index 876925c9797f3..2338ef5e7c413 100644
--- a/core/src/main/scala/kafka/server/KafkaRaftServer.scala
+++ b/core/src/main/scala/kafka/server/KafkaRaftServer.scala
@@ -18,21 +18,27 @@ package kafka.server
 
 import java.io.File
 import java.util.concurrent.CompletableFuture
-
-import kafka.common.{InconsistentNodeIdException, KafkaException}
+import kafka.common.InconsistentNodeIdException
 import kafka.log.{LogConfig, UnifiedLog}
-import kafka.metrics.{KafkaMetricsReporter, KafkaYammerMetrics}
+import kafka.metrics.KafkaMetricsReporter
 import kafka.raft.KafkaRaftManager
 import kafka.server.KafkaRaftServer.{BrokerRole, ControllerRole}
+import kafka.server.metadata.BrokerServerMetrics
 import kafka.utils.{CoreUtils, Logging, Mx4jLoader, VerifiableProperties}
-import org.apache.kafka.common.utils.{AppInfoParser, Time}
-import org.apache.kafka.common.{TopicPartition, Uuid}
 import org.apache.kafka.common.config.{ConfigDef, ConfigResource}
+import org.apache.kafka.common.internals.Topic
+import org.apache.kafka.common.utils.{AppInfoParser, Time}
+import org.apache.kafka.common.{KafkaException, Uuid}
+import org.apache.kafka.controller.{BootstrapMetadata, QuorumControllerMetrics}
 import org.apache.kafka.metadata.{KafkaConfigSchema, MetadataRecordSerde}
 import org.apache.kafka.raft.RaftConfig
-import org.apache.kafka.server.common.ApiMessageAndVersion
+import org.apache.kafka.server.common.{ApiMessageAndVersion, MetadataVersion}
+import org.apache.kafka.server.fault.{LoggingFaultHandler, ProcessExitingFaultHandler}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 
+import java.nio.file.Paths
 import scala.collection.Seq
+import scala.compat.java8.FunctionConverters.asJavaSupplier
 import scala.jdk.CollectionConverters._
 
 /**
@@ -53,7 +59,7 @@ class KafkaRaftServer(
   KafkaMetricsReporter.startReporters(VerifiableProperties(config.originals))
   KafkaYammerMetrics.INSTANCE.configure(config.originals)
 
-  private val (metaProps, offlineDirs) = KafkaRaftServer.initializeLogDirs(config)
+  private val (metaProps, bootstrapMetadata, offlineDirs) = KafkaRaftServer.initializeLogDirs(config)
 
   private val metrics = Server.initializeMetrics(
     config,
@@ -77,31 +83,49 @@ class KafkaRaftServer(
   )
 
   private val broker: Option[BrokerServer] = if (config.processRoles.contains(BrokerRole)) {
+    val brokerMetrics = BrokerServerMetrics(metrics)
+    val fatalFaultHandler = new ProcessExitingFaultHandler()
+    val metadataLoadingFaultHandler = new LoggingFaultHandler("metadata loading",
+        () => brokerMetrics.metadataLoadErrorCount.getAndIncrement())
+    val metadataApplyingFaultHandler = new LoggingFaultHandler("metadata application",
+      () => brokerMetrics.metadataApplyErrorCount.getAndIncrement())
     Some(new BrokerServer(
       config,
       metaProps,
       raftManager,
       time,
       metrics,
+      brokerMetrics,
       threadNamePrefix,
       offlineDirs,
       controllerQuorumVotersFuture,
-      Server.SUPPORTED_FEATURES
+      fatalFaultHandler,
+      metadataLoadingFaultHandler,
+      metadataApplyingFaultHandler
     ))
   } else {
     None
   }
 
   private val controller: Option[ControllerServer] = if (config.processRoles.contains(ControllerRole)) {
+    val controllerMetrics = new QuorumControllerMetrics(KafkaYammerMetrics.defaultRegistry(), time)
+    val metadataFaultHandler = new LoggingFaultHandler("controller metadata",
+      () => controllerMetrics.incrementMetadataErrorCount())
+    val fatalFaultHandler = new ProcessExitingFaultHandler()
     Some(new ControllerServer(
       metaProps,
       config,
       raftManager,
       time,
       metrics,
+      controllerMetrics,
       threadNamePrefix,
       controllerQuorumVotersFuture,
       KafkaRaftServer.configSchema,
+      raftManager.apiVersions,
+      bootstrapMetadata,
+      metadataFaultHandler,
+      fatalFaultHandler
     ))
   } else {
     None
@@ -132,8 +156,8 @@ class KafkaRaftServer(
 }
 
 object KafkaRaftServer {
-  val MetadataTopic = "__cluster_metadata"
-  val MetadataPartition = new TopicPartition(MetadataTopic, 0)
+  val MetadataTopic = Topic.METADATA_TOPIC_NAME
+  val MetadataPartition = Topic.METADATA_TOPIC_PARTITION
   val MetadataTopicId = Uuid.METADATA_TOPIC_ID
 
   sealed trait ProcessRole
@@ -149,7 +173,7 @@ object KafkaRaftServer {
    * @return A tuple containing the loaded meta properties (which are guaranteed to
    *         be consistent across all log dirs) and the offline directories
    */
-  def initializeLogDirs(config: KafkaConfig): (MetaProperties, Seq[String]) = {
+  def initializeLogDirs(config: KafkaConfig): (MetaProperties, BootstrapMetadata, Seq[String]) = {
     val logDirs = (config.logDirs.toSet + config.metadataLogDir).toSeq
     val (rawMetaProperties, offlineDirs) = BrokerMetadataCheckpoint.
       getBrokerMetadataAndOfflineDirs(logDirs, ignoreMissing = false)
@@ -177,11 +201,22 @@ object KafkaRaftServer {
           "If you intend to create a new broker, you should remove all data in your data directories (log.dirs).")
     }
 
-    (metaProperties, offlineDirs.toSeq)
+    // Load the bootstrap metadata file. In the case of an upgrade from older KRaft where there is no bootstrap metadata,
+    // read the IBP from config in order to bootstrap the equivalent metadata version.
+    def getUserDefinedIBPVersionOrThrow(): MetadataVersion = {
+      if (config.originals.containsKey(KafkaConfig.InterBrokerProtocolVersionProp)) {
+        MetadataVersion.fromVersionString(config.interBrokerProtocolVersionString)
+      } else {
+        throw new KafkaException(s"Cannot upgrade from KRaft version prior to 3.3 without first setting ${KafkaConfig.InterBrokerProtocolVersionProp} on each broker.")
+      }
+    }
+    val bootstrapMetadata = BootstrapMetadata.load(Paths.get(config.metadataLogDir), asJavaSupplier(() => getUserDefinedIBPVersionOrThrow()))
+
+    (metaProperties, bootstrapMetadata, offlineDirs.toSeq)
   }
 
   val configSchema = new KafkaConfigSchema(Map(
     ConfigResource.Type.BROKER -> new ConfigDef(KafkaConfig.configDef),
     ConfigResource.Type.TOPIC -> LogConfig.configDefCopy,
-  ).asJava)
+  ).asJava, LogConfig.AllTopicConfigSynonyms)
 }
diff --git a/core/src/main/scala/kafka/server/KafkaServer.scala b/core/src/main/scala/kafka/server/KafkaServer.scala
index afacaa162fd1b..6b52511c1bab3 100755
--- a/core/src/main/scala/kafka/server/KafkaServer.scala
+++ b/core/src/main/scala/kafka/server/KafkaServer.scala
@@ -22,14 +22,13 @@ import java.net.{InetAddress, SocketTimeoutException}
 import java.util.concurrent._
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 
-import kafka.api.{KAFKA_0_9_0, KAFKA_2_2_IV0, KAFKA_2_4_IV1}
 import kafka.cluster.{Broker, EndPoint}
 import kafka.common.{GenerateBrokerIdException, InconsistentBrokerIdException, InconsistentClusterIdException}
 import kafka.controller.KafkaController
 import kafka.coordinator.group.GroupCoordinator
 import kafka.coordinator.transaction.{ProducerIdManager, TransactionCoordinator}
 import kafka.log.LogManager
-import kafka.metrics.{KafkaMetricsReporter, KafkaYammerMetrics}
+import kafka.metrics.KafkaMetricsReporter
 import kafka.network.{ControlPlaneAcceptor, DataPlaneAcceptor, RequestChannel, SocketServer}
 import kafka.security.CredentialProvider
 import kafka.server.metadata.{ZkConfigRepository, ZkMetadataCache}
@@ -50,6 +49,8 @@ import org.apache.kafka.common.utils.{AppInfoParser, LogContext, Time, Utils}
 import org.apache.kafka.common.{Endpoint, Node}
 import org.apache.kafka.metadata.BrokerState
 import org.apache.kafka.server.authorizer.Authorizer
+import org.apache.kafka.server.common.MetadataVersion._
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.zookeeper.client.ZKClientConfig
 
 import scala.collection.{Map, Seq}
@@ -139,7 +140,7 @@ class KafkaServer(
 
   var clientToControllerChannelManager: BrokerToControllerChannelManager = null
 
-  var alterIsrManager: AlterIsrManager = null
+  var alterPartitionManager: AlterPartitionManager = null
 
   var kafkaScheduler: KafkaScheduler = null
 
@@ -161,8 +162,7 @@ class KafkaServer(
 
   private var _featureChangeListener: FinalizedFeatureChangeListener = null
 
-  val brokerFeatures: BrokerFeatures = BrokerFeatures.createDefault()
-  val featureCache: FinalizedFeatureCache = new FinalizedFeatureCache(brokerFeatures)
+  val brokerFeatures: BrokerFeatures = BrokerFeatures.createEmpty()
 
   override def brokerState: BrokerState = _brokerState
 
@@ -203,15 +203,9 @@ class KafkaServer(
         initZkClient(time)
         configRepository = new ZkConfigRepository(new AdminZkClient(zkClient))
 
-        /* initialize features */
-        _featureChangeListener = new FinalizedFeatureChangeListener(featureCache, _zkClient)
-        if (config.isFeatureVersioningSupported) {
-          _featureChangeListener.initOrThrow(config.zkConnectionTimeoutMs)
-        }
-
         /* Get or create cluster_id */
         _clusterId = getOrGenerateClusterId(zkClient)
-        info(s"Cluster ID = ${clusterId}")
+        info(s"Cluster ID = $clusterId")
 
         /* load metadata */
         val (preloadedBrokerMetadataCheckpoint, initialOfflineDirs) =
@@ -226,7 +220,7 @@ class KafkaServer(
         /* check cluster id */
         if (preloadedBrokerMetadataCheckpoint.clusterId.isDefined && preloadedBrokerMetadataCheckpoint.clusterId.get != clusterId)
           throw new InconsistentClusterIdException(
-            s"The Cluster ID ${clusterId} doesn't match stored clusterId ${preloadedBrokerMetadataCheckpoint.clusterId} in meta.properties. " +
+            s"The Cluster ID $clusterId doesn't match stored clusterId ${preloadedBrokerMetadataCheckpoint.clusterId} in meta.properties. " +
             s"The broker is trying to join the wrong cluster. Configured zookeeper.connect may be wrong.")
 
         /* generate brokerId */
@@ -268,20 +262,29 @@ class KafkaServer(
         _brokerState = BrokerState.RECOVERY
         logManager.startup(zkClient.getAllTopicsInCluster())
 
-        metadataCache = MetadataCache.zkMetadataCache(config.brokerId)
+        metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion, brokerFeatures)
+        val controllerNodeProvider = MetadataCacheControllerNodeProvider(config, metadataCache)
+
+        /* initialize feature change listener */
+        _featureChangeListener = new FinalizedFeatureChangeListener(metadataCache, _zkClient)
+        if (config.isFeatureVersioningSupported) {
+          _featureChangeListener.initOrThrow(config.zkConnectionTimeoutMs)
+        }
+
         // Enable delegation token cache for all SCRAM mechanisms to simplify dynamic update.
         // This keeps the cache up-to-date if new SCRAM mechanisms are enabled dynamically.
         tokenCache = new DelegationTokenCache(ScramMechanism.mechanismNames)
         credentialProvider = new CredentialProvider(ScramMechanism.mechanismNames, tokenCache)
 
         clientToControllerChannelManager = BrokerToControllerChannelManager(
-          controllerNodeProvider = MetadataCacheControllerNodeProvider(config, metadataCache),
+          controllerNodeProvider = controllerNodeProvider,
           time = time,
           metrics = metrics,
           config = config,
           channelName = "forwarding",
           threadNamePrefix = threadNamePrefix,
-          retryTimeoutMs = config.requestTimeoutMs.longValue)
+          retryTimeoutMs = config.requestTimeoutMs.longValue
+        )
         clientToControllerChannelManager.start()
 
         /* start forwarding manager */
@@ -296,7 +299,7 @@ class KafkaServer(
           config,
           forwardingManager,
           brokerFeatures,
-          featureCache
+          metadataCache
         )
 
         // Create and start the socket server acceptor threads so that the bound port is known.
@@ -306,25 +309,25 @@ class KafkaServer(
         // Note that we allow the use of KRaft mode controller APIs when forwarding is enabled
         // so that the Envelope request is exposed. This is only used in testing currently.
         socketServer = new SocketServer(config, metrics, time, credentialProvider, apiVersionManager)
-        socketServer.startup(startProcessingRequests = false)
 
-        /* start replica manager */
-        alterIsrManager = if (config.interBrokerProtocolVersion.isAlterIsrSupported) {
-          AlterIsrManager(
+        // Start alter partition manager based on the IBP version
+        alterPartitionManager = if (config.interBrokerProtocolVersion.isAlterPartitionSupported) {
+          AlterPartitionManager(
             config = config,
             metadataCache = metadataCache,
             scheduler = kafkaScheduler,
+            controllerNodeProvider,
             time = time,
             metrics = metrics,
             threadNamePrefix = threadNamePrefix,
-            brokerEpochSupplier = () => kafkaController.brokerEpoch,
-            config.brokerId
+            brokerEpochSupplier = () => kafkaController.brokerEpoch
           )
         } else {
-          AlterIsrManager(kafkaScheduler, time, zkClient)
+          AlterPartitionManager(kafkaScheduler, time, zkClient)
         }
-        alterIsrManager.start()
+        alterPartitionManager.start()
 
+        // Start replica manager
         _replicaManager = createReplicaManager(isShuttingDown)
         replicaManager.startup()
 
@@ -339,7 +342,7 @@ class KafkaServer(
         tokenManager.startup()
 
         /* start kafka controller */
-        _kafkaController = new KafkaController(config, zkClient, time, metrics, brokerInfo, brokerEpoch, tokenManager, brokerFeatures, featureCache, threadNamePrefix)
+        _kafkaController = new KafkaController(config, zkClient, time, metrics, brokerInfo, brokerEpoch, tokenManager, brokerFeatures, metadataCache, threadNamePrefix)
         kafkaController.startup()
 
         adminManager = new ZkAdminManager(config, metrics, metadataCache, zkClient)
@@ -380,7 +383,7 @@ class KafkaServer(
         )
 
         /* Get the authorizer and initialize it if one is specified.*/
-        authorizer = config.authorizer
+        authorizer = config.createNewAuthorizer()
         authorizer.foreach(_.configure(config.originals))
         val authorizerFutures: Map[Endpoint, CompletableFuture[Void]] = authorizer match {
           case Some(authZ) =>
@@ -449,7 +452,7 @@ class KafkaServer(
         dynamicConfigManager = new ZkConfigManager(zkClient, dynamicConfigHandlers)
         dynamicConfigManager.startup()
 
-        socketServer.startProcessingRequests(authorizerFutures)
+        socketServer.enableRequestProcessing(authorizerFutures)
 
         _brokerState = BrokerState.RUNNING
         shutdownLatch = new CountDownLatch(1)
@@ -478,7 +481,7 @@ class KafkaServer(
       quotaManagers = quotaManagers,
       metadataCache = metadataCache,
       logDirFailureChannel = logDirFailureChannel,
-      alterIsrManager = alterIsrManager,
+      alterPartitionManager = alterPartitionManager,
       brokerTopicStats = brokerTopicStats,
       isShuttingDown = isShuttingDown,
       zkClient = Some(zkClient),
@@ -626,9 +629,9 @@ class KafkaServer(
 
               // send the controlled shutdown request
               val controlledShutdownApiVersion: Short =
-                if (config.interBrokerProtocolVersion < KAFKA_0_9_0) 0
-                else if (config.interBrokerProtocolVersion < KAFKA_2_2_IV0) 1
-                else if (config.interBrokerProtocolVersion < KAFKA_2_4_IV1) 2
+                if (config.interBrokerProtocolVersion.isLessThan(IBP_0_9_0)) 0
+                else if (config.interBrokerProtocolVersion.isLessThan(IBP_2_2_IV0)) 1
+                else if (config.interBrokerProtocolVersion.isLessThan(IBP_2_4_IV1)) 2
                 else 3
 
               val controlledShutdownRequest = new ControlledShutdownRequest.Builder(
@@ -755,8 +758,8 @@ class KafkaServer(
         if (replicaManager != null)
           CoreUtils.swallow(replicaManager.shutdown(), this)
 
-        if (alterIsrManager != null)
-          CoreUtils.swallow(alterIsrManager.shutdown(), this)
+        if (alterPartitionManager != null)
+          CoreUtils.swallow(alterPartitionManager.shutdown(), this)
 
         if (clientToControllerChannelManager != null)
           CoreUtils.swallow(clientToControllerChannelManager.shutdown(), this)
@@ -830,7 +833,13 @@ class KafkaServer(
   private def checkpointBrokerMetadata(brokerMetadata: ZkMetaProperties) = {
     for (logDir <- config.logDirs if logManager.isLogDirOnline(new File(logDir).getAbsolutePath)) {
       val checkpoint = brokerMetadataCheckpoints(logDir)
-      checkpoint.write(brokerMetadata.toProperties)
+      try {
+        checkpoint.write(brokerMetadata.toProperties)
+      } catch {
+        case e: IOException =>
+          val dirPath = checkpoint.file.getAbsolutePath
+          logDirFailureChannel.maybeAddOfflineLogDir(dirPath, s"Error while writing meta.properties to $dirPath", e)
+      }
     }
   }
 
diff --git a/core/src/main/scala/kafka/server/LeaderEndPoint.scala b/core/src/main/scala/kafka/server/LeaderEndPoint.scala
new file mode 100644
index 0000000000000..70d2149dabc48
--- /dev/null
+++ b/core/src/main/scala/kafka/server/LeaderEndPoint.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.cluster.BrokerEndPoint
+import kafka.server.AbstractFetcherThread.{ReplicaFetch, ResultWithPartitions}
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.requests.FetchRequest
+import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
+import org.apache.kafka.common.message.{FetchResponseData, OffsetForLeaderEpochRequestData}
+
+import scala.collection.Map
+
+/**
+ * This trait defines the APIs to be used to access a broker that is a leader.
+ */
+trait LeaderEndPoint {
+
+  type FetchData = FetchResponseData.PartitionData
+  type EpochData = OffsetForLeaderEpochRequestData.OffsetForLeaderPartition
+
+  /**
+   * A boolean specifying if truncation when fetching from the leader is supported
+   */
+  def isTruncationOnFetchSupported: Boolean
+
+  /**
+   * Initiate closing access to fetches from leader.
+   */
+  def initiateClose(): Unit
+
+  /**
+   * Closes access to fetches from leader.
+   * `initiateClose` must be called prior to invoking `close`.
+   */
+  def close(): Unit
+
+  /**
+   * The specific broker (host:port) we want to connect to.
+   */
+  def brokerEndPoint(): BrokerEndPoint
+
+  /**
+   * Given a fetchRequest, carries out the expected request and returns
+   * the results from fetching from the leader.
+   *
+   * @param fetchRequest The fetch request we want to carry out
+   *
+   * @return A map of topic partition -> fetch data
+   */
+  def fetch(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData]
+
+  /**
+   * Fetches the log start offset of the given topic partition from the leader.
+   *
+   * @param topicPartition The topic partition that we want to fetch from
+   * @param currentLeaderEpoch An int representing the current leader epoch of the requester
+   *
+   * @return A long representing the earliest offset in the leader's topic partition.
+   */
+  def fetchEarliestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long
+
+  /**
+   * Fetches the log end offset of the given topic partition from the leader.
+   *
+   * @param topicPartition The topic partition that we want to fetch from
+   * @param currentLeaderEpoch An int representing the current leader epoch of the requester
+   *
+   * @return A long representing the latest offset in the leader's topic partition.
+   */
+  def fetchLatestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long
+
+  /**
+   * Fetches offset for leader epoch from the leader for each given topic partition
+   *
+   * @param partitions A map of topic partition -> leader epoch of the replica
+   *
+   * @return A map of topic partition -> end offset for a requested leader epoch
+   */
+  def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset]
+
+  /**
+   * Builds a fetch request, given a partition map.
+   *
+   * @param partitions A map of topic partitions to their respective partition fetch state
+   *
+   * @return A ResultWithPartitions, used to create the fetchRequest for fetch.
+   */
+  def buildFetch(partitions: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]]
+
+}
diff --git a/core/src/main/scala/kafka/server/LocalLeaderEndPoint.scala b/core/src/main/scala/kafka/server/LocalLeaderEndPoint.scala
new file mode 100644
index 0000000000000..1080c8e073976
--- /dev/null
+++ b/core/src/main/scala/kafka/server/LocalLeaderEndPoint.scala
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.api.Request
+import kafka.cluster.BrokerEndPoint
+import kafka.server.AbstractFetcherThread.{ReplicaFetch, ResultWithPartitions}
+import kafka.server.QuotaFactory.UnboundedQuota
+import kafka.utils.Logging
+import org.apache.kafka.common.errors.KafkaStorageException
+import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
+import org.apache.kafka.common.message.FetchResponseData
+import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_EPOCH
+import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
+import org.apache.kafka.common.protocol.{ApiKeys, Errors}
+import org.apache.kafka.common.requests.{FetchRequest, FetchResponse, RequestUtils}
+
+import java.util
+import java.util.Optional
+import scala.collection.{Map, Seq, Set, mutable}
+import scala.compat.java8.OptionConverters.RichOptionForJava8
+import scala.jdk.CollectionConverters._
+
+/**
+ * Facilitates fetches from a local replica leader.
+ *
+ * @param sourceBroker The broker (host:port) that we want to connect to
+ * @param brokerConfig A config file with broker related configurations
+ * @param replicaManager A ReplicaManager
+ * @param quota The quota, used when building a fetch request
+ */
+class LocalLeaderEndPoint(sourceBroker: BrokerEndPoint,
+                          brokerConfig: KafkaConfig,
+                          replicaManager: ReplicaManager,
+                          quota: ReplicaQuota) extends LeaderEndPoint with Logging {
+
+  private val replicaId = brokerConfig.brokerId
+  private val maxBytes = brokerConfig.replicaFetchResponseMaxBytes
+  private val fetchSize = brokerConfig.replicaFetchMaxBytes
+  private var inProgressPartition: Option[TopicPartition] = None
+
+  override val isTruncationOnFetchSupported: Boolean = false
+
+  override def initiateClose(): Unit = {} // do nothing
+
+  override def close(): Unit = {} // do nothing
+
+  override def brokerEndPoint(): BrokerEndPoint = sourceBroker
+
+  override def fetch(fetchRequest: FetchRequest.Builder): collection.Map[TopicPartition, FetchData] = {
+    var partitionData: Seq[(TopicPartition, FetchData)] = null
+    val request = fetchRequest.build()
+
+    // We can build the map from the request since it contains topic IDs and names.
+    // Only one ID can be associated with a name and vice versa.
+    val topicNames = new mutable.HashMap[Uuid, String]()
+    request.data.topics.forEach { topic =>
+      topicNames.put(topic.topicId, topic.topic)
+    }
+
+    def processResponseCallback(responsePartitionData: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
+      partitionData = responsePartitionData.map { case (tp, data) =>
+        val abortedTransactions = data.abortedTransactions.map(_.asJava).orNull
+        val lastStableOffset = data.lastStableOffset.getOrElse(FetchResponse.INVALID_LAST_STABLE_OFFSET)
+        tp.topicPartition -> new FetchResponseData.PartitionData()
+          .setPartitionIndex(tp.topicPartition.partition)
+          .setErrorCode(data.error.code)
+          .setHighWatermark(data.highWatermark)
+          .setLastStableOffset(lastStableOffset)
+          .setLogStartOffset(data.logStartOffset)
+          .setAbortedTransactions(abortedTransactions)
+          .setRecords(data.records)
+      }
+    }
+
+    val fetchData = request.fetchData(topicNames.asJava)
+
+    val fetchParams = FetchParams(
+      requestVersion = request.version,
+      maxWaitMs = 0L, // timeout is 0 so that the callback will be executed immediately
+      replicaId = Request.FutureLocalReplicaId,
+      minBytes = request.minBytes,
+      maxBytes = request.maxBytes,
+      isolation = FetchLogEnd,
+      clientMetadata = None
+    )
+
+    replicaManager.fetchMessages(
+      params = fetchParams,
+      fetchInfos = fetchData.asScala.toSeq,
+      quota = UnboundedQuota,
+      responseCallback = processResponseCallback
+    )
+
+    if (partitionData == null)
+      throw new IllegalStateException(s"Failed to fetch data for partitions ${fetchData.keySet().toArray.mkString(",")}")
+
+    partitionData.toMap
+  }
+
+  override def fetchEarliestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = {
+    val partition = replicaManager.getPartitionOrException(topicPartition)
+    partition.localLogOrException.logStartOffset
+  }
+
+  override def fetchLatestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = {
+    val partition = replicaManager.getPartitionOrException(topicPartition)
+    partition.localLogOrException.logEndOffset
+  }
+
+  override def fetchEpochEndOffsets(partitions: collection.Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
+    partitions.map { case (tp, epochData) =>
+      try {
+        val endOffset = if (epochData.leaderEpoch == UNDEFINED_EPOCH) {
+          new EpochEndOffset()
+            .setPartition(tp.partition)
+            .setErrorCode(Errors.NONE.code)
+        } else {
+          val partition = replicaManager.getPartitionOrException(tp)
+          partition.lastOffsetForLeaderEpoch(
+            currentLeaderEpoch = RequestUtils.getLeaderEpoch(epochData.currentLeaderEpoch),
+            leaderEpoch = epochData.leaderEpoch,
+            fetchOnlyFromLeader = false)
+        }
+        tp -> endOffset
+      } catch {
+        case t: Throwable =>
+          warn(s"Error when getting EpochEndOffset for $tp", t)
+          tp -> new EpochEndOffset()
+            .setPartition(tp.partition)
+            .setErrorCode(Errors.forException(t).code)
+      }
+    }
+  }
+
+  override def buildFetch(partitions: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = {
+    // Only include replica in the fetch request if it is not throttled.
+    if (quota.isQuotaExceeded) {
+      ResultWithPartitions(None, Set.empty)
+    } else {
+      selectPartitionToFetch(partitions) match {
+        case Some((tp, fetchState)) =>
+          buildFetchForPartition(tp, fetchState)
+        case None =>
+          ResultWithPartitions(None, Set.empty)
+      }
+    }
+  }
+
+  private def selectPartitionToFetch(partitions: Map[TopicPartition, PartitionFetchState]): Option[(TopicPartition, PartitionFetchState)] = {
+    // Only move one partition at a time to increase its catch-up rate and thus reduce the time spent on
+    // moving any given replica. Replicas are selected in ascending order (lexicographically by topic) from the
+    // partitions that are ready to fetch. Once selected, we will continue fetching the same partition until it
+    // becomes unavailable or is removed.
+
+    inProgressPartition.foreach { tp =>
+      val fetchStateOpt = partitions.get(tp)
+      fetchStateOpt.filter(_.isReadyForFetch).foreach { fetchState =>
+        return Some((tp, fetchState))
+      }
+    }
+
+    inProgressPartition = None
+
+    val nextPartitionOpt = nextReadyPartition(partitions)
+    nextPartitionOpt.foreach { case (tp, fetchState) =>
+      inProgressPartition = Some(tp)
+      info(s"Beginning/resuming copy of partition $tp from offset ${fetchState.fetchOffset}. " +
+        s"Including this partition, there are ${partitions.size} remaining partitions to copy by this thread.")
+    }
+    nextPartitionOpt
+  }
+
+  private def buildFetchForPartition(topicPartition: TopicPartition, fetchState: PartitionFetchState): ResultWithPartitions[Option[ReplicaFetch]] = {
+    val requestMap = new util.LinkedHashMap[TopicPartition, FetchRequest.PartitionData]
+    val partitionsWithError = mutable.Set[TopicPartition]()
+
+    try {
+      val logStartOffset = replicaManager.futureLocalLogOrException(topicPartition).logStartOffset
+      val lastFetchedEpoch = if (isTruncationOnFetchSupported)
+        fetchState.lastFetchedEpoch.map(_.asInstanceOf[Integer]).asJava
+      else
+        Optional.empty[Integer]
+      val topicId = fetchState.topicId.getOrElse(Uuid.ZERO_UUID)
+      requestMap.put(topicPartition, new FetchRequest.PartitionData(topicId, fetchState.fetchOffset, logStartOffset,
+        fetchSize, Optional.of(fetchState.currentLeaderEpoch), lastFetchedEpoch))
+    } catch {
+      case e: KafkaStorageException =>
+        debug(s"Failed to build fetch for $topicPartition", e)
+        partitionsWithError += topicPartition
+    }
+
+    val fetchRequestOpt = if (requestMap.isEmpty) {
+      None
+    } else {
+      val version: Short = if (fetchState.topicId.isEmpty)
+        12
+      else
+        ApiKeys.FETCH.latestVersion
+      // Set maxWait and minBytes to 0 because the response should return immediately if
+      // the future log has caught up with the current log of the partition
+      val requestBuilder = FetchRequest.Builder.forReplica(version, replicaId, 0, 0, requestMap).setMaxBytes(maxBytes)
+      Some(ReplicaFetch(requestMap, requestBuilder))
+    }
+
+    ResultWithPartitions(fetchRequestOpt, partitionsWithError)
+  }
+
+  private def nextReadyPartition(partitions: Map[TopicPartition, PartitionFetchState]): Option[(TopicPartition, PartitionFetchState)] = {
+    partitions.filter { case (_, partitionFetchState) =>
+      partitionFetchState.isReadyForFetch
+    }.reduceLeftOption { (left, right) =>
+      if ((left._1.topic < right._1.topic) || (left._1.topic == right._1.topic && left._1.partition < right._1.partition))
+        left
+      else
+        right
+    }
+  }
+
+  override def toString: String = s"LocalLeaderEndPoint"
+}
diff --git a/core/src/main/scala/kafka/server/MetadataCache.scala b/core/src/main/scala/kafka/server/MetadataCache.scala
index 2e2da0cb06b69..b20d4f6414c2a 100755
--- a/core/src/main/scala/kafka/server/MetadataCache.scala
+++ b/core/src/main/scala/kafka/server/MetadataCache.scala
@@ -22,9 +22,16 @@ import kafka.server.metadata.{KRaftMetadataCache, ZkMetadataCache}
 import org.apache.kafka.common.message.{MetadataResponseData, UpdateMetadataRequestData}
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.{Cluster, Node, TopicPartition, Uuid}
+import org.apache.kafka.server.common.MetadataVersion
 
 import java.util
 
+case class FinalizedFeaturesAndEpoch(features: Map[String, Short], epoch: Long) {
+  override def toString(): String = {
+    s"FinalizedFeaturesAndEpoch(features=$features, epoch=$epoch)"
+  }
+}
+
 trait MetadataCache {
 
   /**
@@ -92,11 +99,17 @@ trait MetadataCache {
   def contains(topic: String): Boolean
 
   def contains(tp: TopicPartition): Boolean
+
+  def metadataVersion(): MetadataVersion
+
+  def features(): FinalizedFeaturesAndEpoch
 }
 
 object MetadataCache {
-  def zkMetadataCache(brokerId: Int): ZkMetadataCache = {
-    new ZkMetadataCache(brokerId)
+  def zkMetadataCache(brokerId: Int,
+                      metadataVersion: MetadataVersion,
+                      brokerFeatures: BrokerFeatures = BrokerFeatures.createEmpty()): ZkMetadataCache = {
+    new ZkMetadataCache(brokerId, metadataVersion, brokerFeatures)
   }
 
   def kRaftMetadataCache(brokerId: Int): KRaftMetadataCache = {
diff --git a/core/src/main/scala/kafka/server/PartitionMetadataFile.scala b/core/src/main/scala/kafka/server/PartitionMetadataFile.scala
index 749b6dd66fe93..f88a4cc90752a 100644
--- a/core/src/main/scala/kafka/server/PartitionMetadataFile.scala
+++ b/core/src/main/scala/kafka/server/PartitionMetadataFile.scala
@@ -44,8 +44,7 @@ object PartitionMetadataFile {
   }
 
   class PartitionMetadataReadBuffer[T](location: String,
-                                       reader: BufferedReader,
-                                       version: Int) extends Logging {
+                                       reader: BufferedReader) extends Logging {
     def read(): PartitionMetadata = {
       def malformedLineException(line: String) =
         new IOException(s"Malformed line in checkpoint file ($location): '$line'")
@@ -141,7 +140,7 @@ class PartitionMetadataFile(val file: File,
       try {
         val reader = Files.newBufferedReader(path)
         try {
-          val partitionBuffer = new PartitionMetadataReadBuffer(file.getAbsolutePath, reader, CurrentVersion)
+          val partitionBuffer = new PartitionMetadataReadBuffer(file.getAbsolutePath, reader)
           partitionBuffer.read()
         } finally {
           reader.close()
diff --git a/core/src/main/scala/kafka/server/RemoteLeaderEndPoint.scala b/core/src/main/scala/kafka/server/RemoteLeaderEndPoint.scala
new file mode 100644
index 0000000000000..826643a0f5ec6
--- /dev/null
+++ b/core/src/main/scala/kafka/server/RemoteLeaderEndPoint.scala
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.cluster.BrokerEndPoint
+
+import java.util.{Collections, Optional}
+import kafka.server.AbstractFetcherThread.{ReplicaFetch, ResultWithPartitions}
+import kafka.utils.Implicits.MapExtensionMethods
+import kafka.utils.Logging
+import org.apache.kafka.clients.FetchSessionHandler
+import org.apache.kafka.common.errors.KafkaStorageException
+import org.apache.kafka.common.{TopicPartition, Uuid}
+import org.apache.kafka.common.message.ListOffsetsRequestData.{ListOffsetsPartition, ListOffsetsTopic}
+import org.apache.kafka.common.message.OffsetForLeaderEpochRequestData.{OffsetForLeaderTopic, OffsetForLeaderTopicCollection}
+import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
+import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.requests.{FetchRequest, FetchResponse, ListOffsetsRequest, ListOffsetsResponse, OffsetsForLeaderEpochRequest, OffsetsForLeaderEpochResponse}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_0_10_1_IV2
+
+import scala.jdk.CollectionConverters._
+import scala.collection.{Map, mutable}
+import scala.compat.java8.OptionConverters.RichOptionForJava8
+
+/**
+ * Facilitates fetches from a remote replica leader.
+ *
+ * @param logPrefix The log prefix
+ * @param blockingSender The raw leader endpoint used to communicate with the leader
+ * @param fetchSessionHandler A FetchSessionHandler to track the partitions in the session
+ * @param brokerConfig Broker configuration
+ * @param replicaManager A ReplicaManager
+ * @param quota The quota, used when building a fetch request
+ * @param metadataVersionSupplier A supplier that returns the current MetadataVersion. This can change during
+ *                                runtime in KRaft mode.
+ */
+class RemoteLeaderEndPoint(logPrefix: String,
+                           blockingSender: BlockingSend,
+                           private[server] val fetchSessionHandler: FetchSessionHandler, // visible for testing
+                           brokerConfig: KafkaConfig,
+                           replicaManager: ReplicaManager,
+                           quota: ReplicaQuota,
+                           metadataVersionSupplier: () => MetadataVersion) extends LeaderEndPoint with Logging {
+
+  this.logIdent = logPrefix
+
+  private val maxWait = brokerConfig.replicaFetchWaitMaxMs
+  private val minBytes = brokerConfig.replicaFetchMinBytes
+  private val maxBytes = brokerConfig.replicaFetchResponseMaxBytes
+  private val fetchSize = brokerConfig.replicaFetchMaxBytes
+
+  override def isTruncationOnFetchSupported = metadataVersionSupplier().isTruncationOnFetchSupported
+
+  override def initiateClose(): Unit = blockingSender.initiateClose()
+
+  override def close(): Unit = blockingSender.close()
+
+  override def brokerEndPoint(): BrokerEndPoint = blockingSender.brokerEndPoint()
+
+  override def fetch(fetchRequest: FetchRequest.Builder): collection.Map[TopicPartition, FetchData] = {
+    val clientResponse = try {
+      blockingSender.sendRequest(fetchRequest)
+    } catch {
+      case t: Throwable =>
+        fetchSessionHandler.handleError(t)
+        throw t
+    }
+    val fetchResponse = clientResponse.responseBody.asInstanceOf[FetchResponse]
+    if (!fetchSessionHandler.handleResponse(fetchResponse, clientResponse.requestHeader().apiVersion())) {
+      // If we had a session topic ID related error, throw it, otherwise return an empty fetch data map.
+      if (fetchResponse.error == Errors.FETCH_SESSION_TOPIC_ID_ERROR) {
+        throw Errors.forCode(fetchResponse.error().code()).exception()
+      } else {
+        Map.empty
+      }
+    } else {
+      fetchResponse.responseData(fetchSessionHandler.sessionTopicNames, clientResponse.requestHeader().apiVersion()).asScala
+    }
+  }
+
+  override def fetchEarliestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = {
+    fetchOffset(topicPartition, currentLeaderEpoch, ListOffsetsRequest.EARLIEST_TIMESTAMP)
+  }
+
+  override def fetchLatestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = {
+    fetchOffset(topicPartition, currentLeaderEpoch, ListOffsetsRequest.LATEST_TIMESTAMP)
+  }
+
+  private def fetchOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int, earliestOrLatest: Long): Long = {
+    val topic = new ListOffsetsTopic()
+      .setName(topicPartition.topic)
+      .setPartitions(Collections.singletonList(
+        new ListOffsetsPartition()
+          .setPartitionIndex(topicPartition.partition)
+          .setCurrentLeaderEpoch(currentLeaderEpoch)
+          .setTimestamp(earliestOrLatest)))
+    val metadataVersion = metadataVersionSupplier()
+    val requestBuilder = ListOffsetsRequest.Builder.forReplica(metadataVersion.listOffsetRequestVersion, brokerConfig.brokerId)
+      .setTargetTimes(Collections.singletonList(topic))
+
+    val clientResponse = blockingSender.sendRequest(requestBuilder)
+    val response = clientResponse.responseBody.asInstanceOf[ListOffsetsResponse]
+    val responsePartition = response.topics.asScala.find(_.name == topicPartition.topic).get
+      .partitions.asScala.find(_.partitionIndex == topicPartition.partition).get
+
+    Errors.forCode(responsePartition.errorCode) match {
+      case Errors.NONE =>
+        if (metadataVersion.isAtLeast(IBP_0_10_1_IV2))
+          responsePartition.offset
+        else
+          responsePartition.oldStyleOffsets.get(0)
+      case error => throw error.exception
+    }
+  }
+
+  override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
+    if (partitions.isEmpty) {
+      debug("Skipping leaderEpoch request since all partitions do not have an epoch")
+      return Map.empty
+    }
+
+    val topics = new OffsetForLeaderTopicCollection(partitions.size)
+    partitions.forKeyValue { (topicPartition, epochData) =>
+      var topic = topics.find(topicPartition.topic)
+      if (topic == null) {
+        topic = new OffsetForLeaderTopic().setTopic(topicPartition.topic)
+        topics.add(topic)
+      }
+      topic.partitions.add(epochData)
+    }
+
+    val epochRequest = OffsetsForLeaderEpochRequest.Builder.forFollower(
+      metadataVersionSupplier().offsetForLeaderEpochRequestVersion, topics, brokerConfig.brokerId)
+    debug(s"Sending offset for leader epoch request $epochRequest")
+
+    try {
+      val response = blockingSender.sendRequest(epochRequest)
+      val responseBody = response.responseBody.asInstanceOf[OffsetsForLeaderEpochResponse]
+      debug(s"Received leaderEpoch response $response")
+      responseBody.data.topics.asScala.flatMap { offsetForLeaderTopicResult =>
+        offsetForLeaderTopicResult.partitions.asScala.map { offsetForLeaderPartitionResult =>
+          val tp = new TopicPartition(offsetForLeaderTopicResult.topic, offsetForLeaderPartitionResult.partition)
+          tp -> offsetForLeaderPartitionResult
+        }
+      }.toMap
+    } catch {
+      case t: Throwable =>
+        warn(s"Error when sending leader epoch request for $partitions", t)
+
+        // if we get any unexpected exception, mark all partitions with an error
+        val error = Errors.forException(t)
+        partitions.map { case (tp, _) =>
+          tp -> new EpochEndOffset()
+            .setPartition(tp.partition)
+            .setErrorCode(error.code)
+        }
+    }
+  }
+
+  override def buildFetch(partitions: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = {
+    val partitionsWithError = mutable.Set[TopicPartition]()
+
+    val builder = fetchSessionHandler.newBuilder(partitions.size, false)
+    partitions.forKeyValue { (topicPartition, fetchState) =>
+      // We will not include a replica in the fetch request if it should be throttled.
+      if (fetchState.isReadyForFetch && !shouldFollowerThrottle(quota, fetchState, topicPartition)) {
+        try {
+          val logStartOffset = replicaManager.localLogOrException(topicPartition).logStartOffset
+          val lastFetchedEpoch = if (isTruncationOnFetchSupported)
+            fetchState.lastFetchedEpoch.map(_.asInstanceOf[Integer]).asJava
+          else
+            Optional.empty[Integer]
+          builder.add(topicPartition, new FetchRequest.PartitionData(
+            fetchState.topicId.getOrElse(Uuid.ZERO_UUID),
+            fetchState.fetchOffset,
+            logStartOffset,
+            fetchSize,
+            Optional.of(fetchState.currentLeaderEpoch),
+            lastFetchedEpoch))
+        } catch {
+          case _: KafkaStorageException =>
+            // The replica has already been marked offline due to log directory failure and the original failure should have already been logged.
+            // This partition should be removed from ReplicaFetcherThread soon by ReplicaManager.handleLogDirFailure()
+            partitionsWithError += topicPartition
+        }
+      }
+    }
+
+    val fetchData = builder.build()
+    val fetchRequestOpt = if (fetchData.sessionPartitions.isEmpty && fetchData.toForget.isEmpty) {
+      None
+    } else {
+      val metadataVersion = metadataVersionSupplier()
+      val version: Short = if (metadataVersion.fetchRequestVersion >= 13 && !fetchData.canUseTopicIds) {
+        12
+      } else {
+        metadataVersion.fetchRequestVersion
+      }
+      val requestBuilder = FetchRequest.Builder
+        .forReplica(version, brokerConfig.brokerId, maxWait, minBytes, fetchData.toSend)
+        .setMaxBytes(maxBytes)
+        .removed(fetchData.toForget)
+        .replaced(fetchData.toReplace)
+        .metadata(fetchData.metadata)
+      Some(ReplicaFetch(fetchData.sessionPartitions(), requestBuilder))
+    }
+
+    ResultWithPartitions(fetchRequestOpt, partitionsWithError)
+  }
+
+  /**
+   *  To avoid ISR thrashing, we only throttle a replica on the follower if it's in the throttled replica list,
+   *  the quota is exceeded and the replica is not in sync.
+   */
+  private def shouldFollowerThrottle(quota: ReplicaQuota, fetchState: PartitionFetchState, topicPartition: TopicPartition): Boolean = {
+    !fetchState.isReplicaInSync && quota.isThrottled(topicPartition) && quota.isQuotaExceeded
+  }
+
+  override def toString: String = s"RemoteLeaderEndPoint(blockingSender=$blockingSender)"
+}
diff --git a/core/src/main/scala/kafka/server/ReplicaAlterLogDirsManager.scala b/core/src/main/scala/kafka/server/ReplicaAlterLogDirsManager.scala
index b45a76620c74c..0613449e07739 100644
--- a/core/src/main/scala/kafka/server/ReplicaAlterLogDirsManager.scala
+++ b/core/src/main/scala/kafka/server/ReplicaAlterLogDirsManager.scala
@@ -31,8 +31,9 @@ class ReplicaAlterLogDirsManager(brokerConfig: KafkaConfig,
 
   override def createFetcherThread(fetcherId: Int, sourceBroker: BrokerEndPoint): ReplicaAlterLogDirsThread = {
     val threadName = s"ReplicaAlterLogDirsThread-$fetcherId"
-    new ReplicaAlterLogDirsThread(threadName, sourceBroker, brokerConfig, failedPartitions, replicaManager,
-      quotaManager, brokerTopicStats)
+    val leader = new LocalLeaderEndPoint(sourceBroker, brokerConfig, replicaManager, quotaManager)
+    new ReplicaAlterLogDirsThread(threadName, leader, failedPartitions, replicaManager,
+      quotaManager, brokerTopicStats, brokerConfig.replicaFetchBackoffMs)
   }
 
   override protected def addPartitionsToFetcherThread(fetcherThread: ReplicaAlterLogDirsThread,
diff --git a/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala b/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala
index 2ce33c838aac8..10eae83b99f1d 100644
--- a/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala
+++ b/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala
@@ -17,44 +17,27 @@
 
 package kafka.server
 
-import kafka.api.Request
-import kafka.cluster.BrokerEndPoint
 import kafka.log.{LeaderOffsetIncremented, LogAppendInfo}
-import kafka.server.AbstractFetcherThread.{ReplicaFetch, ResultWithPartitions}
-import kafka.server.QuotaFactory.UnboundedQuota
-import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
-import org.apache.kafka.common.errors.KafkaStorageException
-import org.apache.kafka.common.message.FetchResponseData
-import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
-import org.apache.kafka.common.protocol.{ApiKeys, Errors}
-import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_EPOCH
-import org.apache.kafka.common.requests.{FetchRequest, FetchResponse, RequestUtils}
-import java.util
-import java.util.Optional
-import scala.collection.{Map, Seq, Set, mutable}
-import scala.compat.java8.OptionConverters._
-import scala.jdk.CollectionConverters._
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.requests.FetchResponse
+
+import scala.collection.{Map, Set}
 
 class ReplicaAlterLogDirsThread(name: String,
-                                sourceBroker: BrokerEndPoint,
-                                brokerConfig: KafkaConfig,
+                                leader: LeaderEndPoint,
                                 failedPartitions: FailedPartitions,
                                 replicaMgr: ReplicaManager,
                                 quota: ReplicationQuotaManager,
-                                brokerTopicStats: BrokerTopicStats)
+                                brokerTopicStats: BrokerTopicStats,
+                                fetchBackOffMs: Int)
   extends AbstractFetcherThread(name = name,
                                 clientId = name,
-                                sourceBroker = sourceBroker,
+                                leader = leader,
                                 failedPartitions,
-                                fetchBackOffMs = brokerConfig.replicaFetchBackoffMs,
+                                fetchBackOffMs = fetchBackOffMs,
                                 isInterruptible = false,
                                 brokerTopicStats) {
 
-  private val replicaId = brokerConfig.brokerId
-  private val maxBytes = brokerConfig.replicaFetchResponseMaxBytes
-  private val fetchSize = brokerConfig.replicaFetchMaxBytes
-  private var inProgressPartition: Option[TopicPartition] = None
-
   override protected def latestEpoch(topicPartition: TopicPartition): Option[Int] = {
     replicaMgr.futureLocalLogOrException(topicPartition).latestEpoch
   }
@@ -71,53 +54,6 @@ class ReplicaAlterLogDirsThread(name: String,
     replicaMgr.futureLocalLogOrException(topicPartition).endOffsetForEpoch(epoch)
   }
 
-  def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
-    var partitionData: Seq[(TopicPartition, FetchData)] = null
-    val request = fetchRequest.build()
-
-    // We can build the map from the request since it contains topic IDs and names.
-    // Only one ID can be associated with a name and vice versa.
-    val topicNames = new mutable.HashMap[Uuid, String]()
-    request.data.topics.forEach { topic =>
-      topicNames.put(topic.topicId, topic.topic)
-    }
-
-
-    def processResponseCallback(responsePartitionData: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
-      partitionData = responsePartitionData.map { case (tp, data) =>
-        val abortedTransactions = data.abortedTransactions.map(_.asJava).orNull
-        val lastStableOffset = data.lastStableOffset.getOrElse(FetchResponse.INVALID_LAST_STABLE_OFFSET)
-        tp.topicPartition -> new FetchResponseData.PartitionData()
-          .setPartitionIndex(tp.topicPartition.partition)
-          .setErrorCode(data.error.code)
-          .setHighWatermark(data.highWatermark)
-          .setLastStableOffset(lastStableOffset)
-          .setLogStartOffset(data.logStartOffset)
-          .setAbortedTransactions(abortedTransactions)
-          .setRecords(data.records)
-      }
-    }
-
-    val fetchData = request.fetchData(topicNames.asJava)
-
-    replicaMgr.fetchMessages(
-      0L, // timeout is 0 so that the callback will be executed immediately
-      Request.FutureLocalReplicaId,
-      request.minBytes,
-      request.maxBytes,
-      false,
-      fetchData.asScala.toSeq,
-      UnboundedQuota,
-      processResponseCallback,
-      request.isolationLevel,
-      None)
-
-    if (partitionData == null)
-      throw new IllegalStateException(s"Failed to fetch data for partitions ${fetchData.keySet().toArray.mkString(",")}")
-
-    partitionData.toMap
-  }
-
   // process fetched data
   override def processPartitionData(topicPartition: TopicPartition,
                                     fetchOffset: Long,
@@ -159,50 +95,8 @@ class ReplicaAlterLogDirsThread(name: String,
     }
   }
 
-  override protected def fetchEarliestOffsetFromLeader(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
-    val partition = replicaMgr.getPartitionOrException(topicPartition)
-    partition.localLogOrException.logStartOffset
-  }
-
-  override protected def fetchLatestOffsetFromLeader(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
-    val partition = replicaMgr.getPartitionOrException(topicPartition)
-    partition.localLogOrException.logEndOffset
-  }
-
-  /**
-   * Fetches offset for leader epoch from local replica for each given topic partitions
-   * @param partitions map of topic partition -> leader epoch of the future replica
-   * @return map of topic partition -> end offset for a requested leader epoch
-   */
-  override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
-    partitions.map { case (tp, epochData) =>
-      try {
-        val endOffset = if (epochData.leaderEpoch == UNDEFINED_EPOCH) {
-          new EpochEndOffset()
-            .setPartition(tp.partition)
-            .setErrorCode(Errors.NONE.code)
-        } else {
-          val partition = replicaMgr.getPartitionOrException(tp)
-          partition.lastOffsetForLeaderEpoch(
-            currentLeaderEpoch = RequestUtils.getLeaderEpoch(epochData.currentLeaderEpoch),
-            leaderEpoch = epochData.leaderEpoch,
-            fetchOnlyFromLeader = false)
-        }
-        tp -> endOffset
-      } catch {
-        case t: Throwable =>
-          warn(s"Error when getting EpochEndOffset for $tp", t)
-          tp -> new EpochEndOffset()
-            .setPartition(tp.partition)
-            .setErrorCode(Errors.forException(t).code)
-      }
-    }
-  }
-
   override protected val isOffsetForLeaderEpochSupported: Boolean = true
 
-  override protected val isTruncationOnFetchSupported: Boolean = false
-
   /**
    * Truncate the log for each partition based on current replica's returned epoch and offset.
    *
@@ -227,88 +121,4 @@ class ReplicaAlterLogDirsThread(name: String,
     partition.truncateFullyAndStartAt(offset, isFuture = true)
   }
 
-  private def nextReadyPartition(partitionMap: Map[TopicPartition, PartitionFetchState]): Option[(TopicPartition, PartitionFetchState)] = {
-    partitionMap.filter { case (_, partitionFetchState) =>
-      partitionFetchState.isReadyForFetch
-    }.reduceLeftOption { (left, right) =>
-      if ((left._1.topic < right._1.topic) || (left._1.topic == right._1.topic && left._1.partition < right._1.partition))
-        left
-      else
-        right
-    }
-  }
-
-  private def selectPartitionToFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): Option[(TopicPartition, PartitionFetchState)] = {
-    // Only move one partition at a time to increase its catch-up rate and thus reduce the time spent on
-    // moving any given replica. Replicas are selected in ascending order (lexicographically by topic) from the
-    // partitions that are ready to fetch. Once selected, we will continue fetching the same partition until it
-    // becomes unavailable or is removed.
-
-    inProgressPartition.foreach { tp =>
-      val fetchStateOpt = partitionMap.get(tp)
-      fetchStateOpt.filter(_.isReadyForFetch).foreach { fetchState =>
-        return Some((tp, fetchState))
-      }
-    }
-
-    inProgressPartition = None
-
-    val nextPartitionOpt = nextReadyPartition(partitionMap)
-    nextPartitionOpt.foreach { case (tp, fetchState) =>
-      inProgressPartition = Some(tp)
-      info(s"Beginning/resuming copy of partition $tp from offset ${fetchState.fetchOffset}. " +
-        s"Including this partition, there are ${partitionMap.size} remaining partitions to copy by this thread.")
-    }
-    nextPartitionOpt
-  }
-
-  private def buildFetchForPartition(tp: TopicPartition, fetchState: PartitionFetchState): ResultWithPartitions[Option[ReplicaFetch]] = {
-    val requestMap = new util.LinkedHashMap[TopicPartition, FetchRequest.PartitionData]
-    val partitionsWithError = mutable.Set[TopicPartition]()
-
-    try {
-      val logStartOffset = replicaMgr.futureLocalLogOrException(tp).logStartOffset
-      val lastFetchedEpoch = if (isTruncationOnFetchSupported)
-        fetchState.lastFetchedEpoch.map(_.asInstanceOf[Integer]).asJava
-      else
-        Optional.empty[Integer]
-      val topicId = fetchState.topicId.getOrElse(Uuid.ZERO_UUID)
-      requestMap.put(tp, new FetchRequest.PartitionData(topicId, fetchState.fetchOffset, logStartOffset,
-        fetchSize, Optional.of(fetchState.currentLeaderEpoch), lastFetchedEpoch))
-    } catch {
-      case e: KafkaStorageException =>
-        debug(s"Failed to build fetch for $tp", e)
-        partitionsWithError += tp
-    }
-
-    val fetchRequestOpt = if (requestMap.isEmpty) {
-      None
-    } else {
-      val version: Short = if (fetchState.topicId.isEmpty)
-        12
-      else
-        ApiKeys.FETCH.latestVersion
-      // Set maxWait and minBytes to 0 because the response should return immediately if
-      // the future log has caught up with the current log of the partition
-      val requestBuilder = FetchRequest.Builder.forReplica(version, replicaId, 0, 0, requestMap).setMaxBytes(maxBytes)
-      Some(ReplicaFetch(requestMap, requestBuilder))
-    }
-
-    ResultWithPartitions(fetchRequestOpt, partitionsWithError)
-  }
-
-  def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = {
-    // Only include replica in the fetch request if it is not throttled.
-    if (quota.isQuotaExceeded) {
-      ResultWithPartitions(None, Set.empty)
-    } else {
-      selectPartitionToFetch(partitionMap) match {
-        case Some((tp, fetchState)) =>
-          buildFetchForPartition(tp, fetchState)
-        case None =>
-          ResultWithPartitions(None, Set.empty)
-      }
-    }
-  }
-
 }
diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherManager.scala b/core/src/main/scala/kafka/server/ReplicaFetcherManager.scala
index d547e1b5d769b..33af5836cd148 100644
--- a/core/src/main/scala/kafka/server/ReplicaFetcherManager.scala
+++ b/core/src/main/scala/kafka/server/ReplicaFetcherManager.scala
@@ -18,15 +18,18 @@
 package kafka.server
 
 import kafka.cluster.BrokerEndPoint
+import org.apache.kafka.clients.FetchSessionHandler
 import org.apache.kafka.common.metrics.Metrics
-import org.apache.kafka.common.utils.Time
+import org.apache.kafka.common.utils.{LogContext, Time}
+import org.apache.kafka.server.common.MetadataVersion
 
 class ReplicaFetcherManager(brokerConfig: KafkaConfig,
                             protected val replicaManager: ReplicaManager,
                             metrics: Metrics,
                             time: Time,
                             threadNamePrefix: Option[String] = None,
-                            quotaManager: ReplicationQuotaManager)
+                            quotaManager: ReplicationQuotaManager,
+                            metadataVersionSupplier: () => MetadataVersion)
       extends AbstractFetcherManager[ReplicaFetcherThread](
         name = "ReplicaFetcherManager on broker " + brokerConfig.brokerId,
         clientId = "Replica",
@@ -35,8 +38,15 @@ class ReplicaFetcherManager(brokerConfig: KafkaConfig,
   override def createFetcherThread(fetcherId: Int, sourceBroker: BrokerEndPoint): ReplicaFetcherThread = {
     val prefix = threadNamePrefix.map(tp => s"$tp:").getOrElse("")
     val threadName = s"${prefix}ReplicaFetcherThread-$fetcherId-${sourceBroker.id}"
-    new ReplicaFetcherThread(threadName, fetcherId, sourceBroker, brokerConfig, failedPartitions, replicaManager,
-      metrics, time, quotaManager)
+    val logContext = new LogContext(s"[ReplicaFetcher replicaId=${brokerConfig.brokerId}, leaderId=${sourceBroker.id}, " +
+      s"fetcherId=$fetcherId] ")
+    val endpoint = new BrokerBlockingSender(sourceBroker, brokerConfig, metrics, time, fetcherId,
+      s"broker-${brokerConfig.brokerId}-fetcher-$fetcherId", logContext)
+    val fetchSessionHandler = new FetchSessionHandler(logContext, sourceBroker.id)
+    val leader = new RemoteLeaderEndPoint(logContext.logPrefix, endpoint, fetchSessionHandler, brokerConfig,
+      replicaManager, quotaManager, metadataVersionSupplier)
+    new ReplicaFetcherThread(threadName, leader, brokerConfig, failedPartitions, replicaManager,
+      quotaManager, logContext.logPrefix, metadataVersionSupplier)
   }
 
   def shutdown(): Unit = {
diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
index 57d89dc3d7e62..2e728ce8173a8 100644
--- a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
+++ b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala
@@ -17,100 +17,31 @@
 
 package kafka.server
 
-import java.util.Collections
-import java.util.Optional
-
-import kafka.api._
-import kafka.cluster.BrokerEndPoint
 import kafka.log.{LeaderOffsetIncremented, LogAppendInfo}
-import kafka.server.AbstractFetcherThread.ReplicaFetch
-import kafka.server.AbstractFetcherThread.ResultWithPartitions
-import kafka.utils.Implicits._
-import org.apache.kafka.clients.FetchSessionHandler
-import org.apache.kafka.common.{TopicPartition, Uuid}
-import org.apache.kafka.common.errors.KafkaStorageException
-import org.apache.kafka.common.message.ListOffsetsRequestData.{ListOffsetsPartition, ListOffsetsTopic}
-import org.apache.kafka.common.message.OffsetForLeaderEpochRequestData.OffsetForLeaderTopic
-import org.apache.kafka.common.message.OffsetForLeaderEpochRequestData.OffsetForLeaderTopicCollection
-import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
-import org.apache.kafka.common.metrics.Metrics
-import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.record.MemoryRecords
 import org.apache.kafka.common.requests._
-import org.apache.kafka.common.utils.{LogContext, Time}
-
-import scala.jdk.CollectionConverters._
-import scala.collection.{Map, mutable}
-import scala.compat.java8.OptionConverters._
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.server.common.MetadataVersion
 
 class ReplicaFetcherThread(name: String,
-                           fetcherId: Int,
-                           sourceBroker: BrokerEndPoint,
+                           leader: LeaderEndPoint,
                            brokerConfig: KafkaConfig,
                            failedPartitions: FailedPartitions,
                            replicaMgr: ReplicaManager,
-                           metrics: Metrics,
-                           time: Time,
                            quota: ReplicaQuota,
-                           leaderEndpointBlockingSend: Option[BlockingSend] = None)
+                           logPrefix: String,
+                           metadataVersionSupplier: () => MetadataVersion)
   extends AbstractFetcherThread(name = name,
                                 clientId = name,
-                                sourceBroker = sourceBroker,
+                                leader = leader,
                                 failedPartitions,
                                 fetchBackOffMs = brokerConfig.replicaFetchBackoffMs,
                                 isInterruptible = false,
                                 replicaMgr.brokerTopicStats) {
 
-  private val replicaId = brokerConfig.brokerId
-  private val logContext = new LogContext(s"[ReplicaFetcher replicaId=$replicaId, leaderId=${sourceBroker.id}, " +
-    s"fetcherId=$fetcherId] ")
-  this.logIdent = logContext.logPrefix
-
-  private val leaderEndpoint = leaderEndpointBlockingSend.getOrElse(
-    new ReplicaFetcherBlockingSend(sourceBroker, brokerConfig, metrics, time, fetcherId,
-      s"broker-$replicaId-fetcher-$fetcherId", logContext))
-
-  // Visible for testing
-  private[server] val fetchRequestVersion: Short =
-    if (brokerConfig.interBrokerProtocolVersion >= KAFKA_3_1_IV0) 13
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_7_IV1) 12
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_3_IV1) 11
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_1_IV2) 10
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_0_IV1) 8
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_1_1_IV0) 7
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_11_0_IV1) 5
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_11_0_IV0) 4
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_10_1_IV1) 3
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_10_0_IV0) 2
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_9_0) 1
-    else 0
+  this.logIdent = logPrefix
 
-  // Visible for testing
-  private[server] val offsetForLeaderEpochRequestVersion: Short =
-    if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_8_IV0) 4
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_3_IV1) 3
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_1_IV1) 2
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_0_IV0) 1
-    else 0
-
-  // Visible for testing
-  private[server] val listOffsetRequestVersion: Short =
-    if (brokerConfig.interBrokerProtocolVersion >= KAFKA_3_0_IV1) 7
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_8_IV0) 6
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_2_IV1) 5
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_1_IV1) 4
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_2_0_IV1) 3
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_11_0_IV0) 2
-    else if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_10_1_IV2) 1
-    else 0
-
-  private val maxWait = brokerConfig.replicaFetchWaitMaxMs
-  private val minBytes = brokerConfig.replicaFetchMinBytes
-  private val maxBytes = brokerConfig.replicaFetchResponseMaxBytes
-  private val fetchSize = brokerConfig.replicaFetchMaxBytes
-  override protected val isOffsetForLeaderEpochSupported: Boolean = brokerConfig.interBrokerProtocolVersion >= KAFKA_0_11_0_IV2
-  override protected val isTruncationOnFetchSupported = ApiVersion.isTruncationOnFetchSupported(brokerConfig.interBrokerProtocolVersion)
-  val fetchSessionHandler = new FetchSessionHandler(logContext, sourceBroker.id)
+  override protected val isOffsetForLeaderEpochSupported: Boolean = metadataVersionSupplier().isOffsetForLeaderEpochSupported
 
   override protected def latestEpoch(topicPartition: TopicPartition): Option[Int] = {
     replicaMgr.localLogOrException(topicPartition).latestEpoch
@@ -135,10 +66,10 @@ class ReplicaFetcherThread(name: String,
       // to avoid failing the caller, especially during shutdown. We will attempt to close
       // leaderEndpoint after the thread terminates.
       try {
-        leaderEndpoint.initiateClose()
+        leader.initiateClose()
       } catch {
         case t: Throwable =>
-          error(s"Failed to initiate shutdown of leader endpoint $leaderEndpoint after initiating replica fetcher thread shutdown", t)
+          error(s"Failed to initiate shutdown of $leader after initiating replica fetcher thread shutdown", t)
       }
     }
     justShutdown
@@ -150,10 +81,10 @@ class ReplicaFetcherThread(name: String,
     // especially during shutdown. It is safe to catch the exception here without causing correctness
     // issue because we are going to shutdown the thread and will not re-use the leaderEndpoint anyway.
     try {
-      leaderEndpoint.close()
+      leader.close()
     } catch {
       case t: Throwable =>
-        error(s"Failed to close leader endpoint $leaderEndpoint after shutting down replica fetcher thread", t)
+        error(s"Failed to close $leader after shutting down replica fetcher thread", t)
     }
   }
 
@@ -206,115 +137,13 @@ class ReplicaFetcherThread(name: String,
 
   def maybeWarnIfOversizedRecords(records: MemoryRecords, topicPartition: TopicPartition): Unit = {
     // oversized messages don't cause replication to fail from fetch request version 3 (KIP-74)
-    if (fetchRequestVersion <= 2 && records.sizeInBytes > 0 && records.validBytes <= 0)
+    if (metadataVersionSupplier().fetchRequestVersion <= 2 && records.sizeInBytes > 0 && records.validBytes <= 0)
       error(s"Replication is failing due to a message that is greater than replica.fetch.max.bytes for partition $topicPartition. " +
         "This generally occurs when the max.message.bytes has been overridden to exceed this value and a suitably large " +
         "message has also been sent. To fix this problem increase replica.fetch.max.bytes in your broker config to be " +
         "equal or larger than your settings for max.message.bytes, both at a broker and topic level.")
   }
 
-
-  override protected def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
-    val clientResponse = try {
-      leaderEndpoint.sendRequest(fetchRequest)
-    } catch {
-      case t: Throwable =>
-        fetchSessionHandler.handleError(t)
-        throw t
-    }
-    val fetchResponse = clientResponse.responseBody.asInstanceOf[FetchResponse]
-    if (!fetchSessionHandler.handleResponse(fetchResponse, clientResponse.requestHeader().apiVersion())) {
-      // If we had a session topic ID related error, throw it, otherwise return an empty fetch data map.
-      if (fetchResponse.error == Errors.FETCH_SESSION_TOPIC_ID_ERROR) {
-        throw Errors.forCode(fetchResponse.error().code()).exception()
-      } else {
-        Map.empty
-      }
-    } else {
-      fetchResponse.responseData(fetchSessionHandler.sessionTopicNames, clientResponse.requestHeader().apiVersion()).asScala
-    }
-  }
-
-  override protected def fetchEarliestOffsetFromLeader(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = {
-    fetchOffsetFromLeader(topicPartition, currentLeaderEpoch, ListOffsetsRequest.EARLIEST_TIMESTAMP)
-  }
-
-  override protected def fetchLatestOffsetFromLeader(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = {
-    fetchOffsetFromLeader(topicPartition, currentLeaderEpoch, ListOffsetsRequest.LATEST_TIMESTAMP)
-  }
-
-  private def fetchOffsetFromLeader(topicPartition: TopicPartition, currentLeaderEpoch: Int, earliestOrLatest: Long): Long = {
-    val topic = new ListOffsetsTopic()
-      .setName(topicPartition.topic)
-      .setPartitions(Collections.singletonList(
-          new ListOffsetsPartition()
-            .setPartitionIndex(topicPartition.partition)
-            .setCurrentLeaderEpoch(currentLeaderEpoch)
-            .setTimestamp(earliestOrLatest)))
-    val requestBuilder = ListOffsetsRequest.Builder.forReplica(listOffsetRequestVersion, replicaId)
-      .setTargetTimes(Collections.singletonList(topic))
-
-    val clientResponse = leaderEndpoint.sendRequest(requestBuilder)
-    val response = clientResponse.responseBody.asInstanceOf[ListOffsetsResponse]
-    val responsePartition = response.topics.asScala.find(_.name == topicPartition.topic).get
-      .partitions.asScala.find(_.partitionIndex == topicPartition.partition).get
-
-     Errors.forCode(responsePartition.errorCode) match {
-      case Errors.NONE =>
-        if (brokerConfig.interBrokerProtocolVersion >= KAFKA_0_10_1_IV2)
-          responsePartition.offset
-        else
-          responsePartition.oldStyleOffsets.get(0)
-      case error => throw error.exception
-    }
-  }
-
-  override def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = {
-    val partitionsWithError = mutable.Set[TopicPartition]()
-
-    val builder = fetchSessionHandler.newBuilder(partitionMap.size, false)
-    partitionMap.forKeyValue { (topicPartition, fetchState) =>
-      // We will not include a replica in the fetch request if it should be throttled.
-      if (fetchState.isReadyForFetch && !shouldFollowerThrottle(quota, fetchState, topicPartition)) {
-        try {
-          val logStartOffset = this.logStartOffset(topicPartition)
-          val lastFetchedEpoch = if (isTruncationOnFetchSupported)
-            fetchState.lastFetchedEpoch.map(_.asInstanceOf[Integer]).asJava
-          else
-            Optional.empty[Integer]
-          builder.add(topicPartition, new FetchRequest.PartitionData(
-            fetchState.topicId.getOrElse(Uuid.ZERO_UUID),
-            fetchState.fetchOffset,
-            logStartOffset,
-            fetchSize,
-            Optional.of(fetchState.currentLeaderEpoch),
-            lastFetchedEpoch))
-        } catch {
-          case _: KafkaStorageException =>
-            // The replica has already been marked offline due to log directory failure and the original failure should have already been logged.
-            // This partition should be removed from ReplicaFetcherThread soon by ReplicaManager.handleLogDirFailure()
-            partitionsWithError += topicPartition
-        }
-      }
-    }
-
-    val fetchData = builder.build()
-    val fetchRequestOpt = if (fetchData.sessionPartitions.isEmpty && fetchData.toForget.isEmpty) {
-      None
-    } else {
-      val version: Short = if (fetchRequestVersion >= 13 && !fetchData.canUseTopicIds) 12 else fetchRequestVersion
-      val requestBuilder = FetchRequest.Builder
-        .forReplica(version, replicaId, maxWait, minBytes, fetchData.toSend)
-        .setMaxBytes(maxBytes)
-        .removed(fetchData.toForget)
-        .replaced(fetchData.toReplace)
-        .metadata(fetchData.metadata)
-      Some(ReplicaFetch(fetchData.sessionPartitions(), requestBuilder))
-    }
-
-    ResultWithPartitions(fetchRequestOpt, partitionsWithError)
-  }
-
   /**
    * Truncate the log for each partition's epoch based on leader's returned epoch and offset.
    * The logic for finding the truncation offset is implemented in AbstractFetcherThread.getOffsetTruncationState
@@ -340,57 +169,4 @@ class ReplicaFetcherThread(name: String,
     partition.truncateFullyAndStartAt(offset, isFuture = false)
   }
 
-  override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
-
-    if (partitions.isEmpty) {
-      debug("Skipping leaderEpoch request since all partitions do not have an epoch")
-      return Map.empty
-    }
-
-    val topics = new OffsetForLeaderTopicCollection(partitions.size)
-    partitions.forKeyValue { (topicPartition, epochData) =>
-      var topic = topics.find(topicPartition.topic)
-      if (topic == null) {
-        topic = new OffsetForLeaderTopic().setTopic(topicPartition.topic)
-        topics.add(topic)
-      }
-      topic.partitions.add(epochData)
-    }
-
-    val epochRequest = OffsetsForLeaderEpochRequest.Builder.forFollower(
-      offsetForLeaderEpochRequestVersion, topics, brokerConfig.brokerId)
-    debug(s"Sending offset for leader epoch request $epochRequest")
-
-    try {
-      val response = leaderEndpoint.sendRequest(epochRequest)
-      val responseBody = response.responseBody.asInstanceOf[OffsetsForLeaderEpochResponse]
-      debug(s"Received leaderEpoch response $response")
-      responseBody.data.topics.asScala.flatMap { offsetForLeaderTopicResult =>
-        offsetForLeaderTopicResult.partitions.asScala.map { offsetForLeaderPartitionResult =>
-          val tp = new TopicPartition(offsetForLeaderTopicResult.topic, offsetForLeaderPartitionResult.partition)
-          tp -> offsetForLeaderPartitionResult
-        }
-      }.toMap
-    } catch {
-      case t: Throwable =>
-        warn(s"Error when sending leader epoch request for $partitions", t)
-
-        // if we get any unexpected exception, mark all partitions with an error
-        val error = Errors.forException(t)
-        partitions.map { case (tp, _) =>
-          tp -> new EpochEndOffset()
-            .setPartition(tp.partition)
-            .setErrorCode(error.code)
-        }
-    }
-  }
-
-  /**
-   *  To avoid ISR thrashing, we only throttle a replica on the follower if it's in the throttled replica list,
-   *  the quota is exceeded and the replica is not in sync.
-   */
-  private def shouldFollowerThrottle(quota: ReplicaQuota, fetchState: PartitionFetchState, topicPartition: TopicPartition): Boolean = {
-    !fetchState.isReplicaInSync && quota.isThrottled(topicPartition) && quota.isQuotaExceeded
-  }
-
 }
diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala
index 22f2755ba58f4..f60bd53a085e3 100644
--- a/core/src/main/scala/kafka/server/ReplicaManager.scala
+++ b/core/src/main/scala/kafka/server/ReplicaManager.scala
@@ -28,7 +28,6 @@ import kafka.common.RecordValidationException
 import kafka.controller.{KafkaController, StateChangeLogger}
 import kafka.log._
 import kafka.metrics.KafkaMetricsGroup
-import kafka.server.{FetchMetadata => SFetchMetadata}
 import kafka.server.HostedPartition.Online
 import kafka.server.QuotaFactory.QuotaManagers
 import kafka.server.checkpoints.{LazyOffsetCheckpoints, OffsetCheckpointFile, OffsetCheckpoints}
@@ -60,7 +59,10 @@ import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
 import org.apache.kafka.common.requests._
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.image.{LocalReplicaChanges, MetadataImage, TopicsDelta}
+import org.apache.kafka.metadata.LeaderConstants.NO_LEADER
+import org.apache.kafka.server.common.MetadataVersion._
 
+import java.nio.file.{Files, Paths}
 import scala.jdk.CollectionConverters._
 import scala.collection.{Map, Seq, Set, mutable}
 import scala.compat.java8.OptionConverters._
@@ -190,7 +192,7 @@ class ReplicaManager(val config: KafkaConfig,
                      quotaManagers: QuotaManagers,
                      val metadataCache: MetadataCache,
                      logDirFailureChannel: LogDirFailureChannel,
-                     val alterIsrManager: AlterIsrManager,
+                     val alterPartitionManager: AlterPartitionManager,
                      val brokerTopicStats: BrokerTopicStats = new BrokerTopicStats(),
                      val isShuttingDown: AtomicBoolean = new AtomicBoolean(false),
                      val zkClient: Option[KafkaZkClient] = None,
@@ -230,6 +232,8 @@ class ReplicaManager(val config: KafkaConfig,
   @volatile private[server] var highWatermarkCheckpoints: Map[String, OffsetCheckpointFile] = logManager.liveLogDirs.map(dir =>
     (dir.getAbsolutePath, new OffsetCheckpointFile(new File(dir, ReplicaManager.HighWatermarkFilename), logDirFailureChannel))).toMap
 
+  @volatile private var isInControlledShutdown = false
+
   this.logIdent = s"[ReplicaManager broker=$localBrokerId] "
   protected val stateChangeLogger = new StateChangeLogger(localBrokerId, inControllerContext = false, None)
 
@@ -307,7 +311,7 @@ class ReplicaManager(val config: KafkaConfig,
     // If inter-broker protocol (IBP) < 1.0, the controller will send LeaderAndIsrRequest V0 which does not include isNew field.
     // In this case, the broker receiving the request cannot determine whether it is safe to create a partition if a log directory has failed.
     // Thus, we choose to halt the broker on any log directory failure if IBP < 1.0
-    val haltBrokerOnFailure = config.interBrokerProtocolVersion < KAFKA_1_0_IV0
+    val haltBrokerOnFailure = metadataCache.metadataVersion().isLessThan(IBP_1_0_IV0)
     logDirFailureHandler = new LogDirFailureHandler("LogDirFailureHandler", haltBrokerOnFailure)
     logDirFailureHandler.start()
   }
@@ -711,7 +715,7 @@ class ReplicaManager(val config: KafkaConfig,
           /* If the topic name is exceptionally long, we can't support altering the log directory.
            * See KAFKA-4893 for details.
            * TODO: fix this by implementing topic IDs. */
-          if (UnifiedLog.logFutureDirName(topicPartition).size > 255)
+          if (UnifiedLog.logFutureDirName(topicPartition).length > 255)
             throw new InvalidTopicException("The topic name is too long.")
           if (!logManager.isLogDirOnline(destinationDir))
             throw new KafkaStorageException(s"Log directory $destinationDir is offline")
@@ -787,11 +791,15 @@ class ReplicaManager(val config: KafkaConfig,
     val logsByDir = logManager.allLogs.groupBy(log => log.parentDir)
 
     config.logDirs.toSet.map { logDir: String =>
-      val absolutePath = new File(logDir).getAbsolutePath
+      val file = Paths.get(logDir)
+      val absolutePath = file.toAbsolutePath.toString
       try {
         if (!logManager.isLogDirOnline(absolutePath))
           throw new KafkaStorageException(s"Log directory $absolutePath is offline")
 
+        val fileStore = Files.getFileStore(file)
+        val totalBytes = adjustForLargeFileSystems(fileStore.getTotalSpace)
+        val usableBytes = adjustForLargeFileSystems(fileStore.getUsableSpace)
         logsByDir.get(absolutePath) match {
           case Some(logs) =>
             val topicInfos = logs.groupBy(_.topicPartition.topic).map{case (topic, logs) =>
@@ -809,9 +817,11 @@ class ReplicaManager(val config: KafkaConfig,
 
             new DescribeLogDirsResponseData.DescribeLogDirsResult().setLogDir(absolutePath)
               .setErrorCode(Errors.NONE.code).setTopics(topicInfos)
+              .setTotalBytes(totalBytes).setUsableBytes(usableBytes)
           case None =>
             new DescribeLogDirsResponseData.DescribeLogDirsResult().setLogDir(absolutePath)
               .setErrorCode(Errors.NONE.code)
+              .setTotalBytes(totalBytes).setUsableBytes(usableBytes)
         }
 
       } catch {
@@ -829,6 +839,13 @@ class ReplicaManager(val config: KafkaConfig,
     }.toList
   }
 
+  // See: https://bugs.openjdk.java.net/browse/JDK-8162520
+  def adjustForLargeFileSystems(space: Long): Long = {
+    if (space < 0)
+      return Long.MaxValue
+    space
+  }
+
   def getLogEndOffsetLag(topicPartition: TopicPartition, logEndOffset: Long, isFuture: Boolean): Long = {
     localLog(topicPartition) match {
       case Some(log) =>
@@ -987,56 +1004,29 @@ class ReplicaManager(val config: KafkaConfig,
    * the callback function will be triggered either when timeout or required fetch info is satisfied.
    * Consumers may fetch from any replica, but followers can only fetch from the leader.
    */
-  def fetchMessages(timeout: Long,
-                    replicaId: Int,
-                    fetchMinBytes: Int,
-                    fetchMaxBytes: Int,
-                    hardMaxBytesLimit: Boolean,
-                    fetchInfos: Seq[(TopicIdPartition, PartitionData)],
-                    quota: ReplicaQuota,
-                    responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit,
-                    isolationLevel: IsolationLevel,
-                    clientMetadata: Option[ClientMetadata]): Unit = {
-    val isFromFollower = Request.isValidBrokerId(replicaId)
-    val isFromConsumer = !(isFromFollower || replicaId == Request.FutureLocalReplicaId)
-    val fetchIsolation = if (!isFromConsumer)
-      FetchLogEnd
-    else if (isolationLevel == IsolationLevel.READ_COMMITTED)
-      FetchTxnCommitted
-    else
-      FetchHighWatermark
-
-    // Restrict fetching to leader if request is from follower or from a client with older version (no ClientMetadata)
-    val fetchOnlyFromLeader = isFromFollower || (isFromConsumer && clientMetadata.isEmpty)
-    def readFromLog(): Seq[(TopicIdPartition, LogReadResult)] = {
-      val result = readFromLocalLog(
-        replicaId = replicaId,
-        fetchOnlyFromLeader = fetchOnlyFromLeader,
-        fetchIsolation = fetchIsolation,
-        fetchMaxBytes = fetchMaxBytes,
-        hardMaxBytesLimit = hardMaxBytesLimit,
-        readPartitionInfo = fetchInfos,
-        quota = quota,
-        clientMetadata = clientMetadata)
-      if (isFromFollower) updateFollowerFetchState(replicaId, result)
-      else result
-    }
-
-    val logReadResults = readFromLog()
-
+  def fetchMessages(
+    params: FetchParams,
+    fetchInfos: Seq[(TopicIdPartition, PartitionData)],
+    quota: ReplicaQuota,
+    responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit
+  ): Unit = {
     // check if this fetch request can be satisfied right away
+    val logReadResults = readFromLocalLog(params, fetchInfos, quota, readFromPurgatory = false)
     var bytesReadable: Long = 0
     var errorReadingData = false
     var hasDivergingEpoch = false
+    var hasPreferredReadReplica = false
     val logReadResultMap = new mutable.HashMap[TopicIdPartition, LogReadResult]
+
     logReadResults.foreach { case (topicIdPartition, logReadResult) =>
       brokerTopicStats.topicStats(topicIdPartition.topicPartition.topic).totalFetchRequestRate.mark()
       brokerTopicStats.allTopicsStats.totalFetchRequestRate.mark()
-
       if (logReadResult.error != Errors.NONE)
         errorReadingData = true
       if (logReadResult.divergingEpoch.nonEmpty)
         hasDivergingEpoch = true
+      if (logReadResult.preferredReadReplica.nonEmpty)
+        hasPreferredReadReplica = true
       bytesReadable = bytesReadable + logReadResult.info.records.sizeInBytes
       logReadResultMap.put(topicIdPartition, logReadResult)
     }
@@ -1046,9 +1036,11 @@ class ReplicaManager(val config: KafkaConfig,
     //                        3) has enough data to respond
     //                        4) some error happens while reading data
     //                        5) we found a diverging epoch
-    if (timeout <= 0 || fetchInfos.isEmpty || bytesReadable >= fetchMinBytes || errorReadingData || hasDivergingEpoch) {
+    //                        6) has a preferred read replica
+    if (params.maxWaitMs <= 0 || fetchInfos.isEmpty || bytesReadable >= params.minBytes || errorReadingData ||
+      hasDivergingEpoch || hasPreferredReadReplica) {
       val fetchPartitionData = logReadResults.map { case (tp, result) =>
-        val isReassignmentFetch = isFromFollower && isAddingReplica(tp.topicPartition, replicaId)
+        val isReassignmentFetch = params.isFromFollower && isAddingReplica(tp.topicPartition, params.replicaId)
         tp -> result.toFetchPartitionData(isReassignmentFetch)
       }
       responseCallback(fetchPartitionData)
@@ -1061,10 +1053,13 @@ class ReplicaManager(val config: KafkaConfig,
           fetchPartitionStatus += (topicIdPartition -> FetchPartitionStatus(logOffsetMetadata, partitionData))
         })
       }
-      val fetchMetadata: SFetchMetadata = SFetchMetadata(fetchMinBytes, fetchMaxBytes, hardMaxBytesLimit,
-        fetchOnlyFromLeader, fetchIsolation, isFromFollower, replicaId, fetchPartitionStatus)
-      val delayedFetch = new DelayedFetch(timeout, fetchMetadata, this, quota, clientMetadata,
-        responseCallback)
+      val delayedFetch = new DelayedFetch(
+        params = params,
+        fetchPartitionStatus = fetchPartitionStatus,
+        replicaManager = this,
+        quota = quota,
+        responseCallback = responseCallback
+      )
 
       // create a list of (topic, partition) pairs to use as keys for this delayed fetch operation
       val delayedFetchKeys = fetchPartitionStatus.map { case (tp, _) => TopicPartitionOperationKey(tp) }
@@ -1079,14 +1074,12 @@ class ReplicaManager(val config: KafkaConfig,
   /**
    * Read from multiple topic partitions at the given offset up to maxSize bytes
    */
-  def readFromLocalLog(replicaId: Int,
-                       fetchOnlyFromLeader: Boolean,
-                       fetchIsolation: FetchIsolation,
-                       fetchMaxBytes: Int,
-                       hardMaxBytesLimit: Boolean,
-                       readPartitionInfo: Seq[(TopicIdPartition, PartitionData)],
-                       quota: ReplicaQuota,
-                       clientMetadata: Option[ClientMetadata]): Seq[(TopicIdPartition, LogReadResult)] = {
+  def readFromLocalLog(
+    params: FetchParams,
+    readPartitionInfo: Seq[(TopicIdPartition, PartitionData)],
+    quota: ReplicaQuota,
+    readFromPurgatory: Boolean
+  ): Seq[(TopicIdPartition, LogReadResult)] = {
     val traceEnabled = isTraceEnabled
 
     def read(tp: TopicIdPartition, fetchInfo: PartitionData, limitBytes: Int, minOneMessage: Boolean): LogReadResult = {
@@ -1110,13 +1103,13 @@ class ReplicaManager(val config: KafkaConfig,
           throw new InconsistentTopicIdException("Topic ID in the fetch session did not match the topic ID in the log.")
 
         // If we are the leader, determine the preferred read-replica
-        val preferredReadReplica = clientMetadata.flatMap(
-          metadata => findPreferredReadReplica(partition, metadata, replicaId, fetchInfo.fetchOffset, fetchTimeMs))
+        val preferredReadReplica = params.clientMetadata.flatMap(
+          metadata => findPreferredReadReplica(partition, metadata, params.replicaId, fetchInfo.fetchOffset, fetchTimeMs))
 
         if (preferredReadReplica.isDefined) {
           replicaSelectorOpt.foreach { selector =>
             debug(s"Replica selector ${selector.getClass.getSimpleName} returned preferred replica " +
-              s"${preferredReadReplica.get} for $clientMetadata")
+              s"${preferredReadReplica.get} for ${params.clientMetadata}")
           }
           // If a preferred read-replica is set, skip the read
           val offsetSnapshot = partition.fetchOffsetSnapshot(fetchInfo.currentLeaderEpoch, fetchOnlyFromLeader = false)
@@ -1132,20 +1125,19 @@ class ReplicaManager(val config: KafkaConfig,
             exception = None)
         } else {
           // Try the read first, this tells us whether we need all of adjustedFetchSize for this partition
-          val readInfo: LogReadInfo = partition.readRecords(
-            lastFetchedEpoch = fetchInfo.lastFetchedEpoch,
-            fetchOffset = fetchInfo.fetchOffset,
-            currentLeaderEpoch = fetchInfo.currentLeaderEpoch,
+          val readInfo: LogReadInfo = partition.fetchRecords(
+            fetchParams = params,
+            fetchPartitionData = fetchInfo,
+            fetchTimeMs = fetchTimeMs,
             maxBytes = adjustedMaxBytes,
-            fetchIsolation = fetchIsolation,
-            fetchOnlyFromLeader = fetchOnlyFromLeader,
-            minOneMessage = minOneMessage)
-          val isFromFollower = Request.isValidBrokerId(replicaId)
+            minOneMessage = minOneMessage,
+            updateFetchState = !readFromPurgatory
+          )
 
-          val fetchDataInfo = if (isFromFollower && shouldLeaderThrottle(quota, partition, replicaId)) {
+          val fetchDataInfo = if (params.isFromFollower && shouldLeaderThrottle(quota, partition, params.replicaId)) {
             // If the partition is being throttled, simply return an empty set.
             FetchDataInfo(readInfo.fetchedData.fetchOffsetMetadata, MemoryRecords.EMPTY)
-          } else if (!hardMaxBytesLimit && readInfo.fetchedData.firstEntryIncomplete) {
+          } else if (!params.hardMaxBytesLimit && readInfo.fetchedData.firstEntryIncomplete) {
             // For FetchRequest version 3, we replace incomplete message sets with an empty one as consumers can make
             // progress in such cases and don't need to report a `RecordTooLargeException`
             FetchDataInfo(readInfo.fetchedData.fetchOffsetMetadata, MemoryRecords.EMPTY)
@@ -1162,7 +1154,8 @@ class ReplicaManager(val config: KafkaConfig,
             fetchTimeMs = fetchTimeMs,
             lastStableOffset = Some(readInfo.lastStableOffset),
             preferredReadReplica = preferredReadReplica,
-            exception = None)
+            exception = None
+          )
         }
       } catch {
         // NOTE: Failed fetch requests metric is not incremented for known exceptions since it
@@ -1188,7 +1181,7 @@ class ReplicaManager(val config: KafkaConfig,
           brokerTopicStats.topicStats(tp.topic).failedFetchRequestRate.mark()
           brokerTopicStats.allTopicsStats.failedFetchRequestRate.mark()
 
-          val fetchSource = Request.describeReplicaId(replicaId)
+          val fetchSource = Request.describeReplicaId(params.replicaId)
           error(s"Error processing fetch with max size $adjustedMaxBytes from $fetchSource " +
             s"on partition $tp: $fetchInfo", e)
 
@@ -1200,13 +1193,14 @@ class ReplicaManager(val config: KafkaConfig,
             followerLogStartOffset = UnifiedLog.UnknownOffset,
             fetchTimeMs = -1L,
             lastStableOffset = None,
-            exception = Some(e))
+            exception = Some(e)
+          )
       }
     }
 
-    var limitBytes = fetchMaxBytes
+    var limitBytes = params.maxBytes
     val result = new mutable.ArrayBuffer[(TopicIdPartition, LogReadResult)]
-    var minOneMessage = !hardMaxBytesLimit
+    var minOneMessage = !params.hardMaxBytesLimit
     readPartitionInfo.foreach { case (tp, fetchInfo) =>
       val readResult = read(tp, fetchInfo, limitBytes, minOneMessage)
       val recordBatchSize = readResult.info.records.sizeInBytes
@@ -1229,7 +1223,7 @@ class ReplicaManager(val config: KafkaConfig,
                                replicaId: Int,
                                fetchOffset: Long,
                                currentTimeMs: Long): Option[Int] = {
-    partition.leaderReplicaIdOpt.flatMap { leaderReplicaId =>
+    partition.leaderIdIfLocal.flatMap { leaderReplicaId =>
       // Don't look up preferred for follower fetches via normal replication
       if (Request.isValidBrokerId(replicaId))
         None
@@ -1237,18 +1231,26 @@ class ReplicaManager(val config: KafkaConfig,
         replicaSelectorOpt.flatMap { replicaSelector =>
           val replicaEndpoints = metadataCache.getPartitionReplicaEndpoints(partition.topicPartition,
             new ListenerName(clientMetadata.listenerName))
-          val replicaInfos = partition.remoteReplicas
+          val replicaInfoSet = mutable.Set[ReplicaView]()
+
+          partition.remoteReplicas.foreach { replica =>
+            val replicaState = replica.stateSnapshot
             // Exclude replicas that don't have the requested offset (whether or not if they're in the ISR)
-            .filter(replica => replica.logEndOffset >= fetchOffset && replica.logStartOffset <= fetchOffset)
-            .map(replica => new DefaultReplicaView(
-              replicaEndpoints.getOrElse(replica.brokerId, Node.noNode()),
-              replica.logEndOffset,
-              currentTimeMs - replica.lastCaughtUpTimeMs))
+            if (replicaState.logEndOffset >= fetchOffset && replicaState.logStartOffset <= fetchOffset) {
+              replicaInfoSet.add(new DefaultReplicaView(
+                replicaEndpoints.getOrElse(replica.brokerId, Node.noNode()),
+                replicaState.logEndOffset,
+                currentTimeMs - replicaState.lastCaughtUpTimeMs
+              ))
+            }
+          }
 
           val leaderReplica = new DefaultReplicaView(
             replicaEndpoints.getOrElse(leaderReplicaId, Node.noNode()),
-            partition.localLogOrException.logEndOffset, 0L)
-          val replicaInfoSet = mutable.Set[ReplicaView]() ++= replicaInfos += leaderReplica
+            partition.localLogOrException.logEndOffset,
+            0L
+          )
+          replicaInfoSet.add(leaderReplica)
 
           val partitionInfo = new DefaultPartitionView(replicaInfoSet.asJava, leaderReplica)
           replicaSelector.select(partition.topicPartition, clientMetadata, partitionInfo).asScala.collect {
@@ -1585,13 +1587,9 @@ class ReplicaManager(val config: KafkaConfig,
       // Update the partition information to be the leader
       partitionStates.forKeyValue { (partition, partitionState) =>
         try {
-          if (partition.makeLeader(partitionState, highWatermarkCheckpoints, topicIds(partitionState.topicName)))
+          if (partition.makeLeader(partitionState, highWatermarkCheckpoints, topicIds(partitionState.topicName))) {
             partitionsToMakeLeaders += partition
-          else
-            stateChangeLogger.info(s"Skipped the become-leader state change after marking its " +
-              s"partition as leader with correlation id $correlationId from controller $controllerId epoch $controllerEpoch for " +
-              s"partition ${partition.topicPartition} (last update controller epoch ${partitionState.controllerEpoch}) " +
-              s"since it is already the leader for the partition.")
+          }
         } catch {
           case e: KafkaStorageException =>
             stateChangeLogger.error(s"Skipped the become-leader state change with " +
@@ -1662,20 +1660,14 @@ class ReplicaManager(val config: KafkaConfig,
 
     val partitionsToMakeFollower: mutable.Set[Partition] = mutable.Set()
     try {
-      // TODO: Delete leaders from LeaderAndIsrRequest
       partitionStates.forKeyValue { (partition, partitionState) =>
         val newLeaderBrokerId = partitionState.leader
         try {
           if (metadataCache.hasAliveBroker(newLeaderBrokerId)) {
             // Only change partition state when the leader is available
-            if (partition.makeFollower(partitionState, highWatermarkCheckpoints, topicIds(partitionState.topicName)))
+            if (partition.makeFollower(partitionState, highWatermarkCheckpoints, topicIds(partitionState.topicName))) {
               partitionsToMakeFollower += partition
-            else
-              stateChangeLogger.info(s"Skipped the become-follower state change after marking its partition as " +
-                s"follower with correlation id $correlationId from controller $controllerId epoch $controllerEpoch " +
-                s"for partition ${partition.topicPartition} (last update " +
-                s"controller epoch ${partitionState.controllerEpoch}) " +
-                s"since the new leader $newLeaderBrokerId is the same as the old leader")
+            }
           } else {
             // The leader broker should always be present in the metadata cache.
             // If not, we should record the error message and abort the transition process for this partition
@@ -1795,7 +1787,7 @@ class ReplicaManager(val config: KafkaConfig,
    * OffsetForLeaderEpoch request.
    */
   protected def initialFetchOffset(log: UnifiedLog): Long = {
-    if (ApiVersion.isTruncationOnFetchSupported(config.interBrokerProtocolVersion) && log.latestEpoch.nonEmpty)
+    if (metadataCache.metadataVersion().isTruncationOnFetchSupported && log.latestEpoch.nonEmpty)
       log.logEndOffset
     else
       log.highWatermark
@@ -1810,52 +1802,6 @@ class ReplicaManager(val config: KafkaConfig,
     }
   }
 
-  /**
-   * Update the follower's fetch state on the leader based on the last fetch request and update `readResult`.
-   * If the follower replica is not recognized to be one of the assigned replicas, do not update
-   * `readResult` so that log start/end offset and high watermark is consistent with
-   * records in fetch response. Log start/end offset and high watermark may change not only due to
-   * this fetch request, e.g., rolling new log segment and removing old log segment may move log
-   * start offset further than the last offset in the fetched records. The followers will get the
-   * updated leader's state in the next fetch response. If follower has a diverging epoch or if read
-   * fails with any error, follower fetch state is not updated.
-   */
-  private def updateFollowerFetchState(followerId: Int,
-                                       readResults: Seq[(TopicIdPartition, LogReadResult)]): Seq[(TopicIdPartition, LogReadResult)] = {
-    readResults.map { case (topicIdPartition, readResult) =>
-      val updatedReadResult = if (readResult.error != Errors.NONE) {
-        debug(s"Skipping update of fetch state for follower $followerId since the " +
-          s"log read returned error ${readResult.error}")
-        readResult
-      } else if (readResult.divergingEpoch.nonEmpty) {
-        debug(s"Skipping update of fetch state for follower $followerId since the " +
-          s"log read returned diverging epoch ${readResult.divergingEpoch}")
-        readResult
-      } else {
-        onlinePartition(topicIdPartition.topicPartition) match {
-          case Some(partition) =>
-            if (partition.updateFollowerFetchState(followerId,
-              followerFetchOffsetMetadata = readResult.info.fetchOffsetMetadata,
-              followerStartOffset = readResult.followerLogStartOffset,
-              followerFetchTimeMs = readResult.fetchTimeMs,
-              leaderEndOffset = readResult.leaderLogEndOffset)) {
-              readResult
-            } else {
-              warn(s"Leader $localBrokerId failed to record follower $followerId's position " +
-                s"${readResult.info.fetchOffsetMetadata.messageOffset}, and last sent HW since the replica " +
-                s"is not recognized to be one of the assigned replicas ${partition.assignmentState.replicas.mkString(",")} " +
-                s"for partition $topicIdPartition. Empty records will be returned for this partition.")
-              readResult.withEmptyFetchInfo
-            }
-          case None =>
-            warn(s"While recording the replica LEO, the partition $topicIdPartition hasn't been created.")
-            readResult
-        }
-      }
-      topicIdPartition -> updatedReadResult
-    }
-  }
-
   private def leaderPartitionsIterator: Iterator[Partition] =
     onlinePartitionsIterator.filter(_.leaderLogIfLocal.isDefined)
 
@@ -1948,6 +1894,10 @@ class ReplicaManager(val config: KafkaConfig,
     removeMetric("PartitionsWithLateTransactionsCount")
   }
 
+  def beginControlledShutdown(): Unit = {
+    isInControlledShutdown = true
+  }
+
   // High watermark do not need to be checkpointed only when under unit tests
   def shutdown(checkpointHW: Boolean = true): Unit = {
     info("Shutting down")
@@ -1967,7 +1917,7 @@ class ReplicaManager(val config: KafkaConfig,
   }
 
   protected def createReplicaFetcherManager(metrics: Metrics, time: Time, threadNamePrefix: Option[String], quotaManager: ReplicationQuotaManager) = {
-    new ReplicaFetcherManager(config, this, metrics, time, threadNamePrefix, quotaManager)
+    new ReplicaFetcherManager(config, this, metrics, time, threadNamePrefix, quotaManager, () => metadataCache.metadataVersion())
   }
 
   protected def createReplicaAlterLogDirsManager(quotaManager: ReplicationQuotaManager, brokerTopicStats: BrokerTopicStats) = {
@@ -2079,28 +2029,27 @@ class ReplicaManager(val config: KafkaConfig,
                                           topicId: Uuid): Option[(Partition, Boolean)] = {
     getPartition(tp) match {
       case HostedPartition.Offline =>
-        stateChangeLogger.warn(s"Unable to bring up new local leader ${tp} " +
-          s"with topic id ${topicId} because it resides in an offline log " +
+        stateChangeLogger.warn(s"Unable to bring up new local leader $tp " +
+          s"with topic id $topicId because it resides in an offline log " +
           "directory.")
         None
 
-      case HostedPartition.Online(partition) => {
+      case HostedPartition.Online(partition) =>
         if (partition.topicId.exists(_ != topicId)) {
           // Note: Partition#topicId will be None here if the Log object for this partition
           // has not been created.
-          throw new IllegalStateException(s"Topic ${tp} exists, but its ID is " +
-            s"${partition.topicId.get}, not ${topicId} as expected")
+          throw new IllegalStateException(s"Topic $tp exists, but its ID is " +
+            s"${partition.topicId.get}, not $topicId as expected")
         }
         Some(partition, false)
-      }
 
       case HostedPartition.None =>
         if (delta.image().topicsById().containsKey(topicId)) {
-          stateChangeLogger.error(s"Expected partition ${tp} with topic id " +
-            s"${topicId} to exist, but it was missing. Creating...")
+          stateChangeLogger.error(s"Expected partition $tp with topic id " +
+            s"$topicId to exist, but it was missing. Creating...")
         } else {
-          stateChangeLogger.info(s"Creating new partition ${tp} with topic id " +
-            s"${topicId}.")
+          stateChangeLogger.info(s"Creating new partition $tp with topic id " +
+            s"$topicId.")
         }
         // it's a partition that we don't know about yet, so create it and mark it online
         val partition = Partition(tp, time, this)
@@ -2127,10 +2076,10 @@ class ReplicaManager(val config: KafkaConfig,
         stateChangeLogger.info(s"Deleting ${deletes.size} partition(s).")
         stopPartitions(deletes).forKeyValue { (topicPartition, e) =>
           if (e.isInstanceOf[KafkaStorageException]) {
-            stateChangeLogger.error(s"Unable to delete replica ${topicPartition} because " +
+            stateChangeLogger.error(s"Unable to delete replica $topicPartition because " +
               "the local replica for the partition is in an offline log directory")
           } else {
-            stateChangeLogger.error(s"Unable to delete replica ${topicPartition} because " +
+            stateChangeLogger.error(s"Unable to delete replica $topicPartition because " +
               s"we got an unexpected ${e.getClass.getName} exception: ${e.getMessage}")
           }
         }
@@ -2159,20 +2108,16 @@ class ReplicaManager(val config: KafkaConfig,
     changedPartitions: mutable.Set[Partition],
     delta: TopicsDelta,
     offsetCheckpoints: OffsetCheckpoints,
-    newLocalLeaders: mutable.Map[TopicPartition, LocalReplicaChanges.PartitionInfo]
+    localLeaders: mutable.Map[TopicPartition, LocalReplicaChanges.PartitionInfo]
   ): Unit = {
-    stateChangeLogger.info(s"Transitioning ${newLocalLeaders.size} partition(s) to " +
+    stateChangeLogger.info(s"Transitioning ${localLeaders.size} partition(s) to " +
       "local leaders.")
-    replicaFetcherManager.removeFetcherForPartitions(newLocalLeaders.keySet)
-    newLocalLeaders.forKeyValue { (tp, info) =>
+    replicaFetcherManager.removeFetcherForPartitions(localLeaders.keySet)
+    localLeaders.forKeyValue { (tp, info) =>
       getOrCreatePartition(tp, delta, info.topicId).foreach { case (partition, isNew) =>
         try {
           val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
-          if (!partition.makeLeader(state, offsetCheckpoints, Some(info.topicId))) {
-            stateChangeLogger.info("Skipped the become-leader state change for " +
-              s"$tp with topic id ${info.topicId} because this partition is " +
-              "already a local leader.")
-          }
+          partition.makeLeader(state, offsetCheckpoints, Some(info.topicId))
           changedPartitions.add(partition)
         } catch {
           case e: KafkaStorageException =>
@@ -2193,39 +2138,39 @@ class ReplicaManager(val config: KafkaConfig,
     newImage: MetadataImage,
     delta: TopicsDelta,
     offsetCheckpoints: OffsetCheckpoints,
-    newLocalFollowers: mutable.Map[TopicPartition, LocalReplicaChanges.PartitionInfo]
+    localFollowers: mutable.Map[TopicPartition, LocalReplicaChanges.PartitionInfo]
   ): Unit = {
-    stateChangeLogger.info(s"Transitioning ${newLocalFollowers.size} partition(s) to " +
+    stateChangeLogger.info(s"Transitioning ${localFollowers.size} partition(s) to " +
       "local followers.")
     val shuttingDown = isShuttingDown.get()
-    val partitionsToMakeFollower = new mutable.HashMap[TopicPartition, Partition]
-    val newFollowerTopicSet = new mutable.HashSet[String]
-    newLocalFollowers.forKeyValue { (tp, info) =>
+    val partitionsToStartFetching = new mutable.HashMap[TopicPartition, Partition]
+    val partitionsToStopFetching = new mutable.HashMap[TopicPartition, Boolean]
+    val followerTopicSet = new mutable.HashSet[String]
+    localFollowers.forKeyValue { (tp, info) =>
       getOrCreatePartition(tp, delta, info.topicId).foreach { case (partition, isNew) =>
         try {
-          newFollowerTopicSet.add(tp.topic)
+          followerTopicSet.add(tp.topic)
 
           if (shuttingDown) {
             stateChangeLogger.trace(s"Unable to start fetching $tp with topic " +
               s"ID ${info.topicId} because the replica manager is shutting down.")
           } else {
-            val leader = info.partition.leader
-            if (newImage.cluster.broker(leader) == null) {
-              stateChangeLogger.trace(s"Unable to start fetching $tp with topic ID ${info.topicId} " +
-                s"from leader $leader because it is not alive.")
-
-              // Create the local replica even if the leader is unavailable. This is required
-              // to ensure that we include the partition's high watermark in the checkpoint
-              // file (see KAFKA-1647).
-              partition.createLogIfNotExists(isNew, false, offsetCheckpoints, Some(info.topicId))
-            } else {
-              val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
-              if (partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))) {
-                partitionsToMakeFollower.put(tp, partition)
-              } else {
-                stateChangeLogger.info("Skipped the become-follower state change after marking its " +
-                  s"partition as follower for partition $tp with id ${info.topicId} and partition state $state.")
-              }
+            // We always update the follower state.
+            // - This ensure that a replica with no leader can step down;
+            // - This also ensures that the local replica is created even if the leader
+            //   is unavailable. This is required to ensure that we include the partition's
+            //   high watermark in the checkpoint file (see KAFKA-1647).
+            val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
+            val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))
+
+            if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
+                !info.partition.isr.contains(config.brokerId))) {
+              // During controlled shutdown, replica with no leaders and replica
+              // where this broker is not in the ISR are stopped.
+              partitionsToStopFetching.put(tp, false)
+            } else if (isNewLeaderEpoch) {
+              // Otherwise, fetcher is restarted if the leader epoch has changed.
+              partitionsToStartFetching.put(tp, partition)
             }
           }
           changedPartitions.add(partition)
@@ -2248,33 +2193,47 @@ class ReplicaManager(val config: KafkaConfig,
       }
     }
 
-    // Stopping the fetchers must be done first in order to initialize the fetch
-    // position correctly.
-    replicaFetcherManager.removeFetcherForPartitions(partitionsToMakeFollower.keySet)
-    stateChangeLogger.info(s"Stopped fetchers as part of become-follower for ${partitionsToMakeFollower.size} partitions")
-
-    val listenerName = config.interBrokerListenerName.value
-    val partitionAndOffsets = new mutable.HashMap[TopicPartition, InitialFetchState]
-    partitionsToMakeFollower.forKeyValue { (topicPartition, partition) =>
-      val node = partition.leaderReplicaIdOpt
-        .flatMap(leaderId => Option(newImage.cluster.broker(leaderId)))
-        .flatMap(_.node(listenerName).asScala)
-        .getOrElse(Node.noNode)
-      val log = partition.localLogOrException
-      partitionAndOffsets.put(topicPartition, InitialFetchState(
-        log.topicId,
-        new BrokerEndPoint(node.id, node.host, node.port),
-        partition.getLeaderEpoch,
-        initialFetchOffset(log)
-      ))
-    }
+    if (partitionsToStartFetching.nonEmpty) {
+      // Stopping the fetchers must be done first in order to initialize the fetch
+      // position correctly.
+      replicaFetcherManager.removeFetcherForPartitions(partitionsToStartFetching.keySet)
+      stateChangeLogger.info(s"Stopped fetchers as part of become-follower for ${partitionsToStartFetching.size} partitions")
+
+      val listenerName = config.interBrokerListenerName.value
+      val partitionAndOffsets = new mutable.HashMap[TopicPartition, InitialFetchState]
+
+      partitionsToStartFetching.forKeyValue { (topicPartition, partition) =>
+        val nodeOpt = partition.leaderReplicaIdOpt
+          .flatMap(leaderId => Option(newImage.cluster.broker(leaderId)))
+          .flatMap(_.node(listenerName).asScala)
+
+        nodeOpt match {
+          case Some(node) =>
+            val log = partition.localLogOrException
+            partitionAndOffsets.put(topicPartition, InitialFetchState(
+              log.topicId,
+              new BrokerEndPoint(node.id, node.host, node.port),
+              partition.getLeaderEpoch,
+              initialFetchOffset(log)
+            ))
+          case None =>
+            stateChangeLogger.trace(s"Unable to start fetching $topicPartition with topic ID ${partition.topicId} " +
+              s"from leader ${partition.leaderReplicaIdOpt} because it is not alive.")
+        }
+      }
 
-    replicaFetcherManager.addFetcherForPartitions(partitionAndOffsets)
-    stateChangeLogger.info(s"Started fetchers as part of become-follower for ${partitionsToMakeFollower.size} partitions")
+      replicaFetcherManager.addFetcherForPartitions(partitionAndOffsets)
+      stateChangeLogger.info(s"Started fetchers as part of become-follower for ${partitionsToStartFetching.size} partitions")
 
-    partitionsToMakeFollower.keySet.foreach(completeDelayedFetchOrProduceRequests)
+      partitionsToStartFetching.keySet.foreach(completeDelayedFetchOrProduceRequests)
 
-    updateLeaderAndFollowerMetrics(newFollowerTopicSet)
+      updateLeaderAndFollowerMetrics(followerTopicSet)
+    }
+
+    if (partitionsToStopFetching.nonEmpty) {
+      stopPartitions(partitionsToStopFetching)
+      stateChangeLogger.info(s"Stopped fetchers as part of controlled shutdown for ${partitionsToStopFetching.size} partitions")
+    }
   }
 
   def deleteStrayReplicas(topicPartitions: Iterable[TopicPartition]): Unit = {
diff --git a/core/src/main/scala/kafka/server/ReplicationQuotaManager.scala b/core/src/main/scala/kafka/server/ReplicationQuotaManager.scala
index 3035cb1371858..c02936973d35d 100644
--- a/core/src/main/scala/kafka/server/ReplicationQuotaManager.scala
+++ b/core/src/main/scala/kafka/server/ReplicationQuotaManager.scala
@@ -79,7 +79,7 @@ class ReplicationQuotaManager(val config: ReplicationQuotaManagerConfig,
   private var quota: Quota = null
   private val sensorAccess = new SensorAccess(lock, metrics)
   private val rateMetricName = metrics.metricName("byte-rate", replicationType.toString,
-    s"Tracking byte-rate for ${replicationType}")
+    s"Tracking byte-rate for $replicationType")
 
   /**
     * Update the quota
diff --git a/core/src/main/scala/kafka/server/Server.scala b/core/src/main/scala/kafka/server/Server.scala
index c395df47e63f7..5d902c583129d 100644
--- a/core/src/main/scala/kafka/server/Server.scala
+++ b/core/src/main/scala/kafka/server/Server.scala
@@ -16,15 +16,12 @@
  */
 package kafka.server
 
-import java.util.Collections
 import java.util.concurrent.TimeUnit
 
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.metrics.{JmxReporter, KafkaMetricsContext, MetricConfig, Metrics, MetricsReporter, Sensor}
 import org.apache.kafka.common.utils.Time
-import org.apache.kafka.metadata.VersionRange
 
-import scala.jdk.CollectionConverters._
 
 trait Server {
   def startup(): Unit
@@ -99,7 +96,4 @@ object Server {
   case object STARTING extends ProcessStatus
   case object STARTED extends ProcessStatus
   case object SHUTTING_DOWN extends ProcessStatus
-
-  val SUPPORTED_FEATURES = Collections.
-    unmodifiableMap[String, VersionRange](Map[String, VersionRange]().asJava)
 }
diff --git a/core/src/main/scala/kafka/server/ZkAdminManager.scala b/core/src/main/scala/kafka/server/ZkAdminManager.scala
index 2852cd141febd..f65367606da18 100644
--- a/core/src/main/scala/kafka/server/ZkAdminManager.scala
+++ b/core/src/main/scala/kafka/server/ZkAdminManager.scala
@@ -158,7 +158,7 @@ class ZkAdminManager(val config: KafkaConfig,
 
         val nullConfigs = topic.configs.asScala.filter(_.value == null).map(_.name)
         if (nullConfigs.nonEmpty)
-          throw new InvalidRequestException(s"Null value not supported for topic configs : ${nullConfigs.mkString(",")}")
+          throw new InvalidConfigurationException(s"Null value not supported for topic configs: ${nullConfigs.mkString(",")}")
 
         if ((topic.numPartitions != NO_NUM_PARTITIONS || topic.replicationFactor != NO_REPLICATION_FACTOR)
             && !topic.assignments().isEmpty) {
@@ -407,7 +407,7 @@ class ZkAdminManager(val config: KafkaConfig,
         case e @ (_: ConfigException | _: IllegalArgumentException) =>
           val message = s"Invalid config value for resource $resource: ${e.getMessage}"
           info(message)
-          resource -> ApiError.fromThrowable(new InvalidRequestException(message, e))
+          resource -> ApiError.fromThrowable(new InvalidConfigurationException(message, e))
         case e: Throwable =>
           val configProps = new Properties
           config.entries.asScala.filter(_.value != null).foreach { configEntry =>
@@ -427,6 +427,10 @@ class ZkAdminManager(val config: KafkaConfig,
   private def alterTopicConfigs(resource: ConfigResource, validateOnly: Boolean,
                                 configProps: Properties, configEntriesMap: Map[String, String]): (ConfigResource, ApiError) = {
     val topic = resource.name
+    if (topic.isEmpty()) {
+      throw new InvalidRequestException("Default topic resources are not allowed.")
+    }
+
     if (!metadataCache.contains(topic))
       throw new UnknownTopicOrPartitionException(s"The topic '$topic' does not exist.")
 
@@ -489,6 +493,9 @@ class ZkAdminManager(val config: KafkaConfig,
 
         resource.`type` match {
           case ConfigResource.Type.TOPIC =>
+            if (resource.name.isEmpty()) {
+              throw new InvalidRequestException("Default topic resources are not allowed.")
+            }
             val configProps = adminZkClient.fetchEntityConfig(ConfigType.Topic, resource.name)
             prepareIncrementalConfigs(alterConfigOps, configProps, LogConfig.configKeys)
             alterTopicConfigs(resource, validateOnly, configProps, configEntriesMap)
@@ -511,7 +518,7 @@ class ZkAdminManager(val config: KafkaConfig,
         case e @ (_: ConfigException | _: IllegalArgumentException) =>
           val message = s"Invalid config value for resource $resource: ${e.getMessage}"
           info(message)
-          resource -> ApiError.fromThrowable(new InvalidRequestException(message, e))
+          resource -> ApiError.fromThrowable(new InvalidConfigurationException(message, e))
         case e: Throwable =>
           // Log client errors at a lower level than unexpected exceptions
           val message = s"Error processing alter configs request for resource $resource, config $alterConfigOps"
diff --git a/core/src/main/scala/kafka/server/ZkIsrManager.scala b/core/src/main/scala/kafka/server/ZkAlterPartitionManager.scala
similarity index 85%
rename from core/src/main/scala/kafka/server/ZkIsrManager.scala
rename to core/src/main/scala/kafka/server/ZkAlterPartitionManager.scala
index 65e8c147d7b8e..c3d842b796389 100644
--- a/core/src/main/scala/kafka/server/ZkIsrManager.scala
+++ b/core/src/main/scala/kafka/server/ZkAlterPartitionManager.scala
@@ -19,10 +19,11 @@ package kafka.server
 import kafka.utils.{Logging, ReplicationUtils, Scheduler}
 import kafka.zk.KafkaZkClient
 import org.apache.kafka.common.TopicPartition
+
 import java.util.concurrent.atomic.AtomicLong
 import java.util.concurrent.{CompletableFuture, TimeUnit}
-
 import kafka.api.LeaderAndIsr
+import org.apache.kafka.common.TopicIdPartition
 import org.apache.kafka.common.errors.InvalidUpdateVersionException
 import org.apache.kafka.common.utils.Time
 
@@ -35,7 +36,7 @@ import scala.collection.mutable
  */
 case class IsrChangePropagationConfig(checkIntervalMs: Long, maxDelayMs: Long, lingerMs: Long)
 
-object ZkIsrManager {
+object ZkAlterPartitionManager {
   // This field is mutable to allow overriding change notification behavior in test cases
   @volatile var DefaultIsrPropagationConfig: IsrChangePropagationConfig = IsrChangePropagationConfig(
     checkIntervalMs = 2500,
@@ -44,9 +45,9 @@ object ZkIsrManager {
   )
 }
 
-class ZkIsrManager(scheduler: Scheduler, time: Time, zkClient: KafkaZkClient) extends AlterIsrManager with Logging {
+class ZkAlterPartitionManager(scheduler: Scheduler, time: Time, zkClient: KafkaZkClient) extends AlterPartitionManager with Logging {
 
-  private val isrChangeNotificationConfig = ZkIsrManager.DefaultIsrPropagationConfig
+  private val isrChangeNotificationConfig = ZkAlterPartitionManager.DefaultIsrPropagationConfig
   // Visible for testing
   private[server] val isrChangeSet: mutable.Set[TopicPartition] = new mutable.HashSet[TopicPartition]()
   private val lastIsrChangeMs = new AtomicLong(time.milliseconds())
@@ -58,30 +59,30 @@ class ZkIsrManager(scheduler: Scheduler, time: Time, zkClient: KafkaZkClient) ex
   }
 
   override def submit(
-    topicPartition: TopicPartition,
+    topicIdPartition: TopicIdPartition,
     leaderAndIsr: LeaderAndIsr,
     controllerEpoch: Int
   ): CompletableFuture[LeaderAndIsr]= {
     debug(s"Writing new ISR ${leaderAndIsr.isr} to ZooKeeper with version " +
-      s"${leaderAndIsr.zkVersion} for partition $topicPartition")
+      s"${leaderAndIsr.partitionEpoch} for partition $topicIdPartition")
 
-    val (updateSucceeded, newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topicPartition,
+    val (updateSucceeded, newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topicIdPartition.topicPartition,
       leaderAndIsr, controllerEpoch)
 
     val future = new CompletableFuture[LeaderAndIsr]()
     if (updateSucceeded) {
       // Track which partitions need to be propagated to the controller
       isrChangeSet synchronized {
-        isrChangeSet += topicPartition
+        isrChangeSet += topicIdPartition.topicPartition
         lastIsrChangeMs.set(time.milliseconds())
       }
 
       // We rely on Partition#isrState being properly set to the pending ISR at this point since we are synchronously
       // applying the callback
-      future.complete(leaderAndIsr.withZkVersion(newVersion))
+      future.complete(leaderAndIsr.withPartitionEpoch(newVersion))
     } else {
       future.completeExceptionally(new InvalidUpdateVersionException(
-        s"ISR update $leaderAndIsr for partition $topicPartition with controller epoch $controllerEpoch " +
+        s"ISR update $leaderAndIsr for partition $topicIdPartition with controller epoch $controllerEpoch " +
           "failed with an invalid version error"))
     }
     future
diff --git a/core/src/main/scala/kafka/server/metadata/BrokerMetadataListener.scala b/core/src/main/scala/kafka/server/metadata/BrokerMetadataListener.scala
index 5b118220071a1..3984f467eddab 100644
--- a/core/src/main/scala/kafka/server/metadata/BrokerMetadataListener.scala
+++ b/core/src/main/scala/kafka/server/metadata/BrokerMetadataListener.scala
@@ -19,13 +19,13 @@ package kafka.server.metadata
 import java.util
 import java.util.concurrent.{CompletableFuture, TimeUnit}
 import java.util.function.Consumer
-
 import kafka.metrics.KafkaMetricsGroup
 import org.apache.kafka.image.{MetadataDelta, MetadataImage}
 import org.apache.kafka.common.utils.{LogContext, Time}
 import org.apache.kafka.queue.{EventQueue, KafkaEventQueue}
 import org.apache.kafka.raft.{Batch, BatchReader, LeaderAndEpoch, RaftClient}
 import org.apache.kafka.server.common.ApiMessageAndVersion
+import org.apache.kafka.server.fault.FaultHandler
 import org.apache.kafka.snapshot.SnapshotReader
 
 
@@ -39,9 +39,11 @@ class BrokerMetadataListener(
   time: Time,
   threadNamePrefix: Option[String],
   val maxBytesBetweenSnapshots: Long,
-  val snapshotter: Option[MetadataSnapshotter]
+  val snapshotter: Option[MetadataSnapshotter],
+  brokerMetrics: BrokerServerMetrics,
+  metadataLoadingFaultHandler: FaultHandler
 ) extends RaftClient.Listener[ApiMessageAndVersion] with KafkaMetricsGroup {
-  private val logContext = new LogContext(s"[BrokerMetadataListener id=${brokerId}] ")
+  private val logContext = new LogContext(s"[BrokerMetadataListener id=$brokerId] ")
   private val log = logContext.logger(classOf[BrokerMetadataListener])
   logIdent = logContext.logPrefix()
 
@@ -108,29 +110,47 @@ class BrokerMetadataListener(
       extends EventQueue.FailureLoggingEvent(log) {
     override def run(): Unit = {
       val results = try {
-        val loadResults = loadBatches(_delta, reader, None, None, None)
+        val loadResults = loadBatches(_delta, reader, None, None, None, None)
         if (isDebugEnabled) {
-          debug(s"Loaded new commits: ${loadResults}")
+          debug(s"Loaded new commits: $loadResults")
         }
         loadResults
+      } catch {
+        case e: Throwable =>
+          metadataLoadingFaultHandler.handleFault(s"Unable to load metadata commits " +
+            s"from the BatchReader starting at base offset ${reader.baseOffset()}", e)
+          return
       } finally {
         reader.close()
       }
-      _publisher.foreach(publish)
 
-      snapshotter.foreach { snapshotter =>
-        _bytesSinceLastSnapshot = _bytesSinceLastSnapshot + results.numBytes
-        if (shouldSnapshot()) {
-          if (snapshotter.maybeStartSnapshot(_highestTimestamp, _delta.apply())) {
-            _bytesSinceLastSnapshot = 0L
-          }
-        }
+      _bytesSinceLastSnapshot = _bytesSinceLastSnapshot + results.numBytes
+      if (shouldSnapshot()) {
+        maybeStartSnapshot()
       }
+
+      _publisher.foreach(publish)
     }
   }
 
   private def shouldSnapshot(): Boolean = {
-    _bytesSinceLastSnapshot >= maxBytesBetweenSnapshots
+    (_bytesSinceLastSnapshot >= maxBytesBetweenSnapshots) || metadataVersionChanged()
+  }
+
+  private def metadataVersionChanged(): Boolean = {
+    // The _publisher is empty before starting publishing, and we won't compute feature delta
+    // until we starting publishing
+    _publisher.nonEmpty && Option(_delta.featuresDelta()).exists { featuresDelta =>
+      featuresDelta.metadataVersionChange().isPresent
+    }
+  }
+
+  private def maybeStartSnapshot(): Unit = {
+    snapshotter.foreach { snapshotter =>
+      if (snapshotter.maybeStartSnapshot(_highestTimestamp, _delta.apply())) {
+        _bytesSinceLastSnapshot = 0L
+      }
+    }
   }
 
   /**
@@ -142,19 +162,26 @@ class BrokerMetadataListener(
   class HandleSnapshotEvent(reader: SnapshotReader[ApiMessageAndVersion])
     extends EventQueue.FailureLoggingEvent(log) {
     override def run(): Unit = {
+      val snapshotName = s"${reader.snapshotId().offset}-${reader.snapshotId().epoch}"
       try {
-        info(s"Loading snapshot ${reader.snapshotId().offset}-${reader.snapshotId().epoch}.")
+        info(s"Loading snapshot ${snapshotName}")
         _delta = new MetadataDelta(_image) // Discard any previous deltas.
-        val loadResults = loadBatches(
-          _delta,
+        val loadResults = loadBatches(_delta,
           reader,
           Some(reader.lastContainedLogTimestamp),
           Some(reader.lastContainedLogOffset),
-          Some(reader.lastContainedLogEpoch)
-        )
-        _delta.finishSnapshot()
-        info(s"Loaded snapshot ${reader.snapshotId().offset}-${reader.snapshotId().epoch}: " +
-          s"${loadResults}")
+          Some(reader.lastContainedLogEpoch),
+          Some(snapshotName))
+        try {
+          _delta.finishSnapshot()
+        } catch {
+          case e: Throwable => metadataLoadingFaultHandler.handleFault(
+              s"Error finishing snapshot ${snapshotName}", e)
+        }
+        info(s"Loaded snapshot ${snapshotName}: ${loadResults}")
+      } catch {
+        case t: Throwable => metadataLoadingFaultHandler.handleFault("Uncaught exception while " +
+          s"loading broker metadata from Metadata snapshot ${snapshotName}", t)
       } finally {
         reader.close()
       }
@@ -163,9 +190,9 @@ class BrokerMetadataListener(
   }
 
   case class BatchLoadResults(numBatches: Int, numRecords: Int, elapsedUs: Long, numBytes: Long) {
-    override def toString(): String = {
-      s"${numBatches} batch(es) with ${numRecords} record(s) in ${numBytes} bytes " +
-        s"ending at offset ${highestMetadataOffset} in ${elapsedUs} microseconds"
+    override def toString: String = {
+      s"$numBatches batch(es) with $numRecords record(s) in $numBytes bytes " +
+        s"ending at offset $highestMetadataOffset in $elapsedUs microseconds"
     }
   }
 
@@ -187,14 +214,15 @@ class BrokerMetadataListener(
     iterator: util.Iterator[Batch[ApiMessageAndVersion]],
     lastAppendTimestamp: Option[Long],
     lastCommittedOffset: Option[Long],
-    lastCommittedEpoch: Option[Int]
+    lastCommittedEpoch: Option[Int],
+    snapshotName: Option[String]
   ): BatchLoadResults = {
     val startTimeNs = time.nanoseconds()
     var numBatches = 0
     var numRecords = 0
     var numBytes = 0L
 
-    while (iterator.hasNext()) {
+    while (iterator.hasNext) {
       val batch = iterator.next()
 
       val epoch = lastCommittedEpoch.getOrElse(batch.epoch())
@@ -206,12 +234,20 @@ class BrokerMetadataListener(
           trace(s"Metadata batch ${batch.lastOffset}: processing [${index + 1}/${batch.records.size}]:" +
             s" ${messageAndVersion.message}")
         }
-
-        _highestOffset  = lastCommittedOffset.getOrElse(batch.baseOffset() + index)
-
-        delta.replay(highestMetadataOffset, epoch, messageAndVersion.message())
-        numRecords += 1
-        index += 1
+        _highestOffset = lastCommittedOffset.getOrElse(batch.baseOffset() + index)
+        try {
+          delta.replay(highestMetadataOffset, epoch, messageAndVersion.message())
+        } catch {
+          case e: Throwable => snapshotName match {
+            case None => metadataLoadingFaultHandler.handleFault(
+              s"Error replaying metadata log record at offset ${_highestOffset}", e)
+            case Some(name) => metadataLoadingFaultHandler.handleFault(
+              s"Error replaying record ${index} from snapshot ${name} at offset ${_highestOffset}", e)
+          }
+        } finally {
+          numRecords += 1
+          index += 1
+        }
       }
       numBytes = numBytes + batch.sizeInBytes()
       metadataBatchSizeHist.update(batch.records().size())
@@ -236,8 +272,11 @@ class BrokerMetadataListener(
 
     override def run(): Unit = {
       _publisher = Some(publisher)
-      log.info(s"Starting to publish metadata events at offset ${highestMetadataOffset}.")
+      log.info(s"Starting to publish metadata events at offset $highestMetadataOffset.")
       try {
+        if (metadataVersionChanged()) {
+          maybeStartSnapshot()
+        }
         publish(publisher)
         future.complete(null)
       } catch {
@@ -248,14 +287,36 @@ class BrokerMetadataListener(
     }
   }
 
+  // This is used in tests to alter the publisher that is in use by the broker.
+  def alterPublisher(publisher: MetadataPublisher): CompletableFuture[Void] = {
+    val event = new AlterPublisherEvent(publisher)
+    eventQueue.append(event)
+    event.future
+  }
+
+  class AlterPublisherEvent(publisher: MetadataPublisher)
+    extends EventQueue.FailureLoggingEvent(log) {
+    val future = new CompletableFuture[Void]()
+
+    override def run(): Unit = {
+      _publisher = Some(publisher)
+      log.info(s"Set publisher to ${publisher}")
+      future.complete(null)
+    }
+  }
+
   private def publish(publisher: MetadataPublisher): Unit = {
     val delta = _delta
     _image = _delta.apply()
     _delta = new MetadataDelta(_image)
     if (isDebugEnabled) {
-      debug(s"Publishing new metadata delta ${delta} at offset ${_image.highestOffsetAndEpoch().offset}.")
+      debug(s"Publishing new metadata delta $delta at offset ${_image.highestOffsetAndEpoch().offset}.")
     }
     publisher.publish(delta, _image)
+
+    // Update the metrics since the publisher handled the lastest image
+    brokerMetrics.lastAppliedRecordOffset.set(_highestOffset)
+    brokerMetrics.lastAppliedRecordTimestamp.set(_highestTimestamp)
   }
 
   override def handleLeaderChange(leaderAndEpoch: LeaderAndEpoch): Unit = {
@@ -266,8 +327,9 @@ class BrokerMetadataListener(
     eventQueue.beginShutdown("beginShutdown", new ShutdownEvent())
   }
 
-  class ShutdownEvent() extends EventQueue.FailureLoggingEvent(log) {
+  class ShutdownEvent extends EventQueue.FailureLoggingEvent(log) {
     override def run(): Unit = {
+      brokerMetrics.close()
       removeMetric(BrokerMetadataListener.MetadataBatchProcessingTimeUs)
       removeMetric(BrokerMetadataListener.MetadataBatchSizes)
     }
diff --git a/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala b/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala
index 74c5348afc7a7..0192bb4afcfd7 100644
--- a/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala
+++ b/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala
@@ -17,11 +17,13 @@
 
 package kafka.server.metadata
 
+import java.util.Properties
+import java.util.concurrent.atomic.AtomicLong
 import kafka.coordinator.group.GroupCoordinator
 import kafka.coordinator.transaction.TransactionCoordinator
 import kafka.log.{LogManager, UnifiedLog}
 import kafka.server.ConfigAdminManager.toLoggableProps
-import kafka.server.{ConfigEntityName, ConfigHandler, ConfigType, FinalizedFeatureCache, KafkaConfig, ReplicaManager, RequestLocal}
+import kafka.server.{ConfigEntityName, ConfigHandler, ConfigType, KafkaConfig, ReplicaManager, RequestLocal}
 import kafka.utils.Logging
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.config.ConfigResource.Type.{BROKER, TOPIC}
@@ -29,6 +31,7 @@ import org.apache.kafka.common.internals.Topic
 import org.apache.kafka.image.{MetadataDelta, MetadataImage, TopicDelta, TopicsImage}
 import org.apache.kafka.metadata.authorizer.ClusterMetadataAuthorizer
 import org.apache.kafka.server.authorizer.Authorizer
+import org.apache.kafka.server.fault.FaultHandler
 
 import scala.collection.mutable
 
@@ -92,16 +95,19 @@ object BrokerMetadataPublisher extends Logging {
   }
 }
 
-class BrokerMetadataPublisher(conf: KafkaConfig,
-                              metadataCache: KRaftMetadataCache,
-                              logManager: LogManager,
-                              replicaManager: ReplicaManager,
-                              groupCoordinator: GroupCoordinator,
-                              txnCoordinator: TransactionCoordinator,
-                              clientQuotaMetadataManager: ClientQuotaMetadataManager,
-                              featureCache: FinalizedFeatureCache,
-                              dynamicConfigHandlers: Map[String, ConfigHandler],
-                              private val _authorizer: Option[Authorizer]) extends MetadataPublisher with Logging {
+class BrokerMetadataPublisher(
+  conf: KafkaConfig,
+  metadataCache: KRaftMetadataCache,
+  logManager: LogManager,
+  replicaManager: ReplicaManager,
+  groupCoordinator: GroupCoordinator,
+  txnCoordinator: TransactionCoordinator,
+  clientQuotaMetadataManager: ClientQuotaMetadataManager,
+  dynamicConfigHandlers: Map[String, ConfigHandler],
+  private val _authorizer: Option[Authorizer],
+  fatalFaultHandler: FaultHandler,
+  metadataPublishingFaultHandler: FaultHandler
+) extends MetadataPublisher with Logging {
   logIdent = s"[BrokerMetadataPublisher id=${conf.nodeId}] "
 
   import BrokerMetadataPublisher._
@@ -109,70 +115,98 @@ class BrokerMetadataPublisher(conf: KafkaConfig,
   /**
    * The broker ID.
    */
-  val brokerId = conf.nodeId
+  val brokerId: Int = conf.nodeId
 
   /**
    * True if this is the first time we have published metadata.
    */
   var _firstPublish = true
 
+  /**
+   * This is updated after all components (e.g. LogManager) has finished publishing the new metadata delta
+   */
+  val publishedOffsetAtomic = new AtomicLong(-1)
+
   override def publish(delta: MetadataDelta, newImage: MetadataImage): Unit = {
     val highestOffsetAndEpoch = newImage.highestOffsetAndEpoch()
 
+    val deltaName = if (_firstPublish) {
+      s"initial MetadataDelta up to ${highestOffsetAndEpoch.offset}"
+    } else {
+      s"MetadataDelta up to ${highestOffsetAndEpoch.offset}"
+    }
     try {
-      trace(s"Publishing delta $delta with highest offset $highestOffsetAndEpoch")
+      if (isTraceEnabled) {
+        trace(s"Publishing delta $delta with highest offset $highestOffsetAndEpoch")
+      }
 
       // Publish the new metadata image to the metadata cache.
       metadataCache.setImage(newImage)
 
+      val metadataVersionLogMsg = s"metadata.version ${newImage.features().metadataVersion()}"
+
       if (_firstPublish) {
-        info(s"Publishing initial metadata at offset $highestOffsetAndEpoch.")
+        info(s"Publishing initial metadata at offset $highestOffsetAndEpoch with $metadataVersionLogMsg.")
 
         // If this is the first metadata update we are applying, initialize the managers
         // first (but after setting up the metadata cache).
         initializeManagers()
       } else if (isDebugEnabled) {
-        debug(s"Publishing metadata at offset $highestOffsetAndEpoch.")
+        debug(s"Publishing metadata at offset $highestOffsetAndEpoch with $metadataVersionLogMsg.")
       }
 
-      // Apply feature deltas.
       Option(delta.featuresDelta()).foreach { featuresDelta =>
-        featureCache.update(featuresDelta, highestOffsetAndEpoch.offset)
+        featuresDelta.metadataVersionChange().ifPresent{ metadataVersion =>
+          info(s"Updating metadata.version to ${metadataVersion.featureLevel()} at offset $highestOffsetAndEpoch.")
+        }
       }
 
       // Apply topic deltas.
       Option(delta.topicsDelta()).foreach { topicsDelta =>
-        // Notify the replica manager about changes to topics.
-        replicaManager.applyDelta(topicsDelta, newImage)
-
-        // Update the group coordinator of local changes
-        updateCoordinator(
-          newImage,
-          delta,
-          Topic.GROUP_METADATA_TOPIC_NAME,
-          groupCoordinator.onElection,
-          groupCoordinator.onResignation
-        )
-
-        // Update the transaction coordinator of local changes
-        updateCoordinator(
-          newImage,
-          delta,
-          Topic.TRANSACTION_STATE_TOPIC_NAME,
-          txnCoordinator.onElection,
-          txnCoordinator.onResignation
-        )
-
-        // Notify the group coordinator about deleted topics.
-        val deletedTopicPartitions = new mutable.ArrayBuffer[TopicPartition]()
-        topicsDelta.deletedTopicIds().forEach { id =>
-          val topicImage = topicsDelta.image().getTopic(id)
-          topicImage.partitions().keySet().forEach {
-            id => deletedTopicPartitions += new TopicPartition(topicImage.name(), id)
-          }
+        try {
+          // Notify the replica manager about changes to topics.
+          replicaManager.applyDelta(topicsDelta, newImage)
+        } catch {
+          case t: Throwable => metadataPublishingFaultHandler.handleFault("Error applying topics " +
+            s"delta in ${deltaName}", t)
+        }
+        try {
+          // Update the group coordinator of local changes
+          updateCoordinator(newImage,
+            delta,
+            Topic.GROUP_METADATA_TOPIC_NAME,
+            groupCoordinator.onElection,
+            groupCoordinator.onResignation)
+        } catch {
+          case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating group " +
+            s"coordinator with local changes in ${deltaName}", t)
         }
-        if (deletedTopicPartitions.nonEmpty) {
-          groupCoordinator.handleDeletedPartitions(deletedTopicPartitions, RequestLocal.NoCaching)
+        try {
+          // Update the transaction coordinator of local changes
+          updateCoordinator(newImage,
+            delta,
+            Topic.TRANSACTION_STATE_TOPIC_NAME,
+            txnCoordinator.onElection,
+            txnCoordinator.onResignation)
+        } catch {
+          case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating txn " +
+            s"coordinator with local changes in ${deltaName}", t)
+        }
+        try {
+          // Notify the group coordinator about deleted topics.
+          val deletedTopicPartitions = new mutable.ArrayBuffer[TopicPartition]()
+          topicsDelta.deletedTopicIds().forEach { id =>
+            val topicImage = topicsDelta.image().getTopic(id)
+            topicImage.partitions().keySet().forEach {
+              id => deletedTopicPartitions += new TopicPartition(topicImage.name(), id)
+            }
+          }
+          if (deletedTopicPartitions.nonEmpty) {
+            groupCoordinator.handleDeletedPartitions(deletedTopicPartitions, RequestLocal.NoCaching)
+          }
+        } catch {
+          case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating group " +
+            s"coordinator with deleted partitions in ${deltaName}", t)
         }
       }
 
@@ -182,34 +216,62 @@ class BrokerMetadataPublisher(conf: KafkaConfig,
           val props = newImage.configs().configProperties(resource)
           resource.`type`() match {
             case TOPIC =>
-              // Apply changes to a topic's dynamic configuration.
-              info(s"Updating topic ${resource.name()} with new configuration : " +
-                toLoggableProps(resource, props).mkString(","))
-              dynamicConfigHandlers(ConfigType.Topic).
-                processConfigChanges(resource.name(), props)
-              conf.dynamicConfig.reloadUpdatedFilesWithoutConfigChange(props)
-            case BROKER => if (resource.name().isEmpty) {
-              // Apply changes to "cluster configs" (also known as default BROKER configs).
-              // These are stored in KRaft with an empty name field.
-              info(s"Updating cluster configuration : " +
-                toLoggableProps(resource, props).mkString(","))
-              dynamicConfigHandlers(ConfigType.Broker).
-                processConfigChanges(ConfigEntityName.Default, props)
-            } else if (resource.name().equals(brokerId.toString)) {
-              // Apply changes to this broker's dynamic configuration.
-              info(s"Updating broker ${brokerId} with new configuration : " +
-                toLoggableProps(resource, props).mkString(","))
-              dynamicConfigHandlers(ConfigType.Broker).
-                processConfigChanges(resource.name(), props)
-            }
+              try {
+                // Apply changes to a topic's dynamic configuration.
+                info(s"Updating topic ${resource.name()} with new configuration : " +
+                  toLoggableProps(resource, props).mkString(","))
+                dynamicConfigHandlers(ConfigType.Topic).
+                  processConfigChanges(resource.name(), props)
+              } catch {
+                case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating topic " +
+                  s"${resource.name()} with new configuration: ${toLoggableProps(resource, props).mkString(",")} " +
+                  s"in ${deltaName}", t)
+              }
+            case BROKER =>
+              if (resource.name().isEmpty) {
+                try {
+                  // Apply changes to "cluster configs" (also known as default BROKER configs).
+                  // These are stored in KRaft with an empty name field.
+                  info("Updating cluster configuration : " +
+                    toLoggableProps(resource, props).mkString(","))
+                  dynamicConfigHandlers(ConfigType.Broker).
+                    processConfigChanges(ConfigEntityName.Default, props)
+                } catch {
+                  case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating " +
+                    s"cluster with new configuration: ${toLoggableProps(resource, props).mkString(",")} " +
+                    s"in ${deltaName}", t)
+                }
+              } else if (resource.name() == brokerId.toString) {
+                try {
+                  // Apply changes to this broker's dynamic configuration.
+                  info(s"Updating broker $brokerId with new configuration : " +
+                    toLoggableProps(resource, props).mkString(","))
+                  dynamicConfigHandlers(ConfigType.Broker).
+                    processConfigChanges(resource.name(), props)
+                  // When applying a per broker config (not a cluster config), we also
+                  // reload any associated file. For example, if the ssl.keystore is still
+                  // set to /tmp/foo, we still want to reload /tmp/foo in case its contents
+                  // have changed. This doesn't apply to topic configs or cluster configs.
+                  reloadUpdatedFilesWithoutConfigChange(props)
+                } catch {
+                  case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating " +
+                    s"broker with new configuration: ${toLoggableProps(resource, props).mkString(",")} " +
+                    s"in ${deltaName}", t)
+                }
+              }
             case _ => // nothing to do
           }
         }
       }
 
-      // Apply client quotas delta.
-      Option(delta.clientQuotasDelta()).foreach { clientQuotasDelta =>
-        clientQuotaMetadataManager.update(clientQuotasDelta)
+      try {
+        // Apply client quotas delta.
+        Option(delta.clientQuotasDelta()).foreach { clientQuotasDelta =>
+          clientQuotaMetadataManager.update(clientQuotasDelta)
+        }
+      } catch {
+        case t: Throwable => metadataPublishingFaultHandler.handleFault("Error updating client " +
+          s"quotas in ${deltaName}", t)
       }
 
       // Apply changes to ACLs. This needs to be handled carefully because while we are
@@ -220,21 +282,31 @@ class BrokerMetadataPublisher(conf: KafkaConfig,
       // there could be a window during which incorrect authorization results are returned.
       Option(delta.aclsDelta()).foreach( aclsDelta =>
         _authorizer match {
-          case Some(authorizer: ClusterMetadataAuthorizer) => if (aclsDelta.isSnapshotDelta()) {
-            // If the delta resulted from a snapshot load, we want to apply the new changes
-            // all at once using ClusterMetadataAuthorizer#loadSnapshot. If this is the
-            // first snapshot load, it will also complete the futures returned by
-           // Authorizer#start (which we wait for before processing RPCs).
-            authorizer.loadSnapshot(newImage.acls().acls())
+          case Some(authorizer: ClusterMetadataAuthorizer) => if (aclsDelta.isSnapshotDelta) {
+            try {
+              // If the delta resulted from a snapshot load, we want to apply the new changes
+              // all at once using ClusterMetadataAuthorizer#loadSnapshot. If this is the
+              // first snapshot load, it will also complete the futures returned by
+              // Authorizer#start (which we wait for before processing RPCs).
+              authorizer.loadSnapshot(newImage.acls().acls())
+            } catch {
+              case t: Throwable => metadataPublishingFaultHandler.handleFault("Error loading " +
+                s"authorizer snapshot in ${deltaName}", t)
+            }
           } else {
-            // Because the changes map is a LinkedHashMap, the deltas will be returned in
-            // the order they were performed.
-            aclsDelta.changes().entrySet().forEach(e =>
-              if (e.getValue().isPresent()) {
-                authorizer.addAcl(e.getKey(), e.getValue().get())
-              } else {
-                authorizer.removeAcl(e.getKey())
-              })
+            try {
+              // Because the changes map is a LinkedHashMap, the deltas will be returned in
+              // the order they were performed.
+              aclsDelta.changes().entrySet().forEach(e =>
+                if (e.getValue.isPresent) {
+                  authorizer.addAcl(e.getKey, e.getValue.get())
+                } else {
+                  authorizer.removeAcl(e.getKey)
+                })
+            } catch {
+              case t: Throwable => metadataPublishingFaultHandler.handleFault("Error loading " +
+                s"authorizer changes in ${deltaName}", t)
+            }
           }
           case _ => // No ClusterMetadataAuthorizer is configured. There is nothing to do.
         })
@@ -242,14 +314,21 @@ class BrokerMetadataPublisher(conf: KafkaConfig,
       if (_firstPublish) {
         finishInitializingReplicaManager(newImage)
       }
+      publishedOffsetAtomic.set(newImage.highestOffsetAndEpoch().offset)
     } catch {
-      case t: Throwable => error(s"Error publishing broker metadata at $highestOffsetAndEpoch", t)
-        throw t
+      case t: Throwable => metadataPublishingFaultHandler.handleFault("Uncaught exception while " +
+        s"publishing broker metadata from ${deltaName}", t)
     } finally {
       _firstPublish = false
     }
   }
 
+  override def publishedOffset: Long = publishedOffsetAtomic.get()
+
+  def reloadUpdatedFilesWithoutConfigChange(props: Properties): Unit = {
+    conf.dynamicConfig.reloadUpdatedFilesWithoutConfigChange(props)
+  }
+
   /**
    * Update the coordinator of local replica changes: election and resignation.
    *
@@ -261,7 +340,7 @@ class BrokerMetadataPublisher(conf: KafkaConfig,
    * @param resignation function to call on resignation; the first parameter is the partition id;
    *                    the second parameter is the leader epoch
    */
-  private def updateCoordinator(
+  def updateCoordinator(
     image: MetadataImage,
     delta: MetadataDelta,
     topicName: String,
@@ -296,38 +375,60 @@ class BrokerMetadataPublisher(conf: KafkaConfig,
   }
 
   private def initializeManagers(): Unit = {
-    // Start log manager, which will perform (potentially lengthy)
-    // recovery-from-unclean-shutdown if required.
-    logManager.startup(metadataCache.getAllTopics())
-
-    // Make the LogCleaner available for reconfiguration. We can't do this prior to this
-    // point because LogManager#startup creates the LogCleaner object, if
-    // log.cleaner.enable is true. TODO: improve this (see KAFKA-13610)
-    Option(logManager.cleaner).foreach(conf.dynamicConfig.addBrokerReconfigurable)
-
-    // Start the replica manager.
-    replicaManager.startup()
-
-    // Start the group coordinator.
-    groupCoordinator.startup(() => metadataCache.numPartitions(
-      Topic.GROUP_METADATA_TOPIC_NAME).getOrElse(conf.offsetsTopicPartitions))
-
-    // Start the transaction coordinator.
-    txnCoordinator.startup(() => metadataCache.numPartitions(
-      Topic.TRANSACTION_STATE_TOPIC_NAME).getOrElse(conf.transactionTopicPartitions))
+    try {
+      // Start log manager, which will perform (potentially lengthy)
+      // recovery-from-unclean-shutdown if required.
+      logManager.startup(metadataCache.getAllTopics())
+
+      // Make the LogCleaner available for reconfiguration. We can't do this prior to this
+      // point because LogManager#startup creates the LogCleaner object, if
+      // log.cleaner.enable is true. TODO: improve this (see KAFKA-13610)
+      Option(logManager.cleaner).foreach(conf.dynamicConfig.addBrokerReconfigurable)
+    } catch {
+      case t: Throwable => fatalFaultHandler.handleFault("Error starting LogManager", t)
+    }
+    try {
+      // Start the replica manager.
+      replicaManager.startup()
+    } catch {
+      case t: Throwable => fatalFaultHandler.handleFault("Error starting ReplicaManager", t)
+    }
+    try {
+      // Start the group coordinator.
+      groupCoordinator.startup(() => metadataCache.numPartitions(
+        Topic.GROUP_METADATA_TOPIC_NAME).getOrElse(conf.offsetsTopicPartitions))
+    } catch {
+      case t: Throwable => fatalFaultHandler.handleFault("Error starting GroupCoordinator", t)
+    }
+    try {
+      // Start the transaction coordinator.
+      txnCoordinator.startup(() => metadataCache.numPartitions(
+        Topic.TRANSACTION_STATE_TOPIC_NAME).getOrElse(conf.transactionTopicPartitions))
+    } catch {
+      case t: Throwable => fatalFaultHandler.handleFault("Error starting TransactionCoordinator", t)
+    }
   }
 
   private def finishInitializingReplicaManager(newImage: MetadataImage): Unit = {
-    // Delete log directories which we're not supposed to have, according to the
-    // latest metadata. This is only necessary to do when we're first starting up. If
-    // we have to load a snapshot later, these topics will appear in deletedTopicIds.
-    val strayPartitions = findStrayPartitions(brokerId, newImage.topics, logManager.allLogs)
-    if (strayPartitions.nonEmpty) {
-      replicaManager.deleteStrayReplicas(strayPartitions)
+    try {
+      // Delete log directories which we're not supposed to have, according to the
+      // latest metadata. This is only necessary to do when we're first starting up. If
+      // we have to load a snapshot later, these topics will appear in deletedTopicIds.
+      val strayPartitions = findStrayPartitions(brokerId, newImage.topics, logManager.allLogs)
+      if (strayPartitions.nonEmpty) {
+        replicaManager.deleteStrayReplicas(strayPartitions)
+      }
+    } catch {
+      case t: Throwable => metadataPublishingFaultHandler.handleFault("Error deleting stray " +
+        "partitions during startup", t)
     }
-
-    // Make sure that the high water mark checkpoint thread is running for the replica
-    // manager.
-    replicaManager.startHighWatermarkCheckPointThread()
-  }
+    try {
+      // Make sure that the high water mark checkpoint thread is running for the replica
+      // manager.
+      replicaManager.startHighWatermarkCheckPointThread()
+    } catch {
+      case t: Throwable => metadataPublishingFaultHandler.handleFault("Error starting high " +
+        "watermark checkpoint thread during startup", t)
+    }
+}
 }
diff --git a/core/src/main/scala/kafka/server/metadata/BrokerMetadataSnapshotter.scala b/core/src/main/scala/kafka/server/metadata/BrokerMetadataSnapshotter.scala
index fb5bfbbd81cd8..2a236ca749706 100644
--- a/core/src/main/scala/kafka/server/metadata/BrokerMetadataSnapshotter.scala
+++ b/core/src/main/scala/kafka/server/metadata/BrokerMetadataSnapshotter.scala
@@ -17,7 +17,6 @@
 package kafka.server.metadata
 
 import java.util.concurrent.RejectedExecutionException
-
 import kafka.utils.Logging
 import org.apache.kafka.image.MetadataImage
 import org.apache.kafka.common.utils.{LogContext, Time}
@@ -25,11 +24,32 @@ import org.apache.kafka.queue.{EventQueue, KafkaEventQueue}
 import org.apache.kafka.server.common.ApiMessageAndVersion
 import org.apache.kafka.snapshot.SnapshotWriter
 
+import java.util.function.Consumer
 
 trait SnapshotWriterBuilder {
   def build(committedOffset: Long,
             committedEpoch: Int,
-            lastContainedLogTime: Long): SnapshotWriter[ApiMessageAndVersion]
+            lastContainedLogTime: Long): Option[SnapshotWriter[ApiMessageAndVersion]]
+}
+
+/**
+ * The RecordListConsumer takes as input a potentially long list of records, and feeds the
+ * SnapshotWriter a series of smaller lists of records.
+ *
+ * Note: from the perspective of Kafka, the snapshot file is really just a list of records,
+ * and we don't care about batches. Batching is irrelevant to the meaning of the snapshot.
+ */
+class RecordListConsumer(
+  val maxRecordsInBatch: Int,
+  val writer: SnapshotWriter[ApiMessageAndVersion]
+) extends Consumer[java.util.List[ApiMessageAndVersion]] {
+  override def accept(messages: java.util.List[ApiMessageAndVersion]): Unit = {
+    var i = 0
+    while (i < messages.size()) {
+      writer.append(messages.subList(i, Math.min(i + maxRecordsInBatch, messages.size())));
+      i += maxRecordsInBatch
+    }
+  }
 }
 
 class BrokerMetadataSnapshotter(
@@ -38,7 +58,17 @@ class BrokerMetadataSnapshotter(
   threadNamePrefix: Option[String],
   writerBuilder: SnapshotWriterBuilder
 ) extends Logging with MetadataSnapshotter {
-  private val logContext = new LogContext(s"[BrokerMetadataSnapshotter id=${brokerId}] ")
+  /**
+   * The maximum number of records we will put in each batch.
+   *
+   * From the perspective of the Raft layer, the limit on batch size is specified in terms of
+   * bytes, not number of records. @See {@link KafkaRaftClient#MAX_BATCH_SIZE_BYTES} for details.
+   * However, it's more convenient to limit the batch size here in terms of number of records.
+   * So we chose a low number that will not cause problems.
+   */
+  private val maxRecordsInBatch = 1024
+
+  private val logContext = new LogContext(s"[BrokerMetadataSnapshotter id=$brokerId] ")
   logIdent = logContext.logPrefix()
 
   /**
@@ -53,29 +83,37 @@ class BrokerMetadataSnapshotter(
   val eventQueue = new KafkaEventQueue(time, logContext, threadNamePrefix.getOrElse(""))
 
   override def maybeStartSnapshot(lastContainedLogTime: Long, image: MetadataImage): Boolean = synchronized {
-    if (_currentSnapshotOffset == -1L) {
+    if (_currentSnapshotOffset != -1) {
+      info(s"Declining to create a new snapshot at ${image.highestOffsetAndEpoch()} because " +
+        s"there is already a snapshot in progress at offset ${_currentSnapshotOffset}")
+      false
+    } else {
       val writer = writerBuilder.build(
         image.highestOffsetAndEpoch().offset,
         image.highestOffsetAndEpoch().epoch,
         lastContainedLogTime
       )
-      _currentSnapshotOffset = image.highestOffsetAndEpoch().offset
-      info(s"Creating a new snapshot at offset ${_currentSnapshotOffset}...")
-      eventQueue.append(new CreateSnapshotEvent(image, writer))
-      true
-    } else {
-      warn(s"Declining to create a new snapshot at ${image.highestOffsetAndEpoch()} because " +
-           s"there is already a snapshot in progress at offset ${_currentSnapshotOffset}")
-      false
+      if (writer.nonEmpty) {
+        _currentSnapshotOffset = image.highestOffsetAndEpoch().offset
+        info(s"Creating a new snapshot at offset ${_currentSnapshotOffset}...")
+        eventQueue.append(new CreateSnapshotEvent(image, writer.get))
+        true
+      } else {
+        info(s"Declining to create a new snapshot at ${image.highestOffsetAndEpoch()} because " +
+          s"there is already a snapshot at offset ${image.highestOffsetAndEpoch().offset}")
+        false
+      }
     }
   }
 
   class CreateSnapshotEvent(image: MetadataImage,
                             writer: SnapshotWriter[ApiMessageAndVersion])
         extends EventQueue.Event {
+
     override def run(): Unit = {
       try {
-        image.write(writer.append(_))
+        val consumer = new RecordListConsumer(maxRecordsInBatch, writer)
+        image.write(consumer)
         writer.freeze()
       } finally {
         try {
diff --git a/core/src/main/scala/kafka/server/metadata/BrokerServerMetrics.scala b/core/src/main/scala/kafka/server/metadata/BrokerServerMetrics.scala
new file mode 100644
index 0000000000000..3e68ae85f9232
--- /dev/null
+++ b/core/src/main/scala/kafka/server/metadata/BrokerServerMetrics.scala
@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server.metadata
+
+import java.util.concurrent.atomic.AtomicLong
+import org.apache.kafka.common.MetricName
+import org.apache.kafka.common.metrics.Gauge
+import org.apache.kafka.common.metrics.Metrics
+import org.apache.kafka.common.metrics.MetricConfig
+
+final class BrokerServerMetrics private (metrics: Metrics) extends AutoCloseable {
+  import BrokerServerMetrics._
+
+  val lastAppliedRecordOffset: AtomicLong = new AtomicLong(0)
+  val lastAppliedRecordTimestamp: AtomicLong = new AtomicLong(0)
+  val metadataLoadErrorCount: AtomicLong = new AtomicLong(0)
+  val metadataApplyErrorCount: AtomicLong = new AtomicLong(0)
+
+  val lastAppliedRecordOffsetName = metrics.metricName(
+    "last-applied-record-offset",
+    metricGroupName,
+    "The offset of the last record from the cluster metadata partition that was applied by the broker"
+  )
+
+  val lastAppliedRecordTimestampName = metrics.metricName(
+    "last-applied-record-timestamp",
+    metricGroupName,
+    "The timestamp of the last record from the cluster metadata partition that was applied by the broker"
+  )
+
+  val lastAppliedRecordLagMsName = metrics.metricName(
+    "last-applied-record-lag-ms",
+    metricGroupName,
+    "The difference between now and the timestamp of the last record from the cluster metadata partition that was applied by the broker"
+  )
+
+  val metadataLoadErrorCountName = metrics.metricName(
+    "metadata-load-error-count",
+    metricGroupName,
+    "The number of errors encountered by the BrokerMetadataListener while loading the metadata log and generating a new MetadataDelta based on it."
+  )
+
+  val metadataApplyErrorCountName = metrics.metricName(
+    "metadata-apply-error-count",
+    metricGroupName,
+    "The number of errors encountered by the BrokerMetadataPublisher while applying a new MetadataImage based on the latest MetadataDelta."
+  )
+
+  addMetric(metrics, lastAppliedRecordOffsetName) { _ =>
+    lastAppliedRecordOffset.get
+  }
+
+  addMetric(metrics, lastAppliedRecordTimestampName) { _ =>
+    lastAppliedRecordTimestamp.get
+  }
+
+  addMetric(metrics, lastAppliedRecordLagMsName) { now =>
+    now - lastAppliedRecordTimestamp.get
+  }
+
+  addMetric(metrics, metadataLoadErrorCountName) { _ =>
+    metadataLoadErrorCount.get
+  }
+
+  addMetric(metrics, metadataApplyErrorCountName) { _ =>
+    metadataApplyErrorCount.get
+  }
+
+  override def close(): Unit = {
+    List(
+      lastAppliedRecordOffsetName,
+      lastAppliedRecordTimestampName,
+      lastAppliedRecordLagMsName,
+      metadataLoadErrorCountName,
+      metadataApplyErrorCountName
+    ).foreach(metrics.removeMetric)
+  }
+}
+
+
+final object BrokerServerMetrics {
+  private val metricGroupName = "broker-metadata-metrics"
+
+  private def addMetric[T](metrics: Metrics, name: MetricName)(func: Long => T): Unit = {
+    metrics.addMetric(name, new FuncGauge(func))
+  }
+
+  private final class FuncGauge[T](func: Long => T) extends Gauge[T] {
+    override def value(config: MetricConfig, now: Long): T = {
+      func(now)
+    }
+  }
+
+  def apply(metrics: Metrics): BrokerServerMetrics = {
+    new BrokerServerMetrics(metrics)
+  }
+}
diff --git a/core/src/main/scala/kafka/server/metadata/ClientQuotaMetadataManager.scala b/core/src/main/scala/kafka/server/metadata/ClientQuotaMetadataManager.scala
index 6ada6b258c5cc..3f4b136fb990c 100644
--- a/core/src/main/scala/kafka/server/metadata/ClientQuotaMetadataManager.scala
+++ b/core/src/main/scala/kafka/server/metadata/ClientQuotaMetadataManager.scala
@@ -98,10 +98,10 @@ class ClientQuotaMetadataManager(private[metadata] val quotaManagers: QuotaManag
         }
       }
       quotaDelta.changes().entrySet().forEach { e =>
-        handleUserClientQuotaChange(userClientEntity, e.getKey(), e.getValue().asScala.map(_.toDouble))
+        handleUserClientQuotaChange(userClientEntity, e.getKey, e.getValue.asScala.map(_.toDouble))
       }
     } else {
-      warn(s"Ignoring unsupported quota entity ${entity}.")
+      warn(s"Ignoring unsupported quota entity $entity.")
     }
   }
 
@@ -119,10 +119,10 @@ class ClientQuotaMetadataManager(private[metadata] val quotaManagers: QuotaManag
 
     quotaDelta.changes().entrySet().forEach { e =>
       // The connection quota only understands the connection rate limit
-      val quotaName = e.getKey()
-      val quotaValue = e.getValue()
+      val quotaName = e.getKey
+      val quotaValue = e.getValue
       if (!quotaName.equals(QuotaConfigs.IP_CONNECTION_RATE_OVERRIDE_CONFIG)) {
-        warn(s"Ignoring unexpected quota key ${quotaName} for entity $ipEntity")
+        warn(s"Ignoring unexpected quota key $quotaName for entity $ipEntity")
       } else {
         try {
           connectionQuotas.updateIpConnectionRateQuota(inetAddress, quotaValue.asScala.map(_.toInt))
@@ -140,7 +140,7 @@ class ClientQuotaMetadataManager(private[metadata] val quotaManagers: QuotaManag
       case QuotaConfigs.REQUEST_PERCENTAGE_OVERRIDE_CONFIG => quotaManagers.request
       case QuotaConfigs.CONTROLLER_MUTATION_RATE_OVERRIDE_CONFIG => quotaManagers.controllerMutation
       case _ =>
-        warn(s"Ignoring unexpected quota key ${key} for entity $quotaEntity")
+        warn(s"Ignoring unexpected quota key $key for entity $quotaEntity")
         return
     }
 
diff --git a/core/src/main/scala/kafka/server/metadata/KRaftMetadataCache.scala b/core/src/main/scala/kafka/server/metadata/KRaftMetadataCache.scala
index 1ff7a8076c3c9..525772115037e 100644
--- a/core/src/main/scala/kafka/server/metadata/KRaftMetadataCache.scala
+++ b/core/src/main/scala/kafka/server/metadata/KRaftMetadataCache.scala
@@ -18,7 +18,7 @@
 package kafka.server.metadata
 
 import kafka.controller.StateChangeLogger
-import kafka.server.MetadataCache
+import kafka.server.{FinalizedFeaturesAndEpoch, MetadataCache}
 import kafka.utils.Logging
 import org.apache.kafka.common.internals.Topic
 import org.apache.kafka.common.message.MetadataResponseData.{MetadataResponsePartition, MetadataResponseTopic}
@@ -28,14 +28,15 @@ import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.MetadataResponse
 import org.apache.kafka.image.MetadataImage
+
 import java.util
 import java.util.{Collections, Properties}
 import java.util.concurrent.ThreadLocalRandom
-
 import kafka.admin.BrokerMetadata
 import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.message.{DescribeClientQuotasRequestData, DescribeClientQuotasResponseData}
 import org.apache.kafka.metadata.{PartitionRegistration, Replicas}
+import org.apache.kafka.server.common.MetadataVersion
 
 import scala.collection.{Seq, Set, mutable}
 import scala.jdk.CollectionConverters._
@@ -97,10 +98,10 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
         maybeLeader match {
           case None =>
             val error = if (!image.cluster().brokers.containsKey(partition.leader)) {
-              debug(s"Error while fetching metadata for ${topicName}-${partitionId}: leader not available")
+              debug(s"Error while fetching metadata for $topicName-$partitionId: leader not available")
               Errors.LEADER_NOT_AVAILABLE
             } else {
-              debug(s"Error while fetching metadata for ${topicName}-${partitionId}: listener $listenerName " +
+              debug(s"Error while fetching metadata for $topicName-$partitionId: listener $listenerName " +
                 s"not found on leader ${partition.leader}")
               if (errorUnavailableListeners) Errors.LISTENER_NOT_FOUND else Errors.LEADER_NOT_AVAILABLE
             }
@@ -113,12 +114,12 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
               .setIsrNodes(filteredIsr)
               .setOfflineReplicas(offlineReplicas)
           case Some(leader) =>
-            val error = if (filteredReplicas.size < partition.replicas.size) {
-              debug(s"Error while fetching metadata for ${topicName}-${partitionId}: replica information not available for " +
+            val error = if (filteredReplicas.size < partition.replicas.length) {
+              debug(s"Error while fetching metadata for $topicName-$partitionId: replica information not available for " +
                 s"following brokers ${partition.replicas.filterNot(filteredReplicas.contains).mkString(",")}")
               Errors.REPLICA_NOT_AVAILABLE
-            } else if (filteredIsr.size < partition.isr.size) {
-              debug(s"Error while fetching metadata for ${topicName}-${partitionId}: in sync replica information not available for " +
+            } else if (filteredIsr.size < partition.isr.length) {
+              debug(s"Error while fetching metadata for $topicName-$partitionId: in sync replica information not available for " +
                 s"following brokers ${partition.isr.filterNot(filteredIsr.contains).mkString(",")}")
               Errors.REPLICA_NOT_AVAILABLE
             } else {
@@ -198,7 +199,15 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
   override def getTopicName(topicId: Uuid): Option[String] = _currentImage.topics().topicsById.asScala.get(topicId).map(_.name())
 
   override def hasAliveBroker(brokerId: Int): Boolean = {
-    Option(_currentImage.cluster().broker(brokerId)).count(!_.fenced()) == 1
+    Option(_currentImage.cluster.broker(brokerId)).count(!_.fenced()) == 1
+  }
+
+  def isBrokerFenced(brokerId: Int): Boolean = {
+    Option(_currentImage.cluster.broker(brokerId)).count(_.fenced) == 1
+  }
+
+  def isBrokerShuttingDown(brokerId: Int): Boolean = {
+    Option(_currentImage.cluster.broker(brokerId)).count(_.inControlledShutdown) == 1
   }
 
   override def getAliveBrokers(): Iterable[BrokerMetadata] = getAliveBrokers(_currentImage)
@@ -220,7 +229,7 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
 
   override def getPartitionInfo(topicName: String, partitionId: Int): Option[UpdateMetadataPartitionState] = {
     Option(_currentImage.topics().getTopic(topicName)).
-      flatMap(topic => Some(topic.partitions().get(partitionId))).
+      flatMap(topic => Option(topic.partitions().get(partitionId))).
       flatMap(partition => Some(new UpdateMetadataPartitionState().
         setTopicName(topicName).
         setPartitionIndex(partitionId).
@@ -266,8 +275,8 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
     val image = _currentImage
     val result = new mutable.HashMap[Int, Node]()
     Option(image.topics().getTopic(tp.topic())).foreach { topic =>
-      topic.partitions().values().forEach { case partition =>
-        partition.replicas.map { case replicaId =>
+      topic.partitions().values().forEach { partition =>
+        partition.replicas.map { replicaId =>
           result.put(replicaId, Option(image.cluster().broker(replicaId)) match {
             case None => Node.noNode()
             case Some(broker) => broker.node(listenerName.value()).asScala.getOrElse(Node.noNode())
@@ -288,7 +297,7 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
    */
   private def getRandomAliveBroker(image: MetadataImage): Option[Int] = {
     val aliveBrokers = getAliveBrokers(image).toList
-    if (aliveBrokers.size == 0) {
+    if (aliveBrokers.isEmpty) {
       None
     } else {
       Some(aliveBrokers(ThreadLocalRandom.current().nextInt(aliveBrokers.size)).id)
@@ -315,8 +324,8 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
 
     image.topics().topicsByName().values().forEach { topic =>
       topic.partitions().entrySet().forEach { entry =>
-        val partitionId = entry.getKey()
-        val partition = entry.getValue()
+        val partitionId = entry.getKey
+        val partition = entry.getValue
         partitionInfos.add(new PartitionInfo(topic.name(),
           partitionId,
           node(partition.leader),
@@ -364,4 +373,18 @@ class KRaftMetadataCache(val brokerId: Int) extends MetadataCache with Logging w
   def describeClientQuotas(request: DescribeClientQuotasRequestData): DescribeClientQuotasResponseData = {
     _currentImage.clientQuotas().describe(request)
   }
+
+  override def metadataVersion(): MetadataVersion = _currentImage.features().metadataVersion()
+
+  override def features(): FinalizedFeaturesAndEpoch = {
+    val image = _currentImage
+    val features = image.features().finalizedVersions().asScala.map {
+      case (name: String, level: java.lang.Short) => name -> Short2short(level)
+    }
+    features.put(MetadataVersion.FEATURE_NAME, image.features().metadataVersion().featureLevel())
+
+    FinalizedFeaturesAndEpoch(
+      features.toMap,
+      image.highestOffsetAndEpoch().offset)
+  }
 }
diff --git a/core/src/main/scala/kafka/server/metadata/MetadataPublisher.scala b/core/src/main/scala/kafka/server/metadata/MetadataPublisher.scala
index 104d164d9c50d..b63a2c056c05f 100644
--- a/core/src/main/scala/kafka/server/metadata/MetadataPublisher.scala
+++ b/core/src/main/scala/kafka/server/metadata/MetadataPublisher.scala
@@ -30,4 +30,9 @@ trait MetadataPublisher {
    *                               delta to the previous image.
    */
   def publish(delta: MetadataDelta, newImage: MetadataImage): Unit
+
+  /**
+   * The highest offset of metadata topic which has been published
+   */
+  def publishedOffset: Long
 }
diff --git a/core/src/main/scala/kafka/server/metadata/ZkConfigRepository.scala b/core/src/main/scala/kafka/server/metadata/ZkConfigRepository.scala
index 8f8dfcd1a044a..16842bcd11ffe 100644
--- a/core/src/main/scala/kafka/server/metadata/ZkConfigRepository.scala
+++ b/core/src/main/scala/kafka/server/metadata/ZkConfigRepository.scala
@@ -39,7 +39,7 @@ class ZkConfigRepository(adminZkClient: AdminZkClient) extends ConfigRepository
     }
     // ZK stores cluster configs under "<default>".
     val effectiveName = if (configResource.`type`.equals(Type.BROKER) &&
-        configResource.name.isEmpty()) {
+        configResource.name.isEmpty) {
       ConfigEntityName.Default
     } else {
       configResource.name
diff --git a/core/src/main/scala/kafka/server/metadata/ZkMetadataCache.scala b/core/src/main/scala/kafka/server/metadata/ZkMetadataCache.scala
index 03568732ba96a..d69785f90f666 100755
--- a/core/src/main/scala/kafka/server/metadata/ZkMetadataCache.scala
+++ b/core/src/main/scala/kafka/server/metadata/ZkMetadataCache.scala
@@ -19,7 +19,7 @@ package kafka.server.metadata
 
 import java.util
 import java.util.Collections
-import java.util.concurrent.locks.ReentrantReadWriteLock
+import java.util.concurrent.locks.{ReentrantLock, ReentrantReadWriteLock}
 import kafka.admin.BrokerMetadata
 
 import scala.collection.{Seq, Set, mutable}
@@ -27,7 +27,7 @@ import scala.jdk.CollectionConverters._
 import kafka.cluster.{Broker, EndPoint}
 import kafka.api._
 import kafka.controller.StateChangeLogger
-import kafka.server.MetadataCache
+import kafka.server.{BrokerFeatures, FinalizedFeaturesAndEpoch, MetadataCache}
 import kafka.utils.CoreUtils._
 import kafka.utils.Logging
 import kafka.utils.Implicits._
@@ -38,14 +38,30 @@ import org.apache.kafka.common.message.MetadataResponseData.MetadataResponseTopi
 import org.apache.kafka.common.message.MetadataResponseData.MetadataResponsePartition
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.Errors
-import org.apache.kafka.common.requests.{MetadataResponse, UpdateMetadataRequest}
+import org.apache.kafka.common.requests.{ApiVersionsResponse, MetadataResponse, UpdateMetadataRequest}
 import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.apache.kafka.server.common.MetadataVersion
+
+import java.util.concurrent.TimeUnit
+import scala.concurrent.TimeoutException
+import scala.math.max
+
+// Raised whenever there was an error in updating the FinalizedFeatureCache with features.
+class FeatureCacheUpdateException(message: String) extends RuntimeException(message) {
+}
+
+trait ZkFinalizedFeatureCache {
+  def waitUntilFeatureEpochOrThrow(minExpectedEpoch: Long, timeoutMs: Long): Unit
+
+  def getFeatureOption: Option[FinalizedFeaturesAndEpoch]
+}
 
 /**
  *  A cache for the state (e.g., current leader) of each partition. This cache is updated through
  *  UpdateMetadataRequest from the controller. Every broker maintains the same cache, asynchronously.
  */
-class ZkMetadataCache(brokerId: Int) extends MetadataCache with Logging {
+class ZkMetadataCache(brokerId: Int, metadataVersion: MetadataVersion, brokerFeatures: BrokerFeatures)
+  extends MetadataCache with ZkFinalizedFeatureCache with Logging {
 
   private val partitionMetadataLock = new ReentrantReadWriteLock()
   //this is the cache state. every MetadataSnapshot instance is immutable, and updates (performed under a lock)
@@ -58,6 +74,11 @@ class ZkMetadataCache(brokerId: Int) extends MetadataCache with Logging {
   this.logIdent = s"[MetadataCache brokerId=$brokerId] "
   private val stateChangeLogger = new StateChangeLogger(brokerId, inControllerContext = false, None)
 
+  // Features are updated via ZK notification (see FinalizedFeatureChangeListener)
+  @volatile private var featuresAndEpoch: Option[FinalizedFeaturesAndEpoch] = Option.empty
+  private val featureLock = new ReentrantLock()
+  private val featureCond = featureLock.newCondition()
+
   // This method is the main hotspot when it comes to the performance of metadata requests,
   // we should be careful about adding additional logic here. Relatedly, `brokers` is
   // `List[Integer]` instead of `List[Int]` to avoid a collection copy.
@@ -430,4 +451,103 @@ class ZkMetadataCache(brokerId: Int) extends MetadataCache with Logging {
                               aliveNodes: mutable.LongMap[collection.Map[ListenerName, Node]]) {
     val topicNames: Map[Uuid, String] = topicIds.map { case (topicName, topicId) => (topicId, topicName) }
   }
+
+  override def metadataVersion(): MetadataVersion = metadataVersion
+
+  override def features(): FinalizedFeaturesAndEpoch = {
+    featuresAndEpoch match {
+      case Some(features) => features
+      case None => FinalizedFeaturesAndEpoch(Map.empty, ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH)
+    }
+  }
+
+  /**
+   * Updates the cache to the latestFeatures, and updates the existing epoch to latestEpoch.
+   * Expects that the latestEpoch should be always greater than the existing epoch (when the
+   * existing epoch is defined).
+   *
+   * @param latestFeatures   the latest finalized features to be set in the cache
+   * @param latestEpoch      the latest epoch value to be set in the cache
+   *
+   * @throws                 FeatureCacheUpdateException if the cache update operation fails
+   *                         due to invalid parameters or incompatibilities with the broker's
+   *                         supported features. In such a case, the existing cache contents are
+   *                         not modified.
+   */
+  def updateFeaturesOrThrow(latestFeatures: Map[String, Short], latestEpoch: Long): Unit = {
+    val latest = FinalizedFeaturesAndEpoch(latestFeatures, latestEpoch)
+    val existing = featuresAndEpoch.map(item => item.toString()).getOrElse("<empty>")
+    if (featuresAndEpoch.isDefined && featuresAndEpoch.get.epoch > latest.epoch) {
+      val errorMsg = s"FinalizedFeatureCache update failed due to invalid epoch in new $latest." +
+        s" The existing cache contents are $existing."
+      throw new FeatureCacheUpdateException(errorMsg)
+    } else {
+      val incompatibleFeatures = brokerFeatures.incompatibleFeatures(latest.features)
+      if (incompatibleFeatures.nonEmpty) {
+        val errorMsg = "FinalizedFeatureCache update failed since feature compatibility" +
+          s" checks failed! Supported ${brokerFeatures.supportedFeatures} has incompatibilities" +
+          s" with the latest $latest."
+        throw new FeatureCacheUpdateException(errorMsg)
+      } else {
+        val logMsg = s"Updated cache from existing $existing to latest $latest."
+        inLock(featureLock) {
+          featuresAndEpoch = Some(latest)
+          featureCond.signalAll()
+        }
+        info(logMsg)
+      }
+    }
+  }
+
+
+  /**
+   * Clears all existing finalized features and epoch from the cache.
+   */
+  def clearFeatures(): Unit = {
+    inLock(featureLock) {
+      featuresAndEpoch = None
+      featureCond.signalAll()
+    }
+  }
+
+  /**
+   * Waits no more than timeoutMs for the cache's feature epoch to reach an epoch >= minExpectedEpoch.
+   *
+   * @param minExpectedEpoch   the minimum expected epoch to be reached by the cache
+   *                           (should be >= 0)
+   * @param timeoutMs          the timeout (in milli seconds)
+   *
+   * @throws                   TimeoutException if the cache's epoch has not reached at least
+   *                           minExpectedEpoch within timeoutMs.
+   */
+  def waitUntilFeatureEpochOrThrow(minExpectedEpoch: Long, timeoutMs: Long): Unit = {
+    if(minExpectedEpoch < 0L) {
+      throw new IllegalArgumentException(
+        s"Expected minExpectedEpoch >= 0, but $minExpectedEpoch was provided.")
+    }
+
+    if(timeoutMs < 0L) {
+      throw new IllegalArgumentException(s"Expected timeoutMs >= 0, but $timeoutMs was provided.")
+    }
+    val waitEndTimeNanos = System.nanoTime() + (timeoutMs * 1000000)
+    inLock(featureLock) {
+      while (!(featuresAndEpoch.isDefined && featuresAndEpoch.get.epoch >= minExpectedEpoch)) {
+        val nowNanos = System.nanoTime()
+        if (nowNanos > waitEndTimeNanos) {
+          throw new TimeoutException(
+            s"Timed out after waiting for ${timeoutMs}ms for required condition to be met." +
+              s" Current epoch: ${featuresAndEpoch.map(fe => fe.epoch).getOrElse("<none>")}.")
+        }
+        val sleepTimeMs = max(1L, (waitEndTimeNanos - nowNanos) / 1000000)
+        featureCond.await(sleepTimeMs, TimeUnit.MILLISECONDS)
+      }
+    }
+  }
+
+  /**
+   * @return   the latest known FinalizedFeaturesAndEpoch or empty if not defined in the cache.
+   */
+  def getFeatureOption: Option[FinalizedFeaturesAndEpoch] = {
+    featuresAndEpoch
+  }
 }
diff --git a/core/src/main/scala/kafka/tools/ClusterTool.scala b/core/src/main/scala/kafka/tools/ClusterTool.scala
index b868f72fc7f90..ed82eeba0ed56 100644
--- a/core/src/main/scala/kafka/tools/ClusterTool.scala
+++ b/core/src/main/scala/kafka/tools/ClusterTool.scala
@@ -52,6 +52,7 @@ object ClusterTool extends Logging {
       unregisterParser.addArgument("--id", "-i").
         `type`(classOf[Integer]).
         action(store()).
+        required(true).
         help("The ID of the broker to unregister.")
 
       val namespace = parser.parseArgsOrFail(args)
diff --git a/core/src/main/scala/kafka/tools/ConsoleProducer.scala b/core/src/main/scala/kafka/tools/ConsoleProducer.scala
index c99f6aed16ef6..6afd9a923eac3 100644
--- a/core/src/main/scala/kafka/tools/ConsoleProducer.scala
+++ b/core/src/main/scala/kafka/tools/ConsoleProducer.scala
@@ -110,6 +110,11 @@ object ConsoleProducer {
       props, ProducerConfig.SEND_BUFFER_CONFIG, config.options, config.socketBufferSizeOpt)
     CommandLineUtils.maybeMergeOptions(
       props, ProducerConfig.BUFFER_MEMORY_CONFIG, config.options, config.maxMemoryBytesOpt)
+    // We currently have 2 options to set the batch.size value. We'll deprecate/remove one of them in KIP-717.
+    CommandLineUtils.maybeMergeOptions(
+      props, ProducerConfig.BATCH_SIZE_CONFIG, config.options, config.batchSizeOpt)
+    CommandLineUtils.maybeMergeOptions(
+      props, ProducerConfig.BATCH_SIZE_CONFIG, config.options, config.maxPartitionMemoryBytesOpt)
     CommandLineUtils.maybeMergeOptions(
       props, ProducerConfig.METADATA_MAX_AGE_CONFIG, config.options, config.metadataExpiryMsOpt)
     CommandLineUtils.maybeMergeOptions(
@@ -138,6 +143,12 @@ object ConsoleProducer {
                                     .withOptionalArg()
                                     .describedAs("compression-codec")
                                     .ofType(classOf[String])
+    val batchSizeOpt = parser.accepts("batch-size", "Number of messages to send in a single batch if they are not being sent synchronously. "+
+       "please note that this option will be replaced if max-partition-memory-bytes is also set")
+      .withRequiredArg
+      .describedAs("size")
+      .ofType(classOf[java.lang.Integer])
+      .defaultsTo(16 * 1024)
     val messageSendMaxRetriesOpt = parser.accepts("message-send-max-retries", "Brokers can fail receiving the message for multiple reasons, " +
       "and being unavailable transiently is just one of them. This property specifies the number of retries before the producer give up and drop this message. " +
       "This is the option to control `retries` in producer configs.")
diff --git a/core/src/main/scala/kafka/tools/DumpLogSegments.scala b/core/src/main/scala/kafka/tools/DumpLogSegments.scala
index 88e59d51ad80e..b57342ff29baf 100755
--- a/core/src/main/scala/kafka/tools/DumpLogSegments.scala
+++ b/core/src/main/scala/kafka/tools/DumpLogSegments.scala
@@ -18,7 +18,6 @@
 package kafka.tools
 
 import java.io._
-
 import com.fasterxml.jackson.databind.node.{IntNode, JsonNodeFactory, ObjectNode, TextNode}
 import kafka.coordinator.group.GroupMetadataManager
 import kafka.coordinator.transaction.TransactionLog
@@ -26,11 +25,13 @@ import kafka.log._
 import kafka.serializer.Decoder
 import kafka.utils._
 import kafka.utils.Implicits._
+import org.apache.kafka.common.message.{SnapshotFooterRecordJsonConverter, SnapshotHeaderRecordJsonConverter}
 import org.apache.kafka.common.metadata.{MetadataJsonConverters, MetadataRecordType}
 import org.apache.kafka.common.protocol.ByteBufferAccessor
 import org.apache.kafka.common.record._
 import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.metadata.MetadataRecordSerde
+import org.apache.kafka.snapshot.Snapshots
 
 import scala.jdk.CollectionConverters._
 import scala.collection.mutable
@@ -57,9 +58,9 @@ object DumpLogSegments {
       val filename = file.getName
       val suffix = filename.substring(filename.lastIndexOf("."))
       suffix match {
-        case UnifiedLog.LogFileSuffix =>
+        case UnifiedLog.LogFileSuffix | Snapshots.SUFFIX =>
           dumpLog(file, opts.shouldPrintDataLog, nonConsecutivePairsForLogFilesMap, opts.isDeepIteration,
-            opts.messageParser, opts.skipRecordMetadata)
+            opts.messageParser, opts.skipRecordMetadata, opts.maxBytes)
         case UnifiedLog.IndexFileSuffix =>
           dumpIndex(file, opts.indexSanityOnly, opts.verifyOnly, misMatchesForIndexFilesMap, opts.maxMessageSize)
         case UnifiedLog.TimeIndexFileSuffix =>
@@ -246,10 +247,16 @@ object DumpLogSegments {
                       nonConsecutivePairsForLogFilesMap: mutable.Map[String, List[(Long, Long)]],
                       isDeepIteration: Boolean,
                       parser: MessageParser[_, _],
-                      skipRecordMetadata: Boolean): Unit = {
-    val startOffset = file.getName.split("\\.")(0).toLong
-    println("Starting offset: " + startOffset)
-    val fileRecords = FileRecords.open(file, false)
+                      skipRecordMetadata: Boolean,
+                      maxBytes: Int): Unit = {
+    if (file.getName.endsWith(UnifiedLog.LogFileSuffix)) {
+      val startOffset = file.getName.split("\\.")(0).toLong
+      println(s"Log starting offset: $startOffset")
+    } else if (file.getName.endsWith(Snapshots.SUFFIX)) {
+      val path = Snapshots.parse(file.toPath).get()
+      println(s"Snapshot end offset: ${path.snapshotId.offset}, epoch: ${path.snapshotId.epoch}")
+    }
+    val fileRecords = FileRecords.open(file, false).slice(0, maxBytes)
     try {
       var validBytes = 0L
       var lastOffset = -1L
@@ -287,6 +294,12 @@ object DumpLogSegments {
                   case ControlRecordType.ABORT | ControlRecordType.COMMIT =>
                     val endTxnMarker = EndTransactionMarker.deserialize(record)
                     print(s" endTxnMarker: ${endTxnMarker.controlType} coordinatorEpoch: ${endTxnMarker.coordinatorEpoch}")
+                  case ControlRecordType.SNAPSHOT_HEADER =>
+                    val header = ControlRecordUtils.deserializedSnapshotHeaderRecord(record)
+                    print(s" SnapshotHeader ${SnapshotHeaderRecordJsonConverter.write(header, header.version())}")
+                  case ControlRecordType.SNAPSHOT_FOOTER =>
+                    val footer = ControlRecordUtils.deserializedSnapshotFooterRecord(record)
+                    print(s" SnapshotFooter ${SnapshotFooterRecordJsonConverter.write(footer, footer.version())}")
                   case controlType =>
                     print(s" controlType: $controlType($controlTypeId)")
                 }
@@ -306,7 +319,7 @@ object DumpLogSegments {
         validBytes += batch.sizeInBytes
       }
       val trailingBytes = fileRecords.sizeInBytes - validBytes
-      if (trailingBytes > 0)
+      if ( (trailingBytes > 0) && (maxBytes == Integer.MAX_VALUE) )
         println(s"Found $trailingBytes invalid bytes at the end of ${file.getName}")
     } finally fileRecords.closeHandlers()
   }
@@ -430,6 +443,11 @@ object DumpLogSegments {
       .describedAs("size")
       .ofType(classOf[java.lang.Integer])
       .defaultsTo(5 * 1024 * 1024)
+    val maxBytesOpt = parser.accepts("max-bytes", "Limit the amount of total batches read in bytes avoiding reading the whole .log file(s).")
+       .withRequiredArg
+       .describedAs("size")
+       .ofType(classOf[java.lang.Integer])
+       .defaultsTo(Integer.MAX_VALUE)
     val deepIterationOpt = parser.accepts("deep-iteration", "if set, uses deep instead of shallow iteration. Automatically set if print-data-log is enabled.")
     val valueDecoderOpt = parser.accepts("value-decoder-class", "if set, used to deserialize the messages. This class should implement kafka.serializer.Decoder trait. Custom jar should be available in kafka/libs directory.")
       .withOptionalArg()
@@ -473,6 +491,7 @@ object DumpLogSegments {
     lazy val indexSanityOnly: Boolean = options.has(indexSanityOpt)
     lazy val files = options.valueOf(filesOpt).split(",")
     lazy val maxMessageSize = options.valueOf(maxMessageSizeOpt).intValue()
+    lazy val maxBytes = options.valueOf(maxBytesOpt).intValue()
 
     def checkArgs(): Unit = CommandLineUtils.checkRequiredArgs(parser, options, filesOpt)
 
diff --git a/core/src/main/scala/kafka/tools/GetOffsetShell.scala b/core/src/main/scala/kafka/tools/GetOffsetShell.scala
index dfd5a227689ec..03f9c819260b2 100644
--- a/core/src/main/scala/kafka/tools/GetOffsetShell.scala
+++ b/core/src/main/scala/kafka/tools/GetOffsetShell.scala
@@ -18,18 +18,18 @@
  */
 package kafka.tools
 
-import java.util.Properties
 import joptsimple._
 import kafka.utils.{CommandLineUtils, Exit, IncludeList, ToolsUtils}
-import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
-import org.apache.kafka.common.requests.ListOffsetsRequest
-import org.apache.kafka.common.{PartitionInfo, TopicPartition}
-import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, ListTopicsOptions, OffsetSpec}
+import org.apache.kafka.common.{KafkaException, TopicPartition}
+import org.apache.kafka.common.requests.{ListOffsetsRequest, ListOffsetsResponse}
 import org.apache.kafka.common.utils.Utils
 
+import java.util.Properties
+import java.util.concurrent.ExecutionException
 import java.util.regex.Pattern
-import scala.jdk.CollectionConverters._
 import scala.collection.Seq
+import scala.jdk.CollectionConverters._
 import scala.math.Ordering.Implicits.infixOrderingOps
 
 object GetOffsetShell {
@@ -45,7 +45,7 @@ object GetOffsetShell {
     }
   }
 
-  private def fetchOffsets(args: Array[String]): Unit = {
+  private[tools] def fetchOffsets(args: Array[String]): Unit = {
     val parser = new OptionParser(false)
     val brokerListOpt = parser.accepts("broker-list", "DEPRECATED, use --bootstrap-server instead; ignored if --bootstrap-server is specified. The server(s) to connect to in the form HOST1:PORT1,HOST2:PORT2.")
                            .withRequiredArg
@@ -72,10 +72,10 @@ object GetOffsetShell {
                            .ofType(classOf[String])
     val timeOpt = parser.accepts("time", "timestamp of the offsets before that. [Note: No offset is returned, if the timestamp greater than recently committed record timestamp is given.]")
                            .withRequiredArg
-                           .describedAs("timestamp/-1(latest)/-2(earliest)")
-                           .ofType(classOf[java.lang.Long])
-                           .defaultsTo(-1L)
-    val commandConfigOpt = parser.accepts("command-config", s"Property file containing configs to be passed to Consumer Client.")
+                           .describedAs("<timestamp> / -1 or latest / -2 or earliest / -3 or max-timestamp")
+                           .ofType(classOf[String])
+                           .defaultsTo("latest")
+    val commandConfigOpt = parser.accepts("command-config", s"Property file containing configs to be passed to Admin Client.")
                            .withRequiredArg
                            .describedAs("config file")
                            .ofType(classOf[String])
@@ -103,17 +103,14 @@ object GetOffsetShell {
       throw new IllegalArgumentException("--topic-partitions cannot be used with --topic or --partitions")
     }
 
-    val listOffsetsTimestamp = options.valueOf(timeOpt).longValue
+    val offsetSpec = parseOffsetSpec(options.valueOf(timeOpt))
 
     val topicPartitionFilter = if (options.has(topicPartitionsOpt)) {
-      createTopicPartitionFilterWithPatternList(options.valueOf(topicPartitionsOpt), excludeInternalTopics)
+      createTopicPartitionFilterWithPatternList(options.valueOf(topicPartitionsOpt))
     } else {
-      val partitionIdsRequested = createPartitionSet(options.valueOf(partitionsOpt))
-
       createTopicPartitionFilterWithTopicAndPartitionPattern(
         if (options.has(topicOpt)) Some(options.valueOf(topicOpt)) else None,
-        excludeInternalTopics,
-        partitionIdsRequested
+        options.valueOf(partitionsOpt)
       )
     }
 
@@ -121,41 +118,65 @@ object GetOffsetShell {
       Utils.loadProps(options.valueOf(commandConfigOpt))
     else
       new Properties
-    config.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
-    config.setProperty(ConsumerConfig.CLIENT_ID_CONFIG, clientId)
-    val consumer = new KafkaConsumer(config, new ByteArrayDeserializer, new ByteArrayDeserializer)
+    config.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList)
+    config.setProperty(AdminClientConfig.CLIENT_ID_CONFIG, clientId)
+    val adminClient = Admin.create(config)
 
     try {
-      val partitionInfos = listPartitionInfos(consumer, topicPartitionFilter)
+      val partitionInfos = listPartitionInfos(adminClient, topicPartitionFilter, excludeInternalTopics)
 
       if (partitionInfos.isEmpty) {
         throw new IllegalArgumentException("Could not match any topic-partitions with the specified filters")
       }
 
-      val topicPartitions = partitionInfos.flatMap { p =>
-        if (p.leader == null) {
-          System.err.println(s"Error: topic-partition ${p.topic}:${p.partition} does not have a leader. Skip getting offsets")
-          None
-        } else
-          Some(new TopicPartition(p.topic, p.partition))
-      }
+      val timestampsToSearch = partitionInfos.map(tp => tp -> offsetSpec).toMap.asJava
 
-      /* Note that the value of the map can be null */
-      val partitionOffsets: collection.Map[TopicPartition, java.lang.Long] = listOffsetsTimestamp match {
-        case ListOffsetsRequest.EARLIEST_TIMESTAMP => consumer.beginningOffsets(topicPartitions.asJava).asScala
-        case ListOffsetsRequest.LATEST_TIMESTAMP => consumer.endOffsets(topicPartitions.asJava).asScala
-        case _ =>
-          val timestampsToSearch = topicPartitions.map(tp => tp -> (listOffsetsTimestamp: java.lang.Long)).toMap.asJava
-          consumer.offsetsForTimes(timestampsToSearch).asScala.map { case (k, x) =>
-            if (x == null) (k, null) else (k, x.offset: java.lang.Long)
+      val listOffsetsResult = adminClient.listOffsets(timestampsToSearch)
+      val partitionOffsets = partitionInfos.flatMap { tp =>
+        try {
+          val partitionInfo = listOffsetsResult.partitionResult(tp).get
+          if (partitionInfo.offset != ListOffsetsResponse.UNKNOWN_OFFSET) {
+            Some((tp, partitionInfo.offset))
+          } else {
+            None
           }
+        } catch {
+          case e: ExecutionException =>
+            e.getCause match {
+              case cause: KafkaException =>
+                System.err.println(s"Skip getting offsets for topic-partition ${tp.topic}:${tp.partition} due to error: ${cause.getMessage}")
+              case _ =>
+                throw e
+            }
+            None
+        }
       }
 
-      partitionOffsets.toSeq.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
+      partitionOffsets.sortWith((tp1, tp2) => compareTopicPartitions(tp1._1, tp2._1)).foreach {
         case (tp, offset) => println(s"${tp.topic}:${tp.partition}:${Option(offset).getOrElse("")}")
       }
     } finally {
-      consumer.close()
+      adminClient.close()
+    }
+  }
+
+  private def parseOffsetSpec(listOffsetsTimestamp: String): OffsetSpec = {
+    listOffsetsTimestamp match {
+      case "earliest" => OffsetSpec.earliest()
+      case "latest" => OffsetSpec.latest()
+      case "max-timestamp" => OffsetSpec.maxTimestamp()
+      case _ =>
+        try {
+          listOffsetsTimestamp.toLong match {
+            case ListOffsetsRequest.EARLIEST_TIMESTAMP => OffsetSpec.earliest()
+            case ListOffsetsRequest.LATEST_TIMESTAMP => OffsetSpec.latest()
+            case ListOffsetsRequest.MAX_TIMESTAMP => OffsetSpec.maxTimestamp()
+            case value => OffsetSpec.forTimestamp(value)
+          }
+        } catch {
+          case e: NumberFormatException =>
+            throw new IllegalArgumentException(s"Malformed time argument $listOffsetsTimestamp, please use -1 or latest / -2 or earliest / -3 or max-timestamp, or a specified long format timestamp", e)
+        }
     }
   }
 
@@ -171,13 +192,15 @@ object GetOffsetShell {
    * TopicPattern: REGEX
    * PartitionPattern: NUMBER | NUMBER-(NUMBER)? | -NUMBER
    */
-  def createTopicPartitionFilterWithPatternList(topicPartitions: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+  def createTopicPartitionFilterWithPatternList(
+    topicPartitions: String
+  ): TopicPartitionFilter = {
     val ruleSpecs = topicPartitions.split(",")
-    val rules = ruleSpecs.map(ruleSpec => parseRuleSpec(ruleSpec, excludeInternalTopics))
-    tp => rules.exists { rule => rule.apply(tp) }
+    val rules = ruleSpecs.map(ruleSpec => parseRuleSpec(ruleSpec))
+    CompositeTopicPartitionFilter(rules)
   }
 
-  def parseRuleSpec(ruleSpec: String, excludeInternalTopics: Boolean): PartitionInfo => Boolean = {
+  def parseRuleSpec(ruleSpec: String): TopicPartitionFilter = {
     val matcher = TopicPartitionPattern.matcher(ruleSpec)
     if (!matcher.matches())
       throw new IllegalArgumentException(s"Invalid rule specification: $ruleSpec")
@@ -189,22 +212,29 @@ object GetOffsetShell {
     val topicFilter = IncludeList(group(1).getOrElse(".*"))
     val partitionFilter = group(2).map(_.toInt) match {
       case Some(partition) =>
-        (p: Int) => p == partition
+        UniquePartitionFilter(partition)
       case None =>
         val lowerRange = group(3).map(_.toInt).getOrElse(0)
         val upperRange = group(4).map(_.toInt).getOrElse(Int.MaxValue)
-        (p: Int) => p >= lowerRange && p < upperRange
+        PartitionRangeFilter(lowerRange, upperRange)
     }
-
-    tp => topicFilter.isTopicAllowed(tp.topic, excludeInternalTopics) && partitionFilter(tp.partition)
+    TopicFilterAndPartitionFilter(
+      topicFilter,
+      partitionFilter
+    )
   }
 
   /**
    * Creates a topic-partition filter based on a topic pattern and a set of partition ids.
    */
-  def createTopicPartitionFilterWithTopicAndPartitionPattern(topicOpt: Option[String], excludeInternalTopics: Boolean, partitionIds: Set[Int]): PartitionInfo => Boolean = {
-    val topicsFilter = IncludeList(topicOpt.getOrElse(".*"))
-    t => topicsFilter.isTopicAllowed(t.topic, excludeInternalTopics) && (partitionIds.isEmpty || partitionIds.contains(t.partition))
+  def createTopicPartitionFilterWithTopicAndPartitionPattern(
+    topicOpt: Option[String],
+    partitionIds: String
+  ): TopicFilterAndPartitionFilter = {
+    TopicFilterAndPartitionFilter(
+      IncludeList(topicOpt.getOrElse(".*")),
+      PartitionsSetFilter(createPartitionSet(partitionIds))
+    )
   }
 
   def createPartitionSet(partitionsString: String): Set[Int] = {
@@ -224,9 +254,82 @@ object GetOffsetShell {
   /**
    * Return the partition infos. Filter them with topicPartitionFilter.
    */
-  private def listPartitionInfos(consumer: KafkaConsumer[_, _], topicPartitionFilter: PartitionInfo => Boolean): Seq[PartitionInfo] = {
-    consumer.listTopics.asScala.values.flatMap { partitions =>
-      partitions.asScala.filter(topicPartitionFilter)
+  private def listPartitionInfos(
+    client: Admin,
+    topicPartitionFilter: TopicPartitionFilter,
+    excludeInternalTopics: Boolean
+  ): Seq[TopicPartition] = {
+    val listTopicsOptions = new ListTopicsOptions().listInternal(!excludeInternalTopics)
+    val topics = client.listTopics(listTopicsOptions).names.get
+    val filteredTopics = topics.asScala.filter(topicPartitionFilter.isTopicAllowed)
+
+    client.describeTopics(filteredTopics.asJava).allTopicNames.get.asScala.flatMap { case (topic, description) =>
+      description
+        .partitions
+        .asScala
+        .map(tp => new TopicPartition(topic, tp.partition))
+        .filter(topicPartitionFilter.isTopicPartitionAllowed)
     }.toBuffer
   }
 }
+
+trait PartitionFilter {
+
+  /**
+   * Used to filter partitions based on a certain criteria, for example, a set of partition ids.
+   */
+  def isPartitionAllowed(partition: Int): Boolean
+}
+
+case class PartitionsSetFilter(partitionIds: Set[Int]) extends PartitionFilter {
+  override def isPartitionAllowed(partition: Int): Boolean = partitionIds.isEmpty || partitionIds.contains(partition)
+}
+
+case class UniquePartitionFilter(partition: Int) extends PartitionFilter {
+  override def isPartitionAllowed(partition: Int): Boolean = partition == this.partition
+}
+
+case class PartitionRangeFilter(lowerRange: Int, upperRange: Int) extends PartitionFilter {
+  override def isPartitionAllowed(partition: Int): Boolean = partition >= lowerRange && partition < upperRange
+}
+
+trait TopicPartitionFilter {
+
+  /**
+   * Used to filter topics based on a certain criteria, for example, a set of topic names or a regular expression.
+   */
+  def isTopicAllowed(topic: String): Boolean
+
+  /**
+   * Used to filter topic-partitions based on a certain criteria, for example, a topic pattern and a set of partition ids.
+   */
+  def isTopicPartitionAllowed(partition: TopicPartition): Boolean
+}
+
+/**
+ * Creates a topic-partition filter based on a topic filter and a partition filter
+ */
+case class TopicFilterAndPartitionFilter(
+  topicFilter: IncludeList,
+  partitionFilter: PartitionFilter
+) extends TopicPartitionFilter {
+
+  override def isTopicPartitionAllowed(partition: TopicPartition): Boolean = {
+    isTopicAllowed(partition.topic) && partitionFilter.isPartitionAllowed(partition.partition)
+  }
+
+  override def isTopicAllowed(topic: String): Boolean = {
+    topicFilter.isTopicAllowed(topic, false)
+  }
+}
+
+case class CompositeTopicPartitionFilter(filters: Array[TopicPartitionFilter]) extends TopicPartitionFilter {
+
+  override def isTopicAllowed(topic: String): Boolean = {
+    filters.exists(_.isTopicAllowed(topic))
+  }
+
+  override def isTopicPartitionAllowed(tp: TopicPartition): Boolean = {
+    filters.exists(_.isTopicPartitionAllowed(tp))
+  }
+}
diff --git a/core/src/main/scala/kafka/tools/StorageTool.scala b/core/src/main/scala/kafka/tools/StorageTool.scala
index 28377d297cbc5..a96275cc27cd1 100644
--- a/core/src/main/scala/kafka/tools/StorageTool.scala
+++ b/core/src/main/scala/kafka/tools/StorageTool.scala
@@ -19,48 +19,25 @@ package kafka.tools
 
 import java.io.PrintStream
 import java.nio.file.{Files, Paths}
-
 import kafka.server.{BrokerMetadataCheckpoint, KafkaConfig, MetaProperties, RawMetaProperties}
 import kafka.utils.{Exit, Logging}
 import net.sourceforge.argparse4j.ArgumentParsers
 import net.sourceforge.argparse4j.impl.Arguments.{store, storeTrue}
+import net.sourceforge.argparse4j.inf.Namespace
 import org.apache.kafka.common.Uuid
 import org.apache.kafka.common.utils.Utils
+import org.apache.kafka.controller.BootstrapMetadata
+import org.apache.kafka.server.common.MetadataVersion
 
 import scala.collection.mutable
 
 object StorageTool extends Logging {
   def main(args: Array[String]): Unit = {
     try {
-      val parser = ArgumentParsers.
-        newArgumentParser("kafka-storage").
-        defaultHelp(true).
-        description("The Kafka storage tool.")
-      val subparsers = parser.addSubparsers().dest("command")
-
-      val infoParser = subparsers.addParser("info").
-        help("Get information about the Kafka log directories on this node.")
-      val formatParser = subparsers.addParser("format").
-        help("Format the Kafka log directories on this node.")
-      subparsers.addParser("random-uuid").help("Print a random UUID.")
-      List(infoParser, formatParser).foreach(parser => {
-        parser.addArgument("--config", "-c").
-          action(store()).
-          required(true).
-          help("The Kafka configuration file to use.")
-      })
-      formatParser.addArgument("--cluster-id", "-t").
-        action(store()).
-        required(true).
-        help("The cluster ID to use.")
-      formatParser.addArgument("--ignore-formatted", "-g").
-        action(storeTrue())
-
-      val namespace = parser.parseArgsOrFail(args)
+      val namespace = parseArguments(args)
       val command = namespace.getString("command")
       val config = Option(namespace.getString("config")).flatMap(
         p => Some(new KafkaConfig(Utils.loadProps(p))))
-
       command match {
         case "info" =>
           val directories = configToLogDirectories(config.get)
@@ -70,13 +47,17 @@ object StorageTool extends Logging {
         case "format" =>
           val directories = configToLogDirectories(config.get)
           val clusterId = namespace.getString("cluster_id")
+          val metadataVersion = getMetadataVersion(namespace)
+          if (!metadataVersion.isKRaftSupported) {
+            throw new TerseFailure(s"Must specify a valid KRaft metadata version of at least 3.0.")
+          }
           val metaProperties = buildMetadataProperties(clusterId, config.get)
           val ignoreFormatted = namespace.getBoolean("ignore_formatted")
           if (!configToSelfManagedMode(config.get)) {
             throw new TerseFailure("The kafka configuration file appears to be for " +
               "a legacy cluster. Formatting is only supported for clusters in KRaft mode.")
           }
-          Exit.exit(formatCommand(System.out, directories, metaProperties, ignoreFormatted ))
+          Exit.exit(formatCommand(System.out, directories, metaProperties, metadataVersion, ignoreFormatted))
 
         case "random-uuid" =>
           System.out.println(Uuid.randomUuid)
@@ -92,6 +73,37 @@ object StorageTool extends Logging {
     }
   }
 
+  def parseArguments(args: Array[String]): Namespace = {
+    val parser = ArgumentParsers.
+      newArgumentParser("kafka-storage").
+      defaultHelp(true).
+      description("The Kafka storage tool.")
+    val subparsers = parser.addSubparsers().dest("command")
+
+    val infoParser = subparsers.addParser("info").
+      help("Get information about the Kafka log directories on this node.")
+    val formatParser = subparsers.addParser("format").
+      help("Format the Kafka log directories on this node.")
+    subparsers.addParser("random-uuid").help("Print a random UUID.")
+    List(infoParser, formatParser).foreach(parser => {
+      parser.addArgument("--config", "-c").
+        action(store()).
+        required(true).
+        help("The Kafka configuration file to use.")
+    })
+    formatParser.addArgument("--cluster-id", "-t").
+      action(store()).
+      required(true).
+      help("The cluster ID to use.")
+    formatParser.addArgument("--ignore-formatted", "-g").
+      action(storeTrue())
+    formatParser.addArgument("--release-version", "-r").
+      action(store()).
+      help(s"A KRaft release version to use for the initial metadata version. The minimum is 3.0, the default is ${MetadataVersion.latest().version()}")
+
+    parser.parseArgsOrFail(args)
+  }
+
   def configToLogDirectories(config: KafkaConfig): Seq[String] = {
     val directories = new mutable.TreeSet[String]
     directories ++= config.logDirs
@@ -101,6 +113,12 @@ object StorageTool extends Logging {
 
   def configToSelfManagedMode(config: KafkaConfig): Boolean = config.processRoles.nonEmpty
 
+  def getMetadataVersion(namespace: Namespace): MetadataVersion = {
+    Option(namespace.getString("release_version"))
+      .map(ver => MetadataVersion.fromVersionString(ver))
+      .getOrElse(MetadataVersion.latest())
+  }
+
   def infoCommand(stream: PrintStream, selfManagedMode: Boolean, directories: Seq[String]): Int = {
     val problems = new mutable.ArrayBuffer[String]
     val foundDirectories = new mutable.ArrayBuffer[String]
@@ -197,13 +215,16 @@ object StorageTool extends Logging {
       case e: Throwable => throw new TerseFailure(s"Cluster ID string $clusterIdStr " +
         s"does not appear to be a valid UUID: ${e.getMessage}")
     }
-    require(config.nodeId >= 0, s"The node.id must be set to a non-negative integer.")
+    if (config.nodeId < 0) {
+      throw new TerseFailure(s"The node.id must be set to a non-negative integer. We saw ${config.nodeId}")
+    }
     new MetaProperties(effectiveClusterId.toString, config.nodeId)
   }
 
   def formatCommand(stream: PrintStream,
                     directories: Seq[String],
                     metaProperties: MetaProperties,
+                    metadataVersion: MetadataVersion,
                     ignoreFormatted: Boolean): Int = {
     if (directories.isEmpty) {
       throw new TerseFailure("No log directories found in the configuration.")
@@ -231,7 +252,11 @@ object StorageTool extends Logging {
       val metaPropertiesPath = Paths.get(directory, "meta.properties")
       val checkpoint = new BrokerMetadataCheckpoint(metaPropertiesPath.toFile)
       checkpoint.write(metaProperties.toProperties)
-      stream.println(s"Formatting ${directory}")
+
+      val bootstrapMetadata = BootstrapMetadata.create(metadataVersion)
+      BootstrapMetadata.write(bootstrapMetadata, Paths.get(directory))
+
+      stream.println(s"Formatting ${directory} with metadata.version ${metadataVersion}.")
     })
     0
   }
diff --git a/core/src/main/scala/kafka/tools/TestRaftServer.scala b/core/src/main/scala/kafka/tools/TestRaftServer.scala
index 0b27f7fcb5249..a72784c469ad6 100644
--- a/core/src/main/scala/kafka/tools/TestRaftServer.scala
+++ b/core/src/main/scala/kafka/tools/TestRaftServer.scala
@@ -19,6 +19,7 @@ package kafka.tools
 
 import java.util.concurrent.atomic.{AtomicInteger, AtomicLong}
 import java.util.concurrent.{CompletableFuture, CountDownLatch, LinkedBlockingDeque, TimeUnit}
+
 import joptsimple.OptionException
 import kafka.network.{DataPlaneAcceptor, SocketServer}
 import kafka.raft.{KafkaRaftManager, RaftManager}
@@ -74,7 +75,6 @@ class TestRaftServer(
 
     val apiVersionManager = new SimpleApiVersionManager(ListenerType.CONTROLLER)
     socketServer = new SocketServer(config, metrics, time, credentialProvider, apiVersionManager)
-    socketServer.startup(startProcessingRequests = false)
 
     val metaProperties = MetaProperties(
       clusterId = Uuid.ZERO_UUID.toString,
@@ -90,7 +90,7 @@ class TestRaftServer(
       time,
       metrics,
       Some(threadNamePrefix),
-      CompletableFuture.completedFuture(RaftConfig.parseVoterConnections(config.quorumVoters))
+      CompletableFuture.completedFuture(RaftConfig.parseVoterConnections(config.quorumVoters)),
     )
 
     workloadGenerator = new RaftWorkloadGenerator(
@@ -119,7 +119,7 @@ class TestRaftServer(
 
     workloadGenerator.start()
     raftManager.startup()
-    socketServer.startProcessingRequests(Map.empty)
+    socketServer.enableRequestProcessing(Map.empty)
   }
 
   def shutdown(): Unit = {
diff --git a/core/src/main/scala/kafka/utils/CommandLineUtils.scala b/core/src/main/scala/kafka/utils/CommandLineUtils.scala
index 80726ce06b599..1eaee48416864 100644
--- a/core/src/main/scala/kafka/utils/CommandLineUtils.scala
+++ b/core/src/main/scala/kafka/utils/CommandLineUtils.scala
@@ -52,7 +52,7 @@ object CommandLineUtils extends Logging {
     * @param commandOpts Acceptable options for a command
     * @param message     Message to display on successful check
     */
-  def printHelpAndExitIfNeeded(commandOpts: CommandDefaultOptions, message: String) = {
+  def printHelpAndExitIfNeeded(commandOpts: CommandDefaultOptions, message: String): Unit = {
     if (isPrintHelpNeeded(commandOpts))
       printUsageAndDie(commandOpts.parser, message)
     if (isPrintVersionNeeded(commandOpts))
@@ -117,7 +117,7 @@ object CommandLineUtils extends Logging {
 
     val props = new Properties
     for (a <- splits) {
-      if (a.length == 1 || (a.length == 2 && a(1).isEmpty())) {
+      if (a.length == 1 || (a.length == 2 && a(1).isEmpty)) {
         if (acceptMissingValue) props.put(a(0), "")
         else throw new IllegalArgumentException(s"Missing value for key ${a(0)}")
       }
diff --git a/core/src/main/scala/kafka/utils/Exit.scala b/core/src/main/scala/kafka/utils/Exit.scala
index ad17237571e56..eddd929af5547 100644
--- a/core/src/main/scala/kafka/utils/Exit.scala
+++ b/core/src/main/scala/kafka/utils/Exit.scala
@@ -45,7 +45,7 @@ object Exit {
     JExit.setHaltProcedure(functionToProcedure(haltProcedure))
 
   def setShutdownHookAdder(shutdownHookAdder: (String, => Unit) => Unit): Unit = {
-    JExit.setShutdownHookAdder((name, runnable) => shutdownHookAdder(name, runnable.run))
+    JExit.setShutdownHookAdder((name, runnable) => shutdownHookAdder(name, runnable.run()))
   }
 
   def resetExitProcedure(): Unit =
diff --git a/core/src/main/scala/kafka/utils/FileLock.scala b/core/src/main/scala/kafka/utils/FileLock.scala
index c635f76dff3a2..2de16386c954a 100644
--- a/core/src/main/scala/kafka/utils/FileLock.scala
+++ b/core/src/main/scala/kafka/utils/FileLock.scala
@@ -73,7 +73,7 @@ class FileLock(val file: File) extends Logging {
   /**
    * Destroy this lock, closing the associated FileChannel
    */
-  def destroy() = {
+  def destroy(): Unit = {
     this synchronized {
       unlock()
       channel.close()
diff --git a/core/src/main/scala/kafka/utils/KafkaScheduler.scala b/core/src/main/scala/kafka/utils/KafkaScheduler.scala
index bec511b3f7fed..354652ee6fbe7 100755
--- a/core/src/main/scala/kafka/utils/KafkaScheduler.scala
+++ b/core/src/main/scala/kafka/utils/KafkaScheduler.scala
@@ -135,7 +135,7 @@ class KafkaScheduler(val threads: Int,
    * Package private for testing.
    */
   private[kafka] def taskRunning(task: ScheduledFuture[_]): Boolean = {
-    executor.getQueue().contains(task)
+    executor.getQueue.contains(task)
   }
 
   def resizeThreadPool(newSize: Int): Unit = {
diff --git a/core/src/main/scala/kafka/utils/PasswordEncoder.scala b/core/src/main/scala/kafka/utils/PasswordEncoder.scala
index f748a455c62bf..3373223e36f1c 100644
--- a/core/src/main/scala/kafka/utils/PasswordEncoder.scala
+++ b/core/src/main/scala/kafka/utils/PasswordEncoder.scala
@@ -38,6 +38,33 @@ object PasswordEncoder {
   val IterationsProp = "iterations"
   val EncyrptedPasswordProp = "encryptedPassword"
   val PasswordLengthProp = "passwordLength"
+
+  def encrypting(secret: Password,
+                 keyFactoryAlgorithm: Option[String],
+                 cipherAlgorithm: String,
+                 keyLength: Int,
+                 iterations: Int): EncryptingPasswordEncoder = {
+    new EncryptingPasswordEncoder(secret, keyFactoryAlgorithm, cipherAlgorithm, keyLength, iterations)
+  }
+
+  def noop(): NoOpPasswordEncoder = {
+    new NoOpPasswordEncoder()
+  }
+}
+
+trait PasswordEncoder {
+  def encode(password: Password): String
+  def decode(encodedPassword: String): Password
+
+  private[utils] def base64Decode(encoded: String): Array[Byte] = Base64.getDecoder.decode(encoded)
+}
+
+/**
+ * A password encoder that does not modify the given password. This is used in KRaft mode only.
+ */
+class NoOpPasswordEncoder extends PasswordEncoder {
+  override def encode(password: Password): String = password.value()
+  override def decode(encodedPassword: String): Password = new Password(encodedPassword)
 }
 
 /**
@@ -55,16 +82,18 @@ object PasswordEncoder {
   * The values used for encoding are stored along with the encoded password and the stored values are used for decoding.
   *
   */
-class PasswordEncoder(secret: Password,
-                      keyFactoryAlgorithm: Option[String],
-                      cipherAlgorithm: String,
-                      keyLength: Int,
-                      iterations: Int) extends Logging {
+class EncryptingPasswordEncoder(
+  secret: Password,
+  keyFactoryAlgorithm: Option[String],
+  cipherAlgorithm: String,
+  keyLength: Int,
+  iterations: Int
+) extends PasswordEncoder with Logging {
 
   private val secureRandom = new SecureRandom
   private val cipherParamsEncoder = cipherParamsInstance(cipherAlgorithm)
 
-  def encode(password: Password): String = {
+  override def encode(password: Password): String = {
     val salt = new Array[Byte](256)
     secureRandom.nextBytes(salt)
     val cipher = Cipher.getInstance(cipherAlgorithm)
@@ -84,7 +113,7 @@ class PasswordEncoder(secret: Password,
     encryptedMap.map { case (k, v) => s"$k:$v" }.mkString(",")
   }
 
-  def decode(encodedPassword: String): Password = {
+  override def decode(encodedPassword: String): Password = {
     val params = CoreUtils.parseCsvMap(encodedPassword)
     val keyFactoryAlg = params(KeyFactoryAlgorithmProp)
     val cipherAlg = params(CipherAlgorithmProp)
@@ -131,8 +160,6 @@ class PasswordEncoder(secret: Password,
 
   private def base64Encode(bytes: Array[Byte]): String = Base64.getEncoder.encodeToString(bytes)
 
-  private[utils] def base64Decode(encoded: String): Array[Byte] = Base64.getDecoder.decode(encoded)
-
   private def cipherParamsInstance(cipherAlgorithm: String): CipherParamsEncoder = {
     val aesPattern = "AES/(.*)/.*".r
     cipherAlgorithm match {
diff --git a/core/src/main/scala/kafka/utils/ReplicationUtils.scala b/core/src/main/scala/kafka/utils/ReplicationUtils.scala
index e2733b8936fd9..781c5c90214ec 100644
--- a/core/src/main/scala/kafka/utils/ReplicationUtils.scala
+++ b/core/src/main/scala/kafka/utils/ReplicationUtils.scala
@@ -31,7 +31,7 @@ object ReplicationUtils extends Logging {
     val newLeaderData = TopicPartitionStateZNode.encode(LeaderIsrAndControllerEpoch(newLeaderAndIsr, controllerEpoch))
     // use the epoch of the controller that made the leadership decision, instead of the current controller epoch
     val updatePersistentPath: (Boolean, Int) = zkClient.conditionalUpdatePath(path, newLeaderData,
-      newLeaderAndIsr.zkVersion, Some(checkLeaderAndIsrZkData))
+      newLeaderAndIsr.partitionEpoch, Some(checkLeaderAndIsrZkData))
     updatePersistentPath
   }
 
diff --git a/core/src/main/scala/kafka/utils/Throttler.scala b/core/src/main/scala/kafka/utils/Throttler.scala
index cce6270cf02e8..a431db5f00692 100644
--- a/core/src/main/scala/kafka/utils/Throttler.scala
+++ b/core/src/main/scala/kafka/utils/Throttler.scala
@@ -36,7 +36,7 @@ import scala.math._
  * @param time: The time implementation to use
  */
 @threadsafe
-class Throttler(desiredRatePerSec: Double,
+class Throttler(@volatile var desiredRatePerSec: Double,
                 checkIntervalMs: Long = 100L,
                 throttleDown: Boolean = true,
                 metricName: String = "throttler",
@@ -52,6 +52,7 @@ class Throttler(desiredRatePerSec: Double,
   def maybeThrottle(observed: Double): Unit = {
     val msPerSec = TimeUnit.SECONDS.toMillis(1)
     val nsPerSec = TimeUnit.SECONDS.toNanos(1)
+    val currentDesiredRatePerSec = desiredRatePerSec;
 
     meter.mark(observed.toLong)
     lock synchronized {
@@ -62,14 +63,14 @@ class Throttler(desiredRatePerSec: Double,
       // we should take a little nap
       if (elapsedNs > checkIntervalNs && observedSoFar > 0) {
         val rateInSecs = (observedSoFar * nsPerSec) / elapsedNs
-        val needAdjustment = !(throttleDown ^ (rateInSecs > desiredRatePerSec))
+        val needAdjustment = !(throttleDown ^ (rateInSecs > currentDesiredRatePerSec))
         if (needAdjustment) {
           // solve for the amount of time to sleep to make us hit the desired rate
-          val desiredRateMs = desiredRatePerSec / msPerSec.toDouble
+          val desiredRateMs = currentDesiredRatePerSec / msPerSec.toDouble
           val elapsedMs = TimeUnit.NANOSECONDS.toMillis(elapsedNs)
           val sleepTime = round(observedSoFar / desiredRateMs - elapsedMs)
           if (sleepTime > 0) {
-            trace("Natural rate is %f per second but desired rate is %f, sleeping for %d ms to compensate.".format(rateInSecs, desiredRatePerSec, sleepTime))
+            trace("Natural rate is %f per second but desired rate is %f, sleeping for %d ms to compensate.".format(rateInSecs, currentDesiredRatePerSec, sleepTime))
             time.sleep(sleepTime)
           }
         }
@@ -79,6 +80,9 @@ class Throttler(desiredRatePerSec: Double,
     }
   }
 
+  def updateDesiredRatePerSec(updatedDesiredRatePerSec: Double): Unit = {
+    desiredRatePerSec = updatedDesiredRatePerSec;
+  }
 }
 
 object Throttler {
diff --git a/core/src/main/scala/kafka/utils/ToolsUtils.scala b/core/src/main/scala/kafka/utils/ToolsUtils.scala
index 0f3de767fd806..056545cb03115 100644
--- a/core/src/main/scala/kafka/utils/ToolsUtils.scala
+++ b/core/src/main/scala/kafka/utils/ToolsUtils.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 
 object ToolsUtils {
 
-  def validatePortOrDie(parser: OptionParser, hostPort: String) = {
+  def validatePortOrDie(parser: OptionParser, hostPort: String): Unit = {
     val hostPorts: Array[String] = if(hostPort.contains(','))
       hostPort.split(",")
     else
diff --git a/core/src/main/scala/kafka/utils/VersionInfo.scala b/core/src/main/scala/kafka/utils/VersionInfo.scala
index 9d3130e6685d3..203488a64b5c2 100644
--- a/core/src/main/scala/kafka/utils/VersionInfo.scala
+++ b/core/src/main/scala/kafka/utils/VersionInfo.scala
@@ -35,6 +35,6 @@ object VersionInfo {
   }
 
   def getVersionString: String = {
-    s"${getVersion} (Commit:${getCommit})"
+    s"$getVersion (Commit:$getCommit)"
   }
 }
diff --git a/core/src/main/scala/kafka/zk/KafkaZkClient.scala b/core/src/main/scala/kafka/zk/KafkaZkClient.scala
index 6c32acf4d86aa..fa7ce00882aee 100644
--- a/core/src/main/scala/kafka/zk/KafkaZkClient.scala
+++ b/core/src/main/scala/kafka/zk/KafkaZkClient.scala
@@ -199,7 +199,7 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
     val setDataRequests = leaderIsrAndControllerEpochs.map { case (partition, leaderIsrAndControllerEpoch) =>
       val path = TopicPartitionStateZNode.path(partition)
       val data = TopicPartitionStateZNode.encode(leaderIsrAndControllerEpoch)
-      SetDataRequest(path, data, leaderIsrAndControllerEpoch.leaderAndIsr.zkVersion, Some(partition))
+      SetDataRequest(path, data, leaderIsrAndControllerEpoch.leaderAndIsr.partitionEpoch, Some(partition))
     }
     retryRequestsUntilConnected(setDataRequests.toSeq, expectedControllerEpochZkVersion)
   }
@@ -271,7 +271,7 @@ class KafkaZkClient private[zk] (zooKeeperClient: ZooKeeperClient, isSecure: Boo
       val partition = setDataResponse.ctx.get.asInstanceOf[TopicPartition]
       setDataResponse.resultCode match {
         case Code.OK =>
-          val updatedLeaderAndIsr = leaderAndIsrs(partition).withZkVersion(setDataResponse.stat.getVersion)
+          val updatedLeaderAndIsr = leaderAndIsrs(partition).withPartitionEpoch(setDataResponse.stat.getVersion)
           Some(partition -> Right(updatedLeaderAndIsr))
         case Code.BADVERSION =>
           // Update the buffer for partitions to retry
diff --git a/core/src/main/scala/kafka/zk/ZkData.scala b/core/src/main/scala/kafka/zk/ZkData.scala
index baed563094f05..7006a21f94bfb 100644
--- a/core/src/main/scala/kafka/zk/ZkData.scala
+++ b/core/src/main/scala/kafka/zk/ZkData.scala
@@ -19,9 +19,10 @@ package kafka.zk
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util
 import java.util.Properties
+
 import com.fasterxml.jackson.annotation.JsonProperty
 import com.fasterxml.jackson.core.JsonProcessingException
-import kafka.api.{ApiVersion, KAFKA_0_10_0_IV1, KAFKA_2_7_IV0, LeaderAndIsr}
+import kafka.api.LeaderAndIsr
 import kafka.cluster.{Broker, EndPoint}
 import kafka.common.{NotificationHandler, ZkNodeChangeNotificationListener}
 import kafka.controller.{IsrChangeNotificationHandler, LeaderIsrAndControllerEpoch, ReplicaAssignment}
@@ -30,23 +31,25 @@ import kafka.security.authorizer.AclEntry
 import kafka.server.{ConfigType, DelegationTokenManager}
 import kafka.utils.Json
 import kafka.utils.json.JsonObject
-import org.apache.kafka.common.{KafkaException, TopicPartition, Uuid}
 import org.apache.kafka.common.errors.UnsupportedVersionException
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
 import org.apache.kafka.common.feature.Features._
+import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourceType}
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.security.token.delegation.{DelegationToken, TokenInformation}
 import org.apache.kafka.common.utils.{SecurityUtils, Time}
-import org.apache.kafka.server.common.ProducerIdsBlock
+import org.apache.kafka.common.{KafkaException, TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.{MetadataVersion, ProducerIdsBlock}
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_10_0_IV1, IBP_2_7_IV0}
 import org.apache.zookeeper.ZooDefs
 import org.apache.zookeeper.data.{ACL, Stat}
 
 import scala.beans.BeanProperty
-import scala.jdk.CollectionConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, immutable, mutable}
+import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Success, Try}
 
 // This file contains objects for encoding/decoding data stored in ZooKeeper nodes (znodes).
@@ -83,9 +86,9 @@ object BrokerIdsZNode {
 object BrokerInfo {
 
   /**
-   * - Create a broker info with v5 json format if the apiVersion is 2.7.x or above.
+   * - Create a broker info with v5 json format if the metadataVersion is 2.7.x or above.
    * - Create a broker info with v4 json format (which includes multiple endpoints and rack) if
-   *   the apiVersion is 0.10.0.X or above but lesser than 2.7.x.
+   *   the metadataVersion is 0.10.0.X or above but lesser than 2.7.x.
    * - Register the broker with v2 json format otherwise.
    *
    * Due to KAFKA-3100, 0.9.0.0 broker and old clients will break if JSON version is above 2.
@@ -94,11 +97,11 @@ object BrokerInfo {
    * without having to upgrade to 0.9.0.1 first (clients have to be upgraded to 0.9.0.1 in
    * any case).
    */
-  def apply(broker: Broker, apiVersion: ApiVersion, jmxPort: Int): BrokerInfo = {
+  def apply(broker: Broker, metadataVersion: MetadataVersion, jmxPort: Int): BrokerInfo = {
     val version = {
-      if (apiVersion >= KAFKA_2_7_IV0)
+      if (metadataVersion.isAtLeast(IBP_2_7_IV0))
         5
-      else if (apiVersion >= KAFKA_0_10_0_IV1)
+      else if (metadataVersion.isAtLeast(IBP_0_10_0_IV1))
         4
       else
         2
@@ -349,21 +352,39 @@ object TopicPartitionZNode {
 
 object TopicPartitionStateZNode {
   def path(partition: TopicPartition) = s"${TopicPartitionZNode.path(partition)}/state"
+
   def encode(leaderIsrAndControllerEpoch: LeaderIsrAndControllerEpoch): Array[Byte] = {
     val leaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
     val controllerEpoch = leaderIsrAndControllerEpoch.controllerEpoch
-    Json.encodeAsBytes(Map("version" -> 1, "leader" -> leaderAndIsr.leader, "leader_epoch" -> leaderAndIsr.leaderEpoch,
-      "controller_epoch" -> controllerEpoch, "isr" -> leaderAndIsr.isr.asJava).asJava)
+    var partitionState = Map(
+      "version" -> 1,
+      "leader" -> leaderAndIsr.leader,
+      "leader_epoch" -> leaderAndIsr.leaderEpoch,
+      "controller_epoch" -> controllerEpoch,
+      "isr" -> leaderAndIsr.isr.asJava
+    )
+
+    if (leaderAndIsr.leaderRecoveryState != LeaderRecoveryState.RECOVERED) {
+      partitionState = partitionState ++ Seq("leader_recovery_state" -> leaderAndIsr.leaderRecoveryState.value.toInt)
+    }
+
+    Json.encodeAsBytes(partitionState.asJava)
   }
+
   def decode(bytes: Array[Byte], stat: Stat): Option[LeaderIsrAndControllerEpoch] = {
     Json.parseBytes(bytes).map { js =>
       val leaderIsrAndEpochInfo = js.asJsonObject
       val leader = leaderIsrAndEpochInfo("leader").to[Int]
       val epoch = leaderIsrAndEpochInfo("leader_epoch").to[Int]
       val isr = leaderIsrAndEpochInfo("isr").to[List[Int]]
+      val recovery = leaderIsrAndEpochInfo
+        .get("leader_recovery_state")
+        .map(jsonValue => LeaderRecoveryState.of(jsonValue.to[Int].toByte))
+        .getOrElse(LeaderRecoveryState.RECOVERED)
       val controllerEpoch = leaderIsrAndEpochInfo("controller_epoch").to[Int]
+
       val zkPathVersion = stat.getVersion
-      LeaderIsrAndControllerEpoch(LeaderAndIsr(leader, epoch, isr, zkPathVersion), controllerEpoch)
+      LeaderIsrAndControllerEpoch(LeaderAndIsr(leader, epoch, isr, recovery, zkPathVersion), controllerEpoch)
     }
   }
 }
@@ -827,12 +848,12 @@ object DelegationTokenInfoZNode {
  * Enabled  -> This status means the feature versioning system (KIP-584) is enabled, and, the
  *             finalized features stored in the FeatureZNode are active. This status is written by
  *             the controller to the FeatureZNode only when the broker IBP config is greater than
- *             or equal to KAFKA_2_7_IV0.
+ *             or equal to IBP_2_7_IV0.
  *
  * Disabled -> This status means the feature versioning system (KIP-584) is disabled, and, the
  *             the finalized features stored in the FeatureZNode is not relevant. This status is
  *             written by the controller to the FeatureZNode only when the broker IBP config
- *             is less than KAFKA_2_7_IV0.
+ *             is less than IBP_2_7_IV0.
  */
 sealed trait FeatureZNodeStatus {
   def id: Int
@@ -859,20 +880,37 @@ object FeatureZNodeStatus {
 /**
  * Represents the contents of the ZK node containing finalized feature information.
  *
+ * @param version    the version of ZK node, we removed min_version_level in version 2
  * @param status     the status of the ZK node
  * @param features   the cluster-wide finalized features
  */
-case class FeatureZNode(status: FeatureZNodeStatus, features: Features[FinalizedVersionRange]) {
+case class FeatureZNode(version: Int, status: FeatureZNodeStatus, features: Map[String, Short]) {
 }
 
 object FeatureZNode {
   private val VersionKey = "version"
   private val StatusKey = "status"
   private val FeaturesKey = "features"
+  private val V1MinVersionKey = "min_version_level"
+  private val V1MaxVersionKey = "max_version_level"
 
   // V1 contains 'version', 'status' and 'features' keys.
   val V1 = 1
-  val CurrentVersion = V1
+  // V2 removes min_version_level
+  val V2 = 2
+
+  /**
+   * - Create a feature info with v1 json format if if the metadataVersion is before 3.2.0
+   * - Create a feature znode with v2 json format if the metadataVersion is 3.2.1 or above.
+   */
+  def apply(metadataVersion: MetadataVersion, status: FeatureZNodeStatus, features: Map[String, Short]): FeatureZNode = {
+    val version = if (metadataVersion.isAtLeast(MetadataVersion.IBP_3_3_IV0)) {
+      V2
+    } else {
+      V1
+    }
+    FeatureZNode(version, status, features)
+  }
 
   def path = "/feature"
 
@@ -893,10 +931,19 @@ object FeatureZNode {
    * @return               JSON representation of the FeatureZNode, as an Array[Byte]
    */
   def encode(featureZNode: FeatureZNode): Array[Byte] = {
+    val features = if (featureZNode.version == V1) {
+      asJavaMap(featureZNode.features.map{
+        case (feature, version) => feature -> Map(V1MaxVersionKey -> version, V1MinVersionKey -> version)
+      })
+    } else {
+      asJavaMap(featureZNode.features.map{
+        case (feature, version) => feature -> Map(V1MaxVersionKey -> version)
+      })
+    }
     val jsonMap = collection.mutable.Map(
-      VersionKey -> CurrentVersion,
+      VersionKey -> featureZNode.version,
       StatusKey -> featureZNode.status.id,
-      FeaturesKey -> featureZNode.features.toMap)
+      FeaturesKey -> features)
     Json.encodeAsBytes(jsonMap.asJava)
   }
 
@@ -914,27 +961,11 @@ object FeatureZNode {
       case Right(js) =>
         val featureInfo = js.asJsonObject
         val version = featureInfo(VersionKey).to[Int]
-        if (version < V1) {
+        if (version < V1 || version > V2) {
           throw new IllegalArgumentException(s"Unsupported version: $version of feature information: " +
             s"${new String(jsonBytes, UTF_8)}")
         }
 
-        val featuresMap = featureInfo
-          .get(FeaturesKey)
-          .flatMap(_.to[Option[Map[String, Map[String, Int]]]])
-
-        if (featuresMap.isEmpty) {
-          throw new IllegalArgumentException("Features map can not be absent in: " +
-            s"${new String(jsonBytes, UTF_8)}")
-        }
-        val features = asJavaMap(
-          featuresMap
-            .map(theMap => theMap.map {
-              case (featureName, versionInfo) => featureName -> versionInfo.map {
-                case (label, version) => label -> version.asInstanceOf[Short]
-              }
-            }).getOrElse(Map[String, Map[String, Short]]()))
-
         val statusInt = featureInfo
           .get(StatusKey)
           .flatMap(_.to[Option[Int]])
@@ -948,19 +979,44 @@ object FeatureZNode {
             s"Malformed status: $statusInt found in feature information: ${new String(jsonBytes, UTF_8)}")
         }
 
-        var finalizedFeatures: Features[FinalizedVersionRange] = null
-        try {
-          finalizedFeatures = fromFinalizedFeaturesMap(features)
-        } catch {
-          case e: Exception => throw new IllegalArgumentException(
-            "Unable to convert to finalized features from map: " + features, e)
-        }
-        FeatureZNode(status.get, finalizedFeatures)
+        val finalizedFeatures = decodeFeature(version, featureInfo, jsonBytes)
+        FeatureZNode(version, status.get, finalizedFeatures)
       case Left(e) =>
         throw new IllegalArgumentException(s"Failed to parse feature information: " +
           s"${new String(jsonBytes, UTF_8)}", e)
     }
   }
+
+  private def decodeFeature(version: Int, featureInfo: JsonObject, jsonBytes: Array[Byte]): Map[String, Short] = {
+    val featuresMap = featureInfo
+      .get(FeaturesKey)
+      .flatMap(_.to[Option[Map[String, Map[String, Int]]]])
+
+    if (featuresMap.isEmpty) {
+      throw new IllegalArgumentException("Features map can not be absent in: " +
+        s"${new String(jsonBytes, UTF_8)}")
+    }
+    featuresMap.get.map {
+      case (featureName, versionInfo) =>
+        if (version == V1 && !versionInfo.contains(V1MinVersionKey)) {
+          throw new IllegalArgumentException(s"$V1MinVersionKey absent in [$versionInfo]")
+        }
+        if (!versionInfo.contains(V1MaxVersionKey)) {
+          throw new IllegalArgumentException(s"$V1MaxVersionKey absent in [$versionInfo]")
+        }
+
+        val minValueOpt = versionInfo.get(V1MinVersionKey)
+        val maxValue = versionInfo(V1MaxVersionKey)
+
+        if (version == V1 && (minValueOpt.get < 1 || maxValue < minValueOpt.get)) {
+          throw new IllegalArgumentException(s"Expected minValue >= 1, maxValue >= 1 and maxValue >= minValue, but received minValue: ${minValueOpt.get}, maxValue: $maxValue")
+        }
+        if (maxValue < 1) {
+          throw new IllegalArgumentException(s"Expected maxValue >= 1, but received maxValue: $maxValue")
+        }
+        featureName -> maxValue.toShort
+    }
+  }
 }
 
 object ZkData {
diff --git a/core/src/test/java/kafka/test/ClusterConfig.java b/core/src/test/java/kafka/test/ClusterConfig.java
index 20b74cf43244b..8e9f7de96abb1 100644
--- a/core/src/test/java/kafka/test/ClusterConfig.java
+++ b/core/src/test/java/kafka/test/ClusterConfig.java
@@ -19,6 +19,7 @@
 
 import kafka.test.annotation.Type;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.io.File;
 import java.util.HashMap;
@@ -41,7 +42,7 @@ public class ClusterConfig {
     private final SecurityProtocol securityProtocol;
     private final String listenerName;
     private final File trustStoreFile;
-    private final String ibp;
+    private final MetadataVersion metadataVersion;
 
     private final Properties serverProperties = new Properties();
     private final Properties producerProperties = new Properties();
@@ -53,7 +54,7 @@ public class ClusterConfig {
 
     ClusterConfig(Type type, int brokers, int controllers, String name, boolean autoStart,
                   SecurityProtocol securityProtocol, String listenerName, File trustStoreFile,
-                  String ibp) {
+                  MetadataVersion metadataVersion) {
         this.type = type;
         this.brokers = brokers;
         this.controllers = controllers;
@@ -62,7 +63,7 @@ public class ClusterConfig {
         this.securityProtocol = securityProtocol;
         this.listenerName = listenerName;
         this.trustStoreFile = trustStoreFile;
-        this.ibp = ibp;
+        this.metadataVersion = metadataVersion;
     }
 
     public Type clusterType() {
@@ -121,8 +122,8 @@ public Optional<File> trustStoreFile() {
         return Optional.ofNullable(trustStoreFile);
     }
 
-    public Optional<String> ibp() {
-        return Optional.ofNullable(ibp);
+    public MetadataVersion metadataVersion() {
+        return metadataVersion;
     }
 
     public Properties brokerServerProperties(int brokerId) {
@@ -130,16 +131,16 @@ public Properties brokerServerProperties(int brokerId) {
     }
 
     public Map<String, String> nameTags() {
-        Map<String, String> tags = new LinkedHashMap<>(3);
+        Map<String, String> tags = new LinkedHashMap<>(4);
         name().ifPresent(name -> tags.put("Name", name));
-        ibp().ifPresent(ibp -> tags.put("IBP", ibp));
+        tags.put("MetadataVersion", metadataVersion.toString());
         tags.put("Security", securityProtocol.name());
         listenerName().ifPresent(listener -> tags.put("Listener", listener));
         return tags;
     }
 
     public ClusterConfig copyOf() {
-        ClusterConfig copy = new ClusterConfig(type, brokers, controllers, name, autoStart, securityProtocol, listenerName, trustStoreFile, ibp);
+        ClusterConfig copy = new ClusterConfig(type, brokers, controllers, name, autoStart, securityProtocol, listenerName, trustStoreFile, metadataVersion);
         copy.serverProperties.putAll(serverProperties);
         copy.producerProperties.putAll(producerProperties);
         copy.consumerProperties.putAll(consumerProperties);
@@ -149,11 +150,12 @@ public ClusterConfig copyOf() {
     }
 
     public static Builder defaultClusterBuilder() {
-        return new Builder(Type.ZK, 1, 1, true, SecurityProtocol.PLAINTEXT);
+        return new Builder(Type.ZK, 1, 1, true, SecurityProtocol.PLAINTEXT, MetadataVersion.latest());
     }
 
-    public static Builder clusterBuilder(Type type, int brokers, int controllers, boolean autoStart, SecurityProtocol securityProtocol) {
-        return new Builder(type, brokers, controllers, autoStart, securityProtocol);
+    public static Builder clusterBuilder(Type type, int brokers, int controllers, boolean autoStart,
+                                         SecurityProtocol securityProtocol, MetadataVersion metadataVersion) {
+        return new Builder(type, brokers, controllers, autoStart, securityProtocol, metadataVersion);
     }
 
     public static class Builder {
@@ -165,14 +167,15 @@ public static class Builder {
         private SecurityProtocol securityProtocol;
         private String listenerName;
         private File trustStoreFile;
-        private String ibp;
+        private MetadataVersion metadataVersion;
 
-        Builder(Type type, int brokers, int controllers, boolean autoStart, SecurityProtocol securityProtocol) {
+        Builder(Type type, int brokers, int controllers, boolean autoStart, SecurityProtocol securityProtocol, MetadataVersion metadataVersion) {
             this.type = type;
             this.brokers = brokers;
             this.controllers = controllers;
             this.autoStart = autoStart;
             this.securityProtocol = securityProtocol;
+            this.metadataVersion = metadataVersion;
         }
 
         public Builder type(Type type) {
@@ -215,13 +218,13 @@ public Builder trustStoreFile(File trustStoreFile) {
             return this;
         }
 
-        public Builder ibp(String ibp) {
-            this.ibp = ibp;
+        public Builder metadataVersion(MetadataVersion metadataVersion) {
+            this.metadataVersion = metadataVersion;
             return this;
         }
 
         public ClusterConfig build() {
-            return new ClusterConfig(type, brokers, controllers, name, autoStart, securityProtocol, listenerName, trustStoreFile, ibp);
+            return new ClusterConfig(type, brokers, controllers, name, autoStart, securityProtocol, listenerName, trustStoreFile, metadataVersion);
         }
     }
 }
diff --git a/core/src/test/java/kafka/test/ClusterInstance.java b/core/src/test/java/kafka/test/ClusterInstance.java
index 099d93280d796..9058508fa94f7 100644
--- a/core/src/test/java/kafka/test/ClusterInstance.java
+++ b/core/src/test/java/kafka/test/ClusterInstance.java
@@ -18,13 +18,16 @@
 package kafka.test;
 
 import kafka.network.SocketServer;
+import kafka.server.BrokerFeatures;
 import kafka.test.annotation.ClusterTest;
 import org.apache.kafka.clients.admin.Admin;
 import org.apache.kafka.common.network.ListenerName;
 
 import java.util.Collection;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
+import java.util.Set;
 
 public interface ClusterInstance {
 
@@ -48,6 +51,18 @@ default boolean isKRaftTest() {
      */
     ClusterConfig config();
 
+    /**
+     * Return the set of all controller IDs configured for this test. For kraft, this
+     * will return only the nodes which have the "controller" role enabled in `process.roles`.
+     * For zookeeper, this will return all broker IDs since they are all eligible controllers.
+     */
+    Set<Integer> controllerIds();
+
+    /**
+     * Return the set of all broker IDs configured for this test.
+     */
+    Set<Integer> brokerIds();
+
     /**
      * The listener for this cluster as configured by {@link ClusterTest} or by {@link ClusterConfig}. If
      * unspecified by those sources, this will return the listener for the default security protocol PLAINTEXT
@@ -95,6 +110,11 @@ default Optional<ListenerName> controlPlaneListenerName() {
      */
     SocketServer anyControllerSocketServer();
 
+    /**
+     * Return a mapping of the underlying broker IDs to their supported features
+     */
+    Map<Integer, BrokerFeatures> brokerFeatures();
+
     /**
      * The underlying object which is responsible for setting up and tearing down the cluster.
      */
diff --git a/core/src/test/java/kafka/test/ClusterTestExtensionsTest.java b/core/src/test/java/kafka/test/ClusterTestExtensionsTest.java
index 767a279d7e10e..63ca13725316d 100644
--- a/core/src/test/java/kafka/test/ClusterTestExtensionsTest.java
+++ b/core/src/test/java/kafka/test/ClusterTestExtensionsTest.java
@@ -25,6 +25,7 @@
 import kafka.test.annotation.ClusterTests;
 import kafka.test.annotation.Type;
 import kafka.test.junit.ClusterTestExtensions;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
@@ -76,7 +77,7 @@ public void testClusterTemplate() {
         Assertions.assertEquals(clusterInstance.clusterType(), ClusterInstance.ClusterType.ZK,
             "generate1 provided a Zk cluster, so we should see that here");
         Assertions.assertEquals(clusterInstance.config().name().orElse(""), "Generated Test",
-            "generate 1 named this cluster config, so we should see that here");
+            "generate1 named this cluster config, so we should see that here");
         Assertions.assertEquals(clusterInstance.config().serverProperties().getProperty("before"), "each");
     }
 
@@ -89,6 +90,10 @@ public void testClusterTemplate() {
         @ClusterTest(name = "cluster-tests-2", clusterType = Type.KRAFT, serverProperties = {
             @ClusterConfigProperty(key = "foo", value = "baz"),
             @ClusterConfigProperty(key = "spam", value = "eggz")
+        }),
+        @ClusterTest(name = "cluster-tests-3", clusterType = Type.CO_KRAFT, serverProperties = {
+            @ClusterConfigProperty(key = "foo", value = "baz"),
+            @ClusterConfigProperty(key = "spam", value = "eggz")
         })
     })
     public void testClusterTests() {
@@ -109,4 +114,9 @@ public void testNoAutoStart() {
         clusterInstance.start();
         Assertions.assertNotNull(clusterInstance.anyBrokerSocketServer());
     }
+
+    @ClusterTest
+    public void testDefaults(ClusterConfig config) {
+        Assertions.assertEquals(MetadataVersion.IBP_3_3_IV3, config.metadataVersion());
+    }
 }
diff --git a/core/src/test/java/kafka/test/MockController.java b/core/src/test/java/kafka/test/MockController.java
index c163b2df49a5a..ff1154d2119cd 100644
--- a/core/src/test/java/kafka/test/MockController.java
+++ b/core/src/test/java/kafka/test/MockController.java
@@ -25,8 +25,8 @@
 import org.apache.kafka.common.errors.NotControllerException;
 import org.apache.kafka.common.message.AllocateProducerIdsRequestData;
 import org.apache.kafka.common.message.AllocateProducerIdsResponseData;
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.BrokerHeartbeatRequestData;
@@ -41,15 +41,18 @@
 import org.apache.kafka.common.message.ElectLeadersResponseData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData;
+import org.apache.kafka.common.message.UpdateFeaturesRequestData;
+import org.apache.kafka.common.message.UpdateFeaturesResponseData;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.quota.ClientQuotaAlteration;
 import org.apache.kafka.common.quota.ClientQuotaEntity;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.controller.Controller;
+import org.apache.kafka.controller.ControllerRequestContext;
 import org.apache.kafka.controller.ResultOrError;
 import org.apache.kafka.metadata.BrokerHeartbeatReply;
 import org.apache.kafka.metadata.BrokerRegistrationReply;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
 import org.apache.kafka.server.authorizer.AclCreateResult;
 import org.apache.kafka.server.authorizer.AclDeleteResult;
 
@@ -60,6 +63,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -75,12 +79,18 @@ public class MockController implements Controller {
     private final AtomicLong nextTopicId = new AtomicLong(1);
 
     @Override
-    public CompletableFuture<List<AclCreateResult>> createAcls(List<AclBinding> aclBindings) {
+    public CompletableFuture<List<AclCreateResult>> createAcls(
+        ControllerRequestContext context,
+        List<AclBinding> aclBindings
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    public CompletableFuture<List<AclDeleteResult>> deleteAcls(List<AclBindingFilter> aclBindingFilters) {
+    public CompletableFuture<List<AclDeleteResult>> deleteAcls(
+        ControllerRequestContext context,
+        List<AclBindingFilter> aclBindingFilters
+    ) {
         throw new UnsupportedOperationException();
     }
 
@@ -107,13 +117,19 @@ private MockController(Collection<MockTopic> initialTopics) {
     }
 
     @Override
-    public CompletableFuture<AlterIsrResponseData> alterIsr(AlterIsrRequestData request) {
+    public CompletableFuture<AlterPartitionResponseData> alterPartition(
+        ControllerRequestContext context,
+        AlterPartitionRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    synchronized public CompletableFuture<CreateTopicsResponseData>
-            createTopics(CreateTopicsRequestData request) {
+    synchronized public CompletableFuture<CreateTopicsResponseData> createTopics(
+        ControllerRequestContext context,
+        CreateTopicsRequestData request,
+        Set<String> describable
+    ) {
         CreateTopicsResponseData response = new CreateTopicsResponseData();
         for (CreatableTopic topic : request.topics()) {
             if (topicNameToId.containsKey(topic.name())) {
@@ -125,20 +141,40 @@ public CompletableFuture<AlterIsrResponseData> alterIsr(AlterIsrRequestData requ
                 Uuid topicUuid = new Uuid(0, topicId);
                 topicNameToId.put(topic.name(), topicUuid);
                 topics.put(topicUuid, new MockTopic(topic.name(), topicUuid));
-                response.topics().add(new CreatableTopicResult().
+                CreatableTopicResult creatableTopicResult = new CreatableTopicResult().
                     setName(topic.name()).
                     setErrorCode(Errors.NONE.code()).
-                    setTopicId(topicUuid));
-                // For a better mock, we might want to return configs, replication factor,
-                // etc.  Right now, the tests that use MockController don't need these
-                // things.
+                    setTopicId(topicUuid);
+                if (describable.contains(topic.name())) {
+                    // Note: we don't simulate topic configs here yet.
+                    // Just returning replication factor and numPartitions.
+                    if (topic.assignments() != null && !topic.assignments().isEmpty()) {
+                        creatableTopicResult.
+                            setTopicConfigErrorCode(Errors.NONE.code()).
+                            setReplicationFactor((short)
+                                topic.assignments().iterator().next().brokerIds().size()).
+                            setNumPartitions(topic.assignments().size());
+                    } else {
+                        creatableTopicResult.
+                            setTopicConfigErrorCode(Errors.NONE.code()).
+                            setReplicationFactor(topic.replicationFactor()).
+                            setNumPartitions(topic.numPartitions());
+                    }
+                } else {
+                    creatableTopicResult.
+                        setTopicConfigErrorCode(Errors.TOPIC_AUTHORIZATION_FAILED.code());
+                }
+                response.topics().add(creatableTopicResult);
             }
         }
         return CompletableFuture.completedFuture(response);
     }
 
     @Override
-    public CompletableFuture<Void> unregisterBroker(int brokerId) {
+    public CompletableFuture<Void> unregisterBroker(
+        ControllerRequestContext context,
+        int brokerId
+    ) {
         throw new UnsupportedOperationException();
     }
 
@@ -159,8 +195,10 @@ static class MockTopic {
     private final Map<ConfigResource, Map<String, String>> configs = new HashMap<>();
 
     @Override
-    synchronized public CompletableFuture<Map<String, ResultOrError<Uuid>>>
-            findTopicIds(long deadlineNs, Collection<String> topicNames) {
+    synchronized public CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIds(
+        ControllerRequestContext context,
+        Collection<String> topicNames
+    ) {
         Map<String, ResultOrError<Uuid>> results = new HashMap<>();
         for (String topicName : topicNames) {
             if (!topicNameToId.containsKey(topicName)) {
@@ -173,7 +211,9 @@ static class MockTopic {
     }
 
     @Override
-    synchronized public CompletableFuture<Map<String, Uuid>> findAllTopicIds(long deadlineNs) {
+    synchronized public CompletableFuture<Map<String, Uuid>> findAllTopicIds(
+        ControllerRequestContext context
+    ) {
         Map<String, Uuid> results = new HashMap<>();
         for (Entry<Uuid, MockTopic> entry : topics.entrySet()) {
             results.put(entry.getValue().name, entry.getKey());
@@ -182,8 +222,10 @@ synchronized public CompletableFuture<Map<String, Uuid>> findAllTopicIds(long de
     }
 
     @Override
-    synchronized public CompletableFuture<Map<Uuid, ResultOrError<String>>>
-            findTopicNames(long deadlineNs, Collection<Uuid> topicIds) {
+    synchronized public CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNames(
+        ControllerRequestContext context,
+        Collection<Uuid> topicIds
+    ) {
         Map<Uuid, ResultOrError<String>> results = new HashMap<>();
         for (Uuid topicId : topicIds) {
             MockTopic topic = topics.get(topicId);
@@ -197,8 +239,10 @@ synchronized public CompletableFuture<Map<String, Uuid>> findAllTopicIds(long de
     }
 
     @Override
-    synchronized public CompletableFuture<Map<Uuid, ApiError>>
-            deleteTopics(long deadlineNs, Collection<Uuid> topicIds) {
+    synchronized public CompletableFuture<Map<Uuid, ApiError>> deleteTopics(
+        ControllerRequestContext context,
+        Collection<Uuid> topicIds
+    ) {
         if (!active) {
             CompletableFuture<Map<Uuid, ApiError>> future = new CompletableFuture<>();
             future.completeExceptionally(NOT_CONTROLLER_EXCEPTION);
@@ -218,24 +262,34 @@ synchronized public CompletableFuture<Map<String, Uuid>> findAllTopicIds(long de
     }
 
     @Override
-    public CompletableFuture<Map<ConfigResource, ResultOrError<Map<String, String>>>> describeConfigs(Map<ConfigResource, Collection<String>> resources) {
+    public CompletableFuture<Map<ConfigResource, ResultOrError<Map<String, String>>>> describeConfigs(
+        ControllerRequestContext context,
+        Map<ConfigResource, Collection<String>> resources
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    public CompletableFuture<ElectLeadersResponseData> electLeaders(ElectLeadersRequestData request) {
+    public CompletableFuture<ElectLeadersResponseData> electLeaders(
+        ControllerRequestContext context,
+        ElectLeadersRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    public CompletableFuture<FeatureMapAndEpoch> finalizedFeatures() {
+    public CompletableFuture<FinalizedControllerFeatures> finalizedFeatures(
+        ControllerRequestContext context
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
     public CompletableFuture<Map<ConfigResource, ApiError>> incrementalAlterConfigs(
-            Map<ConfigResource, Map<String, Entry<AlterConfigOp.OpType, String>>> configChanges,
-            boolean validateOnly) {
+        ControllerRequestContext context,
+        Map<ConfigResource, Map<String, Entry<AlterConfigOp.OpType, String>>> configChanges,
+        boolean validateOnly
+    ) {
         Map<ConfigResource, ApiError> results = new HashMap<>();
         for (Entry<ConfigResource, Map<String, Entry<AlterConfigOp.OpType, String>>> entry :
                 configChanges.entrySet()) {
@@ -275,20 +329,27 @@ private ApiError incrementalAlterResource(ConfigResource resource,
     }
 
     @Override
-    public CompletableFuture<AlterPartitionReassignmentsResponseData>
-            alterPartitionReassignments(AlterPartitionReassignmentsRequestData request) {
+    public CompletableFuture<AlterPartitionReassignmentsResponseData> alterPartitionReassignments(
+        ControllerRequestContext context,
+        AlterPartitionReassignmentsRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    public CompletableFuture<ListPartitionReassignmentsResponseData>
-            listPartitionReassignments(ListPartitionReassignmentsRequestData request) {
+    public CompletableFuture<ListPartitionReassignmentsResponseData> listPartitionReassignments(
+        ControllerRequestContext context,
+        ListPartitionReassignmentsRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
     public CompletableFuture<Map<ConfigResource, ApiError>> legacyAlterConfigs(
-            Map<ConfigResource, Map<String, String>> newConfigs, boolean validateOnly) {
+        ControllerRequestContext context,
+        Map<ConfigResource, Map<String, String>> newConfigs,
+        boolean validateOnly
+    ) {
         Map<ConfigResource, ApiError> results = new HashMap<>();
         if (!validateOnly) {
             for (Entry<ConfigResource, Map<String, String>> entry : newConfigs.entrySet()) {
@@ -304,14 +365,18 @@ public CompletableFuture<Map<ConfigResource, ApiError>> legacyAlterConfigs(
     }
 
     @Override
-    public CompletableFuture<BrokerHeartbeatReply>
-            processBrokerHeartbeat(BrokerHeartbeatRequestData request) {
+    public CompletableFuture<BrokerHeartbeatReply> processBrokerHeartbeat(
+        ControllerRequestContext context,
+        BrokerHeartbeatRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    public CompletableFuture<BrokerRegistrationReply>
-            registerBroker(BrokerRegistrationRequestData request) {
+    public CompletableFuture<BrokerRegistrationReply> registerBroker(
+        ControllerRequestContext context,
+        BrokerRegistrationRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
@@ -321,19 +386,36 @@ public CompletableFuture<Void> waitForReadyBrokers(int minBrokers) {
     }
 
     @Override
-    public CompletableFuture<Map<ClientQuotaEntity, ApiError>>
-            alterClientQuotas(Collection<ClientQuotaAlteration> quotaAlterations, boolean validateOnly) {
+    public CompletableFuture<Map<ClientQuotaEntity, ApiError>> alterClientQuotas(
+        ControllerRequestContext context,
+        Collection<ClientQuotaAlteration> quotaAlterations,
+        boolean validateOnly
+    ) {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public CompletableFuture<AllocateProducerIdsResponseData> allocateProducerIds(
+        ControllerRequestContext context,
+        AllocateProducerIdsRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    public CompletableFuture<AllocateProducerIdsResponseData> allocateProducerIds(AllocateProducerIdsRequestData request) {
+    public CompletableFuture<UpdateFeaturesResponseData> updateFeatures(
+        ControllerRequestContext context,
+        UpdateFeaturesRequestData request
+    ) {
         throw new UnsupportedOperationException();
     }
 
     @Override
-    synchronized public CompletableFuture<List<CreatePartitionsTopicResult>>
-            createPartitions(long deadlineNs, List<CreatePartitionsTopic> topicList) {
+    synchronized public CompletableFuture<List<CreatePartitionsTopicResult>> createPartitions(
+        ControllerRequestContext context,
+        List<CreatePartitionsTopic> topicList,
+        boolean validateOnly
+    ) {
         if (!active) {
             CompletableFuture<List<CreatePartitionsTopicResult>> future = new CompletableFuture<>();
             future.completeExceptionally(NOT_CONTROLLER_EXCEPTION);
diff --git a/core/src/test/java/kafka/test/annotation/ClusterTest.java b/core/src/test/java/kafka/test/annotation/ClusterTest.java
index 11336ab87a15f..d1d3222a25b88 100644
--- a/core/src/test/java/kafka/test/annotation/ClusterTest.java
+++ b/core/src/test/java/kafka/test/annotation/ClusterTest.java
@@ -18,6 +18,7 @@
 package kafka.test.annotation;
 
 import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.junit.jupiter.api.TestTemplate;
 
 import java.lang.annotation.Documented;
@@ -40,6 +41,6 @@
     String name() default "";
     SecurityProtocol securityProtocol() default SecurityProtocol.PLAINTEXT;
     String listener() default "";
-    String ibp() default "";
+    MetadataVersion metadataVersion() default MetadataVersion.IBP_3_3_IV3;
     ClusterConfigProperty[] serverProperties() default {};
 }
diff --git a/core/src/test/java/kafka/test/annotation/Type.java b/core/src/test/java/kafka/test/annotation/Type.java
index 0d1a161dabe92..933ca5011341b 100644
--- a/core/src/test/java/kafka/test/annotation/Type.java
+++ b/core/src/test/java/kafka/test/annotation/Type.java
@@ -31,7 +31,13 @@ public enum Type {
     KRAFT {
         @Override
         public void invocationContexts(ClusterConfig config, Consumer<TestTemplateInvocationContext> invocationConsumer) {
-            invocationConsumer.accept(new RaftClusterInvocationContext(config.copyOf()));
+            invocationConsumer.accept(new RaftClusterInvocationContext(config.copyOf(), false));
+        }
+    },
+    CO_KRAFT {
+        @Override
+        public void invocationContexts(ClusterConfig config, Consumer<TestTemplateInvocationContext> invocationConsumer) {
+            invocationConsumer.accept(new RaftClusterInvocationContext(config.copyOf(), true));
         }
     },
     ZK {
@@ -40,10 +46,11 @@ public void invocationContexts(ClusterConfig config, Consumer<TestTemplateInvoca
             invocationConsumer.accept(new ZkClusterInvocationContext(config.copyOf()));
         }
     },
-    BOTH {
+    ALL {
         @Override
         public void invocationContexts(ClusterConfig config, Consumer<TestTemplateInvocationContext> invocationConsumer) {
-            invocationConsumer.accept(new RaftClusterInvocationContext(config.copyOf()));
+            invocationConsumer.accept(new RaftClusterInvocationContext(config.copyOf(), false));
+            invocationConsumer.accept(new RaftClusterInvocationContext(config.copyOf(), true));
             invocationConsumer.accept(new ZkClusterInvocationContext(config.copyOf()));
         }
     },
diff --git a/core/src/test/java/kafka/test/junit/ClusterTestExtensions.java b/core/src/test/java/kafka/test/junit/ClusterTestExtensions.java
index 293f00b035ca5..bd69109c4b756 100644
--- a/core/src/test/java/kafka/test/junit/ClusterTestExtensions.java
+++ b/core/src/test/java/kafka/test/junit/ClusterTestExtensions.java
@@ -179,7 +179,8 @@ private void processClusterTest(ExtensionContext context, ClusterTest annot, Clu
                 throw new IllegalStateException();
         }
 
-        ClusterConfig.Builder builder = ClusterConfig.clusterBuilder(type, brokers, controllers, autoStart, annot.securityProtocol());
+        ClusterConfig.Builder builder = ClusterConfig.clusterBuilder(type, brokers, controllers, autoStart,
+            annot.securityProtocol(), annot.metadataVersion());
         if (!annot.name().isEmpty()) {
             builder.name(annot.name());
         } else {
@@ -194,10 +195,6 @@ private void processClusterTest(ExtensionContext context, ClusterTest annot, Clu
             properties.put(property.key(), property.value());
         }
 
-        if (!annot.ibp().isEmpty()) {
-            builder.ibp(annot.ibp());
-        }
-
         ClusterConfig config = builder.build();
         config.serverProperties().putAll(properties);
         type.invocationContexts(config, testInvocations);
diff --git a/core/src/test/java/kafka/test/junit/RaftClusterInvocationContext.java b/core/src/test/java/kafka/test/junit/RaftClusterInvocationContext.java
index b34f286720366..5cd3ec3e24619 100644
--- a/core/src/test/java/kafka/test/junit/RaftClusterInvocationContext.java
+++ b/core/src/test/java/kafka/test/junit/RaftClusterInvocationContext.java
@@ -18,6 +18,7 @@
 package kafka.test.junit;
 
 import kafka.network.SocketServer;
+import kafka.server.BrokerFeatures;
 import kafka.server.BrokerServer;
 import kafka.server.ControllerServer;
 import kafka.test.ClusterConfig;
@@ -38,8 +39,10 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
+import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -63,18 +66,20 @@ public class RaftClusterInvocationContext implements TestTemplateInvocationConte
 
     private final ClusterConfig clusterConfig;
     private final AtomicReference<KafkaClusterTestKit> clusterReference;
+    private final boolean isCoResident;
 
-    public RaftClusterInvocationContext(ClusterConfig clusterConfig) {
+    public RaftClusterInvocationContext(ClusterConfig clusterConfig, boolean isCoResident) {
         this.clusterConfig = clusterConfig;
         this.clusterReference = new AtomicReference<>();
+        this.isCoResident = isCoResident;
     }
 
     @Override
     public String getDisplayName(int invocationIndex) {
         String clusterDesc = clusterConfig.nameTags().entrySet().stream()
-                .map(Object::toString)
-                .collect(Collectors.joining(", "));
-        return String.format("[%d] Type=Raft, %s", invocationIndex, clusterDesc);
+            .map(Object::toString)
+            .collect(Collectors.joining(", "));
+        return String.format("[%d] Type=Raft-%s, %s", invocationIndex, isCoResident ? "CoReside" : "Distributed", clusterDesc);
     }
 
     @Override
@@ -83,6 +88,8 @@ public List<Extension> getAdditionalExtensions() {
         return Arrays.asList(
             (BeforeTestExecutionCallback) context -> {
                 TestKitNodes nodes = new TestKitNodes.Builder().
+                        setBootstrapMetadataVersion(clusterConfig.metadataVersion()).
+                        setCoResident(isCoResident).
                         setNumBrokerNodes(clusterConfig.numBrokers()).
                         setNumControllerNodes(clusterConfig.numControllers()).build();
                 nodes.brokerNodes().forEach((brokerId, brokerNode) -> {
@@ -168,6 +175,14 @@ public SocketServer anyControllerSocketServer() {
                 .orElseThrow(() -> new RuntimeException("No controller SocketServers found"));
         }
 
+        @Override
+        public Map<Integer, BrokerFeatures> brokerFeatures() {
+            return brokers().collect(Collectors.toMap(
+                brokerServer -> brokerServer.config().nodeId(),
+                BrokerServer::brokerFeatures
+            ));
+        }
+
         @Override
         public ClusterType clusterType() {
             return ClusterType.RAFT;
@@ -178,6 +193,20 @@ public ClusterConfig config() {
             return clusterConfig;
         }
 
+        @Override
+        public Set<Integer> controllerIds() {
+            return controllers()
+                .map(controllerServer -> controllerServer.config().nodeId())
+                .collect(Collectors.toSet());
+        }
+
+        @Override
+        public Set<Integer> brokerIds() {
+            return brokers()
+                .map(brokerServer -> brokerServer.config().nodeId())
+                .collect(Collectors.toSet());
+        }
+
         @Override
         public KafkaClusterTestKit getUnderlying() {
             return clusterReference.get();
@@ -185,7 +214,7 @@ public KafkaClusterTestKit getUnderlying() {
 
         @Override
         public Admin createAdminClient(Properties configOverrides) {
-            Admin admin = Admin.create(clusterReference.get().clientProperties());
+            Admin admin = Admin.create(clusterReference.get().clientProperties(configOverrides));
             admins.add(admin);
             return admin;
         }
diff --git a/core/src/test/java/kafka/test/junit/ZkClusterInvocationContext.java b/core/src/test/java/kafka/test/junit/ZkClusterInvocationContext.java
index 68ec04105364b..18a85e2d7bf66 100644
--- a/core/src/test/java/kafka/test/junit/ZkClusterInvocationContext.java
+++ b/core/src/test/java/kafka/test/junit/ZkClusterInvocationContext.java
@@ -19,6 +19,7 @@
 
 import kafka.api.IntegrationTestHarness;
 import kafka.network.SocketServer;
+import kafka.server.BrokerFeatures;
 import kafka.server.KafkaConfig;
 import kafka.server.KafkaServer;
 import kafka.test.ClusterConfig;
@@ -41,8 +42,10 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.stream.Collectors;
@@ -106,7 +109,7 @@ public void modifyConfigs(Seq<Properties> props) {
                     @Override
                     public Properties serverConfig() {
                         Properties props = clusterConfig.serverProperties();
-                        clusterConfig.ibp().ifPresent(ibp -> props.put(KafkaConfig.InterBrokerProtocolVersionProp(), ibp));
+                        props.put(KafkaConfig.InterBrokerProtocolVersionProp(), metadataVersion().version());
                         return props;
                     }
 
@@ -237,6 +240,14 @@ public SocketServer anyControllerSocketServer() {
                 .orElseThrow(() -> new RuntimeException("No broker SocketServers found"));
         }
 
+        @Override
+        public Map<Integer, BrokerFeatures> brokerFeatures() {
+            return servers().collect(Collectors.toMap(
+                brokerServer -> brokerServer.config().nodeId(),
+                KafkaServer::brokerFeatures
+            ));
+        }
+
         @Override
         public ClusterType clusterType() {
             return ClusterType.ZK;
@@ -247,6 +258,18 @@ public ClusterConfig config() {
             return config;
         }
 
+        @Override
+        public Set<Integer> controllerIds() {
+            return brokerIds();
+        }
+
+        @Override
+        public Set<Integer> brokerIds() {
+            return servers()
+                .map(brokerServer -> brokerServer.config().nodeId())
+                .collect(Collectors.toSet());
+        }
+
         @Override
         public IntegrationTestHarness getUnderlying() {
             return clusterReference.get();
diff --git a/core/src/test/java/kafka/testkit/ControllerNode.java b/core/src/test/java/kafka/testkit/ControllerNode.java
index be6c8067f1f7f..3ee2b4d081a9d 100644
--- a/core/src/test/java/kafka/testkit/ControllerNode.java
+++ b/core/src/test/java/kafka/testkit/ControllerNode.java
@@ -27,7 +27,7 @@ public Builder setId(int id) {
             return this;
         }
 
-        public Builder setMetadataDirectory() {
+        public Builder setMetadataDirectory(String metadataDirectory) {
             this.metadataDirectory = metadataDirectory;
             return this;
         }
diff --git a/core/src/test/java/kafka/testkit/KafkaClusterTestKit.java b/core/src/test/java/kafka/testkit/KafkaClusterTestKit.java
index cbfe4ff34e49a..ecee13c498241 100644
--- a/core/src/test/java/kafka/testkit/KafkaClusterTestKit.java
+++ b/core/src/test/java/kafka/testkit/KafkaClusterTestKit.java
@@ -24,7 +24,7 @@
 import kafka.server.KafkaConfig$;
 import kafka.server.KafkaRaftServer;
 import kafka.server.MetaProperties;
-import kafka.server.Server;
+import kafka.server.metadata.BrokerServerMetrics$;
 import kafka.tools.StorageTool;
 import kafka.utils.Logging;
 import org.apache.kafka.clients.CommonClientConfigs;
@@ -35,10 +35,14 @@
 import org.apache.kafka.common.utils.ThreadUtils;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.controller.BootstrapMetadata;
 import org.apache.kafka.controller.Controller;
+import org.apache.kafka.controller.MockControllerMetrics;
 import org.apache.kafka.metadata.MetadataRecordSerde;
 import org.apache.kafka.raft.RaftConfig;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
+import org.apache.kafka.server.fault.MockFaultHandler;
 import org.apache.kafka.test.TestUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -114,6 +118,8 @@ public void close() {
     public static class Builder {
         private TestKitNodes nodes;
         private Map<String, String> configProps = new HashMap<>();
+        private MockFaultHandler metadataFaultHandler = new MockFaultHandler("metadataFaultHandler");
+        private MockFaultHandler fatalFaultHandler = new MockFaultHandler("fatalFaultHandler");
 
         public Builder(TestKitNodes nodes) {
             this.nodes = nodes;
@@ -124,6 +130,11 @@ public Builder setConfigProp(String key, String value) {
             return this;
         }
 
+        public Builder setMetadataFaultHandler(MockFaultHandler metadataFaultHandler) {
+            this.metadataFaultHandler = metadataFaultHandler;
+            return this;
+        }
+
         public KafkaClusterTestKit build() throws Exception {
             Map<Integer, ControllerServer> controllers = new HashMap<>();
             Map<Integer, BrokerServer> brokers = new HashMap<>();
@@ -149,15 +160,16 @@ public KafkaClusterTestKit build() throws Exception {
                     ThreadUtils.createThreadFactory("KafkaClusterTestKit%d", false));
                 for (ControllerNode node : nodes.controllerNodes().values()) {
                     Map<String, String> props = new HashMap<>(configProps);
-                    props.put(KafkaConfig$.MODULE$.ProcessRolesProp(), "controller");
+                    props.put(KafkaConfig$.MODULE$.ProcessRolesProp(), roles(node.id()));
                     props.put(KafkaConfig$.MODULE$.NodeIdProp(),
                         Integer.toString(node.id()));
                     props.put(KafkaConfig$.MODULE$.MetadataLogDirProp(),
                         node.metadataDirectory());
                     props.put(KafkaConfig$.MODULE$.ListenerSecurityProtocolMapProp(),
-                        "CONTROLLER:PLAINTEXT");
-                    props.put(KafkaConfig$.MODULE$.ListenersProp(),
-                        "CONTROLLER://localhost:0");
+                        "EXTERNAL:PLAINTEXT,CONTROLLER:PLAINTEXT");
+                    props.put(KafkaConfig$.MODULE$.ListenersProp(), listeners(node.id()));
+                    props.put(KafkaConfig$.MODULE$.InterBrokerListenerNameProp(),
+                        nodes.interBrokerListenerName().value());
                     props.put(KafkaConfig$.MODULE$.ControllerListenerNamesProp(),
                         "CONTROLLER");
                     // Note: we can't accurately set controller.quorum.voters yet, since we don't
@@ -174,6 +186,7 @@ public KafkaClusterTestKit build() throws Exception {
                     String threadNamePrefix = String.format("controller%d_", node.id());
                     MetaProperties metaProperties = MetaProperties.apply(nodes.clusterId().toString(), node.id());
                     TopicPartition metadataPartition = new TopicPartition(KafkaRaftServer.MetadataTopic(), 0);
+                    BootstrapMetadata bootstrapMetadata = BootstrapMetadata.create(nodes.bootstrapMetadataVersion());
                     KafkaRaftManager<ApiMessageAndVersion> raftManager = new KafkaRaftManager<>(
                         metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftServer.MetadataTopicId(),
                         Time.SYSTEM, new Metrics(), Option.apply(threadNamePrefix), connectFutureManager.future);
@@ -183,9 +196,14 @@ metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftS
                         raftManager,
                         Time.SYSTEM,
                         new Metrics(),
+                        new MockControllerMetrics(),
                         Option.apply(threadNamePrefix),
                         connectFutureManager.future,
-                        KafkaRaftServer.configSchema()
+                        KafkaRaftServer.configSchema(),
+                        raftManager.apiVersions(),
+                        bootstrapMetadata,
+                        metadataFaultHandler,
+                        fatalFaultHandler
                     );
                     controllers.put(node.id(), controller);
                     controller.socketServerFirstBoundPortFuture().whenComplete((port, e) -> {
@@ -199,7 +217,7 @@ metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftS
                 }
                 for (BrokerNode node : nodes.brokerNodes().values()) {
                     Map<String, String> props = new HashMap<>(configProps);
-                    props.put(KafkaConfig$.MODULE$.ProcessRolesProp(), "broker");
+                    props.put(KafkaConfig$.MODULE$.ProcessRolesProp(), roles(node.id()));
                     props.put(KafkaConfig$.MODULE$.BrokerIdProp(),
                         Integer.toString(node.id()));
                     props.put(KafkaConfig$.MODULE$.MetadataLogDirProp(),
@@ -208,8 +226,7 @@ metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftS
                         String.join(",", node.logDataDirectories()));
                     props.put(KafkaConfig$.MODULE$.ListenerSecurityProtocolMapProp(),
                         "EXTERNAL:PLAINTEXT,CONTROLLER:PLAINTEXT");
-                    props.put(KafkaConfig$.MODULE$.ListenersProp(),
-                        "EXTERNAL://localhost:0");
+                    props.put(KafkaConfig$.MODULE$.ListenersProp(), listeners(node.id()));
                     props.put(KafkaConfig$.MODULE$.InterBrokerListenerNameProp(),
                         nodes.interBrokerListenerName().value());
                     props.put(KafkaConfig$.MODULE$.ControllerListenerNamesProp(),
@@ -227,22 +244,31 @@ metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftS
                     String threadNamePrefix = String.format("broker%d_", node.id());
                     MetaProperties metaProperties = MetaProperties.apply(nodes.clusterId().toString(), node.id());
                     TopicPartition metadataPartition = new TopicPartition(KafkaRaftServer.MetadataTopic(), 0);
-                    KafkaRaftManager<ApiMessageAndVersion> raftManager = new KafkaRaftManager<>(
+                    KafkaRaftManager<ApiMessageAndVersion> raftManager;
+                    if (raftManagers.containsKey(node.id())) {
+                        raftManager = raftManagers.get(node.id());
+                    } else {
+                        raftManager = new KafkaRaftManager<>(
                             metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftServer.MetadataTopicId(),
                             Time.SYSTEM, new Metrics(), Option.apply(threadNamePrefix), connectFutureManager.future);
+                        raftManagers.put(node.id(), raftManager);
+                    }
+                    Metrics metrics = new Metrics();
                     BrokerServer broker = new BrokerServer(
                         config,
                         nodes.brokerProperties(node.id()),
                         raftManager,
                         Time.SYSTEM,
-                        new Metrics(),
+                        metrics,
+                        BrokerServerMetrics$.MODULE$.apply(metrics),
                         Option.apply(threadNamePrefix),
                         JavaConverters.asScalaBuffer(Collections.<String>emptyList()).toSeq(),
                         connectFutureManager.future,
-                        Server.SUPPORTED_FEATURES()
+                        fatalFaultHandler,
+                        metadataFaultHandler,
+                        metadataFaultHandler
                     );
                     brokers.put(node.id(), broker);
-                    raftManagers.put(node.id(), raftManager);
                 }
             } catch (Exception e) {
                 if (executorService != null) {
@@ -265,7 +291,28 @@ metaProperties, config, new MetadataRecordSerde(), metadataPartition, KafkaRaftS
                 throw e;
             }
             return new KafkaClusterTestKit(executorService, nodes, controllers,
-                brokers, raftManagers, connectFutureManager, baseDirectory);
+                brokers, raftManagers, connectFutureManager, baseDirectory,
+                metadataFaultHandler, fatalFaultHandler);
+        }
+
+        private String listeners(int node) {
+            if (nodes.isCoResidentNode(node)) {
+                return "EXTERNAL://localhost:0,CONTROLLER://localhost:0";
+            }
+            if (nodes.controllerNodes().containsKey(node)) {
+                return "CONTROLLER://localhost:0";
+            }
+            return "EXTERNAL://localhost:0";
+        }
+
+        private String roles(int node) {
+            if (nodes.isCoResidentNode(node)) {
+                return "broker,controller";
+            }
+            if (nodes.controllerNodes().containsKey(node)) {
+                return "controller";
+            }
+            return "broker";
         }
 
         static private void setupNodeDirectories(File baseDirectory,
@@ -286,14 +333,20 @@ static private void setupNodeDirectories(File baseDirectory,
     private final Map<Integer, KafkaRaftManager<ApiMessageAndVersion>> raftManagers;
     private final ControllerQuorumVotersFutureManager controllerQuorumVotersFutureManager;
     private final File baseDirectory;
-
-    private KafkaClusterTestKit(ExecutorService executorService,
-                                TestKitNodes nodes,
-                                Map<Integer, ControllerServer> controllers,
-                                Map<Integer, BrokerServer> brokers,
-                                Map<Integer, KafkaRaftManager<ApiMessageAndVersion>> raftManagers,
-                                ControllerQuorumVotersFutureManager controllerQuorumVotersFutureManager,
-                                File baseDirectory) {
+    private final MockFaultHandler metadataFaultHandler;
+    private final MockFaultHandler fatalFaultHandler;
+
+    private KafkaClusterTestKit(
+        ExecutorService executorService,
+        TestKitNodes nodes,
+        Map<Integer, ControllerServer> controllers,
+        Map<Integer, BrokerServer> brokers,
+        Map<Integer, KafkaRaftManager<ApiMessageAndVersion>> raftManagers,
+        ControllerQuorumVotersFutureManager controllerQuorumVotersFutureManager,
+        File baseDirectory,
+        MockFaultHandler metadataFaultHandler,
+        MockFaultHandler fatalFaultHandler
+    ) {
         this.executorService = executorService;
         this.nodes = nodes;
         this.controllers = controllers;
@@ -301,6 +354,8 @@ private KafkaClusterTestKit(ExecutorService executorService,
         this.raftManagers = raftManagers;
         this.controllerQuorumVotersFutureManager = controllerQuorumVotersFutureManager;
         this.baseDirectory = baseDirectory;
+        this.metadataFaultHandler = metadataFaultHandler;
+        this.fatalFaultHandler = fatalFaultHandler;
     }
 
     public void format() throws Exception {
@@ -337,6 +392,7 @@ private void formatNodeAndLog(MetaProperties properties, String metadataLogDir,
                     StorageTool.formatCommand(out,
                             JavaConverters.asScalaBuffer(Collections.singletonList(metadataLogDir)).toSeq(),
                             properties,
+                            MetadataVersion.MINIMUM_KRAFT_VERSION,
                             false);
                 } finally {
                     for (String line : stream.toString().split(String.format("%n"))) {
@@ -374,6 +430,7 @@ public void startup() throws ExecutionException, InterruptedException {
 
     /**
      * Wait for a controller to mark all the brokers as ready (registered and unfenced).
+     * And also wait for the metadata cache up-to-date in each broker server.
      */
     public void waitForReadyBrokers() throws ExecutionException, InterruptedException {
         // We can choose any controller, not just the active controller.
@@ -381,6 +438,11 @@ public void waitForReadyBrokers() throws ExecutionException, InterruptedExceptio
         ControllerServer controllerServer = controllers.values().iterator().next();
         Controller controller = controllerServer.controller();
         controller.waitForReadyBrokers(brokers.size()).get();
+
+        // make sure metadata cache in each broker server is up-to-date
+        TestUtils.waitForCondition(() ->
+                brokers().values().stream().allMatch(brokerServer -> brokerServer.metadataCache().getAliveBrokers().size() == brokers.size()),
+            "Failed to wait for publisher to publish the metadata update to each broker.");
     }
 
     public Properties controllerClientProperties() throws ExecutionException, InterruptedException {
@@ -405,7 +467,10 @@ public Properties controllerClientProperties() throws ExecutionException, Interr
     }
 
     public Properties clientProperties() {
-        Properties properties = new Properties();
+        return clientProperties(new Properties());
+    }
+
+    public Properties clientProperties(Properties configOverrides) {
         if (!brokers.isEmpty()) {
             StringBuilder bld = new StringBuilder();
             String prefix = "";
@@ -422,9 +487,9 @@ public Properties clientProperties() {
                 bld.append(prefix).append("localhost:").append(port);
                 prefix = ",";
             }
-            properties.setProperty(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, bld.toString());
+            configOverrides.putIfAbsent(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, bld.toString());
         }
-        return properties;
+        return configOverrides;
     }
 
     public Map<Integer, ControllerServer> controllers() {
@@ -482,6 +547,8 @@ public void close() throws Exception {
             executorService.shutdownNow();
             executorService.awaitTermination(5, TimeUnit.MINUTES);
         }
+        metadataFaultHandler.maybeRethrowFirstException();
+        fatalFaultHandler.maybeRethrowFirstException();
     }
 
     private void waitForAllFutures(List<Entry<String, Future<?>>> futureEntries)
diff --git a/core/src/test/java/kafka/testkit/TestKitNodes.java b/core/src/test/java/kafka/testkit/TestKitNodes.java
index d52b8002337d7..14692ccc9624d 100644
--- a/core/src/test/java/kafka/testkit/TestKitNodes.java
+++ b/core/src/test/java/kafka/testkit/TestKitNodes.java
@@ -20,6 +20,7 @@
 import kafka.server.MetaProperties;
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.network.ListenerName;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.nio.file.Paths;
 import java.util.ArrayList;
@@ -32,7 +33,9 @@
 
 public class TestKitNodes {
     public static class Builder {
+        private boolean coResident = false;
         private Uuid clusterId = null;
+        private MetadataVersion bootstrapMetadataVersion = null;
         private final NavigableMap<Integer, ControllerNode> controllerNodes = new TreeMap<>();
         private final NavigableMap<Integer, BrokerNode> brokerNodes = new TreeMap<>();
 
@@ -41,6 +44,16 @@ public Builder setClusterId(Uuid clusterId) {
             return this;
         }
 
+        public Builder setBootstrapMetadataVersion(MetadataVersion metadataVersion) {
+            this.bootstrapMetadataVersion = metadataVersion;
+            return this;
+        }
+
+        public Builder setCoResident(boolean coResident) {
+            this.coResident = coResident;
+            return this;
+        }
+
         public Builder addNodes(TestKitNode[] nodes) {
             for (TestKitNode node : nodes) {
                 addNode(node);
@@ -71,7 +84,7 @@ public Builder setNumControllerNodes(int numControllerNodes) {
                 controllerNodes.pollFirstEntry();
             }
             while (controllerNodes.size() < numControllerNodes) {
-                int nextId = 3000;
+                int nextId = startControllerId();
                 if (!controllerNodes.isEmpty()) {
                     nextId = controllerNodes.lastKey() + 1;
                 }
@@ -89,7 +102,7 @@ public Builder setNumBrokerNodes(int numBrokerNodes) {
                 brokerNodes.pollFirstEntry();
             }
             while (brokerNodes.size() < numBrokerNodes) {
-                int nextId = 0;
+                int nextId = startBrokerId();
                 if (!brokerNodes.isEmpty()) {
                     nextId = brokerNodes.lastKey() + 1;
                 }
@@ -103,18 +116,39 @@ public TestKitNodes build() {
             if (clusterId == null) {
                 clusterId = Uuid.randomUuid();
             }
-            return new TestKitNodes(clusterId, controllerNodes, brokerNodes);
+            if (bootstrapMetadataVersion == null) {
+                bootstrapMetadataVersion = MetadataVersion.latest();
+            }
+            return new TestKitNodes(clusterId, bootstrapMetadataVersion, controllerNodes, brokerNodes);
+        }
+
+        private int startBrokerId() {
+            return 0;
+        }
+
+        private int startControllerId() {
+            if (coResident) {
+                return startBrokerId();
+            }
+            return startBrokerId() + 3000;
         }
     }
 
     private final Uuid clusterId;
+    private final MetadataVersion bootstrapMetadataVersion;
     private final NavigableMap<Integer, ControllerNode> controllerNodes;
     private final NavigableMap<Integer, BrokerNode> brokerNodes;
 
+    public boolean isCoResidentNode(int node) {
+        return controllerNodes.containsKey(node) && brokerNodes.containsKey(node);
+    }
+
     private TestKitNodes(Uuid clusterId,
+                         MetadataVersion bootstrapMetadataVersion,
                          NavigableMap<Integer, ControllerNode> controllerNodes,
                          NavigableMap<Integer, BrokerNode> brokerNodes) {
         this.clusterId = clusterId;
+        this.bootstrapMetadataVersion = bootstrapMetadataVersion;
         this.controllerNodes = controllerNodes;
         this.brokerNodes = brokerNodes;
     }
@@ -123,6 +157,10 @@ public Uuid clusterId() {
         return clusterId;
     }
 
+    public MetadataVersion bootstrapMetadataVersion() {
+        return bootstrapMetadataVersion;
+    }
+
     public Map<Integer, ControllerNode> controllerNodes() {
         return controllerNodes;
     }
@@ -161,7 +199,7 @@ public TestKitNodes copyWithAbsolutePaths(String baseDirectory) {
                 node.incarnationId(), absolutize(baseDirectory, node.metadataDirectory()),
                 absolutize(baseDirectory, node.logDataDirectories()), node.propertyOverrides()));
         }
-        return new TestKitNodes(clusterId, newControllerNodes, newBrokerNodes);
+        return new TestKitNodes(clusterId, bootstrapMetadataVersion, newControllerNodes, newBrokerNodes);
     }
 
     private static List<String> absolutize(String base, Collection<String> directories) {
diff --git a/core/src/test/scala/integration/kafka/admin/ConfigCommandIntegrationTest.scala b/core/src/test/scala/integration/kafka/admin/ConfigCommandIntegrationTest.scala
new file mode 100644
index 0000000000000..f2a6e71dd1704
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/admin/ConfigCommandIntegrationTest.scala
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.admin
+
+import kafka.admin.ConfigCommand.ConfigCommandOptions
+import kafka.cluster.{Broker, EndPoint}
+import kafka.server.{ConfigEntityName, KafkaConfig, QuorumTestHarness}
+import kafka.utils.{Exit, Logging, TestInfoUtils}
+import kafka.zk.{AdminZkClient, BrokerInfo}
+import org.apache.kafka.common.config.ConfigException
+import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.apache.kafka.server.common.MetadataVersion
+import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
+
+import scala.collection.Seq
+import scala.jdk.CollectionConverters._
+
+class ConfigCommandIntegrationTest extends QuorumTestHarness with Logging {
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def shouldExitWithNonZeroStatusOnUpdatingUnallowedConfigViaZk(quorum: String): Unit = {
+    assertNonZeroStatusExit(Array(
+      "--zookeeper", zkConnect,
+      "--entity-name", "1",
+      "--entity-type", "brokers",
+      "--alter",
+      "--add-config", "security.inter.broker.protocol=PLAINTEXT"))
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def shouldExitWithNonZeroStatusOnZkCommandAlterUserQuota(quorum: String): Unit = {
+    assertNonZeroStatusExit(Array(
+      "--zookeeper", zkConnect,
+      "--entity-type", "users",
+      "--entity-name", "admin",
+      "--alter", "--add-config", "consumer_byte_rate=20000"))
+  }
+
+  private def assertNonZeroStatusExit(args: Array[String]): Unit = {
+    var exitStatus: Option[Int] = None
+    Exit.setExitProcedure { (status, _) =>
+      exitStatus = Some(status)
+      throw new RuntimeException
+    }
+
+    try {
+      ConfigCommand.main(args)
+    } catch {
+      case _: RuntimeException =>
+    } finally {
+      Exit.resetExitProcedure()
+    }
+
+    assertEquals(Some(1), exitStatus)
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def testDynamicBrokerConfigUpdateUsingZooKeeper(quorum: String): Unit = {
+    val brokerId = "1"
+    val adminZkClient = new AdminZkClient(zkClient)
+    val alterOpts = Array("--zookeeper", zkConnect, "--entity-type", "brokers", "--alter")
+
+    def entityOpt(brokerId: Option[String]): Array[String] = {
+      brokerId.map(id => Array("--entity-name", id)).getOrElse(Array("--entity-default"))
+    }
+
+    def alterConfigWithZk(configs: Map[String, String], brokerId: Option[String],
+                          encoderConfigs: Map[String, String] = Map.empty): Unit = {
+      val configStr = (configs ++ encoderConfigs).map { case (k, v) => s"$k=$v" }.mkString(",")
+      val addOpts = new ConfigCommandOptions(alterOpts ++ entityOpt(brokerId) ++ Array("--add-config", configStr))
+      ConfigCommand.alterConfigWithZk(zkClient, addOpts, adminZkClient)
+    }
+
+    def verifyConfig(configs: Map[String, String], brokerId: Option[String]): Unit = {
+      val entityConfigs = zkClient.getEntityConfigs("brokers", brokerId.getOrElse(ConfigEntityName.Default))
+      assertEquals(configs, entityConfigs.asScala)
+    }
+
+    def alterAndVerifyConfig(configs: Map[String, String], brokerId: Option[String]): Unit = {
+      alterConfigWithZk(configs, brokerId)
+      verifyConfig(configs, brokerId)
+    }
+
+    def deleteAndVerifyConfig(configNames: Set[String], brokerId: Option[String]): Unit = {
+      val deleteOpts = new ConfigCommandOptions(alterOpts ++ entityOpt(brokerId) ++
+        Array("--delete-config", configNames.mkString(",")))
+      ConfigCommand.alterConfigWithZk(zkClient, deleteOpts, adminZkClient)
+      verifyConfig(Map.empty, brokerId)
+    }
+
+    // Add config
+    alterAndVerifyConfig(Map("message.max.size" -> "110000"), Some(brokerId))
+    alterAndVerifyConfig(Map("message.max.size" -> "120000"), None)
+
+    // Change config
+    alterAndVerifyConfig(Map("message.max.size" -> "130000"), Some(brokerId))
+    alterAndVerifyConfig(Map("message.max.size" -> "140000"), None)
+
+    // Delete config
+    deleteAndVerifyConfig(Set("message.max.size"), Some(brokerId))
+    deleteAndVerifyConfig(Set("message.max.size"), None)
+
+    // Listener configs: should work only with listener name
+    alterAndVerifyConfig(Map("listener.name.external.ssl.keystore.location" -> "/tmp/test.jks"), Some(brokerId))
+    assertThrows(classOf[ConfigException], () => alterConfigWithZk(Map("ssl.keystore.location" -> "/tmp/test.jks"), Some(brokerId)))
+
+    // Per-broker config configured at default cluster-level should fail
+    assertThrows(classOf[ConfigException], () => alterConfigWithZk(Map("listener.name.external.ssl.keystore.location" -> "/tmp/test.jks"), None))
+    deleteAndVerifyConfig(Set("listener.name.external.ssl.keystore.location"), Some(brokerId))
+
+    // Password config update without encoder secret should fail
+    assertThrows(classOf[IllegalArgumentException], () => alterConfigWithZk(Map("listener.name.external.ssl.keystore.password" -> "secret"), Some(brokerId)))
+
+    // Password config update with encoder secret should succeed and encoded password must be stored in ZK
+    val configs = Map("listener.name.external.ssl.keystore.password" -> "secret", "log.cleaner.threads" -> "2")
+    val encoderConfigs = Map(KafkaConfig.PasswordEncoderSecretProp -> "encoder-secret")
+    alterConfigWithZk(configs, Some(brokerId), encoderConfigs)
+    val brokerConfigs = zkClient.getEntityConfigs("brokers", brokerId)
+    assertFalse(brokerConfigs.contains(KafkaConfig.PasswordEncoderSecretProp), "Encoder secret stored in ZooKeeper")
+    assertEquals("2", brokerConfigs.getProperty("log.cleaner.threads")) // not encoded
+    val encodedPassword = brokerConfigs.getProperty("listener.name.external.ssl.keystore.password")
+    val passwordEncoder = ConfigCommand.createPasswordEncoder(encoderConfigs)
+    assertEquals("secret", passwordEncoder.decode(encodedPassword).value)
+    assertEquals(configs.size, brokerConfigs.size)
+
+    // Password config update with overrides for encoder parameters
+    val configs2 = Map("listener.name.internal.ssl.keystore.password" -> "secret2")
+    val encoderConfigs2 = Map(KafkaConfig.PasswordEncoderSecretProp -> "encoder-secret",
+      KafkaConfig.PasswordEncoderCipherAlgorithmProp -> "DES/CBC/PKCS5Padding",
+      KafkaConfig.PasswordEncoderIterationsProp -> "1024",
+      KafkaConfig.PasswordEncoderKeyFactoryAlgorithmProp -> "PBKDF2WithHmacSHA1",
+      KafkaConfig.PasswordEncoderKeyLengthProp -> "64")
+    alterConfigWithZk(configs2, Some(brokerId), encoderConfigs2)
+    val brokerConfigs2 = zkClient.getEntityConfigs("brokers", brokerId)
+    val encodedPassword2 = brokerConfigs2.getProperty("listener.name.internal.ssl.keystore.password")
+    assertEquals("secret2", ConfigCommand.createPasswordEncoder(encoderConfigs).decode(encodedPassword2).value)
+    assertEquals("secret2", ConfigCommand.createPasswordEncoder(encoderConfigs2).decode(encodedPassword2).value)
+
+
+    // Password config update at default cluster-level should fail
+    assertThrows(classOf[ConfigException], () => alterConfigWithZk(configs, None, encoderConfigs))
+
+    // Dynamic config updates using ZK should fail if broker is running.
+    registerBrokerInZk(brokerId.toInt)
+    assertThrows(classOf[IllegalArgumentException], () => alterConfigWithZk(Map("message.max.size" -> "210000"), Some(brokerId)))
+    assertThrows(classOf[IllegalArgumentException], () => alterConfigWithZk(Map("message.max.size" -> "220000"), None))
+
+    // Dynamic config updates using ZK should for a different broker that is not running should succeed
+    alterAndVerifyConfig(Map("message.max.size" -> "230000"), Some("2"))
+  }
+
+  private def registerBrokerInZk(id: Int): Unit = {
+    zkClient.createTopLevelPaths()
+    val securityProtocol = SecurityProtocol.PLAINTEXT
+    val endpoint = new EndPoint("localhost", 9092, ListenerName.forSecurityProtocol(securityProtocol), securityProtocol)
+    val brokerInfo = BrokerInfo(Broker(id, Seq(endpoint), rack = None), MetadataVersion.latest, jmxPort = 9192)
+    zkClient.registerBroker(brokerInfo)
+  }
+}
diff --git a/core/src/test/scala/integration/kafka/admin/ListOffsetsIntegrationTest.scala b/core/src/test/scala/integration/kafka/admin/ListOffsetsIntegrationTest.scala
index ccc2bdecc8014..2a148947fd196 100644
--- a/core/src/test/scala/integration/kafka/admin/ListOffsetsIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/admin/ListOffsetsIntegrationTest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package integration.kafka.admin
+package kafka.admin
 
 import kafka.integration.KafkaServerTestHarness
 import kafka.server.KafkaConfig
diff --git a/core/src/test/scala/integration/kafka/admin/ReassignPartitionsIntegrationTest.scala b/core/src/test/scala/integration/kafka/admin/ReassignPartitionsIntegrationTest.scala
index 518c8c4b91da9..29b4c82740e2a 100644
--- a/core/src/test/scala/integration/kafka/admin/ReassignPartitionsIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/admin/ReassignPartitionsIntegrationTest.scala
@@ -17,22 +17,23 @@
 
 package kafka.admin
 
-import java.io.Closeable
-import java.util.{Collections, HashMap, List}
 import kafka.admin.ReassignPartitionsCommand._
-import kafka.api.KAFKA_2_7_IV1
-import kafka.server.{IsrChangePropagationConfig, KafkaConfig, KafkaServer, ZkIsrManager}
+import kafka.server._
 import kafka.utils.Implicits._
-import kafka.utils.TestUtils
-import kafka.server.QuorumTestHarness
-import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, AlterConfigOp, ConfigEntry, DescribeLogDirsResult, NewTopic}
+import kafka.utils.{TestInfoUtils, TestUtils}
+import org.apache.kafka.clients.admin._
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.common.{TopicPartition, TopicPartitionReplica}
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_7_IV1
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
-import org.junit.jupiter.api.{AfterEach, Test, Timeout}
+import org.junit.jupiter.api.{AfterEach, Timeout}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
+import java.io.Closeable
+import java.util.{Collections, HashMap, List}
 import scala.collection.{Map, Seq, mutable}
 import scala.jdk.CollectionConverters._
 
@@ -53,52 +54,55 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
     }.toMap
 
 
-  @Test
-  def testReassignment(): Unit = {
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testReassignment(quorum: String): Unit = {
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     executeAndVerifyReassignment()
   }
 
-  @Test
-  def testReassignmentWithAlterIsrDisabled(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // Note: KRaft requires AlterPartition
+  def testReassignmentWithAlterPartitionDisabled(quorum: String): Unit = {
     // Test reassignment when the IBP is on an older version which does not use
-    // the `AlterIsr` API. In this case, the controller will register individual
+    // the `AlterPartition` API. In this case, the controller will register individual
     // watches for each reassigning partition so that the reassignment can be
     // completed as soon as the ISR is expanded.
-    val configOverrides = Map(KafkaConfig.InterBrokerProtocolVersionProp -> KAFKA_2_7_IV1.version)
-    cluster = new ReassignPartitionsTestCluster(zkConnect, configOverrides = configOverrides)
+    val configOverrides = Map(KafkaConfig.InterBrokerProtocolVersionProp -> IBP_2_7_IV1.version)
+    cluster = new ReassignPartitionsTestCluster(configOverrides = configOverrides)
     cluster.setup()
     executeAndVerifyReassignment()
   }
 
-  @Test
-  def testReassignmentCompletionDuringPartialUpgrade(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // Note: KRaft requires AlterPartition
+  def testReassignmentCompletionDuringPartialUpgrade(quorum: String): Unit = {
     // Test reassignment during a partial upgrade when some brokers are relying on
-    // `AlterIsr` and some rely on the old notification logic through Zookeeper.
+    // `AlterPartition` and some rely on the old notification logic through Zookeeper.
     // In this test case, broker 0 starts up first on the latest IBP and is typically
     // elected as controller. The three remaining brokers start up on the older IBP.
     // We want to ensure that reassignment can still complete through the ISR change
-    // notification path even though the controller expects `AlterIsr`.
+    // notification path even though the controller expects `AlterPartition`.
 
     // Override change notification settings so that test is not delayed by ISR
     // change notification delay
-    ZkIsrManager.DefaultIsrPropagationConfig = IsrChangePropagationConfig(
+    ZkAlterPartitionManager.DefaultIsrPropagationConfig = IsrChangePropagationConfig(
       checkIntervalMs = 500,
       lingerMs = 100,
       maxDelayMs = 500
     )
 
-    val oldIbpConfig = Map(KafkaConfig.InterBrokerProtocolVersionProp -> KAFKA_2_7_IV1.version)
+    val oldIbpConfig = Map(KafkaConfig.InterBrokerProtocolVersionProp -> IBP_2_7_IV1.version)
     val brokerConfigOverrides = Map(1 -> oldIbpConfig, 2 -> oldIbpConfig, 3 -> oldIbpConfig)
 
-    cluster = new ReassignPartitionsTestCluster(zkConnect, brokerConfigOverrides = brokerConfigOverrides)
+    cluster = new ReassignPartitionsTestCluster(brokerConfigOverrides = brokerConfigOverrides)
     cluster.setup()
 
     executeAndVerifyReassignment()
   }
 
-  def executeAndVerifyReassignment(): Unit = {
+  private def executeAndVerifyReassignment(): Unit = {
     val assignment = """{"version":1,"partitions":""" +
       """[{"topic":"foo","partition":0,"replicas":[0,1,3],"log_dirs":["any","any","any"]},""" +
       """{"topic":"bar","partition":0,"replicas":[3,2,0],"log_dirs":["any","any","any"]}""" +
@@ -135,9 +139,10 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
       describeBrokerLevelThrottles(unthrottledBrokerConfigs.keySet.toSeq))
   }
 
-  @Test
-  def testHighWaterMarkAfterPartitionReassignment(): Unit = {
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testHighWaterMarkAfterPartitionReassignment(quorum: String): Unit = {
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     val assignment = """{"version":1,"partitions":""" +
       """[{"topic":"foo","partition":0,"replicas":[3,1,2],"log_dirs":["any","any","any"]}""" +
@@ -164,9 +169,10 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
       s"Expected broker 3 to have the correct high water mark for the partition.")
   }
 
-  @Test
-  def testAlterReassignmentThrottle(): Unit = {
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterReassignmentThrottle(quorum: String): Unit = {
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     cluster.produceMessages("foo", 0, 50)
     cluster.produceMessages("baz", 2, 60)
@@ -200,9 +206,10 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
   /**
    * Test running a reassignment with the interBrokerThrottle set.
    */
-  @Test
-  def testThrottledReassignment(): Unit = {
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testThrottledReassignment(quorum: String): Unit = {
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     cluster.produceMessages("foo", 0, 50)
     cluster.produceMessages("baz", 2, 60)
@@ -257,9 +264,10 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
     waitForBrokerLevelThrottles(unthrottledBrokerConfigs)
   }
 
-  @Test
-  def testProduceAndConsumeWithReassignmentInProgress(): Unit = {
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testProduceAndConsumeWithReassignmentInProgress(quorum: String): Unit = {
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     cluster.produceMessages("baz", 2, 60)
     val assignment = """{"version":1,"partitions":""" +
@@ -285,9 +293,10 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
   /**
    * Test running a reassignment and then cancelling it.
    */
-  @Test
-  def testCancellation(): Unit = {
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCancellation(quorum: String): Unit = {
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     cluster.produceMessages("foo", 0, 200)
     cluster.produceMessages("baz", 1, 200)
@@ -368,9 +377,16 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
    */
   private def describeBrokerLevelThrottles(brokerIds: Seq[Int]): Map[Int, Map[String, Long]] = {
     brokerIds.map { brokerId =>
-      val props = zkClient.getEntityConfigs("brokers", brokerId.toString)
+      val brokerResource = new ConfigResource(ConfigResource.Type.BROKER, brokerId.toString)
+      val brokerConfigs = cluster.adminClient.describeConfigs(Collections.singleton(brokerResource)).values()
+        .get(brokerResource)
+        .get()
+
       val throttles = brokerLevelThrottles.map { throttleName =>
-        (throttleName, props.getOrDefault(throttleName, "-1").asInstanceOf[String].toLong)
+        val configValue = Option(brokerConfigs.get(throttleName))
+          .map(_.value)
+          .getOrElse("-1")
+        (throttleName, configValue.toLong)
       }.toMap
       brokerId -> throttles
     }.toMap
@@ -379,11 +395,12 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
   /**
    * Test moving partitions between directories.
    */
-  @Test
-  def testLogDirReassignment(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // JBOD not yet implemented for KRaft
+  def testLogDirReassignment(quorum: String): Unit = {
     val topicPartition = new TopicPartition("foo", 0)
 
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     cluster.produceMessages(topicPartition.topic, topicPartition.partition, 700)
 
@@ -429,11 +446,12 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
     assertEquals(reassignment.targetDir, info1.curLogDirs.getOrElse(topicPartition, ""))
   }
 
-  @Test
-  def testAlterLogDirReassignmentThrottle(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // JBOD not yet implemented for KRaft
+  def testAlterLogDirReassignmentThrottle(quorum: String): Unit = {
     val topicPartition = new TopicPartition("foo", 0)
 
-    cluster = new ReassignPartitionsTestCluster(zkConnect)
+    cluster = new ReassignPartitionsTestCluster()
     cluster.setup()
     cluster.produceMessages(topicPartition.topic, topicPartition.partition, 700)
 
@@ -559,7 +577,6 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
   }
 
   class ReassignPartitionsTestCluster(
-    val zkConnect: String,
     configOverrides: Map[String, String] = Map.empty,
     brokerConfigOverrides: Map[Int, Map[String, String]] = Map.empty
   ) extends Closeable {
@@ -581,7 +598,7 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
       case (brokerId, rack) =>
         val config = TestUtils.createBrokerConfig(
           nodeId = brokerId,
-          zkConnect = zkConnect,
+          zkConnect = zkConnectOrNull,
           rack = Some(rack),
           enableControlledShutdown = false, // shorten test time
           logDirCount = 3)
@@ -596,10 +613,10 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
           overrides.forKeyValue(config.setProperty)
         }
 
-        config
+        new KafkaConfig(config)
     }.toBuffer
 
-    var servers = new mutable.ArrayBuffer[KafkaServer]
+    var servers = new mutable.ArrayBuffer[KafkaBroker]
 
     var brokerList: String = null
 
@@ -612,7 +629,7 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
 
     def createServers(): Unit = {
       brokers.keySet.foreach { brokerId =>
-        servers += TestUtils.createServer(KafkaConfig(brokerConfigs(brokerId)))
+        servers += createBroker(brokerConfigs(brokerId))
       }
     }
 
@@ -634,6 +651,13 @@ class ReassignPartitionsIntegrationTest extends QuorumTestHarness {
         case (topicName, parts) =>
           TestUtils.waitForAllPartitionsMetadata(servers, topicName, parts.size)
       }
+
+      if (isKRaftTest()) {
+        TestUtils.ensureConsistentKRaftMetadata(
+          cluster.servers,
+          controllerServer
+        )
+      }
     }
 
     def produceMessages(topic: String, partition: Int, numMessages: Int): Unit = {
diff --git a/core/src/test/scala/unit/kafka/admin/TopicCommandIntegrationTest.scala b/core/src/test/scala/integration/kafka/admin/TopicCommandIntegrationTest.scala
similarity index 67%
rename from core/src/test/scala/unit/kafka/admin/TopicCommandIntegrationTest.scala
rename to core/src/test/scala/integration/kafka/admin/TopicCommandIntegrationTest.scala
index 9a1fe378f6cf6..3082babd06fa0 100644
--- a/core/src/test/scala/unit/kafka/admin/TopicCommandIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/admin/TopicCommandIntegrationTest.scala
@@ -16,26 +16,26 @@
   */
 package kafka.admin
 
-import java.util.{Collection, Collections, Optional, Properties}
-
+import java.util.{Collections, Optional, Properties}
 import kafka.admin.TopicCommand.{TopicCommandOptions, TopicService}
 import kafka.integration.KafkaServerTestHarness
-import kafka.server.{ConfigType, KafkaConfig}
-import kafka.utils.{Logging, TestUtils}
+import kafka.server.KafkaConfig
+import kafka.utils.{Logging, TestInfoUtils, TestUtils}
 import kafka.zk.{ConfigEntityChangeNotificationZNode, DeleteTopicsTopicZNode}
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin._
+import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.config.{ConfigException, ConfigResource, TopicConfig}
-import org.apache.kafka.common.errors.{ClusterAuthorizationException, ThrottlingQuotaExceededException, TopicExistsException}
+import org.apache.kafka.common.errors.{ClusterAuthorizationException, InvalidTopicException, TopicExistsException}
 import org.apache.kafka.common.internals.Topic
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.requests.MetadataResponse
 import org.apache.kafka.common.security.auth.SecurityProtocol
-import org.apache.kafka.common.{Node, TopicPartition, TopicPartitionInfo}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
-import org.mockito.ArgumentMatcher
-import org.mockito.ArgumentMatchers.{eq => eqThat, _}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 import org.mockito.Mockito._
 
 import scala.collection.Seq
@@ -54,7 +54,7 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     */
   override def generateConfigs: Seq[KafkaConfig] = TestUtils.createBrokerConfigs(
     numConfigs = 6,
-    zkConnect = zkConnect,
+    zkConnect = zkConnectOrNull,
     rackInfo = Map(0 -> "rack1", 1 -> "rack2", 2 -> "rack2", 3 -> "rack1", 4 -> "rack3", 5 -> "rack3"),
     numPartitions = numPartitions,
     defaultReplicationFactor = defaultReplicationFactor,
@@ -76,7 +76,7 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
   }
 
   private[this] def waitForTopicCreated(topicName: String, timeout: Int = 10000): Unit = {
-    TestUtils.waitForPartitionMetadata(servers, topicName, partition = 0, timeout)
+    TestUtils.waitForPartitionMetadata(brokers, topicName, partition = 0, timeout)
   }
 
   @BeforeEach
@@ -98,16 +98,18 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
       topicService.close()
   }
 
-  @Test
-  def testCreate(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreate(quorum: String): Unit = {
     createAndWaitTopic(new TopicCommandOptions(
       Array("--partitions", "2", "--replication-factor", "1", "--topic", testTopicName)))
 
     adminClient.listTopics().names().get().contains(testTopicName)
   }
 
-  @Test
-  def testCreateWithDefaults(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithDefaults(quorum: String): Unit = {
     createAndWaitTopic(new TopicCommandOptions(Array("--topic", testTopicName)))
 
     val partitions = adminClient
@@ -120,8 +122,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertEquals(partitions.get(0).replicas().size(), defaultReplicationFactor)
   }
 
-  @Test
-  def testCreateWithDefaultReplication(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithDefaultReplication(quorum: String): Unit = {
     createAndWaitTopic(new TopicCommandOptions(
       Array("--topic", testTopicName, "--partitions", "2")))
 
@@ -135,8 +138,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertEquals(partitions.get(0).replicas().size(), defaultReplicationFactor)
   }
 
-  @Test
-  def testCreateWithDefaultPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithDefaultPartitions(quorum: String): Unit = {
     createAndWaitTopic(new TopicCommandOptions(
       Array("--topic", testTopicName, "--replication-factor", "2")))
 
@@ -151,8 +155,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertEquals(partitions.get(0).replicas().size(), 2)
   }
 
-  @Test
-  def testCreateWithConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithConfigs(quorum: String): Unit = {
     val configResource = new ConfigResource(ConfigResource.Type.TOPIC, testTopicName)
     createAndWaitTopic(new TopicCommandOptions(
       Array("--partitions", "2", "--replication-factor", "2", "--topic", testTopicName, "--config", "delete.retention.ms=1000")))
@@ -163,8 +168,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertEquals(1000, Integer.valueOf(configs.get("delete.retention.ms").value()))
   }
 
-  @Test
-  def testCreateWhenAlreadyExists(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWhenAlreadyExists(quorum: String): Unit = {
     val numPartitions = 1
 
     // create the topic
@@ -176,15 +182,17 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertThrows(classOf[TopicExistsException], () => topicService.createTopic(createOpts))
   }
 
-  @Test
-  def testCreateWhenAlreadyExistsWithIfNotExists(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWhenAlreadyExistsWithIfNotExists(quorum: String): Unit = {
     val createOpts = new TopicCommandOptions(Array("--topic", testTopicName, "--if-not-exists"))
     createAndWaitTopic(createOpts)
     topicService.createTopic(createOpts)
   }
 
-  @Test
-  def testCreateWithReplicaAssignment(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithReplicaAssignment(quorum: String): Unit = {
     // create the topic
     val createOpts = new TopicCommandOptions(
       Array("--replica-assignment", "5:4,3:2,1:0", "--topic", testTopicName))
@@ -202,37 +210,42 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertEquals(List(1, 0), partitions.get(2).replicas().asScala.map(_.id()))
   }
 
-  @Test
-  def testCreateWithInvalidReplicationFactor(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithInvalidReplicationFactor(quorum: String): Unit = {
     assertThrows(classOf[IllegalArgumentException],
       () => topicService.createTopic(new TopicCommandOptions(
         Array("--partitions", "2", "--replication-factor", (Short.MaxValue+1).toString, "--topic", testTopicName))))
   }
 
-  @Test
-  def testCreateWithNegativeReplicationFactor(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithNegativeReplicationFactor(quorum: String): Unit = {
     assertThrows(classOf[IllegalArgumentException],
       () => topicService.createTopic(new TopicCommandOptions(
         Array("--partitions", "2", "--replication-factor", "-1", "--topic", testTopicName))))
   }
 
-  @Test
-  def testCreateWithNegativePartitionCount(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithNegativePartitionCount(quorum: String): Unit = {
     assertThrows(classOf[IllegalArgumentException],
       () => topicService.createTopic(new TopicCommandOptions(
         Array("--partitions", "-1", "--replication-factor", "1", "--topic", testTopicName))))
   }
 
-  @Test
-  def testInvalidTopicLevelConfig(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidTopicLevelConfig(quorum: String): Unit = {
     val createOpts = new TopicCommandOptions(
       Array("--partitions", "1", "--replication-factor", "1", "--topic", testTopicName,
         "--config", "message.timestamp.type=boom"))
     assertThrows(classOf[ConfigException], () => topicService.createTopic(createOpts))
   }
 
-  @Test
-  def testListTopics(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListTopics(quorum: String): Unit = {
     createAndWaitTopic(new TopicCommandOptions(
       Array("--partitions", "1", "--replication-factor", "1", "--topic", testTopicName)))
 
@@ -242,8 +255,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertTrue(output.contains(testTopicName))
   }
 
-  @Test
-  def testListTopicsWithIncludeList(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListTopicsWithIncludeList(quorum: String): Unit = {
     val topic1 = "kafka.testTopic1"
     val topic2 = "kafka.testTopic2"
     val topic3 = "oooof.testTopic1"
@@ -264,8 +278,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertFalse(output.contains(topic3))
   }
 
-  @Test
-  def testListTopicsWithExcludeInternal(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListTopicsWithExcludeInternal(quorum: String): Unit = {
     val topic1 = "kafka.testTopic1"
     adminClient.createTopics(
       List(new NewTopic(topic1, 2, 2.toShort),
@@ -280,8 +295,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertFalse(output.contains(Topic.GROUP_METADATA_TOPIC_NAME))
   }
 
-  @Test
-  def testAlterPartitionCount(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterPartitionCount(quorum: String): Unit = {
     adminClient.createTopics(
       List(new NewTopic(testTopicName, 2, 2.toShort)).asJavaCollection).all().get()
     waitForTopicCreated(testTopicName)
@@ -289,26 +305,34 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     topicService.alterTopic(new TopicCommandOptions(
       Array("--topic", testTopicName, "--partitions", "3")))
 
+    TestUtils.waitUntilTrue(
+      () => brokers.forall(_.metadataCache.getTopicPartitions(testTopicName).size == 3),
+      "Timeout waiting for new assignment propagating to broker")
     val topicDescription = adminClient.describeTopics(Collections.singletonList(testTopicName)).topicNameValues().get(testTopicName).get()
     assertTrue(topicDescription.partitions().size() == 3)
   }
 
-  @Test
-  def testAlterAssignment(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterAssignment(quorum: String): Unit = {
     adminClient.createTopics(
       Collections.singletonList(new NewTopic(testTopicName, 2, 2.toShort))).all().get()
     waitForTopicCreated(testTopicName)
 
     topicService.alterTopic(new TopicCommandOptions(
       Array("--topic", testTopicName, "--replica-assignment", "5:3,3:1,4:2", "--partitions", "3")))
+    TestUtils.waitUntilTrue(
+      () => brokers.forall(_.metadataCache.getTopicPartitions(testTopicName).size == 3),
+      "Timeout waiting for new assignment propagating to broker")
 
     val topicDescription = adminClient.describeTopics(Collections.singletonList(testTopicName)).topicNameValues().get(testTopicName).get()
     assertTrue(topicDescription.partitions().size() == 3)
     assertEquals(List(4,2), topicDescription.partitions().get(2).replicas().asScala.map(_.id()))
   }
 
-  @Test
-  def testAlterAssignmentWithMoreAssignmentThanPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterAssignmentWithMoreAssignmentThanPartitions(quorum: String): Unit = {
     adminClient.createTopics(
       List(new NewTopic(testTopicName, 2, 2.toShort)).asJavaCollection).all().get()
     waitForTopicCreated(testTopicName)
@@ -318,8 +342,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
         Array("--topic", testTopicName, "--replica-assignment", "5:3,3:1,4:2,3:2", "--partitions", "3"))))
   }
 
-  @Test
-  def testAlterAssignmentWithMorePartitionsThanAssignment(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterAssignmentWithMorePartitionsThanAssignment(quorum: String): Unit = {
     adminClient.createTopics(
       List(new NewTopic(testTopicName, 2, 2.toShort)).asJavaCollection).all().get()
     waitForTopicCreated(testTopicName)
@@ -329,8 +354,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
         Array("--topic", testTopicName, "--replica-assignment", "5:3,3:1,4:2", "--partitions", "6"))))
   }
 
-  @Test
-  def testAlterWithInvalidPartitionCount(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterWithInvalidPartitionCount(quorum: String): Unit = {
     createAndWaitTopic(new TopicCommandOptions(
       Array("--partitions", "1", "--replication-factor", "1", "--topic", testTopicName)))
 
@@ -339,22 +365,25 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
         Array("--partitions", "-1", "--topic", testTopicName))))
   }
 
-  @Test
-  def testAlterWhenTopicDoesntExist(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterWhenTopicDoesntExist(quorum: String): Unit = {
     // alter a topic that does not exist without --if-exists
     val alterOpts = new TopicCommandOptions(Array("--topic", testTopicName, "--partitions", "1"))
     val topicService = TopicService(adminClient)
     assertThrows(classOf[IllegalArgumentException], () => topicService.alterTopic(alterOpts))
   }
 
-  @Test
-  def testAlterWhenTopicDoesntExistWithIfExists(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterWhenTopicDoesntExistWithIfExists(quorum: String): Unit = {
     topicService.alterTopic(new TopicCommandOptions(
       Array("--topic", testTopicName, "--partitions", "1", "--if-exists")))
   }
 
-  @Test
-  def testCreateAlterTopicWithRackAware(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateAlterTopicWithRackAware(quorum: String): Unit = {
     val rackInfo = Map(0 -> "rack1", 1 -> "rack2", 2 -> "rack2", 3 -> "rack1", 4 -> "rack3", 5 -> "rack3")
 
     val numPartitions = 18
@@ -365,9 +394,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
       "--topic", testTopicName))
     createAndWaitTopic(createOpts)
 
-    var assignment = zkClient.getReplicaAssignmentForTopics(Set(testTopicName)).map { case (tp, replicas) =>
-      tp.partition -> replicas
-    }
+    var assignment = adminClient.describeTopics(Collections.singletonList(testTopicName))
+      .allTopicNames().get().get(testTopicName).partitions()
+      .asScala.map(info => info.partition() -> info.replicas().asScala.map(_.id())).toMap
     checkReplicaDistribution(assignment, rackInfo, rackInfo.size, numPartitions, replicationFactor)
 
     val alteredNumPartitions = 36
@@ -376,14 +405,19 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
       "--partitions", alteredNumPartitions.toString,
       "--topic", testTopicName))
     topicService.alterTopic(alterOpts)
-    assignment = zkClient.getReplicaAssignmentForTopics(Set(testTopicName)).map { case (tp, replicas) =>
-      tp.partition -> replicas
-    }
+
+    TestUtils.waitUntilTrue(
+      () => brokers.forall(_.metadataCache.getTopicPartitions(testTopicName).size == alteredNumPartitions),
+      "Timeout waiting for new assignment propagating to broker")
+    assignment = adminClient.describeTopics(Collections.singletonList(testTopicName))
+      .allTopicNames().get().get(testTopicName).partitions()
+      .asScala.map(info => info.partition() -> info.replicas().asScala.map(_.id())).toMap
     checkReplicaDistribution(assignment, rackInfo, rackInfo.size, alteredNumPartitions, replicationFactor)
   }
 
-  @Test
-  def testConfigPreservationAcrossPartitionAlteration(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConfigPreservationAcrossPartitionAlteration(quorum: String): Unit = {
     val numPartitionsOriginal = 1
     val cleanupKey = "cleanup.policy"
     val cleanupVal = "compact"
@@ -395,25 +429,30 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
       "--config", cleanupKey + "=" + cleanupVal,
       "--topic", testTopicName))
     createAndWaitTopic(createOpts)
-    val props = adminZkClient.fetchEntityConfig(ConfigType.Topic, testTopicName)
-    assertTrue(props.containsKey(cleanupKey), "Properties after creation don't contain " + cleanupKey)
-    assertTrue(props.getProperty(cleanupKey).equals(cleanupVal), "Properties after creation have incorrect value")
+    val configResource = new ConfigResource(ConfigResource.Type.TOPIC, testTopicName)
+    val props = adminClient.describeConfigs(Collections.singleton(configResource)).all().get().get(configResource)
+    // val props = adminZkClient.fetchEntityConfig(ConfigType.Topic, testTopicName)
+    assertNotNull(props.get(cleanupKey), "Properties after creation don't contain " + cleanupKey)
+    assertEquals(cleanupVal, props.get(cleanupKey).value(), "Properties after creation have incorrect value")
 
     // pre-create the topic config changes path to avoid a NoNodeException
-    zkClient.makeSurePersistentPathExists(ConfigEntityChangeNotificationZNode.path)
+    if (!isKRaftTest()) {
+      zkClient.makeSurePersistentPathExists(ConfigEntityChangeNotificationZNode.path)
+    }
 
     // modify the topic to add new partitions
     val numPartitionsModified = 3
     val alterOpts = new TopicCommandOptions(
       Array("--partitions", numPartitionsModified.toString, "--topic", testTopicName))
     topicService.alterTopic(alterOpts)
-    val newProps = adminZkClient.fetchEntityConfig(ConfigType.Topic, testTopicName)
-    assertTrue(newProps.containsKey(cleanupKey), "Updated properties do not contain " + cleanupKey)
-    assertTrue(newProps.getProperty(cleanupKey).equals(cleanupVal), "Updated properties have incorrect value")
+    val newProps = adminClient.describeConfigs(Collections.singleton(configResource)).all().get().get(configResource)
+    assertNotNull(newProps.get(cleanupKey), "Updated properties do not contain " + cleanupKey)
+    assertEquals(cleanupVal, newProps.get(cleanupKey).value(), "Updated properties have incorrect value")
   }
 
-  @Test
-  def testTopicDeletion(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testTopicDeletion(quorum: String): Unit = {
     // create the NormalTopic
     val createOpts = new TopicCommandOptions(Array("--partitions", "1",
       "--replication-factor", "1",
@@ -423,14 +462,17 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     // delete the NormalTopic
     val deleteOpts = new TopicCommandOptions(Array("--topic", testTopicName))
 
-    val deletePath = DeleteTopicsTopicZNode.path(testTopicName)
-    assertFalse(zkClient.pathExists(deletePath), "Delete path for topic shouldn't exist before deletion.")
+    if (!isKRaftTest()) {
+      val deletePath = DeleteTopicsTopicZNode.path(testTopicName)
+      assertFalse(zkClient.pathExists(deletePath), "Delete path for topic shouldn't exist before deletion.")
+    }
     topicService.deleteTopic(deleteOpts)
-    TestUtils.verifyTopicDeletion(zkClient, testTopicName, 1, servers)
+    TestUtils.verifyTopicDeletion(zkClientOrNull, testTopicName, 1, brokers)
   }
 
-  @Test
-  def testDeleteInternalTopic(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteInternalTopic(quorum: String): Unit = {
     // create the offset topic
     val createOffsetTopicOpts = new TopicCommandOptions(Array("--partitions", "1",
       "--replication-factor", "1",
@@ -443,25 +485,30 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     val deleteOffsetTopicOpts = new TopicCommandOptions(
       Array("--topic", Topic.GROUP_METADATA_TOPIC_NAME))
     val deleteOffsetTopicPath = DeleteTopicsTopicZNode.path(Topic.GROUP_METADATA_TOPIC_NAME)
-    assertFalse(zkClient.pathExists(deleteOffsetTopicPath), "Delete path for topic shouldn't exist before deletion.")
+    if (!isKRaftTest()) {
+      assertFalse(zkClient.pathExists(deleteOffsetTopicPath), "Delete path for topic shouldn't exist before deletion.")
+    }
     topicService.deleteTopic(deleteOffsetTopicOpts)
-    TestUtils.verifyTopicDeletion(zkClient, Topic.GROUP_METADATA_TOPIC_NAME, 1, servers)
+    TestUtils.verifyTopicDeletion(zkClientOrNull, Topic.GROUP_METADATA_TOPIC_NAME, 1, brokers)
   }
 
-  @Test
-  def testDeleteWhenTopicDoesntExist(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteWhenTopicDoesntExist(quorum: String): Unit = {
     // delete a topic that does not exist
     val deleteOpts = new TopicCommandOptions(Array("--topic", testTopicName))
     assertThrows(classOf[IllegalArgumentException], () => topicService.deleteTopic(deleteOpts))
   }
 
-  @Test
-  def testDeleteWhenTopicDoesntExistWithIfExists(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteWhenTopicDoesntExistWithIfExists(quorum: String): Unit = {
     topicService.deleteTopic(new TopicCommandOptions(Array("--topic", testTopicName, "--if-exists")))
   }
 
-  @Test
-  def testDescribe(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribe(quorum: String): Unit = {
     adminClient.createTopics(
       Collections.singletonList(new NewTopic(testTopicName, 2, 2.toShort))).all().get()
     waitForTopicCreated(testTopicName)
@@ -473,19 +520,22 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertTrue(rows(0).startsWith(s"Topic: $testTopicName"))
   }
 
-  @Test
-  def testDescribeWhenTopicDoesntExist(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeWhenTopicDoesntExist(quorum: String): Unit = {
     assertThrows(classOf[IllegalArgumentException],
       () => topicService.describeTopic(new TopicCommandOptions(Array("--topic", testTopicName))))
   }
 
-  @Test
-  def testDescribeWhenTopicDoesntExistWithIfExists(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeWhenTopicDoesntExistWithIfExists(quorum: String): Unit = {
     topicService.describeTopic(new TopicCommandOptions(Array("--topic", testTopicName, "--if-exists")))
   }
 
-  @Test
-  def testDescribeUnavailablePartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeUnavailablePartitions(quorum: String): Unit = {
     adminClient.createTopics(
       Collections.singletonList(new NewTopic(testTopicName, 6, 1.toShort))).all().get()
     waitForTopicCreated(testTopicName)
@@ -500,7 +550,7 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
 
       // wait until the topic metadata for the test topic is propagated to each alive broker
       TestUtils.waitUntilTrue(() => {
-        servers
+        brokers
           .filterNot(_.config.brokerId == 0)
           .foldLeft(true) {
             (result, server) => {
@@ -527,27 +577,32 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     }
   }
 
-  @Test
-  def testDescribeUnderReplicatedPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeUnderReplicatedPartitions(quorum: String): Unit = {
     adminClient.createTopics(
       Collections.singletonList(new NewTopic(testTopicName, 1, 6.toShort))).all().get()
     waitForTopicCreated(testTopicName)
 
     try {
       killBroker(0)
-      val aliveServers = servers.filterNot(_.config.brokerId == 0)
-      TestUtils.waitForPartitionMetadata(aliveServers, testTopicName, 0)
+      if (isKRaftTest()) {
+        ensureConsistentKRaftMetadata()
+      } else {
+        TestUtils.waitForPartitionMetadata(aliveBrokers, testTopicName, 0)
+      }
       val output = TestUtils.grabConsoleOutput(
         topicService.describeTopic(new TopicCommandOptions(Array("--under-replicated-partitions"))))
       val rows = output.split("\n")
-      assertTrue(rows(0).startsWith(s"\tTopic: $testTopicName"))
+      assertTrue(rows(0).startsWith(s"\tTopic: $testTopicName"), s"Unexpected output: ${rows(0)}")
     } finally {
       restartDeadBrokers()
     }
   }
 
-  @Test
-  def testDescribeUnderMinIsrPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeUnderMinIsrPartitions(quorum: String): Unit = {
     val configMap = new java.util.HashMap[String, String]()
     configMap.put(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG, "6")
 
@@ -557,8 +612,14 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
 
     try {
       killBroker(0)
-      val aliveServers = servers.filterNot(_.config.brokerId == 0)
-      TestUtils.waitForPartitionMetadata(aliveServers, testTopicName, 0)
+      if (isKRaftTest()) {
+        ensureConsistentKRaftMetadata()
+      } else {
+        TestUtils.waitUntilTrue(
+          () => aliveBrokers.forall(_.metadataCache.getPartitionInfo(testTopicName, 0).get.isr().size() == 5),
+          s"Timeout waiting for partition metadata propagating to brokers for $testTopicName topic"
+        )
+      }
       val output = TestUtils.grabConsoleOutput(
         topicService.describeTopic(new TopicCommandOptions(Array("--under-min-isr-partitions"))))
       val rows = output.split("\n")
@@ -568,8 +629,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     }
   }
 
-  @Test
-  def testDescribeUnderReplicatedPartitionsWhenReassignmentIsInProgress(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeUnderReplicatedPartitionsWhenReassignmentIsInProgress(quorum: String): Unit = {
     val configMap = new java.util.HashMap[String, String]()
     val replicationFactor: Short = 1
     val partitions = 1
@@ -580,12 +642,12 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     waitForTopicCreated(testTopicName)
 
     // Produce multiple batches.
-    TestUtils.generateAndProduceMessages(servers, testTopicName, numMessages = 10, acks = -1)
-    TestUtils.generateAndProduceMessages(servers, testTopicName, numMessages = 10, acks = -1)
+    TestUtils.generateAndProduceMessages(brokers, testTopicName, numMessages = 10, acks = -1)
+    TestUtils.generateAndProduceMessages(brokers, testTopicName, numMessages = 10, acks = -1)
 
     // Enable throttling. Note the broker config sets the replica max fetch bytes to `1` upon to minimize replication
     // throughput so the reassignment doesn't complete quickly.
-    val brokerIds = servers.map(_.config.brokerId)
+    val brokerIds = brokers.map(_.config.brokerId)
     TestUtils.setReplicationThrottleForPartitions(adminClient, brokerIds, Set(tp), throttleBytes = 1)
 
     val testTopicDesc = adminClient.describeTopics(Collections.singleton(testTopicName)).allTopicNames().get().get(testTopicName)
@@ -622,8 +684,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     TestUtils.waitForAllReassignmentsToComplete(adminClient)
   }
 
-  @Test
-  def testDescribeAtMinIsrPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeAtMinIsrPartitions(quorum: String): Unit = {
     val configMap = new java.util.HashMap[String, String]()
     configMap.put(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG, "4")
 
@@ -634,6 +697,16 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     try {
       killBroker(0)
       killBroker(1)
+
+      if (isKRaftTest()) {
+        ensureConsistentKRaftMetadata()
+      } else {
+        TestUtils.waitUntilTrue(
+          () => aliveBrokers.forall(_.metadataCache.getPartitionInfo(testTopicName, 0).get.isr().size() == 4),
+          s"Timeout waiting for partition metadata propagating to brokers for $testTopicName topic"
+        )
+      }
+
       val output = TestUtils.grabConsoleOutput(
         topicService.describeTopic(new TopicCommandOptions(Array("--at-min-isr-partitions"))))
       val rows = output.split("\n")
@@ -653,8 +726,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     *
     * Output should only display the (1) topic with partition under min ISR count and (3) topic with offline partition
     */
-  @Test
-  def testDescribeUnderMinIsrPartitionsMixed(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeUnderMinIsrPartitionsMixed(quorum: String): Unit = {
     val underMinIsrTopic = "under-min-isr-topic"
     val notUnderMinIsrTopic = "not-under-min-isr-topic"
     val offlineTopic = "offline-topic"
@@ -677,8 +751,17 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
 
     try {
       killBroker(0)
-      val aliveServers = servers.filterNot(_.config.brokerId == 0)
-      TestUtils.waitForPartitionMetadata(aliveServers, underMinIsrTopic, 0)
+      if (isKRaftTest()) {
+        ensureConsistentKRaftMetadata()
+      } else {
+        TestUtils.waitUntilTrue(
+          () => aliveBrokers.forall(
+            broker =>
+              broker.metadataCache.getPartitionInfo(underMinIsrTopic, 0).get.isr().size() < 6 &&
+                broker.metadataCache.getPartitionInfo(offlineTopic, 0).get.leader() == MetadataResponse.NO_LEADER_ID),
+          "Timeout waiting for partition metadata propagating to brokers for underMinIsrTopic topic"
+        )
+      }
       val output = TestUtils.grabConsoleOutput(
         topicService.describeTopic(new TopicCommandOptions(Array("--under-min-isr-partitions"))))
       val rows = output.split("\n")
@@ -690,8 +773,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     }
   }
 
-  @Test
-  def testDescribeReportOverriddenConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeReportOverriddenConfigs(quorum: String): Unit = {
     val config = "file.delete.delay.ms=1000"
     createAndWaitTopic(new TopicCommandOptions(
       Array("--partitions", "2", "--replication-factor", "2", "--topic", testTopicName, "--config", config)))
@@ -700,8 +784,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertTrue(output.contains(config), s"Describe output should have contained $config")
   }
 
-  @Test
-  def testDescribeAndListTopicsWithoutInternalTopics(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeAndListTopicsWithoutInternalTopics(quorum: String): Unit = {
     createAndWaitTopic(
       new TopicCommandOptions(Array("--partitions", "1", "--replication-factor", "1", "--topic", testTopicName)))
     // create a internal topic
@@ -720,8 +805,9 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertFalse(output.contains(Topic.GROUP_METADATA_TOPIC_NAME))
   }
 
-  @Test
-  def testDescribeDoesNotFailWhenListingReassignmentIsUnauthorized(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeDoesNotFailWhenListingReassignmentIsUnauthorized(quorum: String): Unit = {
     adminClient = spy(adminClient)
     topicService = TopicService(adminClient)
 
@@ -746,71 +832,14 @@ class TopicCommandIntegrationTest extends KafkaServerTestHarness with Logging wi
     assertTrue(rows(0).startsWith(s"Topic: $testTopicName"))
   }
 
-  @Test
-  def testCreateTopicDoesNotRetryThrottlingQuotaExceededException(): Unit = {
-    val adminClient = mock(classOf[Admin])
-    val topicService = TopicService(adminClient)
-
-    val result = AdminClientTestUtils.createTopicsResult(testTopicName, Errors.THROTTLING_QUOTA_EXCEEDED.exception())
-    when(adminClient.createTopics(any(), any())).thenReturn(result)
-
-    assertThrows(classOf[ThrottlingQuotaExceededException],
-      () => topicService.createTopic(new TopicCommandOptions(Array("--topic", testTopicName))))
-
-    val expectedNewTopic = new NewTopic(testTopicName, Optional.empty[Integer](), Optional.empty[java.lang.Short]())
-      .configs(Map.empty[String, String].asJava)
-
-    verify(adminClient, times(1)).createTopics(
-      eqThat(Set(expectedNewTopic).asJava),
-      argThat((_.shouldRetryOnQuotaViolation() == false): ArgumentMatcher[CreateTopicsOptions])
-    )
-  }
-
-  @Test
-  def testDeleteTopicDoesNotRetryThrottlingQuotaExceededException(): Unit = {
-    val adminClient = mock(classOf[Admin])
-    val topicService = TopicService(adminClient)
-
-    val listResult = AdminClientTestUtils.listTopicsResult(testTopicName)
-    when(adminClient.listTopics(any())).thenReturn(listResult)
-
-    val result = AdminClientTestUtils.deleteTopicsResult(testTopicName, Errors.THROTTLING_QUOTA_EXCEEDED.exception())
-    when(adminClient.deleteTopics(any[Collection[String]](), any())).thenReturn(result)
-
-    val exception = assertThrows(classOf[ExecutionException],
-      () => topicService.deleteTopic(new TopicCommandOptions(Array("--topic", testTopicName))))
-    assertTrue(exception.getCause.isInstanceOf[ThrottlingQuotaExceededException])
-
-    verify(adminClient, times(1)).deleteTopics(
-      eqThat(Seq(testTopicName).asJavaCollection),
-      argThat((_.shouldRetryOnQuotaViolation() == false): ArgumentMatcher[DeleteTopicsOptions])
-    )
-  }
-
-  @Test
-  def testCreatePartitionsDoesNotRetryThrottlingQuotaExceededException(): Unit = {
-    val adminClient = mock(classOf[Admin])
-    val topicService = TopicService(adminClient)
-
-    val listResult = AdminClientTestUtils.listTopicsResult(testTopicName)
-    when(adminClient.listTopics(any())).thenReturn(listResult)
-
-    val topicPartitionInfo = new TopicPartitionInfo(0, new Node(0, "", 0),
-      Collections.emptyList(), Collections.emptyList())
-    val describeResult = AdminClientTestUtils.describeTopicsResult(testTopicName, new TopicDescription(
-      testTopicName, false, Collections.singletonList(topicPartitionInfo)))
-    when(adminClient.describeTopics(any(classOf[java.util.Collection[String]]))).thenReturn(describeResult)
-
-    val result = AdminClientTestUtils.createPartitionsResult(testTopicName, Errors.THROTTLING_QUOTA_EXCEEDED.exception())
-    when(adminClient.createPartitions(any(), any())).thenReturn(result)
-
-    val exception = assertThrows(classOf[ExecutionException],
-      () => topicService.alterTopic(new TopicCommandOptions(Array("--topic", testTopicName, "--partitions", "3"))))
-    assertTrue(exception.getCause.isInstanceOf[ThrottlingQuotaExceededException])
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateWithTopicNameCollision(quorum: String): Unit = {
+    adminClient.createTopics(
+      Collections.singletonList(new NewTopic("foo_bar", 1, 6.toShort))).all().get()
+    waitForTopicCreated("foo_bar")
 
-    verify(adminClient, times(1)).createPartitions(
-      argThat((_.get(testTopicName).totalCount() == 3): ArgumentMatcher[java.util.Map[String, NewPartitions]]),
-      argThat((_.shouldRetryOnQuotaViolation() == false): ArgumentMatcher[CreatePartitionsOptions])
-    )
+    assertThrows(classOf[InvalidTopicException],
+      () => topicService.createTopic(new TopicCommandOptions(Array("--topic", "foo.bar"))))
   }
 }
diff --git a/core/src/test/scala/integration/kafka/api/AbstractConsumerTest.scala b/core/src/test/scala/integration/kafka/api/AbstractConsumerTest.scala
index 56bc47c79e956..23b56b8e91fd3 100644
--- a/core/src/test/scala/integration/kafka/api/AbstractConsumerTest.scala
+++ b/core/src/test/scala/integration/kafka/api/AbstractConsumerTest.scala
@@ -342,15 +342,16 @@ abstract class AbstractConsumerTest extends BaseRequestTest {
 
   protected class ConsumerAssignmentPoller(consumer: Consumer[Array[Byte], Array[Byte]],
                                            topicsToSubscribe: List[String],
-                                           partitionsToAssign: Set[TopicPartition])
+                                           partitionsToAssign: Set[TopicPartition],
+                                           userRebalanceListener: ConsumerRebalanceListener)
     extends ShutdownableThread("daemon-consumer-assignment", false) {
 
     def this(consumer: Consumer[Array[Byte], Array[Byte]], topicsToSubscribe: List[String]) = {
-      this(consumer, topicsToSubscribe, Set.empty[TopicPartition])
+      this(consumer, topicsToSubscribe, Set.empty[TopicPartition], null)
     }
 
     def this(consumer: Consumer[Array[Byte], Array[Byte]], partitionsToAssign: Set[TopicPartition]) = {
-      this(consumer, List.empty[String], partitionsToAssign)
+      this(consumer, List.empty[String], partitionsToAssign, null)
     }
 
     @volatile var thrownException: Option[Throwable] = None
@@ -363,10 +364,14 @@ abstract class AbstractConsumerTest extends BaseRequestTest {
     val rebalanceListener: ConsumerRebalanceListener = new ConsumerRebalanceListener {
       override def onPartitionsAssigned(partitions: util.Collection[TopicPartition]) = {
         partitionAssignment ++= partitions.toArray(new Array[TopicPartition](0))
+        if (userRebalanceListener != null)
+          userRebalanceListener.onPartitionsAssigned(partitions)
       }
 
       override def onPartitionsRevoked(partitions: util.Collection[TopicPartition]) = {
         partitionAssignment --= partitions.toArray(new Array[TopicPartition](0))
+        if (userRebalanceListener != null)
+          userRebalanceListener.onPartitionsRevoked(partitions)
       }
     }
 
diff --git a/core/src/test/scala/integration/kafka/api/AdminClientWithPoliciesIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AdminClientWithPoliciesIntegrationTest.scala
index ab75dc31fb37f..5b2213a65e962 100644
--- a/core/src/test/scala/integration/kafka/api/AdminClientWithPoliciesIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/AdminClientWithPoliciesIntegrationTest.scala
@@ -15,20 +15,24 @@ package kafka.api
 
 import java.util
 import java.util.Properties
-import java.util.concurrent.ExecutionException
 import kafka.integration.KafkaServerTestHarness
 import kafka.log.LogConfig
 import kafka.server.{Defaults, KafkaConfig}
-import kafka.utils.{Logging, TestUtils}
-import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, AlterConfigsOptions, Config, ConfigEntry}
+import kafka.utils.TestUtils.assertFutureExceptionTypeEquals
+import kafka.utils.{Logging, TestInfoUtils, TestUtils}
+import org.apache.kafka.clients.admin.AlterConfigOp.OpType
+import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, AlterConfigOp, AlterConfigsOptions, Config, ConfigEntry}
 import org.apache.kafka.common.config.{ConfigResource, TopicConfig}
-import org.apache.kafka.common.errors.{InvalidRequestException, PolicyViolationException}
+import org.apache.kafka.common.errors.{InvalidConfigurationException, InvalidRequestException, PolicyViolationException}
 import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.server.policy.AlterConfigPolicy
-import org.junit.jupiter.api.Assertions.{assertEquals, assertNull, assertThrows, assertTrue}
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo, Timeout}
+import org.junit.jupiter.api.Assertions.{assertEquals, assertNull, assertTrue}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo, Timeout}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.annotation.nowarn
+import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
 /**
@@ -45,7 +49,7 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
-    TestUtils.waitUntilBrokerMetadataIsPropagated(servers)
+    TestUtils.waitUntilBrokerMetadataIsPropagated(brokers)
   }
 
   @AfterEach
@@ -58,14 +62,25 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
   def createConfig: util.Map[String, Object] =
     Map[String, Object](AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG -> bootstrapServers()).asJava
 
-  override def generateConfigs = {
-    val configs = TestUtils.createBrokerConfigs(brokerCount, zkConnect)
-    configs.foreach(props => props.put(KafkaConfig.AlterConfigPolicyClassNameProp, classOf[Policy]))
+  override def generateConfigs: collection.Seq[KafkaConfig] = {
+    val configs = TestUtils.createBrokerConfigs(brokerCount, zkConnectOrNull)
+    configs.foreach(overrideNodeConfigs)
     configs.map(KafkaConfig.fromProps)
   }
 
-  @Test
-  def testValidAlterConfigs(): Unit = {
+  override def kraftControllerConfigs(): Seq[Properties] = {
+    val props = new Properties()
+    overrideNodeConfigs(props)
+    Seq(props)
+  }
+
+  private def overrideNodeConfigs(props: Properties): Unit = {
+    props.put(KafkaConfig.AlterConfigPolicyClassNameProp, classOf[Policy])
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testValidAlterConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     // Create topics
     val topic1 = "describe-alter-configs-topic-1"
@@ -79,18 +94,20 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
     val topicResource2 = new ConfigResource(ConfigResource.Type.TOPIC, topic2)
     createTopic(topic2, 1, 1)
 
-    PlaintextAdminIntegrationTest.checkValidAlterConfigs(client, topicResource1, topicResource2)
+    PlaintextAdminIntegrationTest.checkValidAlterConfigs(client, this, topicResource1, topicResource2)
   }
 
-  @Test
-  def testInvalidAlterConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidAlterConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
-    PlaintextAdminIntegrationTest.checkInvalidAlterConfigs(zkClient, servers, client)
+    PlaintextAdminIntegrationTest.checkInvalidAlterConfigs(this, client)
   }
 
   @nowarn("cat=deprecation")
-  @Test
-  def testInvalidAlterConfigsDueToPolicy(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidAlterConfigsDueToPolicy(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     // Create topics
@@ -106,6 +123,14 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
     val topicResource3 = new ConfigResource(ConfigResource.Type.TOPIC, topic3)
     createTopic(topic3, 1, 1)
 
+    // Set a mutable broker config
+    val brokerResource = new ConfigResource(ConfigResource.Type.BROKER, brokers.head.config.brokerId.toString)
+    val brokerConfigs = Seq(new ConfigEntry(KafkaConfig.MessageMaxBytesProp, "50000")).asJava
+    val alterResult1 = client.alterConfigs(Map(brokerResource -> new Config(brokerConfigs)).asJava)
+    alterResult1.all.get
+    assertEquals(Set(KafkaConfig.MessageMaxBytesProp), validationsForResource(brokerResource).head.configs().keySet().asScala)
+    validations.clear()
+
     val topicConfigEntries1 = Seq(
       new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "0.9"),
       new ConfigEntry(LogConfig.MinInSyncReplicasProp, "2") // policy doesn't allow this
@@ -115,7 +140,6 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
 
     val topicConfigEntries3 = Seq(new ConfigEntry(LogConfig.MinInSyncReplicasProp, "-1")).asJava
 
-    val brokerResource = new ConfigResource(ConfigResource.Type.BROKER, servers.head.config.brokerId.toString)
     val brokerConfigEntries = Seq(new ConfigEntry(KafkaConfig.SslTruststorePasswordProp, "12313")).asJava
 
     // Alter configs: second is valid, the others are invalid
@@ -127,12 +151,16 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
     ).asJava)
 
     assertEquals(Set(topicResource1, topicResource2, topicResource3, brokerResource).asJava, alterResult.values.keySet)
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(topicResource1).get).getCause.isInstanceOf[PolicyViolationException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(topicResource1), classOf[PolicyViolationException])
     alterResult.values.get(topicResource2).get
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(topicResource3).get).getCause.isInstanceOf[InvalidRequestException])
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(brokerResource).get).getCause.isInstanceOf[InvalidRequestException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(topicResource3), classOf[InvalidConfigurationException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(brokerResource), classOf[InvalidRequestException])
+    assertTrue(validationsForResource(brokerResource).isEmpty,
+      "Should not see the broker resource in the AlterConfig policy when the broker configs are not being updated.")
+    validations.clear()
 
     // Verify that the second resource was updated and the others were not
+    ensureConsistentKRaftMetadata()
     var describeResult = client.describeConfigs(Seq(topicResource1, topicResource2, topicResource3, brokerResource).asJava)
     var configs = describeResult.all.get
     assertEquals(4, configs.size)
@@ -155,12 +183,16 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
     ).asJava, new AlterConfigsOptions().validateOnly(true))
 
     assertEquals(Set(topicResource1, topicResource2, topicResource3, brokerResource).asJava, alterResult.values.keySet)
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(topicResource1).get).getCause.isInstanceOf[PolicyViolationException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(topicResource1), classOf[PolicyViolationException])
     alterResult.values.get(topicResource2).get
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(topicResource3).get).getCause.isInstanceOf[InvalidRequestException])
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(brokerResource).get).getCause.isInstanceOf[InvalidRequestException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(topicResource3), classOf[InvalidConfigurationException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(brokerResource), classOf[InvalidRequestException])
+    assertTrue(validationsForResource(brokerResource).isEmpty,
+      "Should not see the broker resource in the AlterConfig policy when the broker configs are not being updated.")
+    validations.clear()
 
     // Verify that no resources are updated since validate_only = true
+    ensureConsistentKRaftMetadata()
     describeResult = client.describeConfigs(Seq(topicResource1, topicResource2, topicResource3, brokerResource).asJava)
     configs = describeResult.all.get
     assertEquals(4, configs.size)
@@ -171,28 +203,44 @@ class AdminClientWithPoliciesIntegrationTest extends KafkaServerTestHarness with
     assertEquals("0.8", configs.get(topicResource2).get(LogConfig.MinCleanableDirtyRatioProp).value)
 
     assertNull(configs.get(brokerResource).get(KafkaConfig.SslTruststorePasswordProp).value)
-  }
 
+    // Do an incremental alter config on the broker, ensure we don't see the broker config we set earlier in the policy
+    alterResult = client.incrementalAlterConfigs(Map(
+      brokerResource ->
+        Seq(new AlterConfigOp(
+          new ConfigEntry(KafkaConfig.MaxConnectionsProp, "9999"), OpType.SET)
+        ).asJavaCollection
+    ).asJava)
+    alterResult.all.get
+    assertEquals(Set(KafkaConfig.MaxConnectionsProp), validationsForResource(brokerResource).head.configs().keySet().asScala)
+  }
 
 }
 
 object AdminClientWithPoliciesIntegrationTest {
 
+  val validations = new mutable.ListBuffer[AlterConfigPolicy.RequestMetadata]()
+
+  def validationsForResource(resource: ConfigResource): Seq[AlterConfigPolicy.RequestMetadata] = {
+    validations.filter { req => req.resource().equals(resource) }.toSeq
+  }
+
   class Policy extends AlterConfigPolicy {
 
     var configs: Map[String, _] = _
     var closed = false
 
     def configure(configs: util.Map[String, _]): Unit = {
+      validations.clear()
       this.configs = configs.asScala.toMap
     }
 
     def validate(requestMetadata: AlterConfigPolicy.RequestMetadata): Unit = {
+      validations.append(requestMetadata)
       require(!closed, "Policy should not be closed")
       require(!configs.isEmpty, "configure should have been called with non empty configs")
       require(!requestMetadata.configs.isEmpty, "request configs should not be empty")
       require(requestMetadata.resource.name.nonEmpty, "resource name should not be empty")
-      require(requestMetadata.resource.name.contains("topic"))
       if (requestMetadata.configs.containsKey(TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG))
         throw new PolicyViolationException("Min in sync replicas cannot be updated")
     }
diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala
index a4323bed2c0ba..a109ae8ce4c64 100644
--- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala
@@ -23,7 +23,7 @@ import kafka.log.LogConfig
 import kafka.security.authorizer.{AclAuthorizer, AclEntry}
 import kafka.security.authorizer.AclEntry.WildcardHost
 import kafka.server.{BaseRequestTest, KafkaConfig}
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import kafka.utils.TestUtils.waitUntilTrue
 import org.apache.kafka.clients.admin.{Admin, AlterConfigOp}
 import org.apache.kafka.clients.consumer._
@@ -85,9 +85,8 @@ object AuthorizerIntegrationTest {
   class PrincipalBuilder extends DefaultKafkaPrincipalBuilder(null, null) {
     override def build(context: AuthenticationContext): KafkaPrincipal = {
       context.listenerName match {
-        case BrokerListenerName => BrokerPrincipal
+        case BrokerListenerName | ControllerListenerName => BrokerPrincipal
         case ClientListenerName => ClientPrincipal
-        case ControllerListenerName => BrokerPrincipal
         case listenerName => throw new IllegalArgumentException(s"No principal mapped to listener $listenerName")
       }
     }
@@ -152,32 +151,32 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   consumerConfig.setProperty(ConsumerConfig.GROUP_ID_CONFIG, group)
 
   override def brokerPropertyOverrides(properties: Properties): Unit = {
+    properties.put(KafkaConfig.BrokerIdProp, brokerId.toString)
+    addNodeProperties(properties)
+  }
+
+  override def kraftControllerConfigs(): collection.Seq[Properties] = {
+    val controllerConfigs = super.kraftControllerConfigs()
+    controllerConfigs.foreach(addNodeProperties)
+    controllerConfigs
+  }
+
+  private def addNodeProperties(properties: Properties): Unit = {
     if (isKRaftTest()) {
       properties.put(KafkaConfig.AuthorizerClassNameProp, classOf[StandardAuthorizer].getName)
-      properties.put(StandardAuthorizer.SUPER_USERS_CONFIG, BrokerPrincipal.toString())
+      properties.put(StandardAuthorizer.SUPER_USERS_CONFIG, BrokerPrincipal.toString)
     } else {
       properties.put(KafkaConfig.AuthorizerClassNameProp, classOf[AclAuthorizer].getName)
     }
-    properties.put(KafkaConfig.BrokerIdProp, brokerId.toString)
+
     properties.put(KafkaConfig.OffsetsTopicPartitionsProp, "1")
     properties.put(KafkaConfig.OffsetsTopicReplicationFactorProp, "1")
     properties.put(KafkaConfig.TransactionsTopicPartitionsProp, "1")
     properties.put(KafkaConfig.TransactionsTopicReplicationFactorProp, "1")
     properties.put(KafkaConfig.TransactionsTopicMinISRProp, "1")
-    properties.put(BrokerSecurityConfigs.PRINCIPAL_BUILDER_CLASS_CONFIG,
-      classOf[PrincipalBuilder].getName)
+    properties.put(BrokerSecurityConfigs.PRINCIPAL_BUILDER_CLASS_CONFIG, classOf[PrincipalBuilder].getName)
   }
 
-  override def kraftControllerConfigs(): Seq[Properties] = {
-    val controllerConfigs = Seq(new Properties())
-    controllerConfigs.foreach { properties =>
-      properties.put(KafkaConfig.AuthorizerClassNameProp, classOf[StandardAuthorizer].getName())
-      properties.put(StandardAuthorizer.SUPER_USERS_CONFIG, BrokerPrincipal.toString())
-      properties.put(BrokerSecurityConfigs.PRINCIPAL_BUILDER_CLASS_CONFIG,
-        classOf[PrincipalBuilder].getName)
-    }
-    controllerConfigs
-  }
 
   val requestKeyToError = (topicNames: Map[Uuid, String], version: Short) => Map[ApiKeys, Nothing => Errors](
     ApiKeys.METADATA -> ((resp: requests.MetadataResponse) => resp.errors.asScala.find(_._1 == topic).getOrElse(("test", Errors.NONE))._2),
@@ -549,7 +548,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
         .setLeader(brokerId)
         .setLeaderEpoch(Int.MaxValue)
         .setIsr(List(brokerId).asJava)
-        .setZkVersion(2)
+        .setPartitionEpoch(2)
         .setReplicas(Seq(brokerId).asJava)
         .setIsNew(false)).asJava,
       getTopicIds().asJava,
@@ -562,7 +561,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
         .setTopicName(tp.topic)
         .setPartitionStates(Seq(new StopReplicaPartitionState()
           .setPartitionIndex(tp.partition)
-          .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 2)
+          .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 2)
           .setDeletePartition(true)).asJava)
     ).asJava
     new StopReplicaRequest.Builder(ApiKeys.STOP_REPLICA.latestVersion, brokerId, Int.MaxValue,
@@ -752,7 +751,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizationWithTopicExisting(quorum: String): Unit = {
     //First create the topic so we have a valid topic ID
@@ -806,7 +805,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   /*
    * even if the topic doesn't exist, request APIs should not leak the topic name
    */
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizationWithTopicNotExisting(quorum: String): Unit = {
     val id = Uuid.randomUuid()
@@ -831,7 +830,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRequests(requestKeyToRequest, false, topicNames)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @CsvSource(value = Array("zk,false", "zk,true", "kraft,false", "kraft,true"))
   def testTopicIdAuthorization(quorum: String, withTopicExisting: Boolean): Unit = {
     val topicId = if (withTopicExisting) {
@@ -884,7 +883,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   /*
    * even if the topic doesn't exist, request APIs should not leak the topic name
    */
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizationFetchV12WithTopicNotExisting(quorum: String): Unit = {
     val id = Uuid.ZERO_UUID
@@ -896,7 +895,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRequests(requestKeyToRequest, false, topicNames)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreateTopicAuthorizationWithClusterCreate(quorum: String): Unit = {
     removeAllClientAcls()
@@ -909,7 +908,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRequestAndVerifyResponseError(createTopicsRequest, resources, isAuthorized = true)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testFetchFollowerRequest(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -929,7 +928,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRequestAndVerifyResponseError(request, resources, isAuthorized = true)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIncrementalAlterConfigsRequestRequiresClusterPermissionForBrokerLogger(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -953,7 +952,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRequestAndVerifyResponseError(request, resources, isAuthorized = true)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetsForLeaderEpochClusterPermission(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -972,7 +971,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRequestAndVerifyResponseError(request, resources, isAuthorized = true)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testProduceWithNoTopicAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -980,7 +979,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => sendRecords(producer, numRecords, tp))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testProduceWithTopicDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -989,7 +988,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => sendRecords(producer, numRecords, tp))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testProduceWithTopicRead(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -998,7 +997,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => sendRecords(producer, numRecords, tp))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testProduceWithTopicWrite(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1007,13 +1006,13 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRecords(producer, numRecords, tp)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreatePermissionOnTopicToWriteToNonExistentTopic(quorum: String): Unit = {
     testCreatePermissionNeededToWriteToNonExistentTopic(TOPIC)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreatePermissionOnClusterToWriteToNonExistentTopic(quorum: String): Unit = {
     testCreatePermissionNeededToWriteToNonExistentTopic(CLUSTER)
@@ -1032,7 +1031,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     sendRecords(producer, numRecords, tp)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConsumeUsingAssignWithNoAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1047,7 +1046,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => consumeRecords(consumer))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testSimpleConsumeWithOffsetLookupAndNoGroupAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1067,7 +1066,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(group, e.groupId())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testSimpleConsumeWithExplicitSeekAndNoGroupAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1087,7 +1086,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumeRecords(consumer)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConsumeWithoutTopicDescribeAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1105,7 +1104,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Collections.singleton(topic), e.unauthorizedTopics())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConsumeWithTopicDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1124,7 +1123,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Collections.singleton(topic), e.unauthorizedTopics())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConsumeWithTopicWrite(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1143,7 +1142,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Collections.singleton(topic), e.unauthorizedTopics())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConsumeWithTopicAndGroupRead(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1162,7 +1161,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   }
 
   @nowarn("cat=deprecation")
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPatternSubscriptionWithNoTopicAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1180,7 +1179,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertTrue(consumer.subscription.isEmpty)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPatternSubscriptionWithTopicDescribeOnlyAndGroupRead(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1199,7 +1198,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   }
 
   @nowarn("cat=deprecation")
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPatternSubscriptionWithTopicAndGroupRead(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1232,7 +1231,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   }
 
   @nowarn("cat=deprecation")
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPatternSubscriptionMatchingInternalTopic(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1262,7 +1261,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPatternSubscriptionMatchingInternalTopicWithDescribeOnlyPermission(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1288,7 +1287,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Collections.singleton(GROUP_METADATA_TOPIC_NAME), e.unauthorizedTopics())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPatternSubscriptionNotMatchingInternalTopic(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1307,7 +1306,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumeRecords(consumer)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreatePermissionOnTopicToReadFromNonExistentTopic(quorum: String): Unit = {
     testCreatePermissionNeededToReadFromNonExistentTopic("newTopic",
@@ -1315,7 +1314,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
       TOPIC)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreatePermissionOnClusterToReadFromNonExistentTopic(quorum: String): Unit = {
     testCreatePermissionNeededToReadFromNonExistentTopic("newTopic",
@@ -1348,7 +1347,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }, "Partition metadata not propagated.")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreatePermissionMetadataRequestAutoCreate(quorum: String): Unit = {
     val readAcls = topicReadAcl(topicResource)
@@ -1370,14 +1369,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCommitWithNoAccess(quorum: String): Unit = {
     val consumer = createConsumer()
     assertThrows(classOf[GroupAuthorizationException], () => consumer.commitSync(Map(tp -> new OffsetAndMetadata(5)).asJava))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCommitWithNoTopicAccess(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, READ, ALLOW)), groupResource)
@@ -1385,7 +1384,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => consumer.commitSync(Map(tp -> new OffsetAndMetadata(5)).asJava))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCommitWithTopicWrite(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1396,7 +1395,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => consumer.commitSync(Map(tp -> new OffsetAndMetadata(5)).asJava))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCommitWithTopicDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1407,7 +1406,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => consumer.commitSync(Map(tp -> new OffsetAndMetadata(5)).asJava))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCommitWithNoGroupAccess(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, READ, ALLOW)), topicResource)
@@ -1415,7 +1414,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[GroupAuthorizationException], () => consumer.commitSync(Map(tp -> new OffsetAndMetadata(5)).asJava))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCommitWithTopicAndGroupRead(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1425,7 +1424,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumer.commitSync(Map(tp -> new OffsetAndMetadata(5)).asJava)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchWithNoAccess(quorum: String): Unit = {
     val consumer = createConsumer()
@@ -1433,7 +1432,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => consumer.position(tp))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchWithNoGroupAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1443,7 +1442,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[GroupAuthorizationException], () => consumer.position(tp))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchWithNoTopicAccess(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, READ, ALLOW)), groupResource)
@@ -1452,7 +1451,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TopicAuthorizationException], () => consumer.position(tp))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchAllTopicPartitionsAuthorization(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1484,7 +1483,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(offset, offsetFetchResponse.partitionDataMap(group).get(tp).offset)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchMultipleGroupsAuthorization(quorum: String): Unit = {
     val groups: Seq[String] = (1 to 5).map(i => s"group$i")
@@ -1640,7 +1639,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     )
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchTopicDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1651,7 +1650,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumer.position(tp)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testOffsetFetchWithTopicAndGroupRead(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1662,14 +1661,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumer.position(tp)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testMetadataWithNoTopicAccess(quorum: String): Unit = {
     val consumer = createConsumer()
     assertThrows(classOf[TopicAuthorizationException], () => consumer.partitionsFor(topic))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testMetadataWithTopicDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1678,14 +1677,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumer.partitionsFor(topic)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testListOffsetsWithNoTopicAccess(quorum: String): Unit = {
     val consumer = createConsumer()
     assertThrows(classOf[TopicAuthorizationException], () => consumer.endOffsets(Set(tp).asJava))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testListOffsetsWithTopicDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1694,7 +1693,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumer.endOffsets(Set(tp).asJava)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeGroupApiWithNoGroupAcl(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, DESCRIBE, ALLOW)), topicResource)
@@ -1702,7 +1701,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     TestUtils.assertFutureExceptionTypeEquals(result.describedGroups().get(group), classOf[GroupAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeGroupApiWithGroupDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1711,7 +1710,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     createAdminClient().describeConsumerGroups(Seq(group).asJava).describedGroups().get(group).get()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeGroupCliWithGroupDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1725,7 +1724,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     consumerGroupService.close()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testListGroupApiWithAndWithoutListGroupAcls(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1774,7 +1773,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     otherConsumer.close()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupApiWithDeleteGroupAcl(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1788,7 +1787,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     createAdminClient().deleteConsumerGroups(Seq(group).asJava).deletedGroups().get(group).get()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupApiWithNoDeleteGroupAcl(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1802,14 +1801,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     TestUtils.assertFutureExceptionTypeEquals(result.deletedGroups().get(group), classOf[GroupAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupApiWithNoDeleteGroupAcl2(quorum: String): Unit = {
     val result = createAdminClient().deleteConsumerGroups(Seq(group).asJava)
     TestUtils.assertFutureExceptionTypeEquals(result.deletedGroups().get(group), classOf[GroupAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupOffsetsWithAcl(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1825,7 +1824,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertNull(result.partitionResult(tp).get())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupOffsetsWithoutDeleteAcl(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1840,7 +1839,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     TestUtils.assertFutureExceptionTypeEquals(result.all(), classOf[GroupAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupOffsetsWithDeleteAclWithoutTopicAcl(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1861,21 +1860,21 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     TestUtils.assertFutureExceptionTypeEquals(result.partitionResult(tp), classOf[TopicAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteGroupOffsetsWithNoAcl(quorum: String): Unit = {
     val result = createAdminClient().deleteConsumerGroupOffsets(group, Set(tp).asJava)
     TestUtils.assertFutureExceptionTypeEquals(result.all(), classOf[GroupAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUnauthorizedDeleteTopicsWithoutDescribe(quorum: String): Unit = {
     val deleteResponse = connectAndReceive[DeleteTopicsResponse](deleteTopicsRequest)
     assertEquals(Errors.TOPIC_AUTHORIZATION_FAILED.code, deleteResponse.data.responses.find(topic).errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUnauthorizedDeleteTopicsWithDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1884,7 +1883,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Errors.TOPIC_AUTHORIZATION_FAILED.code, deleteResponse.data.responses.find(topic).errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteTopicsWithWildCardAuth(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1893,7 +1892,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Errors.NONE.code, deleteResponse.data.responses.find(topic).errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUnauthorizedDeleteRecordsWithoutDescribe(quorum: String): Unit = {
     val deleteRecordsResponse = connectAndReceive[DeleteRecordsResponse](deleteRecordsRequest)
@@ -1901,7 +1900,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
       partitions.asScala.head.errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUnauthorizedDeleteRecordsWithDescribe(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1911,7 +1910,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
       partitions.asScala.head.errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeleteRecordsWithWildCardAuth(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1921,14 +1920,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
       partitions.asScala.head.errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUnauthorizedCreatePartitions(quorum: String): Unit = {
     val createPartitionsResponse = connectAndReceive[CreatePartitionsResponse](createPartitionsRequest)
     assertEquals(Errors.TOPIC_AUTHORIZATION_FAILED.code, createPartitionsResponse.data.results.asScala.head.errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreatePartitionsWithWildCardAuth(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1937,7 +1936,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Errors.NONE.code, createPartitionsResponse.data.results.asScala.head.errorCode)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testTransactionalProducerInitTransactionsNoWriteTransactionalIdAcl(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, DESCRIBE, ALLOW)), transactionalIdResource)
@@ -1945,14 +1944,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TransactionalIdAuthorizationException], () => producer.initTransactions())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testTransactionalProducerInitTransactionsNoDescribeTransactionalIdAcl(quorum: String): Unit = {
     val producer = buildTransactionalProducer()
     assertThrows(classOf[TransactionalIdAuthorizationException], () => producer.initTransactions())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testSendOffsetsWithNoConsumerGroupDescribeAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1968,7 +1967,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
       () => producer.sendOffsetsToTransaction(Map(tp -> new OffsetAndMetadata(0L)).asJava, new ConsumerGroupMetadata(group)))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testSendOffsetsWithNoConsumerGroupWriteAccess(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -1983,7 +1982,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
       () => producer.sendOffsetsToTransaction(Map(tp -> new OffsetAndMetadata(0L)).asJava, new ConsumerGroupMetadata(group)))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIdempotentProducerNoIdempotentWriteAclInInitProducerId(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2022,7 +2021,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertClusterAuthFailure()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIdempotentProducerNoIdempotentWriteAclInProduce(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2051,7 +2050,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertTrue(e.getCause.isInstanceOf[TopicAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldInitTransactionsWhenAclSet(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, WRITE, ALLOW)), transactionalIdResource)
@@ -2059,7 +2058,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     producer.initTransactions()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testTransactionalProducerTopicAuthorizationExceptionInSendCallback(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2076,7 +2075,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Set(topic), e.unauthorizedTopics.asScala)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testTransactionalProducerTopicAuthorizationExceptionInCommit(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2094,7 +2093,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     })
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldThrowTransactionalIdAuthorizationExceptionWhenNoTransactionAccessDuringSend(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2109,7 +2108,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     JTestUtils.assertFutureThrows(future, classOf[TransactionalIdAuthorizationException])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldThrowTransactionalIdAuthorizationExceptionWhenNoTransactionAccessOnEndTransaction(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2124,7 +2123,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertThrows(classOf[TransactionalIdAuthorizationException], () => producer.commitTransaction())
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testListTransactionsAuthorization(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2158,7 +2157,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertListTransactionResult(expectedTransactionalIds = Set(transactionalId))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldNotIncludeUnauthorizedTopicsInDescribeTransactionsResponse(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2181,7 +2180,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(List.empty, transactionStateData.topics.asScala.toList)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldSuccessfullyAbortTransactionAfterTopicAuthorizationException(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2201,7 +2200,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     producer.abortTransaction()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldThrowTransactionalIdAuthorizationExceptionWhenNoTransactionAccessOnSendOffsetsToTxn(quorum: String): Unit = {
     addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WildcardHost, WRITE, ALLOW)), transactionalIdResource)
@@ -2217,7 +2216,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     })
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def shouldSendSuccessfullyWhenIdempotentAndHasCorrectACL(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2228,7 +2227,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   }
 
   // Verify that metadata request without topics works without any ACLs and returns cluster id
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testClusterId(quorum: String): Unit = {
     val request = new requests.MetadataRequest.Builder(List.empty.asJava, false).build()
@@ -2237,7 +2236,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertFalse(response.clusterId.isEmpty, "Cluster id not returned")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizeByResourceTypeMultipleAddAndRemove(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2255,7 +2254,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizeByResourceTypeIsolationUnrelatedDenyWontDominateAllow(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2278,7 +2277,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertIdempotentSendSuccess()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizeByResourceTypeDenyTakesPrecedence(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2291,7 +2290,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertIdempotentSendAuthorizationFailure()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizeByResourceTypeWildcardResourceDenyDominate(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2309,7 +2308,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertIdempotentSendAuthorizationFailure()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAuthorizeByResourceTypePrefixedResourceDenyDominate(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2323,7 +2322,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertIdempotentSendAuthorizationFailure()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testMetadataClusterAuthorizedOperationsWithoutDescribeCluster(quorum: String): Unit = {
     removeAllClientAcls()
@@ -2334,7 +2333,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testMetadataClusterAuthorizedOperationsWithDescribeAndAlterCluster(quorum: String): Unit = {
     removeAllClientAcls()
@@ -2355,7 +2354,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeTopicAclWithOperationAll(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2380,7 +2379,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Errors.NONE, topicResponse.error)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeTopicConfigsAclWithOperationAll(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2420,7 +2419,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeClusterClusterAuthorizedOperationsWithoutDescribeCluster(quorum: String): Unit = {
     removeAllClientAcls()
@@ -2430,7 +2429,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDescribeClusterClusterAuthorizedOperationsWithDescribeAndAlterCluster(quorum: String): Unit = {
     removeAllClientAcls()
@@ -2450,7 +2449,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testHostAddressBasedAcls(quorum: String): Unit = {
     createTopicWithBrokerPrincipal(topic)
@@ -2484,7 +2483,7 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
   }
 
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCreateAndCloseConsumerWithNoAccess(quorum: String): Unit = {
     val consumer = createConsumer()
@@ -2510,13 +2509,14 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     val aclEntryFilter = new AccessControlEntryFilter(clientPrincipalString, null, AclOperation.ANY, AclPermissionType.ANY)
     val aclFilter = new AclBindingFilter(ResourcePatternFilter.ANY, aclEntryFilter)
 
-    authorizerForWrite.deleteAcls(null, List(aclFilter).asJava).asScala.map(_.toCompletableFuture.get).flatMap { deletion =>
-      deletion.aclBindingDeleteResults().asScala.map(_.aclBinding.pattern).toSet
-    }.foreach { resource =>
-      (brokers.map(_.authorizer.get) ++ controllerServers.map(_.authorizer.get)).foreach { authorizer =>
-        TestUtils.waitAndVerifyAcls(Set.empty[AccessControlEntry], authorizer, resource, aclEntryFilter)
+    authorizerForWrite.deleteAcls(TestUtils.anonymousAuthorizableContext, List(aclFilter).asJava).asScala.
+      map(_.toCompletableFuture.get).flatMap { deletion =>
+        deletion.aclBindingDeleteResults().asScala.map(_.aclBinding.pattern).toSet
+      }.foreach { resource =>
+        (brokers.map(_.authorizer.get) ++ controllerServers.map(_.authorizer.get)).foreach { authorizer =>
+          TestUtils.waitAndVerifyAcls(Set.empty[AccessControlEntry], authorizer, resource, aclEntryFilter)
+        }
       }
-    }
   }
 
   private def sendRequestAndVerifyResponseError(request: AbstractRequest,
@@ -2573,14 +2573,6 @@ class AuthorizerIntegrationTest extends BaseRequestTest {
     }
   }
 
-  private def addAndVerifyAcls(acls: Set[AccessControlEntry], resource: ResourcePattern): Unit = {
-    TestUtils.addAndVerifyAcls(brokers, acls, resource, controllerServers)
-  }
-
-  private def removeAndVerifyAcls(acls: Set[AccessControlEntry], resource: ResourcePattern): Unit = {
-    TestUtils.removeAndVerifyAcls(brokers, acls, resource, controllerServers)
-  }
-
   private def consumeRecords(consumer: Consumer[Array[Byte], Array[Byte]],
                              numRecords: Int = 1,
                              startingOffset: Int = 0,
diff --git a/core/src/test/scala/integration/kafka/api/BaseAdminIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/BaseAdminIntegrationTest.scala
index e3a79114964a6..4f95654d541d7 100644
--- a/core/src/test/scala/integration/kafka/api/BaseAdminIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/BaseAdminIntegrationTest.scala
@@ -48,12 +48,15 @@ abstract class BaseAdminIntegrationTest extends IntegrationTestHarness with Logg
   def brokerCount = 3
   override def logDirCount = 2
 
+  var testInfo: TestInfo = null
+
   var client: Admin = _
 
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
+    this.testInfo = testInfo
     super.setUp(testInfo)
-    waitUntilBrokerMetadataIsPropagated(servers)
+    waitUntilBrokerMetadataIsPropagated(brokers)
   }
 
   @AfterEach
@@ -189,6 +192,15 @@ abstract class BaseAdminIntegrationTest extends IntegrationTestHarness with Logg
 
   override def modifyConfigs(configs: Seq[Properties]): Unit = {
     super.modifyConfigs(configs)
+    // For testCreateTopicsReturnsConfigs, set some static broker configurations so that we can
+    // verify that they show up in the "configs" output of CreateTopics.
+    if (testInfo.getTestMethod.toString.contains("testCreateTopicsReturnsConfigs")) {
+      configs.foreach(config => {
+        config.setProperty(KafkaConfig.LogRollTimeHoursProp, "2")
+        config.setProperty(KafkaConfig.LogRetentionTimeMinutesProp, "240")
+        config.setProperty(KafkaConfig.LogRollTimeJitterMillisProp, "123")
+      })
+    }
     configs.foreach { config =>
       config.setProperty(KafkaConfig.DeleteTopicEnableProp, "true")
       config.setProperty(KafkaConfig.GroupInitialRebalanceDelayMsProp, "0")
@@ -201,6 +213,18 @@ abstract class BaseAdminIntegrationTest extends IntegrationTestHarness with Logg
     }
   }
 
+  override def kraftControllerConfigs(): Seq[Properties] = {
+    val controllerConfig = new Properties()
+    if (testInfo.getTestMethod.toString.contains("testCreateTopicsReturnsConfigs")) {
+      // For testCreateTopicsReturnsConfigs, set the controller's ID to 1 so that the dynamic
+      // config we set for node 1 will apply to it.
+      controllerConfig.setProperty(KafkaConfig.NodeIdProp, "1")
+    }
+    val controllerConfigs = Seq(controllerConfig)
+    modifyConfigs(controllerConfigs)
+    controllerConfigs
+  }
+
   def createConfig: util.Map[String, Object] = {
     val config = new util.HashMap[String, Object]
     config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers())
diff --git a/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala b/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala
index 61870b073d845..ce3cd32afdea3 100644
--- a/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala
+++ b/core/src/test/scala/integration/kafka/api/BaseProducerSendTest.scala
@@ -19,22 +19,24 @@ package kafka.api
 
 import java.time.Duration
 import java.nio.charset.StandardCharsets
-import java.util.Properties
+import java.util.{Collections, Properties}
 import java.util.concurrent.TimeUnit
-
 import kafka.integration.KafkaServerTestHarness
 import kafka.log.LogConfig
 import kafka.server.KafkaConfig
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
+import org.apache.kafka.clients.admin.{Admin, NewPartitions}
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer._
 import org.apache.kafka.common.errors.TimeoutException
-import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.network.{ListenerName, Mode}
 import org.apache.kafka.common.record.TimestampType
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.{KafkaException, TopicPartition}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.jdk.CollectionConverters._
 import scala.collection.mutable.Buffer
@@ -42,16 +44,17 @@ import scala.concurrent.ExecutionException
 
 abstract class BaseProducerSendTest extends KafkaServerTestHarness {
 
-  def generateConfigs = {
+  def generateConfigs: scala.collection.Seq[KafkaConfig] = {
     val overridingProps = new Properties()
     val numServers = 2
     overridingProps.put(KafkaConfig.NumPartitionsProp, 4.toString)
-    TestUtils.createBrokerConfigs(numServers, zkConnect, false, interBrokerSecurityProtocol = Some(securityProtocol),
+    TestUtils.createBrokerConfigs(numServers, zkConnectOrNull, false, interBrokerSecurityProtocol = Some(securityProtocol),
       trustStoreFile = trustStoreFile, saslProperties = serverSaslProperties).map(KafkaConfig.fromProps(_, overridingProps))
   }
 
   private var consumer: KafkaConsumer[Array[Byte], Array[Byte]] = _
   private val producers = Buffer[KafkaProducer[Array[Byte], Array[Byte]]]()
+  protected var admin: Admin = null
 
   protected val topic = "topic"
   private val numRecords = 100
@@ -59,6 +62,15 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
+
+    admin = TestUtils.createAdminClient(brokers, listenerName,
+        TestUtils.securityConfigs(Mode.CLIENT,
+          securityProtocol,
+          trustStoreFile,
+          "adminClient",
+          TestUtils.SslCertificateCn,
+          clientSaslProperties))
+
     consumer = TestUtils.createConsumer(
       bootstrapServers(listenerName = ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT)),
       securityProtocol = SecurityProtocol.PLAINTEXT
@@ -70,6 +82,7 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
     consumer.close()
     // Ensure that all producers are closed since unclosed producers impact other tests when Kafka server ports are reused
     producers.foreach(_.close())
+    admin.close()
 
     super.tearDown()
   }
@@ -105,8 +118,9 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
    * 1. Send with null key/value/partition-id should be accepted; send with null topic should be rejected.
    * 2. Last message of the non-blocking send should return the correct offset metadata
    */
-  @Test
-  def testSendOffset(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendOffset(quorum: String): Unit = {
     val producer = createProducer()
     val partition = 0
 
@@ -134,7 +148,7 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
 
     try {
       // create topic
-      createTopic(topic, 1, 2)
+      TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2)
 
       // send a normal record
       val record0 = new ProducerRecord[Array[Byte], Array[Byte]](topic, partition, "key".getBytes(StandardCharsets.UTF_8),
@@ -166,8 +180,9 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testSendCompressedMessageWithCreateTime(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendCompressedMessageWithCreateTime(quorum: String): Unit = {
     val producer = createProducer(
       compressionType = "gzip",
       lingerMs = Int.MaxValue,
@@ -175,8 +190,9 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
     sendAndVerifyTimestamp(producer, TimestampType.CREATE_TIME)
   }
 
-  @Test
-  def testSendNonCompressedMessageWithCreateTime(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendNonCompressedMessageWithCreateTime(quorum: String): Unit = {
     val producer = createProducer(lingerMs = Int.MaxValue, deliveryTimeoutMs = Int.MaxValue)
     sendAndVerifyTimestamp(producer, TimestampType.CREATE_TIME)
   }
@@ -186,7 +202,7 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
                               timeoutMs: Long = 20000L): Unit = {
     val partition = 0
     try {
-      createTopic(topic, 1, 2)
+      TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2)
 
       val futures = for (i <- 1 to numRecords) yield {
         val record = new ProducerRecord(topic, partition, s"key$i".getBytes(StandardCharsets.UTF_8),
@@ -241,7 +257,7 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
         topicProps.setProperty(LogConfig.MessageTimestampTypeProp, "LogAppendTime")
       else
         topicProps.setProperty(LogConfig.MessageTimestampTypeProp, "CreateTime")
-      createTopic(topic, 1, 2, topicProps)
+      TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2, topicConfig = topicProps)
 
       val recordAndFutures = for (i <- 1 to numRecords) yield {
         val record = new ProducerRecord(topic, partition, baseTimestamp + i, s"key$i".getBytes(StandardCharsets.UTF_8),
@@ -267,13 +283,14 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
    *
    * After close() returns, all messages should be sent with correct returned offset metadata
    */
-  @Test
-  def testClose(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testClose(quorum: String): Unit = {
     val producer = createProducer()
 
     try {
       // create topic
-      createTopic(topic, 1, 2)
+      TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2)
 
       // non-blocking send a list of records
       val record0 = new ProducerRecord[Array[Byte], Array[Byte]](topic, null, "key".getBytes(StandardCharsets.UTF_8),
@@ -300,12 +317,13 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
    *
    * The specified partition-id should be respected
    */
-  @Test
-  def testSendToPartition(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendToPartition(quorum: String): Unit = {
     val producer = createProducer()
 
     try {
-      createTopic(topic, 2, 2)
+      TestUtils.createTopicWithAdmin(admin, topic, brokers, 2, 2)
       val partition = 1
 
       val now = System.currentTimeMillis()
@@ -345,14 +363,15 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
     * Producer will attempt to send messages to the partition specified in each record, and should
     * succeed as long as the partition is included in the metadata.
     */
-  @Test
-  def testSendBeforeAndAfterPartitionExpansion(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendBeforeAndAfterPartitionExpansion(quorum: String): Unit = {
     val producer = createProducer(maxBlockMs = 5 * 1000L)
 
     // create topic
-    createTopic(topic, 1, 2)
-    val partition0 = 0
+    TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2)
 
+    val partition0 = 0
     var futures0 = (1 to numRecords).map { i =>
       producer.send(new ProducerRecord(topic, partition0, null, ("value" + i).getBytes(StandardCharsets.UTF_8)))
     }.map(_.get(30, TimeUnit.SECONDS))
@@ -369,13 +388,11 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
     val e = assertThrows(classOf[ExecutionException], () => producer.send(new ProducerRecord(topic, partition1, null, "value".getBytes(StandardCharsets.UTF_8))).get())
     assertEquals(classOf[TimeoutException], e.getCause.getClass)
 
-    val existingAssignment = zkClient.getFullReplicaAssignmentForTopics(Set(topic)).map {
-      case (topicPartition, assignment) => topicPartition.partition -> assignment
-    }
-    adminZkClient.addPartitions(topic, existingAssignment, adminZkClient.getBrokerMetadatas(), 2)
+    admin.createPartitions(Collections.singletonMap(topic, NewPartitions.increaseTo(2))).all().get()
+
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitForPartitionMetadata(servers, topic, 0)
-    TestUtils.waitForPartitionMetadata(servers, topic, 1)
+    TestUtils.waitForPartitionMetadata(brokers, topic, 0)
+    TestUtils.waitForPartitionMetadata(brokers, topic, 1)
 
     // send records to the newly added partition after confirming that metadata have been updated.
     val futures1 = (1 to numRecords).map { i =>
@@ -404,11 +421,12 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
   /**
    * Test that flush immediately sends all accumulated requests.
    */
-  @Test
-  def testFlush(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFlush(quorum: String): Unit = {
     val producer = createProducer(lingerMs = Int.MaxValue, deliveryTimeoutMs = Int.MaxValue)
     try {
-      createTopic(topic, 2, 2)
+      TestUtils.createTopicWithAdmin(admin, topic, brokers, 2, 2)
       val record = new ProducerRecord[Array[Byte], Array[Byte]](topic,
         "value".getBytes(StandardCharsets.UTF_8))
       for (_ <- 0 until 50) {
@@ -425,9 +443,10 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
   /**
    * Test close with zero timeout from caller thread
    */
-  @Test
-  def testCloseWithZeroTimeoutFromCallerThread(): Unit = {
-    createTopic(topic, 2, 2)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCloseWithZeroTimeoutFromCallerThread(quorum: String): Unit = {
+    TestUtils.createTopicWithAdmin(admin, topic, brokers, 2, 2)
     val partition = 0
     consumer.assign(List(new TopicPartition(topic, partition)).asJava)
     val record0 = new ProducerRecord[Array[Byte], Array[Byte]](topic, partition, null,
@@ -450,9 +469,10 @@ abstract class BaseProducerSendTest extends KafkaServerTestHarness {
   /**
    * Test close with zero and non-zero timeout from sender thread
    */
-  @Test
-  def testCloseWithZeroTimeoutFromSenderThread(): Unit = {
-    createTopic(topic, 1, 2)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCloseWithZeroTimeoutFromSenderThread(quorum: String): Unit = {
+    TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2)
     val partition = 0
     consumer.assign(List(new TopicPartition(topic, partition)).asJava)
     val record = new ProducerRecord[Array[Byte], Array[Byte]](topic, partition, null, "value".getBytes(StandardCharsets.UTF_8))
diff --git a/core/src/test/scala/integration/kafka/api/BaseQuotaTest.scala b/core/src/test/scala/integration/kafka/api/BaseQuotaTest.scala
index 9f73236d0ba03..40d4cef7f82ee 100644
--- a/core/src/test/scala/integration/kafka/api/BaseQuotaTest.scala
+++ b/core/src/test/scala/integration/kafka/api/BaseQuotaTest.scala
@@ -19,9 +19,8 @@ import java.util.concurrent.TimeUnit
 import java.util.{Collections, HashMap, Properties}
 import com.yammer.metrics.core.{Histogram, Meter}
 import kafka.api.QuotaTestClients._
-import kafka.metrics.KafkaYammerMetrics
-import kafka.server.{ClientQuotaManager, ClientQuotaManagerConfig, KafkaConfig, KafkaServer, QuotaType}
-import kafka.utils.TestUtils
+import kafka.server.{ClientQuotaManager, ClientQuotaManagerConfig, KafkaBroker, KafkaConfig, QuotaType}
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.apache.kafka.clients.admin.Admin
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.clients.producer._
@@ -33,8 +32,11 @@ import org.apache.kafka.common.protocol.ApiKeys
 import org.apache.kafka.common.quota.ClientQuotaAlteration
 import org.apache.kafka.common.quota.ClientQuotaEntity
 import org.apache.kafka.common.security.auth.KafkaPrincipal
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.collection.Map
 import scala.jdk.CollectionConverters._
@@ -45,7 +47,7 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
 
   protected def producerClientId = "QuotasTestProducer-1"
   protected def consumerClientId = "QuotasTestConsumer-1"
-  protected def createQuotaTestClients(topic: String, leaderNode: KafkaServer): QuotaTestClients
+  protected def createQuotaTestClients(topic: String, leaderNode: KafkaBroker): QuotaTestClients
 
   this.serverConfig.setProperty(KafkaConfig.ControlledShutdownEnableProp, "false")
   this.serverConfig.setProperty(KafkaConfig.OffsetsTopicReplicationFactorProp, "2")
@@ -69,8 +71,8 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
   val defaultRequestQuota: Double = Long.MaxValue.toDouble
 
   val topic1 = "topic-1"
-  var leaderNode: KafkaServer = _
-  var followerNode: KafkaServer = _
+  var leaderNode: KafkaBroker = _
+  var followerNode: KafkaBroker = _
   var quotaTestClients: QuotaTestClients = _
 
   @BeforeEach
@@ -78,14 +80,15 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
     super.setUp(testInfo)
 
     val numPartitions = 1
-    val leaders = createTopic(topic1, numPartitions, brokerCount)
-    leaderNode = if (leaders(0) == servers.head.config.brokerId) servers.head else servers(1)
-    followerNode = if (leaders(0) != servers.head.config.brokerId) servers.head else servers(1)
+    val leaders = createTopic(topic1, numPartitions, brokerCount, adminClientConfig = adminClientConfig)
+    leaderNode = if (leaders(0) == brokers.head.config.brokerId) brokers.head else brokers(1)
+    followerNode = if (leaders(0) != brokers.head.config.brokerId) brokers.head else brokers(1)
     quotaTestClients = createQuotaTestClients(topic1, leaderNode)
   }
 
-  @Test
-  def testThrottledProducerConsumer(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testThrottledProducerConsumer(quorum: String): Unit = {
     val numRecords = 1000
     val produced = quotaTestClients.produceUntilThrottled(numRecords)
     quotaTestClients.verifyProduceThrottle(expectThrottle = true)
@@ -95,8 +98,9 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
     quotaTestClients.verifyConsumeThrottle(expectThrottle = true)
   }
 
-  @Test
-  def testProducerConsumerOverrideUnthrottled(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testProducerConsumerOverrideUnthrottled(quorum: String): Unit = {
     // Give effectively unlimited quota for producer and consumer
     val props = new Properties()
     props.put(QuotaConfigs.PRODUCER_BYTE_RATE_OVERRIDE_CONFIG, Long.MaxValue.toString)
@@ -114,8 +118,9 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
     quotaTestClients.verifyConsumeThrottle(expectThrottle = false)
   }
 
-  @Test
-  def testProducerConsumerOverrideLowerQuota(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testProducerConsumerOverrideLowerQuota(quorum: String): Unit = {
     // consumer quota is set such that consumer quota * default quota window (10 seconds) is less than
     // MAX_PARTITION_FETCH_BYTES_CONFIG, so that we can test consumer ability to fetch in this case
     // In this case, 250 * 10 < 4096
@@ -131,8 +136,9 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
     quotaTestClients.verifyConsumeThrottle(expectThrottle = true)
   }
 
-  @Test
-  def testQuotaOverrideDelete(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testQuotaOverrideDelete(quorum: String): Unit = {
     // Override producer and consumer quotas to unlimited
     quotaTestClients.overrideQuotas(Long.MaxValue, Long.MaxValue, Long.MaxValue.toDouble)
     quotaTestClients.waitForQuotaUpdate(Long.MaxValue, Long.MaxValue, Long.MaxValue.toDouble)
@@ -157,8 +163,9 @@ abstract class BaseQuotaTest extends IntegrationTestHarness {
     quotaTestClients.verifyConsumeThrottle(expectThrottle = true)
   }
 
-  @Test
-  def testThrottledRequest(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testThrottledRequest(quorum: String): Unit = {
     quotaTestClients.overrideQuotas(Long.MaxValue, Long.MaxValue, 0.1)
     quotaTestClients.waitForQuotaUpdate(Long.MaxValue, Long.MaxValue, 0.1)
 
@@ -189,7 +196,7 @@ object QuotaTestClients {
 }
 
 abstract class QuotaTestClients(topic: String,
-                                leaderNode: KafkaServer,
+                                leaderNode: KafkaBroker,
                                 producerClientId: String,
                                 consumerClientId: String,
                                 val producer: KafkaProducer[Array[Byte], Array[Byte]],
@@ -366,7 +373,7 @@ abstract class QuotaTestClients(topic: String,
     adminClient.alterClientQuotas(quotaAlterations.asJava).all().get()
   }
 
-  def waitForQuotaUpdate(producerQuota: Long, consumerQuota: Long, requestQuota: Double, server: KafkaServer = leaderNode): Unit = {
+  def waitForQuotaUpdate(producerQuota: Long, consumerQuota: Long, requestQuota: Double, server: KafkaBroker = leaderNode): Unit = {
     TestUtils.retry(10000) {
       val quotaManagers = server.dataPlaneRequestProcessor.quotas
       val overrideProducerQuota = quota(quotaManagers.produce, userPrincipal, producerClientId)
diff --git a/core/src/test/scala/integration/kafka/api/ClientIdQuotaTest.scala b/core/src/test/scala/integration/kafka/api/ClientIdQuotaTest.scala
index e4cebe2e9038a..d85d4b79a4f9d 100644
--- a/core/src/test/scala/integration/kafka/api/ClientIdQuotaTest.scala
+++ b/core/src/test/scala/integration/kafka/api/ClientIdQuotaTest.scala
@@ -14,7 +14,7 @@
 
 package kafka.api
 
-import kafka.server.KafkaServer
+import kafka.server.KafkaBroker
 import org.apache.kafka.common.security.auth.KafkaPrincipal
 import org.junit.jupiter.api.{BeforeEach, TestInfo}
 
@@ -35,7 +35,7 @@ class ClientIdQuotaTest extends BaseQuotaTest {
     quotaTestClients.waitForQuotaUpdate(defaultProducerQuota, defaultConsumerQuota, defaultRequestQuota)
   }
 
-  override def createQuotaTestClients(topic: String, leaderNode: KafkaServer): QuotaTestClients = {
+  override def createQuotaTestClients(topic: String, leaderNode: KafkaBroker): QuotaTestClients = {
     val producer = createProducer()
     val consumer = createConsumer()
     val adminClient = createAdminClient()
diff --git a/core/src/test/scala/integration/kafka/api/ConsumerBounceTest.scala b/core/src/test/scala/integration/kafka/api/ConsumerBounceTest.scala
index f675ef5314a8a..2ee6d43f0fc55 100644
--- a/core/src/test/scala/integration/kafka/api/ConsumerBounceTest.scala
+++ b/core/src/test/scala/integration/kafka/api/ConsumerBounceTest.scala
@@ -16,7 +16,6 @@ package kafka.api
 import java.time
 import java.util.concurrent._
 import java.util.{Collection, Collections, Properties}
-
 import kafka.server.KafkaConfig
 import kafka.utils.{Logging, ShutdownableThread, TestUtils}
 import org.apache.kafka.clients.consumer._
@@ -29,6 +28,7 @@ import org.apache.kafka.common.requests.{FindCoordinatorRequest, FindCoordinator
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, Disabled, Test}
 
+import java.time.Duration
 import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
 import scala.collection.{Seq, mutable}
@@ -77,14 +77,12 @@ class ConsumerBounceTest extends AbstractConsumerTest with Logging {
   }
 
   @Test
-  @Disabled // To be re-enabled once we can make it less flaky (KAFKA-4801)
   def testConsumptionWithBrokerFailures(): Unit = consumeWithBrokerFailures(10)
 
   /*
    * 1. Produce a bunch of messages
    * 2. Then consume the messages while killing and restarting brokers at random
    */
-  @nowarn("cat=deprecation")
   def consumeWithBrokerFailures(numIters: Int): Unit = {
     val numRecords = 1000
     val producer = createProducer()
@@ -99,8 +97,7 @@ class ConsumerBounceTest extends AbstractConsumerTest with Logging {
     scheduler.start()
 
     while (scheduler.isRunning) {
-      val records = consumer.poll(100).asScala
-      assertEquals(Set(tp), consumer.assignment.asScala)
+      val records = consumer.poll(Duration.ofMillis(100)).asScala
 
       for (record <- records) {
         assertEquals(consumed, record.offset())
@@ -246,12 +243,8 @@ class ConsumerBounceTest extends AbstractConsumerTest with Logging {
     killBroker(findCoordinator(dynamicGroup))
     killBroker(findCoordinator(manualGroup))
 
-    val future1 = submitCloseAndValidate(consumer1, Long.MaxValue, None, gracefulCloseTimeMs)
-
-    val future2 = submitCloseAndValidate(consumer2, Long.MaxValue, None, gracefulCloseTimeMs)
-
-    future1.get
-    future2.get
+    submitCloseAndValidate(consumer1, Long.MaxValue, None, gracefulCloseTimeMs).get
+    submitCloseAndValidate(consumer2, Long.MaxValue, None, gracefulCloseTimeMs).get
 
     restartDeadBrokers()
     checkClosedState(dynamicGroup, 0)
@@ -299,6 +292,7 @@ class ConsumerBounceTest extends AbstractConsumerTest with Logging {
     * Then, 1 consumer should be left out of the group.
     */
   @Test
+  @Disabled // TODO: To be re-enabled once we can make it less flaky (KAFKA-13421)
   def testRollingBrokerRestartsWithSmallerMaxGroupSizeConfigDisruptsBigGroup(): Unit = {
     val group = "group-max-size-test"
     val topic = "group-max-size-test"
diff --git a/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationTest.scala b/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationTest.scala
index f025cc3471dac..2d63e77b119a5 100644
--- a/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationTest.scala
@@ -21,7 +21,7 @@ import java.util.Properties
 import kafka.server.KafkaConfig
 import kafka.utils.{JaasTestUtils, TestUtils}
 import kafka.zk.ConfigEntityChangeNotificationZNode
-import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, ScramCredentialInfo, UserScramCredentialAlteration, UserScramCredentialUpsertion, ScramMechanism => PublicScramMechanism}
+import org.apache.kafka.clients.admin.{Admin, AdminClientConfig, CreateDelegationTokenOptions, ScramCredentialInfo, UserScramCredentialAlteration, UserScramCredentialUpsertion, ScramMechanism => PublicScramMechanism}
 import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
 import org.apache.kafka.common.security.scram.internals.ScramMechanism
@@ -45,20 +45,27 @@ class DelegationTokenEndToEndAuthorizationTest extends EndToEndAuthorizationTest
   private val clientPassword = JaasTestUtils.KafkaScramPassword
 
   override val kafkaPrincipal = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, JaasTestUtils.KafkaScramAdmin)
-  private val kafkaPassword = JaasTestUtils.KafkaScramAdminPassword
+  protected val kafkaPassword = JaasTestUtils.KafkaScramAdminPassword
 
-  private val privilegedAdminClientConfig = new Properties()
+  protected val privilegedAdminClientConfig = new Properties()
 
   this.serverConfig.setProperty(KafkaConfig.DelegationTokenSecretKeyProp, "testKey")
 
+  def createDelegationTokenOptions(): CreateDelegationTokenOptions = new CreateDelegationTokenOptions()
+
+  def configureTokenAclsBeforeServersStart(): Unit = { }
+
   override def configureSecurityBeforeServersStart(): Unit = {
     super.configureSecurityBeforeServersStart()
+    configureTokenAclsBeforeServersStart()
     zkClient.makeSurePersistentPathExists(ConfigEntityChangeNotificationZNode.path)
     // Create broker admin credentials before starting brokers
     createScramCredentials(zkConnect, kafkaPrincipal.getName, kafkaPassword)
   }
 
-  override def createPrivilegedAdminClient() = createScramAdminClient(kafkaClientSaslMechanism, kafkaPrincipal.getName, kafkaPassword)
+  override def createPrivilegedAdminClient(): Admin = createScramAdminClient(kafkaClientSaslMechanism, kafkaPrincipal.getName, kafkaPassword)
+
+  def createAdditionalCredentialsAfterServersStarted(): Unit = {}
 
   override def configureSecurityAfterServersStart(): Unit = {
     super.configureSecurityAfterServersStart()
@@ -67,6 +74,8 @@ class DelegationTokenEndToEndAuthorizationTest extends EndToEndAuthorizationTest
     createScramCredentialsViaPrivilegedAdminClient(clientPrincipal.getName, clientPassword)
     waitForUserScramCredentialToAppearOnAllBrokers(clientPrincipal.getName, kafkaClientSaslMechanism)
 
+    createAdditionalCredentialsAfterServersStarted()
+
     //create a token with "scram-user" credentials and a privileged token with scram-admin credentials
     val tokens = createDelegationTokens()
     val token = tokens._1
@@ -105,12 +114,36 @@ class DelegationTokenEndToEndAuthorizationTest extends EndToEndAuthorizationTest
     privilegedAdminClientConfig.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers())
   }
 
-  private def createDelegationTokens(): (DelegationToken, DelegationToken) = {
-    val adminClient = createScramAdminClient(kafkaClientSaslMechanism, clientPrincipal.getName, clientPassword)
+  def assertTokenOwner(owner: KafkaPrincipal, token: DelegationToken): Unit = {
+    assertEquals(owner, token.tokenInfo().owner())
+  }
+
+  def assertTokenRequester(requester: KafkaPrincipal, token: DelegationToken): Unit = {
+    assertEquals(requester, token.tokenInfo().tokenRequester())
+  }
+
+  def assertToken(token: DelegationToken): Unit = {
+    assertTokenOwner(clientPrincipal, token)
+    assertTokenRequester(clientPrincipal, token)
+  }
+
+  def createTokenRequesterAdminClient(): Admin = {
+    createScramAdminClient(kafkaClientSaslMechanism, clientPrincipal.getName, clientPassword)
+  }
+
+  def createDelegationTokens(): (DelegationToken, DelegationToken) = {
+    createDelegationTokens(createDelegationTokenOptions)
+  }
+
+  def createDelegationTokens(createDelegationTokenOptions: () => CreateDelegationTokenOptions, assert: Boolean = true): (DelegationToken, DelegationToken) = {
+    val adminClient = createTokenRequesterAdminClient()
     try {
       val privilegedAdminClient = createScramAdminClient(kafkaClientSaslMechanism, kafkaPrincipal.getName, kafkaPassword)
       try {
-        val token = adminClient.createDelegationToken().delegationToken().get()
+        val token = adminClient.createDelegationToken(createDelegationTokenOptions()).delegationToken().get()
+        if (assert) {
+          assertToken(token)
+        }
         val privilegedToken = privilegedAdminClient.createDelegationToken().delegationToken().get()
         //wait for tokens to reach all the brokers
         TestUtils.waitUntilTrue(() => servers.forall(server => server.tokenCache.tokens().size() == 2),
diff --git a/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationWithOwnerTest.scala b/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationWithOwnerTest.scala
new file mode 100644
index 0000000000000..3c034fc4c1906
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/api/DelegationTokenEndToEndAuthorizationWithOwnerTest.scala
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.api
+
+import kafka.admin.AclCommand
+import kafka.utils.JaasTestUtils
+import org.apache.kafka.clients.admin.{Admin, CreateDelegationTokenOptions, DescribeDelegationTokenOptions}
+import org.apache.kafka.common.security.auth.KafkaPrincipal
+import org.apache.kafka.common.security.token.delegation.DelegationToken
+import org.junit.jupiter.api.Assertions.{assertThrows, assertTrue}
+import org.junit.jupiter.api.Test
+
+import java.util.Collections
+import scala.concurrent.ExecutionException
+import scala.jdk.CollectionConverters._
+
+class DelegationTokenEndToEndAuthorizationWithOwnerTest extends DelegationTokenEndToEndAuthorizationTest {
+
+  def createTokenForValidUserArgs: Array[String] = Array("--authorizer-properties",
+    s"zookeeper.connect=$zkConnect",
+    s"--add",
+    s"--user-principal=$clientPrincipal",
+    s"--operation=CreateTokens",
+    s"--allow-principal=$tokenRequesterPrincipal")
+
+  // tests the naive positive case for token requesting for others
+  def describeTokenForValidUserArgs: Array[String] = Array("--authorizer-properties",
+    s"zookeeper.connect=$zkConnect",
+    s"--add",
+    s"--user-principal=$clientPrincipal",
+    s"--operation=DescribeTokens",
+    s"--allow-principal=$tokenRequesterPrincipal")
+
+  // This permission is just there so that otherClientPrincipal shows up among the resources
+  def describeTokenForAdminArgs: Array[String] = Array("--authorizer-properties",
+    s"zookeeper.connect=$zkConnect",
+    s"--add",
+    s"--user-principal=$otherClientPrincipal",
+    s"--operation=DescribeTokens",
+    s"--allow-principal=$otherClientRequesterPrincipal")
+
+  override def createDelegationTokenOptions(): CreateDelegationTokenOptions = new CreateDelegationTokenOptions().owner(clientPrincipal)
+
+  private val tokenRequesterPrincipal = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, JaasTestUtils.KafkaScramUser2)
+  private val tokenRequesterPassword = JaasTestUtils.KafkaScramPassword2
+
+  private val otherClientPrincipal = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, "other-client-principal")
+  private val otherClientPassword = "other-client-password"
+
+  private val otherClientRequesterPrincipal = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, "other-client-requester-principal")
+  private val otherClientRequesterPassword = "other-client-requester-password"
+
+  private val describeTokenFailPrincipal = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, "describe-token-fail-principal")
+  private val describeTokenFailPassword = "describe-token-fail-password"
+
+  override def configureTokenAclsBeforeServersStart(): Unit = {
+    super.configureTokenAclsBeforeServersStart()
+    AclCommand.main(createTokenForValidUserArgs)
+    AclCommand.main(describeTokenForValidUserArgs)
+    AclCommand.main(describeTokenForAdminArgs)
+  }
+
+  override def createAdditionalCredentialsAfterServersStarted(): Unit = {
+    super.createAdditionalCredentialsAfterServersStarted()
+    createScramCredentialsViaPrivilegedAdminClient(tokenRequesterPrincipal.getName, tokenRequesterPassword)
+    createScramCredentialsViaPrivilegedAdminClient(otherClientPrincipal.getName, otherClientPassword)
+    createScramCredentialsViaPrivilegedAdminClient(otherClientRequesterPrincipal.getName, otherClientRequesterPassword)
+    createScramCredentialsViaPrivilegedAdminClient(describeTokenFailPrincipal.getName, describeTokenFailPassword)
+  }
+
+  override def assertToken(token: DelegationToken): Unit = {
+    assertTokenOwner(clientPrincipal, token)
+    assertTokenRequester(tokenRequesterPrincipal, token)
+  }
+
+  override def createTokenRequesterAdminClient(): Admin = {
+    createScramAdminClient(kafkaClientSaslMechanism, tokenRequesterPrincipal.getName, tokenRequesterPassword)
+  }
+
+  @Test
+  def testCreateTokenForOtherUserFails(): Unit = {
+    val thrown = assertThrows(classOf[ExecutionException], () => {
+      createDelegationTokens(() => new CreateDelegationTokenOptions().owner(otherClientPrincipal), assert = false)
+    })
+    assertTrue(thrown.getMessage.contains("Delegation Token authorization failed"))
+  }
+
+  @Test
+  def testDescribeTokenForOtherUserFails(): Unit = {
+    val describeTokenFailAdminClient = createScramAdminClient(kafkaClientSaslMechanism, describeTokenFailPrincipal.getName, describeTokenFailPassword)
+    val otherClientAdminClient = createScramAdminClient(kafkaClientSaslMechanism, otherClientPrincipal.getName, otherClientPassword)
+    try {
+      otherClientAdminClient.createDelegationToken().delegationToken().get()
+      val tokens = describeTokenFailAdminClient.describeDelegationToken(
+        new DescribeDelegationTokenOptions().owners(Collections.singletonList(otherClientPrincipal)))
+        .delegationTokens.get.asScala
+      assertTrue(tokens.isEmpty)
+    } finally {
+      describeTokenFailAdminClient.close()
+      otherClientAdminClient.close()
+    }
+  }
+
+  @Test
+  def testDescribeTokenForOtherUserPasses(): Unit = {
+    val adminClient = createTokenRequesterAdminClient()
+    try {
+      val tokens = adminClient.describeDelegationToken(
+        new DescribeDelegationTokenOptions().owners(Collections.singletonList(clientPrincipal)))
+        .delegationTokens.get.asScala
+      assertTrue(tokens.nonEmpty)
+      tokens.foreach(t => {
+        assertTrue(t.tokenInfo.owner.equals(clientPrincipal))
+        assertTrue(t.tokenInfo.tokenRequester.equals(tokenRequesterPrincipal))
+      })
+    } finally {
+      adminClient.close()
+    }
+  }
+}
diff --git a/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala b/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala
index 7c8ac014ad112..eb9522a500036 100644
--- a/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/EndToEndAuthorizationTest.scala
@@ -18,12 +18,11 @@
 package kafka.api
 
 import com.yammer.metrics.core.Gauge
-
 import java.io.File
 import java.util.{Collections, Properties}
 import java.util.concurrent.ExecutionException
+
 import kafka.admin.AclCommand
-import kafka.metrics.KafkaYammerMetrics
 import kafka.security.authorizer.AclAuthorizer
 import kafka.security.authorizer.AclEntry.WildcardHost
 import kafka.server._
@@ -40,6 +39,7 @@ import org.apache.kafka.common.resource._
 import org.apache.kafka.common.resource.ResourceType._
 import org.apache.kafka.common.resource.PatternType.{LITERAL, PREFIXED}
 import org.apache.kafka.common.security.auth.KafkaPrincipal
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
 import org.junit.jupiter.params.ParameterizedTest
diff --git a/core/src/test/scala/integration/kafka/api/EndToEndClusterIdTest.scala b/core/src/test/scala/integration/kafka/api/EndToEndClusterIdTest.scala
index 2492903f535e8..25f7ce6a8c0c4 100644
--- a/core/src/test/scala/integration/kafka/api/EndToEndClusterIdTest.scala
+++ b/core/src/test/scala/integration/kafka/api/EndToEndClusterIdTest.scala
@@ -29,10 +29,12 @@ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, Produce
 import org.apache.kafka.common.{ClusterResource, ClusterResourceListener, TopicPartition}
 import org.apache.kafka.test.{TestUtils => _, _}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{BeforeEach, TestInfo}
 
 import scala.jdk.CollectionConverters._
 import org.apache.kafka.test.TestUtils.isValidClusterId
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 /** The test cases here verify the following conditions.
   * 1. The ProducerInterceptor receives the cluster id after the onSend() method is called and before onAcknowledgement() method is called.
@@ -99,7 +101,7 @@ class EndToEndClusterIdTest extends KafkaServerTestHarness {
   this.serverConfig.setProperty(KafkaConfig.MetricReporterClassesProp, classOf[MockBrokerMetricsReporter].getName)
 
   override def generateConfigs = {
-    val cfgs = TestUtils.createBrokerConfigs(serverCount, zkConnect, interBrokerSecurityProtocol = Some(securityProtocol),
+    val cfgs = TestUtils.createBrokerConfigs(serverCount, zkConnectOrNull, interBrokerSecurityProtocol = Some(securityProtocol),
       trustStoreFile = trustStoreFile, saslProperties = serverSaslProperties)
     cfgs.foreach(_ ++= serverConfig)
     cfgs.map(KafkaConfig.fromProps)
@@ -113,8 +115,9 @@ class EndToEndClusterIdTest extends KafkaServerTestHarness {
     createTopic(topic, 2, serverCount)
   }
 
-  @Test
-  def testEndToEnd(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testEndToEnd(quorum: String): Unit = {
     val appendStr = "mock"
     MockConsumerInterceptor.resetCounters()
     MockProducerInterceptor.resetCounters()
diff --git a/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala
index 2b380bfd2adf2..82e637ae00281 100644
--- a/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/GroupAuthorizerIntegrationTest.scala
@@ -14,12 +14,11 @@ package kafka.api
 
 import java.util.Properties
 import java.util.concurrent.ExecutionException
-
 import kafka.api.GroupAuthorizerIntegrationTest._
 import kafka.security.authorizer.AclAuthorizer
 import kafka.security.authorizer.AclEntry.WildcardHost
 import kafka.server.{BaseRequestTest, KafkaConfig}
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
@@ -30,8 +29,11 @@ import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.resource.{PatternType, Resource, ResourcePattern, ResourceType}
 import org.apache.kafka.common.security.auth.{AuthenticationContext, KafkaPrincipal}
 import org.apache.kafka.common.security.authenticator.DefaultKafkaPrincipalBuilder
+import org.apache.kafka.metadata.authorizer.StandardAuthorizer
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.jdk.CollectionConverters._
 
@@ -41,11 +43,12 @@ object GroupAuthorizerIntegrationTest {
 
   val BrokerListenerName = "BROKER"
   val ClientListenerName = "CLIENT"
+  val ControllerListenerName = "CONTROLLER"
 
   class GroupPrincipalBuilder extends DefaultKafkaPrincipalBuilder(null, null) {
     override def build(context: AuthenticationContext): KafkaPrincipal = {
       context.listenerName match {
-        case BrokerListenerName => BrokerPrincipal
+        case BrokerListenerName | ControllerListenerName => BrokerPrincipal
         case ClientListenerName => ClientPrincipal
         case listenerName => throw new IllegalArgumentException(s"No principal mapped to listener $listenerName")
       }
@@ -64,9 +67,25 @@ class GroupAuthorizerIntegrationTest extends BaseRequestTest {
   def brokerPrincipal: KafkaPrincipal = BrokerPrincipal
   def clientPrincipal: KafkaPrincipal = ClientPrincipal
 
+  override def kraftControllerConfigs(): collection.Seq[Properties] = {
+    val controllerConfigs = super.kraftControllerConfigs()
+    controllerConfigs.foreach(addNodeProperties)
+    controllerConfigs
+  }
+
   override def brokerPropertyOverrides(properties: Properties): Unit = {
-    properties.put(KafkaConfig.AuthorizerClassNameProp, classOf[AclAuthorizer].getName)
     properties.put(KafkaConfig.BrokerIdProp, brokerId.toString)
+    addNodeProperties(properties)
+  }
+
+  private def addNodeProperties(properties: Properties): Unit = {
+    if (isKRaftTest()) {
+      properties.put(KafkaConfig.AuthorizerClassNameProp, classOf[StandardAuthorizer].getName)
+      properties.put(StandardAuthorizer.SUPER_USERS_CONFIG, BrokerPrincipal.toString)
+    } else {
+      properties.put(KafkaConfig.AuthorizerClassNameProp, classOf[AclAuthorizer].getName)
+    }
+
     properties.put(KafkaConfig.OffsetsTopicPartitionsProp, "1")
     properties.put(KafkaConfig.OffsetsTopicReplicationFactorProp, "1")
     properties.put(KafkaConfig.TransactionsTopicPartitionsProp, "1")
@@ -80,11 +99,12 @@ class GroupAuthorizerIntegrationTest extends BaseRequestTest {
     doSetup(testInfo, createOffsetsTopic = false)
 
     // Allow inter-broker communication
-    TestUtils.addAndVerifyAcls(brokers,
+    addAndVerifyAcls(
       Set(createAcl(AclOperation.CLUSTER_ACTION, AclPermissionType.ALLOW, principal = BrokerPrincipal)),
-      new ResourcePattern(ResourceType.CLUSTER, Resource.CLUSTER_NAME, PatternType.LITERAL))
+      new ResourcePattern(ResourceType.CLUSTER, Resource.CLUSTER_NAME, PatternType.LITERAL)
+    )
 
-    TestUtils.createOffsetsTopic(zkClient, servers)
+    createOffsetsTopic(interBrokerListenerName)
   }
 
   private def createAcl(aclOperation: AclOperation,
@@ -93,12 +113,13 @@ class GroupAuthorizerIntegrationTest extends BaseRequestTest {
     new AccessControlEntry(principal.toString, WildcardHost, aclOperation, aclPermissionType)
   }
 
-  @Test
-  def testUnauthorizedProduceAndConsume(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testUnauthorizedProduceAndConsume(quorum: String): Unit = {
     val topic = "topic"
     val topicPartition = new TopicPartition("topic", 0)
 
-    createTopic(topic)
+    createTopic(topic, listenerName = interBrokerListenerName)
 
     val producer = createProducer()
     val produceException = assertThrows(classOf[ExecutionException],
@@ -113,22 +134,25 @@ class GroupAuthorizerIntegrationTest extends BaseRequestTest {
     assertEquals(Set(topic), consumeException.unauthorizedTopics.asScala)
   }
 
-  @Test
-  def testAuthorizedProduceAndConsume(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAuthorizedProduceAndConsume(quorum: String): Unit = {
     val topic = "topic"
     val topicPartition = new TopicPartition("topic", 0)
 
-    createTopic(topic)
+    createTopic(topic, listenerName = interBrokerListenerName)
 
-    TestUtils.addAndVerifyAcls(brokers,
+    addAndVerifyAcls(
       Set(createAcl(AclOperation.WRITE, AclPermissionType.ALLOW)),
-      new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL))
+      new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL)
+    )
     val producer = createProducer()
     producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic, "message".getBytes)).get()
 
-    TestUtils.addAndVerifyAcls(brokers,
+    addAndVerifyAcls(
       Set(createAcl(AclOperation.READ, AclPermissionType.ALLOW)),
-      new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL))
+      new ResourcePattern(ResourceType.TOPIC, topic, PatternType.LITERAL)
+    )
     val consumer = createConsumer(configsToRemove = List(ConsumerConfig.GROUP_ID_CONFIG))
     consumer.assign(List(topicPartition).asJava)
     TestUtils.pollUntilAtLeastNumRecords(consumer, numRecords = 1)
diff --git a/core/src/test/scala/integration/kafka/api/MetricsTest.scala b/core/src/test/scala/integration/kafka/api/MetricsTest.scala
index f5a3ae439f12a..612092f41eb42 100644
--- a/core/src/test/scala/integration/kafka/api/MetricsTest.scala
+++ b/core/src/test/scala/integration/kafka/api/MetricsTest.scala
@@ -13,11 +13,11 @@
 package kafka.api
 
 import java.util.{Locale, Properties}
+
 import kafka.log.LogConfig
 import kafka.server.{KafkaConfig, KafkaServer}
 import kafka.utils.{JaasTestUtils, TestUtils}
 import com.yammer.metrics.core.{Gauge, Histogram, Meter}
-import kafka.metrics.KafkaYammerMetrics
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
 import org.apache.kafka.common.{Metric, MetricName, TopicPartition}
@@ -26,6 +26,7 @@ import org.apache.kafka.common.errors.{InvalidTopicException, UnknownTopicOrPart
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.security.authenticator.TestJaasConfig
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
 import org.junit.jupiter.api.Assertions._
 
diff --git a/core/src/test/scala/integration/kafka/api/PlaintextAdminIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/PlaintextAdminIntegrationTest.scala
index fc14de187c931..203c04a68a7a9 100644
--- a/core/src/test/scala/integration/kafka/api/PlaintextAdminIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/PlaintextAdminIntegrationTest.scala
@@ -25,14 +25,16 @@ import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit}
 import java.util.{Collections, Optional, Properties}
 import java.{time, util}
-
+import kafka.integration.KafkaServerTestHarness
 import kafka.log.LogConfig
 import kafka.security.authorizer.AclEntry
-import kafka.server.{Defaults, DynamicConfig, KafkaConfig, KafkaServer}
+import kafka.server.metadata.KRaftMetadataCache
+import kafka.server.{Defaults, DynamicConfig, KafkaConfig}
 import kafka.utils.TestUtils._
-import kafka.utils.{Log4jController, TestUtils}
-import kafka.zk.KafkaZkClient
+import kafka.utils.{Log4jController, TestInfoUtils, TestUtils}
 import org.apache.kafka.clients.HostResolver
+import org.apache.kafka.clients.admin.AlterConfigOp.OpType
+import org.apache.kafka.clients.admin.ConfigEntry.ConfigSource
 import org.apache.kafka.clients.admin._
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
@@ -44,7 +46,9 @@ import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourceT
 import org.apache.kafka.common.utils.{Time, Utils}
 import org.apache.kafka.common.{ConsumerGroupState, ElectionType, TopicCollection, TopicPartition, TopicPartitionInfo, TopicPartitionReplica, Uuid}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test, TestInfo}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 import org.slf4j.LoggerFactory
 
 import scala.annotation.nowarn
@@ -74,7 +78,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
     brokerLoggerConfigResource = new ConfigResource(
-      ConfigResource.Type.BROKER_LOGGER, servers.head.config.brokerId.toString)
+      ConfigResource.Type.BROKER_LOGGER, brokers.head.config.brokerId.toString)
   }
 
   @AfterEach
@@ -83,15 +87,17 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     super.tearDown()
   }
 
-  @Test
-  def testClose(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testClose(quorum: String): Unit = {
     val client = Admin.create(createConfig)
     client.close()
     client.close() // double close has no effect
   }
 
-  @Test
-  def testListNodes(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListNodes(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val brokerStrs = bootstrapServers().split(",").toList.sorted
     var nodeStrs: List[String] = null
@@ -102,8 +108,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(brokerStrs.mkString(","), nodeStrs.mkString(","))
   }
 
-  @Test
-  def testAdminClientHandlingBadIPWithoutTimeout(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAdminClientHandlingBadIPWithoutTimeout(quorum: String): Unit = {
     val config = createConfig
     config.put(AdminClientConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG, "1000")
     val returnBadAddressFirst = new HostResolver {
@@ -116,8 +123,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     client.describeCluster().nodes().get()
   }
 
-  @Test
-  def testCreateExistingTopicsThrowTopicExistsException(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateExistingTopicsThrowTopicExistsException(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topic = "mytopic"
     val topics = Seq(topic)
@@ -126,14 +134,15 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     client.createTopics(newTopics.asJava).all.get()
     waitForTopics(client, topics, List())
 
-    val newTopicsWithInvalidRF = Seq(new NewTopic(topic, 1, (servers.size + 1).toShort))
+    val newTopicsWithInvalidRF = Seq(new NewTopic(topic, 1, (brokers.size + 1).toShort))
     val e = assertThrows(classOf[ExecutionException],
       () => client.createTopics(newTopicsWithInvalidRF.asJava, new CreateTopicsOptions().validateOnly(true)).all.get())
     assertTrue(e.getCause.isInstanceOf[TopicExistsException])
   }
 
-  @Test
-  def testDeleteTopicsWithIds(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteTopicsWithIds(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topics = Seq("mytopic", "mytopic2", "mytopic3")
     val newTopics = Seq(
@@ -150,15 +159,16 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     waitForTopics(client, List(), topics)
   }
 
-  @Test
-  def testMetadataRefresh(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // KRaft mode will be supported in KAFKA-13910
+  def testMetadataRefresh(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topics = Seq("mytopic")
     val newTopics = Seq(new NewTopic("mytopic", 3, 3.toShort))
     client.createTopics(newTopics.asJava).all.get()
     waitForTopics(client, expectedPresent = topics, expectedMissing = List())
 
-    val controller = servers.find(_.config.brokerId == TestUtils.waitUntilControllerElected(zkClient)).get
+    val controller = brokers.find(_.config.brokerId == brokers.flatMap(_.metadataCache.getControllerId).head).get
     controller.shutdown()
     controller.awaitShutdown()
     val topicDesc = client.describeTopics(topics.asJava).allTopicNames.get()
@@ -168,8 +178,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
   /**
     * describe should not auto create topics
     */
-  @Test
-  def testDescribeNonExistingTopic(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeNonExistingTopic(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     val existingTopic = "existing-topic"
@@ -179,18 +190,23 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val nonExistingTopic = "non-existing"
     val results = client.describeTopics(Seq(nonExistingTopic, existingTopic).asJava).topicNameValues()
     assertEquals(existingTopic, results.get(existingTopic).get.name)
-    assertThrows(classOf[ExecutionException], () => results.get(nonExistingTopic).get).getCause.isInstanceOf[UnknownTopicOrPartitionException]
-    assertEquals(None, zkClient.getTopicPartitionCount(nonExistingTopic))
+    assertFutureExceptionTypeEquals(results.get(nonExistingTopic), classOf[UnknownTopicOrPartitionException])
+    if (!isKRaftTest()) {
+      assertEquals(None, zkClient.getTopicPartitionCount(nonExistingTopic))
+    }
   }
 
-  @Test
-  def testDescribeTopicsWithIds(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeTopicsWithIds(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     val existingTopic = "existing-topic"
     client.createTopics(Seq(existingTopic).map(new NewTopic(_, 1, 1.toShort)).asJava).all.get()
     waitForTopics(client, Seq(existingTopic), List())
-    val existingTopicId = zkClient.getTopicIdsForTopics(Set(existingTopic)).values.head
+    ensureConsistentKRaftMetadata()
+
+    val existingTopicId = brokers.head.metadataCache.getTopicId(existingTopic)
 
     val nonExistingTopicId = Uuid.randomUuid()
 
@@ -199,37 +215,48 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertThrows(classOf[ExecutionException], () => results.get(nonExistingTopicId).get).getCause.isInstanceOf[UnknownTopicIdException]
   }
 
-  @Test
-  def testDescribeCluster(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeCluster(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val result = client.describeCluster
     val nodes = result.nodes.get()
     val clusterId = result.clusterId().get()
-    assertEquals(servers.head.dataPlaneRequestProcessor.clusterId, clusterId)
+    assertEquals(brokers.head.dataPlaneRequestProcessor.clusterId, clusterId)
     val controller = result.controller().get()
-    assertEquals(servers.head.dataPlaneRequestProcessor.metadataCache.getControllerId.
-      getOrElse(MetadataResponse.NO_CONTROLLER_ID), controller.id())
-    val brokers = bootstrapServers().split(",")
-    assertEquals(brokers.size, nodes.size)
+
+    if (isKRaftTest()) {
+      // In KRaft, we return a random brokerId as the current controller.
+      val brokerIds = brokers.map(_.config.brokerId).toSet
+      assertTrue(brokerIds.contains(controller.id))
+    } else {
+      assertEquals(brokers.head.dataPlaneRequestProcessor.metadataCache.getControllerId.
+        getOrElse(MetadataResponse.NO_CONTROLLER_ID), controller.id)
+    }
+
+    val brokerEndpoints = bootstrapServers().split(",")
+    assertEquals(brokerEndpoints.size, nodes.size)
     for (node <- nodes.asScala) {
       val hostStr = s"${node.host}:${node.port}"
-      assertTrue(brokers.contains(hostStr), s"Unknown host:port pair $hostStr in brokerVersionInfos")
+      assertTrue(brokerEndpoints.contains(hostStr), s"Unknown host:port pair $hostStr in brokerVersionInfos")
     }
   }
 
-  @Test
-  def testDescribeLogDirs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeLogDirs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topic = "topic"
     val leaderByPartition = createTopic(topic, numPartitions = 10)
     val partitionsByBroker = leaderByPartition.groupBy { case (_, leaderId) => leaderId }.map { case (k, v) =>
       k -> v.keys.toSeq
     }
-    val brokers = (0 until brokerCount).map(Integer.valueOf)
-    val logDirInfosByBroker = client.describeLogDirs(brokers.asJava).allDescriptions.get
+    ensureConsistentKRaftMetadata()
+    val brokerIds = (0 until brokerCount).map(Integer.valueOf)
+    val logDirInfosByBroker = client.describeLogDirs(brokerIds.asJava).allDescriptions.get
 
     (0 until brokerCount).foreach { brokerId =>
-      val server = servers.find(_.config.brokerId == brokerId).get
+      val server = brokers.find(_.config.brokerId == brokerId).get
       val expectedPartitions = partitionsByBroker(brokerId)
       val logDirInfos = logDirInfosByBroker.get(brokerId)
       val replicaInfos = logDirInfos.asScala.flatMap { case (_, logDirInfo) =>
@@ -238,6 +265,8 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
 
       assertEquals(expectedPartitions.toSet, replicaInfos.keys.map(_.partition).toSet)
       logDirInfos.forEach { (logDir, logDirInfo) =>
+        assertTrue(logDirInfo.totalBytes.isPresent)
+        assertTrue(logDirInfo.usableBytes.isPresent)
         logDirInfo.replicaInfos.asScala.keys.foreach(tp =>
           assertEquals(server.logManager.getLog(tp).get.dir.getParent, logDir)
         )
@@ -245,36 +274,39 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }
   }
 
-  @Test
-  def testDescribeReplicaLogDirs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeReplicaLogDirs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topic = "topic"
     val leaderByPartition = createTopic(topic, numPartitions = 10)
     val replicas = leaderByPartition.map { case (partition, brokerId) =>
       new TopicPartitionReplica(topic, partition, brokerId)
     }.toSeq
+    ensureConsistentKRaftMetadata()
 
     val replicaDirInfos = client.describeReplicaLogDirs(replicas.asJavaCollection).all.get
     replicaDirInfos.forEach { (topicPartitionReplica, replicaDirInfo) =>
-      val server = servers.find(_.config.brokerId == topicPartitionReplica.brokerId()).get
+      val server = brokers.find(_.config.brokerId == topicPartitionReplica.brokerId()).get
       val tp = new TopicPartition(topicPartitionReplica.topic(), topicPartitionReplica.partition())
       assertEquals(server.logManager.getLog(tp).get.dir.getParent, replicaDirInfo.getCurrentReplicaLogDir)
     }
   }
 
-  @Test
-  def testAlterReplicaLogDirs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterReplicaLogDirs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topic = "topic"
     val tp = new TopicPartition(topic, 0)
-    val randomNums = servers.map(server => server -> Random.nextInt(2)).toMap
+    val randomNums = brokers.map(server => server -> Random.nextInt(2)).toMap
 
     // Generate two mutually exclusive replicaAssignment
-    val firstReplicaAssignment = servers.map { server =>
+    val firstReplicaAssignment = brokers.map { server =>
       val logDir = new File(server.config.logDirs(randomNums(server))).getAbsolutePath
       new TopicPartitionReplica(topic, 0, server.config.brokerId) -> logDir
     }.toMap
-    val secondReplicaAssignment = servers.map { server =>
+    val secondReplicaAssignment = brokers.map { server =>
       val logDir = new File(server.config.logDirs(1 - randomNums(server))).getAbsolutePath
       new TopicPartitionReplica(topic, 0, server.config.brokerId) -> logDir
     }.toMap
@@ -288,14 +320,15 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }
 
     createTopic(topic, replicationFactor = brokerCount)
-    servers.foreach { server =>
+    ensureConsistentKRaftMetadata()
+    brokers.foreach { server =>
       val logDir = server.logManager.getLog(tp).get.dir.getParent
       assertEquals(firstReplicaAssignment(new TopicPartitionReplica(topic, 0, server.config.brokerId)), logDir)
     }
 
     // Verify that replica can be moved to the specified log directory after the topic has been created
     client.alterReplicaLogDirs(secondReplicaAssignment.asJava, new AlterReplicaLogDirsOptions).all.get
-    servers.foreach { server =>
+    brokers.foreach { server =>
       TestUtils.waitUntilTrue(() => {
         val logDir = server.logManager.getLog(tp).get.dir.getParent
         secondReplicaAssignment(new TopicPartitionReplica(topic, 0, server.config.brokerId)) == logDir
@@ -328,7 +361,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     try {
       TestUtils.waitUntilTrue(() => numMessages.get > 10, s"only $numMessages messages are produced before timeout. Producer future ${producerFuture.value}")
       client.alterReplicaLogDirs(firstReplicaAssignment.asJava, new AlterReplicaLogDirsOptions).all.get
-      servers.foreach { server =>
+      brokers.foreach { server =>
         TestUtils.waitUntilTrue(() => {
           val logDir = server.logManager.getLog(tp).get.dir.getParent
           firstReplicaAssignment(new TopicPartitionReplica(topic, 0, server.config.brokerId)) == logDir
@@ -343,15 +376,16 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val finalNumMessages = Await.result(producerFuture, Duration(20, TimeUnit.SECONDS))
 
     // Verify that all messages that are produced can be consumed
-    val consumerRecords = TestUtils.consumeTopicRecords(servers, topic, finalNumMessages,
+    val consumerRecords = TestUtils.consumeTopicRecords(brokers, topic, finalNumMessages,
       securityProtocol = securityProtocol, trustStoreFile = trustStoreFile)
     consumerRecords.zipWithIndex.foreach { case (consumerRecord, index) =>
       assertEquals(s"xxxxxxxxxxxxxxxxxxxx-$index", new String(consumerRecord.value))
     }
   }
 
-  @Test
-  def testDescribeAndAlterConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeAndAlterConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     // Create topics
@@ -367,8 +401,8 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     createTopic(topic2)
 
     // Describe topics and broker
-    val brokerResource1 = new ConfigResource(ConfigResource.Type.BROKER, servers(1).config.brokerId.toString)
-    val brokerResource2 = new ConfigResource(ConfigResource.Type.BROKER, servers(2).config.brokerId.toString)
+    val brokerResource1 = new ConfigResource(ConfigResource.Type.BROKER, brokers(1).config.brokerId.toString)
+    val brokerResource2 = new ConfigResource(ConfigResource.Type.BROKER, brokers(2).config.brokerId.toString)
     val configResources = Seq(topicResource1, topicResource2, brokerResource1, brokerResource2)
     val describeResult = client.describeConfigs(configResources.asJava)
     val configs = describeResult.all.get
@@ -392,10 +426,10 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertFalse(maxMessageBytes2.isSensitive)
     assertFalse(maxMessageBytes2.isReadOnly)
 
-    assertEquals(servers(1).config.nonInternalValues.size, configs.get(brokerResource1).entries.size)
-    assertEquals(servers(1).config.brokerId.toString, configs.get(brokerResource1).get(KafkaConfig.BrokerIdProp).value)
+    assertEquals(brokers(1).config.nonInternalValues.size, configs.get(brokerResource1).entries.size)
+    assertEquals(brokers(1).config.brokerId.toString, configs.get(brokerResource1).get(KafkaConfig.BrokerIdProp).value)
     val listenerSecurityProtocolMap = configs.get(brokerResource1).get(KafkaConfig.ListenerSecurityProtocolMapProp)
-    assertEquals(servers(1).config.getString(KafkaConfig.ListenerSecurityProtocolMapProp), listenerSecurityProtocolMap.value)
+    assertEquals(brokers(1).config.getString(KafkaConfig.ListenerSecurityProtocolMapProp), listenerSecurityProtocolMap.value)
     assertEquals(KafkaConfig.ListenerSecurityProtocolMapProp, listenerSecurityProtocolMap.name)
     assertFalse(listenerSecurityProtocolMap.isDefault)
     assertFalse(listenerSecurityProtocolMap.isSensitive)
@@ -407,22 +441,23 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertTrue(truststorePassword.isSensitive)
     assertFalse(truststorePassword.isReadOnly)
     val compressionType = configs.get(brokerResource1).get(KafkaConfig.CompressionTypeProp)
-    assertEquals(servers(1).config.compressionType, compressionType.value)
+    assertEquals(brokers(1).config.compressionType, compressionType.value)
     assertEquals(KafkaConfig.CompressionTypeProp, compressionType.name)
     assertTrue(compressionType.isDefault)
     assertFalse(compressionType.isSensitive)
     assertFalse(compressionType.isReadOnly)
 
-    assertEquals(servers(2).config.nonInternalValues.size, configs.get(brokerResource2).entries.size)
-    assertEquals(servers(2).config.brokerId.toString, configs.get(brokerResource2).get(KafkaConfig.BrokerIdProp).value)
-    assertEquals(servers(2).config.logCleanerThreads.toString,
+    assertEquals(brokers(2).config.nonInternalValues.size, configs.get(brokerResource2).entries.size)
+    assertEquals(brokers(2).config.brokerId.toString, configs.get(brokerResource2).get(KafkaConfig.BrokerIdProp).value)
+    assertEquals(brokers(2).config.logCleanerThreads.toString,
       configs.get(brokerResource2).get(KafkaConfig.LogCleanerThreadsProp).value)
 
-    checkValidAlterConfigs(client, topicResource1, topicResource2)
+    checkValidAlterConfigs(client, this, topicResource1, topicResource2)
   }
 
-  @Test
-  def testCreatePartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreatePartitions(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     // Create topics
@@ -486,7 +521,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       var e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidPartitionsException when newCount is a decrease")
       assertTrue(e.getCause.isInstanceOf[InvalidPartitionsException], desc)
-      assertEquals("Topic currently has 3 partitions, which is higher than the requested 1.", e.getCause.getMessage, desc)
+      var exceptionMsgStr = if (isKRaftTest()) {
+        "The topic create-partitions-topic-1 currently has 3 partition(s); 1 would not be an increase."
+      } else {
+        "Topic currently has 3 partitions, which is higher than the requested 1."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try a newCount which would be a noop (without assignment)
@@ -495,7 +535,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic2).get,
         () => s"$desc: Expect InvalidPartitionsException when requesting a noop")
       assertTrue(e.getCause.isInstanceOf[InvalidPartitionsException], desc)
-      assertEquals("Topic already has 3 partitions.", e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "Topic already has 3 partition(s)."
+      } else {
+        "Topic already has 3 partitions."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic2, Some(3)), desc)
 
       // try a newCount which would be a noop (where the assignment matches current state)
@@ -503,7 +548,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
         NewPartitions.increaseTo(3, newPartition2Assignments)).asJava, option)
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic2).get)
       assertTrue(e.getCause.isInstanceOf[InvalidPartitionsException], desc)
-      assertEquals("Topic already has 3 partitions.", e.getCause.getMessage, desc)
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic2, Some(3)), desc)
 
       // try a newCount which would be a noop (where the assignment doesn't match current state)
@@ -511,7 +556,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
         NewPartitions.increaseTo(3, newPartition2Assignments.asScala.reverse.toList.asJava)).asJava, option)
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic2).get)
       assertTrue(e.getCause.isInstanceOf[InvalidPartitionsException], desc)
-      assertEquals("Topic already has 3 partitions.", e.getCause.getMessage, desc)
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic2, Some(3)), desc)
 
       // try a bad topic name
@@ -521,7 +566,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(unknownTopic).get,
         () => s"$desc: Expect InvalidTopicException when using an unknown topic")
       assertTrue(e.getCause.isInstanceOf[UnknownTopicOrPartitionException], desc)
-      assertEquals("The topic 'an-unknown-topic' does not exist.", e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "This server does not host this topic-partition."
+      } else {
+        "The topic 'an-unknown-topic' does not exist."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
 
       // try an invalid newCount
       alterResult = client.createPartitions(Map(topic1 ->
@@ -529,7 +579,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidPartitionsException when newCount is invalid")
       assertTrue(e.getCause.isInstanceOf[InvalidPartitionsException], desc)
-      assertEquals("Topic currently has 3 partitions, which is higher than the requested -22.", e.getCause.getMessage,
+      exceptionMsgStr = if (isKRaftTest()) {
+        "The topic create-partitions-topic-1 currently has 3 partition(s); -22 would not be an increase."
+      } else {
+        "Topic currently has 3 partitions, which is higher than the requested -22."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage,
         desc)
       assertEquals(3, numPartitions(topic1), desc)
 
@@ -539,9 +594,14 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidPartitionsException when #brokers != replication factor")
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Inconsistent replication factor between partitions, partition 0 has 1 " +
-        "while partitions [3] have replication factors [2], respectively.",
-        e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "The manual partition assignment includes a partition with 2 replica(s), but this is not " +
+          "consistent with previous partitions, which have 1 replica(s)."
+      } else {
+        "Inconsistent replication factor between partitions, partition 0 has 1 while partitions [3] " +
+          "have replication factors [2], respectively."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try #assignments < with the increase
@@ -550,7 +610,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidReplicaAssignmentException when #assignments != newCount - oldCount")
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Increasing the number of partitions by 3 but 1 assignments provided.", e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "Attempted to add 3 additional partition(s), but only 1 assignment(s) were specified."
+      } else {
+        "Increasing the number of partitions by 3 but 1 assignments provided."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try #assignments > with the increase
@@ -558,8 +623,13 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
         NewPartitions.increaseTo(4, asList(asList(1), asList(2)))).asJava, option)
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidReplicaAssignmentException when #assignments != newCount - oldCount")
+      exceptionMsgStr = if (isKRaftTest()) {
+        "Attempted to add 1 additional partition(s), but only 2 assignment(s) were specified."
+      } else {
+        "Increasing the number of partitions by 1 but 2 assignments provided."
+      }
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Increasing the number of partitions by 1 but 2 assignments provided.", e.getCause.getMessage, desc)
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try with duplicate brokers in assignments
@@ -568,8 +638,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidReplicaAssignmentException when assignments has duplicate brokers")
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Duplicate brokers not allowed in replica assignment: 1, 1 for partition id 3.",
-        e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "The manual partition assignment includes the broker 1 more than once."
+      } else {
+        "Duplicate brokers not allowed in replica assignment: 1, 1 for partition id 3."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try assignments with differently sized inner lists
@@ -578,8 +652,14 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidReplicaAssignmentException when assignments have differently sized inner lists")
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Inconsistent replication factor between partitions, partition 0 has 1 " +
-        "while partitions [4] have replication factors [2], respectively.", e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "The manual partition assignment includes a partition with 2 replica(s), but this is not " +
+          "consistent with previous partitions, which have 1 replica(s)."
+      } else {
+        "Inconsistent replication factor between partitions, partition 0 has 1 " +
+          "while partitions [4] have replication factors [2], respectively."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try assignments with unknown brokers
@@ -588,7 +668,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidReplicaAssignmentException when assignments contains an unknown broker")
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Unknown broker(s) in replica assignment: 12.", e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "The manual partition assignment includes broker 12, but no such broker is registered."
+      } else {
+        "Unknown broker(s) in replica assignment: 12."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
 
       // try with empty assignments
@@ -597,7 +682,12 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
         () => s"$desc: Expect InvalidReplicaAssignmentException when assignments is empty")
       assertTrue(e.getCause.isInstanceOf[InvalidReplicaAssignmentException], desc)
-      assertEquals("Increasing the number of partitions by 1 but 0 assignments provided.", e.getCause.getMessage, desc)
+      exceptionMsgStr = if (isKRaftTest()) {
+        "Attempted to add 1 additional partition(s), but only 0 assignment(s) were specified."
+      } else {
+        "Increasing the number of partitions by 1 but 0 assignments provided."
+      }
+      assertEquals(exceptionMsgStr, e.getCause.getMessage, desc)
       assertEquals(3, numPartitions(topic1), desc)
     }
 
@@ -610,22 +700,35 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     TestUtils.waitUntilTrue(() => numPartitions(topic1) == 4, "Timed out waiting for new partitions to appear")
     var e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic2).get)
     assertTrue(e.getCause.isInstanceOf[InvalidPartitionsException])
-    assertEquals("Topic currently has 3 partitions, which is higher than the requested 2.", e.getCause.getMessage)
+    val exceptionMsgStr = if (isKRaftTest()) {
+      "The topic create-partitions-topic-2 currently has 3 partition(s); 2 would not be an increase."
+    } else {
+      "Topic currently has 3 partitions, which is higher than the requested 2."
+    }
+    assertEquals(exceptionMsgStr, e.getCause.getMessage)
     assertEquals(3, numPartitions(topic2))
 
-    // finally, try to add partitions to a topic queued for deletion
+    // Delete the topic. Verify addition of partitions to deleted topic is not possible. In
+    // Zookeeper mode, the topic is queued for deletion. In KRaft, the deletion occurs
+    // immediately and hence we have a different Exception thrown in the response.
     val deleteResult = client.deleteTopics(asList(topic1))
     deleteResult.topicNameValues.get(topic1).get
     alterResult = client.createPartitions(Map(topic1 ->
       NewPartitions.increaseTo(4)).asJava, validateOnly)
     e = assertThrows(classOf[ExecutionException], () => alterResult.values.get(topic1).get,
-      () => "Expect InvalidTopicException when the topic is queued for deletion")
-    assertTrue(e.getCause.isInstanceOf[InvalidTopicException])
-    assertEquals("The topic is queued for deletion.", e.getCause.getMessage)
+      () => "Expect InvalidTopicException or UnknownTopicOrPartitionException when the topic is queued for deletion")
+    if (isKRaftTest()) {
+      assertTrue(e.getCause.isInstanceOf[UnknownTopicOrPartitionException], e.toString)
+      assertEquals("This server does not host this topic-partition.", e.getCause.getMessage)
+    } else {
+      assertTrue(e.getCause.isInstanceOf[InvalidTopicException], e.toString)
+      assertEquals("The topic is queued for deletion.", e.getCause.getMessage)
+    }
   }
 
-  @Test
-  def testSeekAfterDeleteRecords(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSeekAfterDeleteRecords(quorum: String): Unit = {
     createTopic(topic, numPartitions = 2, replicationFactor = brokerCount)
 
     client = Admin.create(createConfig)
@@ -653,8 +756,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(10L, consumer.position(topicPartition))
   }
 
-  @Test
-  def testLogStartOffsetCheckpoint(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testLogStartOffsetCheckpoint(quorum: String): Unit = {
     createTopic(topic, numPartitions = 2, replicationFactor = brokerCount)
 
     client = Admin.create(createConfig)
@@ -692,8 +796,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }, s"Expected low watermark of the partition to be 5 but got ${lowWatermark.getOrElse("no response within the timeout")}")
   }
 
-  @Test
-  def testLogStartOffsetAfterDeleteRecords(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testLogStartOffsetAfterDeleteRecords(quorum: String): Unit = {
     createTopic(topic, numPartitions = 2, replicationFactor = brokerCount)
 
     client = Admin.create(createConfig)
@@ -709,25 +814,26 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(3L, lowWatermark)
 
     for (i <- 0 until brokerCount)
-      assertEquals(3, servers(i).replicaManager.localLog(topicPartition).get.logStartOffset)
+      assertEquals(3, brokers(i).replicaManager.localLog(topicPartition).get.logStartOffset)
   }
 
-  @Test
-  def testReplicaCanFetchFromLogStartOffsetAfterDeleteRecords(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testReplicaCanFetchFromLogStartOffsetAfterDeleteRecords(quorum: String): Unit = {
     val leaders = createTopic(topic, replicationFactor = brokerCount)
-    val followerIndex = if (leaders(0) != servers(0).config.brokerId) 0 else 1
+    val followerIndex = if (leaders(0) != brokers(0).config.brokerId) 0 else 1
 
     def waitForFollowerLog(expectedStartOffset: Long, expectedEndOffset: Long): Unit = {
-      TestUtils.waitUntilTrue(() => servers(followerIndex).replicaManager.localLog(topicPartition) != None,
+      TestUtils.waitUntilTrue(() => brokers(followerIndex).replicaManager.localLog(topicPartition) != None,
                               "Expected follower to create replica for partition")
 
       // wait until the follower discovers that log start offset moved beyond its HW
       TestUtils.waitUntilTrue(() => {
-        servers(followerIndex).replicaManager.localLog(topicPartition).get.logStartOffset == expectedStartOffset
+        brokers(followerIndex).replicaManager.localLog(topicPartition).get.logStartOffset == expectedStartOffset
       }, s"Expected follower to discover new log start offset $expectedStartOffset")
 
       TestUtils.waitUntilTrue(() => {
-        servers(followerIndex).replicaManager.localLog(topicPartition).get.logEndOffset == expectedEndOffset
+        brokers(followerIndex).replicaManager.localLog(topicPartition).get.logEndOffset == expectedEndOffset
       }, s"Expected follower to catch up to log end offset $expectedEndOffset")
     }
 
@@ -748,7 +854,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
 
     // after the new replica caught up, all replicas should have same log start offset
     for (i <- 0 until brokerCount)
-      assertEquals(3, servers(i).replicaManager.localLog(topicPartition).get.logStartOffset)
+      assertEquals(3, brokers(i).replicaManager.localLog(topicPartition).get.logStartOffset)
 
     // kill the same follower again, produce more records, and delete records beyond follower's LOE
     killBroker(followerIndex)
@@ -759,8 +865,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     waitForFollowerLog(expectedStartOffset=117L, expectedEndOffset=200L)
   }
 
-  @Test
-  def testAlterLogDirsAfterDeleteRecords(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterLogDirsAfterDeleteRecords(quorum: String): Unit = {
     client = Admin.create(createConfig)
     createTopic(topic, replicationFactor = brokerCount)
     val expectedLEO = 100
@@ -772,27 +879,28 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     result.all().get()
     // make sure we are in the expected state after delete records
     for (i <- 0 until brokerCount) {
-      assertEquals(3, servers(i).replicaManager.localLog(topicPartition).get.logStartOffset)
-      assertEquals(expectedLEO, servers(i).replicaManager.localLog(topicPartition).get.logEndOffset)
+      assertEquals(3, brokers(i).replicaManager.localLog(topicPartition).get.logStartOffset)
+      assertEquals(expectedLEO, brokers(i).replicaManager.localLog(topicPartition).get.logEndOffset)
     }
 
     // we will create another dir just for one server
-    val futureLogDir = servers(0).config.logDirs(1)
-    val futureReplica = new TopicPartitionReplica(topic, 0, servers(0).config.brokerId)
+    val futureLogDir = brokers(0).config.logDirs(1)
+    val futureReplica = new TopicPartitionReplica(topic, 0, brokers(0).config.brokerId)
 
     // Verify that replica can be moved to the specified log directory
     client.alterReplicaLogDirs(Map(futureReplica -> futureLogDir).asJava).all.get
     TestUtils.waitUntilTrue(() => {
-      futureLogDir == servers(0).logManager.getLog(topicPartition).get.dir.getParent
+      futureLogDir == brokers(0).logManager.getLog(topicPartition).get.dir.getParent
     }, "timed out waiting for replica movement")
 
     // once replica moved, its LSO and LEO should match other replicas
-    assertEquals(3, servers.head.replicaManager.localLog(topicPartition).get.logStartOffset)
-    assertEquals(expectedLEO, servers.head.replicaManager.localLog(topicPartition).get.logEndOffset)
+    assertEquals(3, brokers.head.replicaManager.localLog(topicPartition).get.logStartOffset)
+    assertEquals(expectedLEO, brokers.head.replicaManager.localLog(topicPartition).get.logEndOffset)
   }
 
-  @Test
-  def testOffsetsForTimesAfterDeleteRecords(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testOffsetsForTimesAfterDeleteRecords(quorum: String): Unit = {
     createTopic(topic, numPartitions = 2, replicationFactor = brokerCount)
 
     client = Admin.create(createConfig)
@@ -813,8 +921,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertNull(consumer.offsetsForTimes(Map(topicPartition -> JLong.valueOf(0L)).asJava).get(topicPartition))
   }
 
-  @Test
-  def testConsumeAfterDeleteRecords(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConsumeAfterDeleteRecords(quorum: String): Unit = {
     val consumer = createConsumer()
     subscribeAndWaitForAssignment(topic, consumer)
 
@@ -836,8 +945,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     TestUtils.consumeRecords(consumer, 2)
   }
 
-  @Test
-  def testDeleteRecordsWithException(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteRecordsWithException(quorum: String): Unit = {
     val consumer = createConsumer()
     subscribeAndWaitForAssignment(topic, consumer)
 
@@ -861,8 +971,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(classOf[LeaderNotAvailableException], cause.getClass)
   }
 
-  @Test
-  def testDescribeConfigsForTopic(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeConfigsForTopic(quorum: String): Unit = {
     createTopic(topic, numPartitions = 2, replicationFactor = brokerCount)
     client = Admin.create(createConfig)
 
@@ -897,10 +1008,11 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     futures.foreach(_.get)
   }
 
-  @Test
-  def testInvalidAlterConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidAlterConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
-    checkInvalidAlterConfigs(zkClient, servers, client)
+    checkInvalidAlterConfigs(this, client)
   }
 
   /**
@@ -908,8 +1020,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
    * Also see [[kafka.api.SaslSslAdminIntegrationTest.testAclOperations()]] for tests of ACL operations
    * when the authorizer is enabled.
    */
-  @Test
-  def testAclOperations(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAclOperations(quorum: String): Unit = {
     val acl = new AclBinding(new ResourcePattern(ResourceType.TOPIC, "mytopic3", PatternType.LITERAL),
       new AccessControlEntry("User:ANONYMOUS", "*", AclOperation.DESCRIBE, AclPermissionType.ALLOW))
     client = Admin.create(createConfig)
@@ -924,8 +1037,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     * Test closing the AdminClient with a generous timeout.  Calls in progress should be completed,
     * since they can be done within the timeout.  New calls should receive timeouts.
     */
-  @Test
-  def testDelayedClose(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDelayedClose(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topics = Seq("mytopic", "mytopic2")
     val newTopics = topics.map(new NewTopic(_, 1, 1.toShort))
@@ -941,8 +1055,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     * Test closing the AdminClient with a timeout of 0, when there are calls with extremely long
     * timeouts in progress.  The calls should be aborted after the hard shutdown timeout elapses.
     */
-  @Test
-  def testForceClose(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testForceClose(quorum: String): Unit = {
     val config = createConfig
     config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, s"localhost:${TestUtils.IncorrectBrokerPort}")
     client = Admin.create(config)
@@ -958,8 +1073,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     * Check that a call with a timeout does not complete before the minimum timeout has elapsed,
     * even when the default request timeout is shorter.
     */
-  @Test
-  def testMinimumRequestTimeouts(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testMinimumRequestTimeouts(quorum: String): Unit = {
     val config = createConfig
     config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, s"localhost:${TestUtils.IncorrectBrokerPort}")
     config.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "0")
@@ -975,8 +1091,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
   /**
     * Test injecting timeouts for calls that are in flight.
     */
-  @Test
-  def testCallInFlightTimeouts(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCallInFlightTimeouts(quorum: String): Unit = {
     val config = createConfig
     config.put(AdminClientConfig.DEFAULT_API_TIMEOUT_MS_CONFIG, "100000000")
     config.put(AdminClientConfig.RETRIES_CONFIG, "0")
@@ -994,8 +1111,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
   /**
    * Test the consumer group APIs.
    */
-  @Test
-  def testConsumerGroups(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConsumerGroups(quorum: String): Unit = {
     val config = createConfig
     client = Admin.create(config)
     try {
@@ -1213,8 +1331,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }
   }
 
-  @Test
-  def testDeleteConsumerGroupOffsets(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteConsumerGroupOffsets(quorum: String): Unit = {
     val config = createConfig
     client = Admin.create(config)
     try {
@@ -1285,8 +1404,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }
   }
 
-  @Test
-  def testElectPreferredLeaders(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectPreferredLeaders(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     val prefer0 = Seq(0, 1, 2)
@@ -1294,10 +1414,10 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val prefer2 = Seq(2, 0, 1)
 
     val partition1 = new TopicPartition("elect-preferred-leaders-topic-1", 0)
-    TestUtils.createTopic(zkClient, partition1.topic, Map[Int, Seq[Int]](partition1.partition -> prefer0), servers)
+    createTopicWithAssignment(partition1.topic, Map[Int, Seq[Int]](partition1.partition -> prefer0))
 
     val partition2 = new TopicPartition("elect-preferred-leaders-topic-2", 0)
-    TestUtils.createTopic(zkClient, partition2.topic, Map[Int, Seq[Int]](partition2.partition -> prefer0), servers)
+    createTopicWithAssignment(partition2.topic, Map[Int, Seq[Int]](partition2.partition -> prefer0))
 
     def preferredLeader(topicPartition: TopicPartition): Int = {
       val partitionMetadata = getTopicMetadata(client, topicPartition.topic).partitions.get(topicPartition.partition)
@@ -1306,19 +1426,18 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }
 
     /** Changes the <i>preferred</i> leader without changing the <i>current</i> leader. */
-    def changePreferredLeader(newAssignment: Seq[Int]) = {
+    def changePreferredLeader(newAssignment: Seq[Int]): Unit = {
       val preferred = newAssignment.head
-      val prior1 = zkClient.getLeaderForPartition(partition1).get
-      val prior2 = zkClient.getLeaderForPartition(partition2).get
-
-      var m = Map.empty[TopicPartition, Seq[Int]]
+      val prior1 = brokers.head.metadataCache.getPartitionLeaderEndpoint(partition1.topic, partition1.partition(), listenerName).get.id()
+      val prior2 = brokers.head.metadataCache.getPartitionLeaderEndpoint(partition2.topic, partition2.partition(), listenerName).get.id()
 
+      var m = Map.empty[TopicPartition, Optional[NewPartitionReassignment]]
       if (prior1 != preferred)
-        m += partition1 -> newAssignment
+        m += partition1 -> Optional.of(new NewPartitionReassignment(newAssignment.map(Int.box).asJava))
       if (prior2 != preferred)
-        m += partition2 -> newAssignment
+        m += partition2 -> Optional.of(new NewPartitionReassignment(newAssignment.map(Int.box).asJava))
+      client.alterPartitionReassignments(m.asJava).all().get()
 
-      zkClient.createPartitionReassignment(m)
       TestUtils.waitUntilTrue(
         () => preferredLeader(partition1) == preferred && preferredLeader(partition2) == preferred,
         s"Expected preferred leader to become $preferred, but is ${preferredLeader(partition1)} and ${preferredLeader(partition2)}",
@@ -1334,7 +1453,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
 
     // Noop election
     var electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition1).asJava)
-    var exception = electResult.partitions.get.get(partition1).get
+    val exception = electResult.partitions.get.get(partition1).get
     assertEquals(classOf[ElectionNotNeededException], exception.getClass)
     TestUtils.assertLeader(client, partition1, 0)
 
@@ -1363,13 +1482,24 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertFalse(electResult.partitions.get.get(partition2).isPresent)
     TestUtils.assertLeader(client, partition2, 1)
 
+    def assertUnknownTopicOrPartition(
+      topicPartition: TopicPartition,
+      result: ElectLeadersResult
+    ): Unit = {
+      val exception = result.partitions.get.get(topicPartition).get
+      assertEquals(classOf[UnknownTopicOrPartitionException], exception.getClass)
+      if (isKRaftTest()) {
+        assertEquals(s"No such topic as ${topicPartition.topic()}", exception.getMessage)
+      } else {
+        assertEquals("The partition does not exist.", exception.getMessage)
+      }
+    }
+
     // unknown topic
     val unknownPartition = new TopicPartition("topic-does-not-exist", 0)
     electResult = client.electLeaders(ElectionType.PREFERRED, Set(unknownPartition).asJava)
     assertEquals(Set(unknownPartition).asJava, electResult.partitions.get.keySet)
-    exception = electResult.partitions.get.get(unknownPartition).get
-    assertEquals(classOf[UnknownTopicOrPartitionException], exception.getClass)
-    assertEquals("The partition does not exist.", exception.getMessage)
+    assertUnknownTopicOrPartition(unknownPartition, electResult)
     TestUtils.assertLeader(client, partition1, 1)
     TestUtils.assertLeader(client, partition2, 1)
 
@@ -1381,9 +1511,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(Set(unknownPartition, partition1).asJava, electResult.partitions.get.keySet)
     TestUtils.assertLeader(client, partition1, 2)
     TestUtils.assertLeader(client, partition2, 1)
-    exception = electResult.partitions.get.get(unknownPartition).get
-    assertEquals(classOf[UnknownTopicOrPartitionException], exception.getClass)
-    assertEquals("The partition does not exist.", exception.getMessage)
+    assertUnknownTopicOrPartition(unknownPartition, electResult)
 
     // elect preferred leader for partition 2
     electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition2).asJava)
@@ -1394,41 +1522,48 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     // Now change the preferred leader to 1
     changePreferredLeader(prefer1)
     // but shut it down...
-    servers(1).shutdown()
+    brokers(1).shutdown()
     TestUtils.waitForBrokersOutOfIsr(client, Set(partition1, partition2), Set(1))
 
+    def assertPreferredLeaderNotAvailable(
+      topicPartition: TopicPartition,
+      result: ElectLeadersResult
+    ): Unit = {
+      val exception = result.partitions.get.get(topicPartition).get
+      assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
+      if (isKRaftTest()) {
+        assertTrue(exception.getMessage.contains(
+          "The preferred leader was not available."),
+          s"Unexpected message: ${exception.getMessage}")
+      } else {
+        assertTrue(exception.getMessage.contains(
+          s"Failed to elect leader for partition $topicPartition under strategy PreferredReplicaPartitionLeaderElectionStrategy"),
+          s"Unexpected message: ${exception.getMessage}")
+      }
+    }
+
     // ... now what happens if we try to elect the preferred leader and it's down?
     val shortTimeout = new ElectLeadersOptions().timeoutMs(10000)
     electResult = client.electLeaders(ElectionType.PREFERRED, Set(partition1).asJava, shortTimeout)
     assertEquals(Set(partition1).asJava, electResult.partitions.get.keySet)
-    exception = electResult.partitions.get.get(partition1).get
-    assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
-    assertTrue(exception.getMessage.contains(
-      "Failed to elect leader for partition elect-preferred-leaders-topic-1-0 under strategy PreferredReplicaPartitionLeaderElectionStrategy"),
-      s"Wrong message ${exception.getMessage}")
+
+    assertPreferredLeaderNotAvailable(partition1, electResult)
     TestUtils.assertLeader(client, partition1, 2)
 
     // preferred leader unavailable with null argument
     electResult = client.electLeaders(ElectionType.PREFERRED, null, shortTimeout)
+    assertTrue(Set(partition1, partition2).subsetOf(electResult.partitions.get.keySet.asScala))
 
-    exception = electResult.partitions.get.get(partition1).get
-    assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
-    assertTrue(exception.getMessage.contains(
-      "Failed to elect leader for partition elect-preferred-leaders-topic-1-0 under strategy PreferredReplicaPartitionLeaderElectionStrategy"),
-      s"Wrong message ${exception.getMessage}")
-
-    exception = electResult.partitions.get.get(partition2).get
-    assertEquals(classOf[PreferredLeaderNotAvailableException], exception.getClass)
-    assertTrue(exception.getMessage.contains(
-      "Failed to elect leader for partition elect-preferred-leaders-topic-2-0 under strategy PreferredReplicaPartitionLeaderElectionStrategy"),
-      s"Wrong message ${exception.getMessage}")
-
+    assertPreferredLeaderNotAvailable(partition1, electResult)
     TestUtils.assertLeader(client, partition1, 2)
+
+    assertPreferredLeaderNotAvailable(partition2, electResult)
     TestUtils.assertLeader(client, partition2, 2)
   }
 
-  @Test
-  def testElectUncleanLeadersForOnePartition(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersForOnePartition(quorum: String): Unit = {
     // Case: unclean leader election with one topic partition
     client = Admin.create(createConfig)
 
@@ -1437,23 +1572,24 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val assignment1 = Seq(broker1, broker2)
 
     val partition1 = new TopicPartition("unclean-test-topic-1", 0)
-    TestUtils.createTopic(zkClient, partition1.topic, Map[Int, Seq[Int]](partition1.partition -> assignment1), servers)
+    createTopicWithAssignment(partition1.topic, Map[Int, Seq[Int]](partition1.partition -> assignment1))
 
     TestUtils.assertLeader(client, partition1, broker1)
 
-    servers(broker2).shutdown()
+    brokers(broker2).shutdown()
     TestUtils.waitForBrokersOutOfIsr(client, Set(partition1), Set(broker2))
-    servers(broker1).shutdown()
+    brokers(broker1).shutdown()
     TestUtils.assertNoLeader(client, partition1)
-    servers(broker2).startup()
+    brokers(broker2).startup()
 
     val electResult = client.electLeaders(ElectionType.UNCLEAN, Set(partition1).asJava)
     assertFalse(electResult.partitions.get.get(partition1).isPresent)
     TestUtils.assertLeader(client, partition1, broker2)
   }
 
-  @Test
-  def testElectUncleanLeadersForManyPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersForManyPartitions(quorum: String): Unit = {
     // Case: unclean leader election with many topic partitions
     client = Admin.create(createConfig)
 
@@ -1466,22 +1602,20 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val partition1 = new TopicPartition(topic, 0)
     val partition2 = new TopicPartition(topic, 1)
 
-    TestUtils.createTopic(
-      zkClient,
+    createTopicWithAssignment(
       topic,
-      Map(partition1.partition -> assignment1, partition2.partition -> assignment2),
-      servers
+      Map(partition1.partition -> assignment1, partition2.partition -> assignment2)
     )
 
     TestUtils.assertLeader(client, partition1, broker1)
     TestUtils.assertLeader(client, partition2, broker1)
 
-    servers(broker2).shutdown()
+    brokers(broker2).shutdown()
     TestUtils.waitForBrokersOutOfIsr(client, Set(partition1, partition2), Set(broker2))
-    servers(broker1).shutdown()
+    brokers(broker1).shutdown()
     TestUtils.assertNoLeader(client, partition1)
     TestUtils.assertNoLeader(client, partition2)
-    servers(broker2).startup()
+    brokers(broker2).startup()
 
     val electResult = client.electLeaders(ElectionType.UNCLEAN, Set(partition1, partition2).asJava)
     assertFalse(electResult.partitions.get.get(partition1).isPresent)
@@ -1490,8 +1624,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     TestUtils.assertLeader(client, partition2, broker2)
   }
 
-  @Test
-  def testElectUncleanLeadersForAllPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersForAllPartitions(quorum: String): Unit = {
     // Case: noop unclean leader election and valid unclean leader election for all partitions
     client = Admin.create(createConfig)
 
@@ -1505,22 +1640,20 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val partition1 = new TopicPartition(topic, 0)
     val partition2 = new TopicPartition(topic, 1)
 
-    TestUtils.createTopic(
-      zkClient,
+    createTopicWithAssignment(
       topic,
-      Map(partition1.partition -> assignment1, partition2.partition -> assignment2),
-      servers
+      Map(partition1.partition -> assignment1, partition2.partition -> assignment2)
     )
 
     TestUtils.assertLeader(client, partition1, broker1)
     TestUtils.assertLeader(client, partition2, broker1)
 
-    servers(broker2).shutdown()
+    brokers(broker2).shutdown()
     TestUtils.waitForBrokersOutOfIsr(client, Set(partition1), Set(broker2))
-    servers(broker1).shutdown()
+    brokers(broker1).shutdown()
     TestUtils.assertNoLeader(client, partition1)
     TestUtils.assertLeader(client, partition2, broker3)
-    servers(broker2).startup()
+    brokers(broker2).startup()
 
     val electResult = client.electLeaders(ElectionType.UNCLEAN, null)
     assertFalse(electResult.partitions.get.get(partition1).isPresent)
@@ -1529,8 +1662,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     TestUtils.assertLeader(client, partition2, broker3)
   }
 
-  @Test
-  def testElectUncleanLeadersForUnknownPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersForUnknownPartitions(quorum: String): Unit = {
     // Case: unclean leader election for unknown topic
     client = Admin.create(createConfig)
 
@@ -1542,11 +1676,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val unknownPartition = new TopicPartition(topic, 1)
     val unknownTopic = new TopicPartition("unknown-topic", 0)
 
-    TestUtils.createTopic(
-      zkClient,
+    createTopicWithAssignment(
       topic,
-      Map(0 -> assignment1),
-      servers
+      Map(0 -> assignment1)
     )
 
     TestUtils.assertLeader(client, new TopicPartition(topic, 0), broker1)
@@ -1556,8 +1688,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertTrue(electResult.partitions.get.get(unknownTopic).get.isInstanceOf[UnknownTopicOrPartitionException])
   }
 
-  @Test
-  def testElectUncleanLeadersWhenNoLiveBrokers(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersWhenNoLiveBrokers(quorum: String): Unit = {
     // Case: unclean leader election with no live brokers
     client = Admin.create(createConfig)
 
@@ -1568,26 +1701,25 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val topic = "unclean-test-topic-1"
     val partition1 = new TopicPartition(topic, 0)
 
-    TestUtils.createTopic(
-      zkClient,
+    createTopicWithAssignment(
       topic,
-      Map(partition1.partition -> assignment1),
-      servers
+      Map(partition1.partition -> assignment1)
     )
 
     TestUtils.assertLeader(client, partition1, broker1)
 
-    servers(broker2).shutdown()
+    brokers(broker2).shutdown()
     TestUtils.waitForBrokersOutOfIsr(client, Set(partition1), Set(broker2))
-    servers(broker1).shutdown()
+    brokers(broker1).shutdown()
     TestUtils.assertNoLeader(client, partition1)
 
     val electResult = client.electLeaders(ElectionType.UNCLEAN, Set(partition1).asJava)
     assertTrue(electResult.partitions.get.get(partition1).get.isInstanceOf[EligibleLeadersNotAvailableException])
   }
 
-  @Test
-  def testElectUncleanLeadersNoop(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersNoop(quorum: String): Unit = {
     // Case: noop unclean leader election with explicit topic partitions
     client = Admin.create(createConfig)
 
@@ -1598,25 +1730,24 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val topic = "unclean-test-topic-1"
     val partition1 = new TopicPartition(topic, 0)
 
-    TestUtils.createTopic(
-      zkClient,
+    createTopicWithAssignment(
       topic,
-      Map(partition1.partition -> assignment1),
-      servers
+      Map(partition1.partition -> assignment1)
     )
 
     TestUtils.assertLeader(client, partition1, broker1)
 
-    servers(broker1).shutdown()
+    brokers(broker1).shutdown()
     TestUtils.assertLeader(client, partition1, broker2)
-    servers(broker1).startup()
+    brokers(broker1).startup()
 
     val electResult = client.electLeaders(ElectionType.UNCLEAN, Set(partition1).asJava)
     assertTrue(electResult.partitions.get.get(partition1).get.isInstanceOf[ElectionNotNeededException])
   }
 
-  @Test
-  def testElectUncleanLeadersAndNoop(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testElectUncleanLeadersAndNoop(quorum: String): Unit = {
     // Case: one noop unclean leader election and one valid unclean leader election
     client = Admin.create(createConfig)
 
@@ -1630,22 +1761,20 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val partition1 = new TopicPartition(topic, 0)
     val partition2 = new TopicPartition(topic, 1)
 
-    TestUtils.createTopic(
-      zkClient,
+    createTopicWithAssignment(
       topic,
-      Map(partition1.partition -> assignment1, partition2.partition -> assignment2),
-      servers
+      Map(partition1.partition -> assignment1, partition2.partition -> assignment2)
     )
 
     TestUtils.assertLeader(client, partition1, broker1)
     TestUtils.assertLeader(client, partition2, broker1)
 
-    servers(broker2).shutdown()
+    brokers(broker2).shutdown()
     TestUtils.waitForBrokersOutOfIsr(client, Set(partition1), Set(broker2))
-    servers(broker1).shutdown()
+    brokers(broker1).shutdown()
     TestUtils.assertNoLeader(client, partition1)
     TestUtils.assertLeader(client, partition2, broker3)
-    servers(broker2).startup()
+    brokers(broker2).startup()
 
     val electResult = client.electLeaders(ElectionType.UNCLEAN, Set(partition1, partition2).asJava)
     assertFalse(electResult.partitions.get.get(partition1).isPresent)
@@ -1654,8 +1783,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     TestUtils.assertLeader(client, partition2, broker3)
   }
 
-  @Test
-  def testListReassignmentsDoesNotShowNonReassigningPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListReassignmentsDoesNotShowNonReassigningPartitions(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     // Create topics
@@ -1670,8 +1800,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(0, allReassignmentsMap.size())
   }
 
-  @Test
-  def testListReassignmentsDoesNotShowDeletedPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListReassignmentsDoesNotShowDeletedPartitions(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     val topic = "list-reassignments-no-reassignments"
@@ -1684,8 +1815,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(0, allReassignmentsMap.size())
   }
 
-  @Test
-  def testValidIncrementalAlterConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testValidIncrementalAlterConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     // Create topics
@@ -1722,6 +1854,8 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(Set(topic1Resource, topic2Resource).asJava, alterResult.values.keySet)
     alterResult.all.get
 
+    ensureConsistentKRaftMetadata()
+
     // Verify that topics were updated correctly
     var describeResult = client.describeConfigs(Seq(topic1Resource, topic2Resource).asJava)
     var configs = describeResult.all.get
@@ -1736,7 +1870,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals("lz4", configs.get(topic2Resource).get(LogConfig.CompressionTypeProp).value)
     assertEquals("delete,compact", configs.get(topic2Resource).get(LogConfig.CleanupPolicyProp).value)
 
-    //verify subtract operation, including from an empty property
+    // verify subtract operation, including from an empty property
     topic1AlterConfigs = Seq(
       new AlterConfigOp(new ConfigEntry(LogConfig.CleanupPolicyProp, LogConfig.Compact), AlterConfigOp.OpType.SUBTRACT),
       new AlterConfigOp(new ConfigEntry(LogConfig.LeaderReplicationThrottledReplicasProp, "0"), AlterConfigOp.OpType.SUBTRACT)
@@ -1754,6 +1888,8 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(Set(topic1Resource, topic2Resource).asJava, alterResult.values.keySet)
     alterResult.all.get
 
+    ensureConsistentKRaftMetadata()
+
     // Verify that topics were updated correctly
     describeResult = client.describeConfigs(Seq(topic1Resource, topic2Resource).asJava)
     configs = describeResult.all.get
@@ -1781,7 +1917,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
 
     assertEquals("delete", configs.get(topic1Resource).get(LogConfig.CleanupPolicyProp).value)
 
-    //Alter topics with validateOnly=true with invalid configs
+    // Alter topics with validateOnly=true with invalid configs
     topic1AlterConfigs = Seq(
       new AlterConfigOp(new ConfigEntry(LogConfig.CompressionTypeProp, "zip"), AlterConfigOp.OpType.SET)
     ).asJava
@@ -1790,12 +1926,59 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       topic1Resource -> topic1AlterConfigs
     ).asJava, new AlterConfigsOptions().validateOnly(true))
 
-    assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidRequestException],
-      Some("Invalid config value for resource"))
+    if (isKRaftTest()) {
+      assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidConfigurationException],
+        Some("Invalid value zip for configuration compression.type"))
+    } else {
+      assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidConfigurationException],
+        Some("Invalid config value for resource"))
+    }
   }
 
-  @Test
-  def testIncrementalAlterConfigsDeleteAndSetBrokerConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAppendAlreadyExistsConfigsAndSubtractNotExistsConfigs(quorum: String): Unit = {
+    client = Admin.create(createConfig)
+
+    // Create topics
+    val topic = "incremental-alter-configs-topic"
+    val topicResource = new ConfigResource(ConfigResource.Type.TOPIC, topic)
+
+    val appendValues = s"0:${brokers.head.config.brokerId}"
+    val subtractValues = brokers.tail.map(broker => s"0:${broker.config.brokerId}").mkString(",")
+    assertNotEquals("", subtractValues)
+
+    val topicCreateConfigs = new Properties
+    topicCreateConfigs.setProperty(LogConfig.LeaderReplicationThrottledReplicasProp, appendValues)
+    createTopic(topic, numPartitions = 1, replicationFactor = 1, topicCreateConfigs)
+
+    // Append value that is already present
+    val topicAppendConfigs = Seq(
+      new AlterConfigOp(new ConfigEntry(LogConfig.LeaderReplicationThrottledReplicasProp, appendValues), AlterConfigOp.OpType.APPEND),
+    ).asJavaCollection
+
+    val appendResult = client.incrementalAlterConfigs(Map(topicResource -> topicAppendConfigs).asJava)
+    appendResult.all.get
+
+    // Subtract values that are not present
+    val topicSubtractConfigs = Seq(
+      new AlterConfigOp(new ConfigEntry(LogConfig.LeaderReplicationThrottledReplicasProp, subtractValues), AlterConfigOp.OpType.SUBTRACT)
+    ).asJavaCollection
+    val subtractResult = client.incrementalAlterConfigs(Map(topicResource -> topicSubtractConfigs).asJava)
+    subtractResult.all.get
+
+    ensureConsistentKRaftMetadata()
+
+    // Verify that topics were updated correctly
+    val describeResult = client.describeConfigs(Seq(topicResource).asJava)
+    val configs = describeResult.all.get
+
+    assertEquals(appendValues, configs.get(topicResource).get(LogConfig.LeaderReplicationThrottledReplicasProp).value)
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testIncrementalAlterConfigsDeleteAndSetBrokerConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val broker0Resource = new ConfigResource(ConfigResource.Type.BROKER, "0")
     client.incrementalAlterConfigs(Map(broker0Resource ->
@@ -1806,9 +1989,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       ).asJavaCollection).asJava).all().get()
     TestUtils.waitUntilTrue(() => {
       val broker0Configs = client.describeConfigs(Seq(broker0Resource).asJava).
-        all().get().get(broker0Resource).entries().asScala.map {
-        case entry => (entry.name, entry.value)
-      }.toMap
+        all().get().get(broker0Resource).entries().asScala.map(entry => (entry.name, entry.value)).toMap
       ("123".equals(broker0Configs.getOrElse(DynamicConfig.Broker.LeaderReplicationThrottledRateProp, "")) &&
         "456".equals(broker0Configs.getOrElse(DynamicConfig.Broker.FollowerReplicationThrottledRateProp, "")))
     }, "Expected to see the broker properties we just set", pause=25)
@@ -1822,17 +2003,16 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       ).asJavaCollection).asJava).all().get()
     TestUtils.waitUntilTrue(() => {
       val broker0Configs = client.describeConfigs(Seq(broker0Resource).asJava).
-        all().get().get(broker0Resource).entries().asScala.map {
-        case entry => (entry.name, entry.value)
-      }.toMap
+        all().get().get(broker0Resource).entries().asScala.map(entry => (entry.name, entry.value)).toMap
       ("".equals(broker0Configs.getOrElse(DynamicConfig.Broker.LeaderReplicationThrottledRateProp, "")) &&
         "654".equals(broker0Configs.getOrElse(DynamicConfig.Broker.FollowerReplicationThrottledRateProp, "")) &&
         "987".equals(broker0Configs.getOrElse(DynamicConfig.Broker.ReplicaAlterLogDirsIoMaxBytesPerSecondProp, "")))
     }, "Expected to see the broker properties we just modified", pause=25)
   }
 
-  @Test
-  def testIncrementalAlterConfigsDeleteBrokerConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testIncrementalAlterConfigsDeleteBrokerConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val broker0Resource = new ConfigResource(ConfigResource.Type.BROKER, "0")
     client.incrementalAlterConfigs(Map(broker0Resource ->
@@ -1845,9 +2025,7 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       ).asJavaCollection).asJava).all().get()
     TestUtils.waitUntilTrue(() => {
       val broker0Configs = client.describeConfigs(Seq(broker0Resource).asJava).
-        all().get().get(broker0Resource).entries().asScala.map {
-        case entry => (entry.name, entry.value)
-      }.toMap
+        all().get().get(broker0Resource).entries().asScala.map(entry => (entry.name, entry.value)).toMap
       ("123".equals(broker0Configs.getOrElse(DynamicConfig.Broker.LeaderReplicationThrottledRateProp, "")) &&
         "456".equals(broker0Configs.getOrElse(DynamicConfig.Broker.FollowerReplicationThrottledRateProp, "")) &&
         "789".equals(broker0Configs.getOrElse(DynamicConfig.Broker.ReplicaAlterLogDirsIoMaxBytesPerSecondProp, "")))
@@ -1862,17 +2040,16 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       ).asJavaCollection).asJava).all().get()
     TestUtils.waitUntilTrue(() => {
       val broker0Configs = client.describeConfigs(Seq(broker0Resource).asJava).
-        all().get().get(broker0Resource).entries().asScala.map {
-        case entry => (entry.name, entry.value)
-      }.toMap
+        all().get().get(broker0Resource).entries().asScala.map(entry => (entry.name, entry.value)).toMap
       ("".equals(broker0Configs.getOrElse(DynamicConfig.Broker.LeaderReplicationThrottledRateProp, "")) &&
         "".equals(broker0Configs.getOrElse(DynamicConfig.Broker.FollowerReplicationThrottledRateProp, "")) &&
         "".equals(broker0Configs.getOrElse(DynamicConfig.Broker.ReplicaAlterLogDirsIoMaxBytesPerSecondProp, "")))
     }, "Expected to see the broker properties we just removed to be deleted", pause=25)
   }
 
-  @Test
-  def testInvalidIncrementalAlterConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidIncrementalAlterConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     // Create topics
@@ -1884,14 +2061,14 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     val topic2Resource = new ConfigResource(ConfigResource.Type.TOPIC, topic2)
     createTopic(topic2)
 
-    //Add duplicate Keys for topic1
+    // Add duplicate Keys for topic1
     var topic1AlterConfigs = Seq(
       new AlterConfigOp(new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "0.75"), AlterConfigOp.OpType.SET),
       new AlterConfigOp(new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "0.65"), AlterConfigOp.OpType.SET),
       new AlterConfigOp(new ConfigEntry(LogConfig.CompressionTypeProp, "gzip"), AlterConfigOp.OpType.SET) // valid entry
     ).asJavaCollection
 
-    //Add valid config for topic2
+    // Add valid config for topic2
     var topic2AlterConfigs = Seq(
       new AlterConfigOp(new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "0.9"), AlterConfigOp.OpType.SET)
     ).asJavaCollection
@@ -1902,12 +2079,13 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     ).asJava)
     assertEquals(Set(topic1Resource, topic2Resource).asJava, alterResult.values.keySet)
 
-    //InvalidRequestException error for topic1
+    // InvalidRequestException error for topic1
     assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidRequestException],
       Some("Error due to duplicate config keys"))
 
-    //operation should succeed for topic2
+    // Operation should succeed for topic2
     alterResult.values().get(topic2Resource).get()
+    ensureConsistentKRaftMetadata()
 
     // Verify that topic1 is not config not updated, and topic2 config is updated
     val describeResult = client.describeConfigs(Seq(topic1Resource, topic2Resource).asJava)
@@ -1915,10 +2093,10 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(2, configs.size)
 
     assertEquals(Defaults.LogCleanerMinCleanRatio.toString, configs.get(topic1Resource).get(LogConfig.MinCleanableDirtyRatioProp).value)
-    assertEquals(Defaults.CompressionType.toString, configs.get(topic1Resource).get(LogConfig.CompressionTypeProp).value)
+    assertEquals(Defaults.CompressionType, configs.get(topic1Resource).get(LogConfig.CompressionTypeProp).value)
     assertEquals("0.9", configs.get(topic2Resource).get(LogConfig.MinCleanableDirtyRatioProp).value)
 
-    //check invalid use of append/subtract operation types
+    // Check invalid use of append/subtract operation types
     topic1AlterConfigs = Seq(
       new AlterConfigOp(new ConfigEntry(LogConfig.CompressionTypeProp, "gzip"), AlterConfigOp.OpType.APPEND)
     ).asJavaCollection
@@ -1933,14 +2111,21 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     ).asJava)
     assertEquals(Set(topic1Resource, topic2Resource).asJava, alterResult.values.keySet)
 
-    assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidRequestException],
-      Some("Config value append is not allowed for config"))
-
-    assertFutureExceptionTypeEquals(alterResult.values().get(topic2Resource), classOf[InvalidRequestException],
-      Some("Config value subtract is not allowed for config"))
-
-
-    //try to add invalid config
+    assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidConfigurationException],
+      if (isKRaftTest()) {
+        Some("Can't APPEND to key compression.type because its type is not LIST.")
+      } else {
+        Some("Config value append is not allowed for config")
+      })
+
+    assertFutureExceptionTypeEquals(alterResult.values().get(topic2Resource), classOf[InvalidConfigurationException],
+      if (isKRaftTest()) {
+        Some("Can't SUBTRACT to key compression.type because its type is not LIST.")
+      } else {
+        Some("Config value subtract is not allowed for config")
+      })
+
+    // Try to add invalid config
     topic1AlterConfigs = Seq(
       new AlterConfigOp(new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "1.1"), AlterConfigOp.OpType.SET)
     ).asJavaCollection
@@ -1950,12 +2135,17 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     ).asJava)
     assertEquals(Set(topic1Resource).asJava, alterResult.values.keySet)
 
-    assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidRequestException],
-      Some("Invalid config value for resource"))
+    assertFutureExceptionTypeEquals(alterResult.values().get(topic1Resource), classOf[InvalidConfigurationException],
+      if (isKRaftTest()) {
+        Some("Invalid value 1.1 for configuration min.cleanable.dirty.ratio: Value must be no more than 1")
+      } else {
+        Some("Invalid config value for resource")
+      })
   }
 
-  @Test
-  def testInvalidAlterPartitionReassignments(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidAlterPartitionReassignments(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val topic = "alter-reassignments-topic-1"
     val tp1 = new TopicPartition(topic, 0)
@@ -1993,8 +2183,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertFutureExceptionTypeEquals(invalidReplicaResult.get(tp3), classOf[InvalidReplicaAssignmentException])
   }
 
-  @Test
-  def testLongTopicNames(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testLongTopicNames(quorum: String): Unit = {
     val client = Admin.create(createConfig)
     val longTopicName = String.join("", Collections.nCopies(249, "x"));
     val invalidTopicName = String.join("", Collections.nCopies(250, "x"));
@@ -2006,17 +2197,24 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertTrue(results.containsKey(invalidTopicName))
     assertFutureExceptionTypeEquals(results.get(invalidTopicName), classOf[InvalidTopicException])
     assertFutureExceptionTypeEquals(client.alterReplicaLogDirs(
-      Map(new TopicPartitionReplica(longTopicName, 0, 0) -> servers(0).config.logDirs(0)).asJava).all(),
+      Map(new TopicPartitionReplica(longTopicName, 0, 0) -> brokers(0).config.logDirs(0)).asJava).all(),
       classOf[InvalidTopicException])
     client.close()
   }
 
   // Verify that createTopics and alterConfigs fail with null values
-  @Test
-  def testNullConfigs(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testNullConfigs(quorum: String): Unit = {
 
     def validateLogConfig(compressionType: String): Unit = {
-      val logConfig = zkClient.getLogConfigs(Set(topic), Collections.emptyMap[String, AnyRef])._1(topic)
+      val logConfig = if (isKRaftTest()) {
+        ensureConsistentKRaftMetadata()
+        val topicProps = brokers.head.metadataCache.asInstanceOf[KRaftMetadataCache].topicConfig(topic)
+        LogConfig.fromProps(Collections.emptyMap[String, AnyRef], topicProps)
+      } else {
+        zkClient.getLogConfigs(Set(topic), Collections.emptyMap[String, AnyRef])._1(topic)
+      }
 
       assertEquals(compressionType, logConfig.originals.get(LogConfig.CompressionTypeProp))
       assertNull(logConfig.originals.get(LogConfig.RetentionBytesProp))
@@ -2029,10 +2227,11 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       LogConfig.CompressionTypeProp -> "producer"
     ).asJava
     val newTopic = new NewTopic(topic, 2, brokerCount.toShort)
-    val e1 = assertThrows(classOf[ExecutionException],
-      () => client.createTopics(Collections.singletonList(newTopic.configs(invalidConfigs))).all.get())
-    assertTrue(e1.getCause.isInstanceOf[InvalidRequestException],
-      s"Unexpected exception ${e1.getCause.getClass}")
+    assertFutureExceptionTypeEquals(
+      client.createTopics(Collections.singletonList(newTopic.configs(invalidConfigs))).all,
+      classOf[InvalidConfigurationException],
+      Some("Null value not supported for topic configs: retention.bytes")
+    )
 
     val validConfigs = Map[String, String](LogConfig.CompressionTypeProp -> "producer").asJava
     client.createTopics(Collections.singletonList(newTopic.configs(validConfigs))).all.get()
@@ -2044,15 +2243,17 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
       new AlterConfigOp(new ConfigEntry(LogConfig.RetentionBytesProp, null), AlterConfigOp.OpType.SET),
       new AlterConfigOp(new ConfigEntry(LogConfig.CompressionTypeProp, "lz4"), AlterConfigOp.OpType.SET)
     )
-    val e2 = assertThrows(classOf[ExecutionException],
-      () => client.incrementalAlterConfigs(Map(topicResource -> alterOps.asJavaCollection).asJava).all.get)
-    assertTrue(e2.getCause.isInstanceOf[InvalidRequestException],
-      s"Unexpected exception ${e2.getCause.getClass}")
+    assertFutureExceptionTypeEquals(
+      client.incrementalAlterConfigs(Map(topicResource -> alterOps.asJavaCollection).asJava).all,
+      classOf[InvalidRequestException],
+      Some("Null value not supported for : retention.bytes")
+    )
     validateLogConfig(compressionType = "producer")
   }
 
-  @Test
-  def testDescribeConfigsForLog4jLogLevels(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDescribeConfigsForLog4jLogLevels(quorum: String): Unit = {
     client = Admin.create(createConfig)
     LoggerFactory.getLogger("kafka.cluster.Replica").trace("Message to create the logger")
     val loggerConfig = describeBrokerLoggers()
@@ -2061,15 +2262,16 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     // we expect the log level to be inherited from the first ancestor with a level configured
     assertEquals(kafkaLogLevel, logCleanerLogLevelConfig.value())
     assertEquals("kafka.cluster.Replica", logCleanerLogLevelConfig.name())
-    assertEquals(ConfigEntry.ConfigSource.DYNAMIC_BROKER_LOGGER_CONFIG, logCleanerLogLevelConfig.source())
+    assertEquals(ConfigSource.DYNAMIC_BROKER_LOGGER_CONFIG, logCleanerLogLevelConfig.source())
     assertEquals(false, logCleanerLogLevelConfig.isReadOnly)
     assertEquals(false, logCleanerLogLevelConfig.isSensitive)
     assertTrue(logCleanerLogLevelConfig.synonyms().isEmpty)
   }
 
-  @Test
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
   @Disabled // To be re-enabled once KAFKA-8779 is resolved
-  def testIncrementalAlterConfigsForLog4jLogLevels(): Unit = {
+  def testIncrementalAlterConfigsForLog4jLogLevels(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     val initialLoggerConfig = describeBrokerLoggers()
@@ -2131,9 +2333,10 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     * 4. Change ROOT logger to ERROR
     * 5. Ensure the kafka.controller.KafkaController logger's level is ERROR (the curent root logger level)
     */
-  @Test
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
   @Disabled // To be re-enabled once KAFKA-8779 is resolved
-  def testIncrementalAlterConfigsForLog4jLogLevelsCanResetLoggerToCurrentRoot(): Unit = {
+  def testIncrementalAlterConfigsForLog4jLogLevelsCanResetLoggerToCurrentRoot(quorum: String): Unit = {
     client = Admin.create(createConfig)
     // step 1 - configure root logger
     val initialRootLogLevel = LogLevelConfig.TRACE_LOG_LEVEL
@@ -2173,9 +2376,10 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertEquals(newRootLogLevel, newRootLoggerConfig.get("kafka.controller.KafkaController").value())
   }
 
-  @Test
-  @Disabled // To be re-enabled once KAFKA-8779 is resolved
-  def testIncrementalAlterConfigsForLog4jLogLevelsCannotResetRootLogger(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  @Disabled // Zk to be re-enabled once KAFKA-8779 is resolved
+  def testIncrementalAlterConfigsForLog4jLogLevelsCannotResetRootLogger(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val deleteRootLoggerEntry = Seq(
       new AlterConfigOp(new ConfigEntry(Log4jController.ROOT_LOGGER, ""), AlterConfigOp.OpType.DELETE)
@@ -2184,9 +2388,10 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     assertTrue(assertThrows(classOf[ExecutionException], () => alterBrokerLoggers(deleteRootLoggerEntry)).getCause.isInstanceOf[InvalidRequestException])
   }
 
-  @Test
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
   @Disabled // To be re-enabled once KAFKA-8779 is resolved
-  def testIncrementalAlterConfigsForLog4jLogLevelsDoesNotWorkWithInvalidConfigs(): Unit = {
+  def testIncrementalAlterConfigsForLog4jLogLevelsDoesNotWorkWithInvalidConfigs(quorum: String): Unit = {
     client = Admin.create(createConfig)
     val validLoggerName = "kafka.server.KafkaRequestHandler"
     val expectedValidLoggerLogLevel = describeBrokerLoggers().get(validLoggerName)
@@ -2228,9 +2433,9 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     * The AlterConfigs API is deprecated and should not support altering log levels
     */
   @nowarn("cat=deprecation")
-  @Test
-  @Disabled // To be re-enabled once KAFKA-8779 is resolved
-  def testAlterConfigsForLog4jLogLevelsDoesNotWork(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("kraft")) // Zk to be re-enabled once KAFKA-8779 is resolved
+  def testAlterConfigsForLog4jLogLevelsDoesNotWork(quorum: String): Unit = {
     client = Admin.create(createConfig)
 
     val alterLogLevelsEntries = Seq(
@@ -2275,12 +2480,96 @@ class PlaintextAdminIntegrationTest extends BaseAdminIntegrationTest {
     }
   }
 
+  /**
+   * Test that createTopics returns the dynamic configurations of the topics that were created.
+   *
+   * Note: this test requires some custom static broker and controller configurations, which are set up in
+   * BaseAdminIntegrationTest.modifyConfigs and BaseAdminIntegrationTest.kraftControllerConfigs.
+   */
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCreateTopicsReturnsConfigs(quorum: String): Unit = {
+    client = Admin.create(super.createConfig)
+
+    val alterMap = new util.HashMap[ConfigResource, util.Collection[AlterConfigOp]]
+    alterMap.put(new ConfigResource(ConfigResource.Type.BROKER, ""), util.Arrays.asList(
+      new AlterConfigOp(new ConfigEntry(KafkaConfig.LogRetentionTimeMillisProp, "10800000"), OpType.SET)))
+    (brokers.map(_.config) ++ controllerServers.map(_.config)).foreach { case config =>
+      alterMap.put(new ConfigResource(ConfigResource.Type.BROKER, config.nodeId.toString()),
+        util.Arrays.asList(new AlterConfigOp(new ConfigEntry(
+          KafkaConfig.LogCleanerDeleteRetentionMsProp, "34"), OpType.SET)))
+    }
+    client.incrementalAlterConfigs(alterMap).all().get()
+    waitUntilTrue(() => brokers.forall(_.config.originals.getOrDefault(
+      KafkaConfig.LogCleanerDeleteRetentionMsProp, "").toString.equals("34")),
+      s"Timed out waiting for change to ${KafkaConfig.LogCleanerDeleteRetentionMsProp}",
+      waitTimeMs = 60000L)
+
+    val newTopics = Seq(new NewTopic("foo", Map((0: Integer) -> Seq[Integer](1, 2).asJava,
+      (1: Integer) -> Seq[Integer](2, 0).asJava).asJava).
+      configs(Collections.singletonMap(LogConfig.IndexIntervalBytesProp, "9999999")),
+      new NewTopic("bar", 3, 3.toShort),
+      new NewTopic("baz", Option.empty[Integer].asJava, Option.empty[java.lang.Short].asJava)
+    )
+    val result = client.createTopics(newTopics.asJava)
+    result.all.get()
+    waitForTopics(client, newTopics.map(_.name()).toList, List())
+
+    assertEquals(2, result.numPartitions("foo").get())
+    assertEquals(2, result.replicationFactor("foo").get())
+    assertEquals(3, result.numPartitions("bar").get())
+    assertEquals(3, result.replicationFactor("bar").get())
+    assertEquals(configs.head.numPartitions, result.numPartitions("baz").get())
+    assertEquals(configs.head.defaultReplicationFactor, result.replicationFactor("baz").get())
+
+    val topicConfigs = result.config("foo").get()
+
+    // From the topic configuration defaults.
+    assertEquals(new ConfigEntry(LogConfig.CleanupPolicyProp, "delete",
+      ConfigSource.DEFAULT_CONFIG, false, false, Collections.emptyList(), null, null),
+      topicConfigs.get(LogConfig.CleanupPolicyProp))
+
+    // From dynamic cluster config via the synonym LogRetentionTimeHoursProp.
+    assertEquals(new ConfigEntry(LogConfig.RetentionMsProp, "10800000",
+      ConfigSource.DYNAMIC_DEFAULT_BROKER_CONFIG, false, false, Collections.emptyList(), null, null),
+      topicConfigs.get(LogConfig.RetentionMsProp))
+
+    // From dynamic broker config via LogCleanerDeleteRetentionMsProp.
+    assertEquals(new ConfigEntry(LogConfig.DeleteRetentionMsProp, "34",
+      ConfigSource.DYNAMIC_BROKER_CONFIG, false, false, Collections.emptyList(), null, null),
+      topicConfigs.get(LogConfig.DeleteRetentionMsProp))
+
+    // From static broker config by SegmentJitterMsProp.
+    assertEquals(new ConfigEntry(LogConfig.SegmentJitterMsProp, "123",
+      ConfigSource.STATIC_BROKER_CONFIG, false, false, Collections.emptyList(), null, null),
+      topicConfigs.get(LogConfig.SegmentJitterMsProp))
+
+    // From static broker config by the synonym LogRollTimeHoursProp.
+    val segmentMsPropType = if (isKRaftTest()) {
+      ConfigSource.STATIC_BROKER_CONFIG
+    } else {
+      ConfigSource.DEFAULT_CONFIG
+    }
+    assertEquals(new ConfigEntry(LogConfig.SegmentMsProp, "7200000",
+      segmentMsPropType, false, false, Collections.emptyList(), null, null),
+      topicConfigs.get(LogConfig.SegmentMsProp))
+
+    // From the dynamic topic config.
+    assertEquals(new ConfigEntry(LogConfig.IndexIntervalBytesProp, "9999999",
+      ConfigSource.DYNAMIC_TOPIC_CONFIG, false, false, Collections.emptyList(), null, null),
+      topicConfigs.get(LogConfig.IndexIntervalBytesProp))
+  }
 }
 
 object PlaintextAdminIntegrationTest {
 
   @nowarn("cat=deprecation")
-  def checkValidAlterConfigs(client: Admin, topicResource1: ConfigResource, topicResource2: ConfigResource): Unit = {
+  def checkValidAlterConfigs(
+    admin: Admin,
+    test: KafkaServerTestHarness,
+    topicResource1: ConfigResource,
+    topicResource2: ConfigResource
+  ): Unit = {
     // Alter topics
     var topicConfigEntries1 = Seq(
       new ConfigEntry(LogConfig.FlushMsProp, "1000")
@@ -2291,7 +2580,7 @@ object PlaintextAdminIntegrationTest {
       new ConfigEntry(LogConfig.CompressionTypeProp, "lz4")
     ).asJava
 
-    var alterResult = client.alterConfigs(Map(
+    var alterResult = admin.alterConfigs(Map(
       topicResource1 -> new Config(topicConfigEntries1),
       topicResource2 -> new Config(topicConfigEntries2)
     ).asJava)
@@ -2300,7 +2589,8 @@ object PlaintextAdminIntegrationTest {
     alterResult.all.get
 
     // Verify that topics were updated correctly
-    var describeResult = client.describeConfigs(Seq(topicResource1, topicResource2).asJava)
+    test.ensureConsistentKRaftMetadata()
+    var describeResult = admin.describeConfigs(Seq(topicResource1, topicResource2).asJava)
     var configs = describeResult.all.get
 
     assertEquals(2, configs.size)
@@ -2323,7 +2613,7 @@ object PlaintextAdminIntegrationTest {
       new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "0.3")
     ).asJava
 
-    alterResult = client.alterConfigs(Map(
+    alterResult = admin.alterConfigs(Map(
       topicResource1 -> new Config(topicConfigEntries1),
       topicResource2 -> new Config(topicConfigEntries2)
     ).asJava, new AlterConfigsOptions().validateOnly(true))
@@ -2332,7 +2622,8 @@ object PlaintextAdminIntegrationTest {
     alterResult.all.get
 
     // Verify that topics were not updated due to validateOnly = true
-    describeResult = client.describeConfigs(Seq(topicResource1, topicResource2).asJava)
+    test.ensureConsistentKRaftMetadata()
+    describeResult = admin.describeConfigs(Seq(topicResource1, topicResource2).asJava)
     configs = describeResult.all.get
 
     assertEquals(2, configs.size)
@@ -2343,15 +2634,18 @@ object PlaintextAdminIntegrationTest {
   }
 
   @nowarn("cat=deprecation")
-  def checkInvalidAlterConfigs(zkClient: KafkaZkClient, servers: Seq[KafkaServer], client: Admin): Unit = {
+  def checkInvalidAlterConfigs(
+    test: KafkaServerTestHarness,
+    admin: Admin
+  ): Unit = {
     // Create topics
     val topic1 = "invalid-alter-configs-topic-1"
     val topicResource1 = new ConfigResource(ConfigResource.Type.TOPIC, topic1)
-    TestUtils.createTopic(zkClient, topic1, 1, 1, servers)
+    createTopicWithAdmin(admin, topic1, test.brokers, numPartitions = 1, replicationFactor = 1)
 
     val topic2 = "invalid-alter-configs-topic-2"
     val topicResource2 = new ConfigResource(ConfigResource.Type.TOPIC, topic2)
-    TestUtils.createTopic(zkClient, topic2, 1, 1, servers)
+    createTopicWithAdmin(admin, topic2, test.brokers, numPartitions = 1, replicationFactor = 1)
 
     val topicConfigEntries1 = Seq(
       new ConfigEntry(LogConfig.MinCleanableDirtyRatioProp, "1.1"), // this value is invalid as it's above 1.0
@@ -2360,23 +2654,24 @@ object PlaintextAdminIntegrationTest {
 
     var topicConfigEntries2 = Seq(new ConfigEntry(LogConfig.CompressionTypeProp, "snappy")).asJava
 
-    val brokerResource = new ConfigResource(ConfigResource.Type.BROKER, servers.head.config.brokerId.toString)
+    val brokerResource = new ConfigResource(ConfigResource.Type.BROKER, test.brokers.head.config.brokerId.toString)
     val brokerConfigEntries = Seq(new ConfigEntry(KafkaConfig.ZkConnectProp, "localhost:2181")).asJava
 
     // Alter configs: first and third are invalid, second is valid
-    var alterResult = client.alterConfigs(Map(
+    var alterResult = admin.alterConfigs(Map(
       topicResource1 -> new Config(topicConfigEntries1),
       topicResource2 -> new Config(topicConfigEntries2),
       brokerResource -> new Config(brokerConfigEntries)
     ).asJava)
 
     assertEquals(Set(topicResource1, topicResource2, brokerResource).asJava, alterResult.values.keySet)
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(topicResource1).get).getCause.isInstanceOf[InvalidRequestException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(topicResource1), classOf[InvalidConfigurationException])
     alterResult.values.get(topicResource2).get
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(brokerResource).get).getCause.isInstanceOf[InvalidRequestException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(brokerResource), classOf[InvalidRequestException])
 
     // Verify that first and third resources were not updated and second was updated
-    var describeResult = client.describeConfigs(Seq(topicResource1, topicResource2, brokerResource).asJava)
+    test.ensureConsistentKRaftMetadata()
+    var describeResult = admin.describeConfigs(Seq(topicResource1, topicResource2, brokerResource).asJava)
     var configs = describeResult.all.get
     assertEquals(3, configs.size)
 
@@ -2392,19 +2687,20 @@ object PlaintextAdminIntegrationTest {
     // Alter configs with validateOnly = true: first and third are invalid, second is valid
     topicConfigEntries2 = Seq(new ConfigEntry(LogConfig.CompressionTypeProp, "gzip")).asJava
 
-    alterResult = client.alterConfigs(Map(
+    alterResult = admin.alterConfigs(Map(
       topicResource1 -> new Config(topicConfigEntries1),
       topicResource2 -> new Config(topicConfigEntries2),
       brokerResource -> new Config(brokerConfigEntries)
     ).asJava, new AlterConfigsOptions().validateOnly(true))
 
     assertEquals(Set(topicResource1, topicResource2, brokerResource).asJava, alterResult.values.keySet)
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(topicResource1).get).getCause.isInstanceOf[InvalidRequestException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(topicResource1), classOf[InvalidConfigurationException])
     alterResult.values.get(topicResource2).get
-    assertTrue(assertThrows(classOf[ExecutionException], () => alterResult.values.get(brokerResource).get).getCause.isInstanceOf[InvalidRequestException])
+    assertFutureExceptionTypeEquals(alterResult.values.get(brokerResource), classOf[InvalidRequestException])
 
     // Verify that no resources are updated since validate_only = true
-    describeResult = client.describeConfigs(Seq(topicResource1, topicResource2, brokerResource).asJava)
+    test.ensureConsistentKRaftMetadata()
+    describeResult = admin.describeConfigs(Seq(topicResource1, topicResource2, brokerResource).asJava)
     configs = describeResult.all.get
     assertEquals(3, configs.size)
 
@@ -2417,5 +2713,4 @@ object PlaintextAdminIntegrationTest {
 
     assertEquals(Defaults.CompressionType, configs.get(brokerResource).get(KafkaConfig.CompressionTypeProp).value)
   }
-
 }
diff --git a/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala b/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala
index 4ede241b0c57a..5dc7c2ada1e0d 100644
--- a/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala
+++ b/core/src/test/scala/integration/kafka/api/PlaintextConsumerTest.scala
@@ -37,7 +37,11 @@ import kafka.server.QuotaType
 import kafka.server.KafkaServer
 import org.apache.kafka.clients.admin.NewPartitions
 import org.apache.kafka.clients.admin.NewTopic
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.locks.ReentrantLock
 import scala.collection.mutable
 
 /* We have some tests in this class instead of `BaseConsumerTest` in order to keep the build time under control. */
@@ -969,6 +973,89 @@ class PlaintextConsumerTest extends BaseConsumerTest {
     }
   }
 
+  @ParameterizedTest
+  @ValueSource(strings = Array(
+    "org.apache.kafka.clients.consumer.CooperativeStickyAssignor",
+    "org.apache.kafka.clients.consumer.RangeAssignor"))
+  def testRebalanceAndRejoin(assignmentStrategy: String): Unit = {
+    // create 2 consumers
+    this.consumerConfig.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "rebalance-and-rejoin-group")
+    this.consumerConfig.setProperty(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, assignmentStrategy)
+    this.consumerConfig.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true")
+    val consumer1 = createConsumer()
+    val consumer2 = createConsumer()
+
+    // create a new topic, have 2 partitions
+    val topic = "topic1"
+    val producer = createProducer()
+    val expectedAssignment = createTopicAndSendRecords(producer, topic, 2, 100)
+
+    assertEquals(0, consumer1.assignment().size)
+    assertEquals(0, consumer2.assignment().size)
+
+    val lock = new ReentrantLock()
+    var generationId1 = -1
+    var memberId1 = ""
+    val customRebalanceListener = new ConsumerRebalanceListener {
+      override def onPartitionsRevoked(partitions: util.Collection[TopicPartition]): Unit = {
+      }
+      override def onPartitionsAssigned(partitions: util.Collection[TopicPartition]): Unit = {
+        if (!lock.tryLock(3000, TimeUnit.MILLISECONDS)) {
+          fail(s"Time out while awaiting for lock.")
+        }
+        try {
+          generationId1 = consumer1.groupMetadata().generationId()
+          memberId1 = consumer1.groupMetadata().memberId()
+        } finally {
+          lock.unlock()
+        }
+      }
+    }
+    val consumerPoller1 = new ConsumerAssignmentPoller(consumer1, List(topic), Set.empty, customRebalanceListener)
+    consumerPoller1.start()
+    TestUtils.waitUntilTrue(() => consumerPoller1.consumerAssignment() == expectedAssignment,
+      s"Timed out while awaiting expected assignment change to $expectedAssignment.")
+
+    // Since the consumer1 already completed the rebalance,
+    // the `onPartitionsAssigned` rebalance listener will be invoked to set the generationId and memberId
+    var stableGeneration = -1
+    var stableMemberId1 = ""
+    if (!lock.tryLock(3000, TimeUnit.MILLISECONDS)) {
+      fail(s"Time out while awaiting for lock.")
+    }
+    try {
+      stableGeneration = generationId1
+      stableMemberId1 = memberId1
+    } finally {
+      lock.unlock()
+    }
+
+    val consumerPoller2 = subscribeConsumerAndStartPolling(consumer2, List(topic))
+    TestUtils.waitUntilTrue(() => consumerPoller1.consumerAssignment().size == 1,
+      s"Timed out while awaiting expected assignment size change to 1.")
+    TestUtils.waitUntilTrue(() => consumerPoller2.consumerAssignment().size == 1,
+      s"Timed out while awaiting expected assignment size change to 1.")
+
+    if (!lock.tryLock(3000, TimeUnit.MILLISECONDS)) {
+      fail(s"Time out while awaiting for lock.")
+    }
+    try {
+      if (assignmentStrategy.equals(classOf[CooperativeStickyAssignor].getName)) {
+        // cooperative rebalance should rebalance twice before finally stable
+        assertEquals(stableGeneration + 2, generationId1)
+      } else {
+        // eager rebalance should rebalance once before finally stable
+        assertEquals(stableGeneration + 1, generationId1)
+      }
+      assertEquals(stableMemberId1, memberId1)
+    } finally {
+      lock.unlock()
+    }
+
+    consumerPoller1.shutdown()
+    consumerPoller2.shutdown()
+  }
+
   /**
    * This test re-uses BaseConsumerTest's consumers.
    * As a result, it is testing the default assignment strategy set by BaseConsumerTest
diff --git a/core/src/test/scala/integration/kafka/api/PlaintextProducerSendTest.scala b/core/src/test/scala/integration/kafka/api/PlaintextProducerSendTest.scala
index 06ff201e0b2e6..c25eb184b3e13 100644
--- a/core/src/test/scala/integration/kafka/api/PlaintextProducerSendTest.scala
+++ b/core/src/test/scala/integration/kafka/api/PlaintextProducerSendTest.scala
@@ -19,22 +19,23 @@ package kafka.api
 
 import java.util.Properties
 import java.util.concurrent.{ExecutionException, Future, TimeUnit}
-
 import kafka.log.LogConfig
 import kafka.server.Defaults
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.apache.kafka.clients.producer.{BufferExhaustedException, KafkaProducer, ProducerConfig, ProducerRecord, RecordMetadata}
 import org.apache.kafka.common.errors.{InvalidTimestampException, RecordTooLargeException, SerializationException, TimeoutException}
 import org.apache.kafka.common.record.{DefaultRecord, DefaultRecordBatch, Records, TimestampType}
 import org.apache.kafka.common.serialization.ByteArraySerializer
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Test
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 
 class PlaintextProducerSendTest extends BaseProducerSendTest {
 
-  @Test
-  def testWrongSerializer(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testWrongSerializer(quorum: String): Unit = {
     val producerProps = new Properties()
     producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers())
     producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer")
@@ -44,8 +45,9 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
     assertThrows(classOf[SerializationException], () => producer.send(record))
   }
 
-  @Test
-  def testBatchSizeZero(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testBatchSizeZero(quorum: String): Unit = {
     val producer = createProducer(
       lingerMs = Int.MaxValue,
       deliveryTimeoutMs = Int.MaxValue,
@@ -53,8 +55,9 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
     sendAndVerify(producer)
   }
 
-  @Test
-  def testSendCompressedMessageWithLogAppendTime(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendCompressedMessageWithLogAppendTime(quorum: String): Unit = {
     val producer = createProducer(
       compressionType = "gzip",
       lingerMs = Int.MaxValue,
@@ -62,8 +65,9 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
     sendAndVerifyTimestamp(producer, TimestampType.LOG_APPEND_TIME)
   }
 
-  @Test
-  def testSendNonCompressedMessageWithLogAppendTime(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendNonCompressedMessageWithLogAppendTime(quorum: String): Unit = {
     val producer = createProducer(lingerMs = Int.MaxValue, deliveryTimeoutMs = Int.MaxValue)
     sendAndVerifyTimestamp(producer, TimestampType.LOG_APPEND_TIME)
   }
@@ -73,8 +77,9 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
    *
    * The topic should be created upon sending the first message
    */
-  @Test
-  def testAutoCreateTopic(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAutoCreateTopic(quorum: String): Unit = {
     val producer = createProducer()
     try {
       // Send a message to auto-create the topic
@@ -82,18 +87,18 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
       assertEquals(0L, producer.send(record).get.offset, "Should have offset 0")
 
       // double check that the topic is created with leader elected
-      TestUtils.waitUntilLeaderIsElectedOrChanged(zkClient, topic, 0)
-
+      TestUtils.waitUntilLeaderIsElectedOrChangedWithAdmin(admin, topic, 0)
     } finally {
       producer.close()
     }
   }
 
-  @Test
-  def testSendWithInvalidCreateTime(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendWithInvalidCreateTime(quorum: String): Unit = {
     val topicProps = new Properties()
     topicProps.setProperty(LogConfig.MessageTimestampDifferenceMaxMsProp, "1000")
-    createTopic(topic, 1, 2, topicProps)
+    TestUtils.createTopicWithAdmin(admin, topic, brokers, 1, 2, topicConfig = topicProps)
 
     val producer = createProducer()
     try {
@@ -118,8 +123,9 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
   // Test that producer with max.block.ms=0 can be used to send in non-blocking mode
   // where requests are failed immediately without blocking if metadata is not available
   // or buffer is full.
-  @Test
-  def testNonBlockingProducer(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testNonBlockingProducer(quorum: String): Unit = {
 
     def send(producer: KafkaProducer[Array[Byte],Array[Byte]]): Future[RecordMetadata] = {
       producer.send(new ProducerRecord(topic, 0, "key".getBytes, new Array[Byte](1000)))
@@ -173,8 +179,9 @@ class PlaintextProducerSendTest extends BaseProducerSendTest {
     verifySendSuccess(future2)               // previous batch should be completed and sent now
   }
 
-  @Test
-  def testSendRecordBatchWithMaxRequestSizeAndHigher(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendRecordBatchWithMaxRequestSizeAndHigher(quorum: String): Unit = {
     val producerProps = new Properties()
     producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers())
     val producer = registerProducer(new KafkaProducer(producerProps, new ByteArraySerializer, new ByteArraySerializer))
diff --git a/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala b/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala
index ccdfe7d3d3676..07d9ccb024f9a 100755
--- a/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala
+++ b/core/src/test/scala/integration/kafka/api/ProducerCompressionTest.scala
@@ -17,19 +17,19 @@
 
 package kafka.api.test
 
-import kafka.server.{KafkaConfig, KafkaServer}
+import kafka.server.{KafkaBroker, KafkaConfig, QuorumTestHarness}
 import kafka.utils.TestUtils
-import kafka.server.QuorumTestHarness
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.serialization.ByteArraySerializer
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
 import org.junit.jupiter.params.ParameterizedTest
-import org.junit.jupiter.params.provider.{Arguments, MethodSource}
+import org.junit.jupiter.params.provider.CsvSource
 
 import java.util.{Collections, Properties}
-import scala.jdk.CollectionConverters._
 
 class ProducerCompressionTest extends QuorumTestHarness {
 
@@ -37,18 +37,18 @@ class ProducerCompressionTest extends QuorumTestHarness {
   private val topic = "topic"
   private val numRecords = 2000
 
-  private var server: KafkaServer = null
+  private var broker: KafkaBroker = null
 
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
-    val props = TestUtils.createBrokerConfig(brokerId, zkConnect)
-    server = TestUtils.createServer(KafkaConfig.fromProps(props))
+    val props = TestUtils.createBrokerConfig(brokerId, zkConnectOrNull)
+    broker = createBroker(new KafkaConfig(props))
   }
 
   @AfterEach
   override def tearDown(): Unit = {
-    TestUtils.shutdownServers(Seq(server))
+    TestUtils.shutdownServers(Seq(broker))
     super.tearDown()
   }
 
@@ -58,11 +58,18 @@ class ProducerCompressionTest extends QuorumTestHarness {
    * Compressed messages should be able to sent and consumed correctly
    */
   @ParameterizedTest
-  @MethodSource(Array("parameters"))
-  def testCompression(compression: String): Unit = {
+  @CsvSource(value = Array(
+    "kraft,none",
+    "kraft,gzip",
+    "kraft,snappy",
+    "kraft,lz4",
+    "kraft,zstd",
+    "zk,gzip"
+  ))
+  def testCompression(quorum: String, compression: String): Unit = {
 
     val producerProps = new Properties()
-    val bootstrapServers = TestUtils.plaintextBootstrapServers(Seq(server))
+    val bootstrapServers = TestUtils.plaintextBootstrapServers(Seq(broker))
     producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
     producerProps.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, compression)
     producerProps.put(ProducerConfig.BATCH_SIZE_CONFIG, "66000")
@@ -72,7 +79,13 @@ class ProducerCompressionTest extends QuorumTestHarness {
 
     try {
       // create topic
-      TestUtils.createTopic(zkClient, topic, 1, 1, List(server))
+      val admin = TestUtils.createAdminClient(Seq(broker),
+        ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT))
+      try {
+        TestUtils.createTopicWithAdmin(admin, topic, Seq(broker))
+      } finally {
+        admin.close()
+      }
       val partition = 0
 
       // prepare the messages
@@ -103,15 +116,3 @@ class ProducerCompressionTest extends QuorumTestHarness {
     }
   }
 }
-
-object ProducerCompressionTest {
-  def parameters: java.util.stream.Stream[Arguments] = {
-    Seq(
-      Arguments.of("none"),
-      Arguments.of("gzip"),
-      Arguments.of("snappy"),
-      Arguments.of("lz4"),
-      Arguments.of("zstd")
-    ).asJava.stream()
-  }
-}
diff --git a/core/src/test/scala/integration/kafka/api/SslAdminIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/SslAdminIntegrationTest.scala
index b9180815c6f05..d04a09023cc3b 100644
--- a/core/src/test/scala/integration/kafka/api/SslAdminIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/SslAdminIntegrationTest.scala
@@ -15,8 +15,8 @@ package kafka.api
 import java.io.File
 import java.util
 import java.util.concurrent._
+
 import com.yammer.metrics.core.Gauge
-import kafka.metrics.KafkaYammerMetrics
 import kafka.security.authorizer.AclAuthorizer
 import kafka.server.KafkaConfig
 import kafka.utils.TestUtils
@@ -26,6 +26,7 @@ import org.apache.kafka.common.protocol.ApiKeys
 import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourceType}
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
 import org.apache.kafka.server.authorizer._
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotNull, assertTrue}
 import org.junit.jupiter.api.{AfterEach, Test}
 
diff --git a/core/src/test/scala/integration/kafka/api/TransactionsExpirationTest.scala b/core/src/test/scala/integration/kafka/api/TransactionsExpirationTest.scala
index 91e7ae2cc1884..ddf3a9746028f 100644
--- a/core/src/test/scala/integration/kafka/api/TransactionsExpirationTest.scala
+++ b/core/src/test/scala/integration/kafka/api/TransactionsExpirationTest.scala
@@ -21,7 +21,7 @@ import java.util.Properties
 
 import kafka.integration.KafkaServerTestHarness
 import kafka.server.KafkaConfig
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import kafka.utils.TestUtils.consumeRecords
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer.KafkaProducer
@@ -68,7 +68,7 @@ class TransactionsExpirationTest extends KafkaServerTestHarness {
     super.tearDown()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testBumpTransactionalEpochAfterInvalidProducerIdMapping(quorum: String): Unit = {
     producer.initTransactions()
diff --git a/core/src/test/scala/integration/kafka/api/TransactionsTest.scala b/core/src/test/scala/integration/kafka/api/TransactionsTest.scala
index 3df602a233609..c4d676a3ce7dd 100644
--- a/core/src/test/scala/integration/kafka/api/TransactionsTest.scala
+++ b/core/src/test/scala/integration/kafka/api/TransactionsTest.scala
@@ -22,26 +22,26 @@ import java.nio.charset.StandardCharsets
 import java.time.Duration
 import java.util.concurrent.TimeUnit
 import java.util.{Optional, Properties}
-
-import kafka.integration.KafkaServerTestHarness
 import kafka.server.KafkaConfig
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import kafka.utils.TestUtils.consumeRecords
 import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerGroupMetadata, KafkaConsumer, OffsetAndMetadata}
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 import org.apache.kafka.common.errors.{InvalidProducerEpochException, ProducerFencedException, TimeoutException}
 import org.apache.kafka.common.TopicPartition
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
-import scala.collection.Seq
 import scala.collection.mutable.Buffer
 import scala.concurrent.ExecutionException
 
-class TransactionsTest extends KafkaServerTestHarness {
-  val numServers = 3
+class TransactionsTest extends IntegrationTestHarness {
+  override def brokerCount = 3
+
   val transactionalProducerCount = 2
   val transactionalConsumerCount = 1
   val nonTransactionalConsumerCount = 1
@@ -54,17 +54,26 @@ class TransactionsTest extends KafkaServerTestHarness {
   val transactionalConsumers = Buffer[KafkaConsumer[Array[Byte], Array[Byte]]]()
   val nonTransactionalConsumers = Buffer[KafkaConsumer[Array[Byte], Array[Byte]]]()
 
-  override def generateConfigs: Seq[KafkaConfig] = {
-    TestUtils.createBrokerConfigs(numServers, zkConnect).map(KafkaConfig.fromProps(_, serverProps()))
-  }
+  serverConfig.put(KafkaConfig.AutoCreateTopicsEnableProp, false.toString)
+  // Set a smaller value for the number of partitions for the __consumer_offsets topic
+  // so that the creation of that topic/partition(s) and subsequent leader assignment doesn't take relatively long
+  serverConfig.put(KafkaConfig.OffsetsTopicPartitionsProp, 1.toString)
+  serverConfig.put(KafkaConfig.TransactionsTopicPartitionsProp, 3.toString)
+  serverConfig.put(KafkaConfig.TransactionsTopicReplicationFactorProp, 2.toString)
+  serverConfig.put(KafkaConfig.TransactionsTopicMinISRProp, 2.toString)
+  serverConfig.put(KafkaConfig.ControlledShutdownEnableProp, true.toString)
+  serverConfig.put(KafkaConfig.UncleanLeaderElectionEnableProp, false.toString)
+  serverConfig.put(KafkaConfig.AutoLeaderRebalanceEnableProp, false.toString)
+  serverConfig.put(KafkaConfig.GroupInitialRebalanceDelayMsProp, "0")
+  serverConfig.put(KafkaConfig.TransactionsAbortTimedOutTransactionCleanupIntervalMsProp, "200")
 
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
     val topicConfig = new Properties()
     topicConfig.put(KafkaConfig.MinInSyncReplicasProp, 2.toString)
-    createTopic(topic1, numPartitions, numServers, topicConfig)
-    createTopic(topic2, numPartitions, numServers, topicConfig)
+    createTopic(topic1, numPartitions, brokerCount, topicConfig)
+    createTopic(topic2, numPartitions, brokerCount, topicConfig)
 
     for (_ <- 0 until transactionalProducerCount)
       createTransactionalProducer("transactional-producer")
@@ -82,8 +91,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     super.tearDown()
   }
 
-  @Test
-  def testBasicTransactions() = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testBasicTransactions(quorum: String): Unit = {
     val producer = transactionalProducers.head
     val consumer = transactionalConsumers.head
     val unCommittedConsumer = nonTransactionalConsumers.head
@@ -116,8 +126,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testReadCommittedConsumerShouldNotSeeUndecidedData(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testReadCommittedConsumerShouldNotSeeUndecidedData(quorum: String): Unit = {
     val producer1 = transactionalProducers.head
     val producer2 = createTransactionalProducer("other")
     val readCommittedConsumer = transactionalConsumers.head
@@ -183,8 +194,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     assertNull(readCommittedOffsetsForTimes.get(tp2))
   }
 
-  @Test
-  def testDelayedFetchIncludesAbortedTransaction(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDelayedFetchIncludesAbortedTransaction(quorum: String): Unit = {
     val producer1 = transactionalProducers.head
     val producer2 = createTransactionalProducer("other")
 
@@ -228,14 +240,16 @@ class TransactionsTest extends KafkaServerTestHarness {
   }
 
   @nowarn("cat=deprecation")
-  @Test
-  def testSendOffsetsWithGroupId() = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendOffsetsWithGroupId(quorum: String): Unit = {
     sendOffset((producer, groupId, consumer) =>
       producer.sendOffsetsToTransaction(TestUtils.consumerPositions(consumer).asJava, groupId))
   }
 
-  @Test
-  def testSendOffsetsWithGroupMetadata() = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendOffsetsWithGroupMetadata(quorum: String): Unit = {
     sendOffset((producer, _, consumer) =>
       producer.sendOffsetsToTransaction(TestUtils.consumerPositions(consumer).asJava, consumer.groupMetadata()))
   }
@@ -255,7 +269,7 @@ class TransactionsTest extends KafkaServerTestHarness {
     val consumerGroupId = "foobar-consumer-group"
     val numSeedMessages = 500
 
-    TestUtils.seedTopicWithNumberedRecords(topic1, numSeedMessages, servers)
+    TestUtils.seedTopicWithNumberedRecords(topic1, numSeedMessages, brokers)
 
     val producer = transactionalProducers.head
 
@@ -307,8 +321,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     assertEquals(valueSeq.size, valueSet.size, s"Expected ${valueSeq.size} unique messages in $topic2.")
   }
 
-  @Test
-  def testFencingOnCommit() = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFencingOnCommit(quorum: String): Unit = {
     val producer1 = transactionalProducers(0)
     val producer2 = transactionalProducers(1)
     val consumer = transactionalConsumers(0)
@@ -336,8 +351,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testFencingOnSendOffsets() = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFencingOnSendOffsets(quorum: String): Unit = {
     val producer1 = transactionalProducers(0)
     val producer2 = transactionalProducers(1)
     val consumer = transactionalConsumers(0)
@@ -367,8 +383,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testOffsetMetadataInSendOffsetsToTransaction() = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testOffsetMetadataInSendOffsetsToTransaction(quorum: String): Unit = {
     val tp = new TopicPartition(topic1, 0)
     val groupId = "group"
 
@@ -392,24 +409,28 @@ class TransactionsTest extends KafkaServerTestHarness {
     TestUtils.waitUntilTrue(() => offsetAndMetadata.equals(consumer.committed(Set(tp).asJava).get(tp)), "cannot read committed offset")
   }
 
-  @Test
-  def testInitTransactionsTimeout(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInitTransactionsTimeout(quorum: String): Unit = {
     testTimeout(false, producer => producer.initTransactions())
   }
 
-  @Test
-  def testSendOffsetsToTransactionTimeout(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testSendOffsetsToTransactionTimeout(quorum: String): Unit = {
     testTimeout(true, producer => producer.sendOffsetsToTransaction(
       Map(new TopicPartition(topic1, 0) -> new OffsetAndMetadata(0)).asJava, new ConsumerGroupMetadata("test-group")))
   }
 
-  @Test
-  def testCommitTransactionTimeout(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCommitTransactionTimeout(quorum: String): Unit = {
     testTimeout(true, producer => producer.commitTransaction())
   }
 
-  @Test
-  def testAbortTransactionTimeout(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAbortTransactionTimeout(quorum: String): Unit = {
     testTimeout(true, producer => producer.abortTransaction())
   }
 
@@ -422,14 +443,15 @@ class TransactionsTest extends KafkaServerTestHarness {
       producer.send(new ProducerRecord[Array[Byte], Array[Byte]](topic1, "foo".getBytes, "bar".getBytes))
     }
 
-    for  (i <- servers.indices) killBroker(i)
+    for  (i <- brokers.indices) killBroker(i)
 
     assertThrows(classOf[TimeoutException], () => timeoutProcess(producer))
     producer.close(Duration.ZERO)
   }
 
-  @Test
-  def testFencingOnSend(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFencingOnSend(quorum: String): Unit = {
     val producer1 = transactionalProducers(0)
     val producer2 = transactionalProducers(1)
     val consumer = transactionalConsumers(0)
@@ -451,8 +473,8 @@ class TransactionsTest extends KafkaServerTestHarness {
       val result = producer1.send(TestUtils.producerRecordWithExpectedTransactionStatus(topic1, null, "1", "5", willBeCommitted = false))
       val recordMetadata = result.get()
       error(s"Missed a producer fenced exception when writing to ${recordMetadata.topic}-${recordMetadata.partition}. Grab the logs!!")
-      servers.foreach { server =>
-        error(s"log dirs: ${server.logManager.liveLogDirs.map(_.getAbsolutePath).head}")
+      brokers.foreach { broker =>
+        error(s"log dirs: ${broker.logManager.liveLogDirs.map(_.getAbsolutePath).head}")
       }
       fail("Should not be able to send messages from a fenced producer.")
     } catch {
@@ -472,8 +494,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testFencingOnAddPartitions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFencingOnAddPartitions(quorum: String): Unit = {
     val producer1 = transactionalProducers(0)
     val producer2 = transactionalProducers(1)
     val consumer = transactionalConsumers(0)
@@ -498,8 +521,8 @@ class TransactionsTest extends KafkaServerTestHarness {
       val result =  producer1.send(TestUtils.producerRecordWithExpectedTransactionStatus(topic1, null, "1", "5", willBeCommitted = false))
       val recordMetadata = result.get()
       error(s"Missed a producer fenced exception when writing to ${recordMetadata.topic}-${recordMetadata.partition}. Grab the logs!!")
-      servers.foreach { server =>
-        error(s"log dirs: ${server.logManager.liveLogDirs.map(_.getAbsolutePath).head}")
+      brokers.foreach { broker =>
+        error(s"log dirs: ${broker.logManager.liveLogDirs.map(_.getAbsolutePath).head}")
       }
       fail("Should not be able to send messages from a fenced producer.")
     } catch {
@@ -518,8 +541,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testFencingOnTransactionExpiration(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFencingOnTransactionExpiration(quorum: String): Unit = {
     val producer = createTransactionalProducer("expiringProducer", transactionTimeoutMs = 100)
 
     producer.initTransactions()
@@ -560,8 +584,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     assertTrue(transactionalRecords.isEmpty)
   }
 
-  @Test
-  def testMultipleMarkersOneLeader(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testMultipleMarkersOneLeader(quorum: String): Unit = {
     val firstProducer = transactionalProducers.head
     val consumer = transactionalConsumers.head
     val unCommittedConsumer = nonTransactionalConsumers.head
@@ -570,7 +595,7 @@ class TransactionsTest extends KafkaServerTestHarness {
     val topicConfig = new Properties()
     topicConfig.put(KafkaConfig.MinInSyncReplicasProp, 2.toString)
 
-    createTopic(topicWith10Partitions, 10, numServers, topicConfig)
+    createTopic(topicWith10Partitions, 10, brokerCount, topicConfig)
     createTopic(topicWith10PartitionsAndOneReplica, 10, 1, new Properties())
 
     firstProducer.initTransactions()
@@ -599,16 +624,18 @@ class TransactionsTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testConsecutivelyRunInitTransactions(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConsecutivelyRunInitTransactions(quorum: String): Unit = {
     val producer = createTransactionalProducer(transactionalId = "normalProducer")
 
     producer.initTransactions()
     assertThrows(classOf[IllegalStateException], () => producer.initTransactions())
   }
 
-  @Test
-  def testBumpTransactionalEpoch(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testBumpTransactionalEpoch(quorum: String): Unit = {
     val producer = createTransactionalProducer("transactionalProducer",
       deliveryTimeoutMs = 5000, requestTimeoutMs = 5000)
     val consumer = transactionalConsumers.head
@@ -616,7 +643,7 @@ class TransactionsTest extends KafkaServerTestHarness {
       // Create a topic with RF=1 so that a single broker failure will render it unavailable
       val testTopic = "test-topic"
       createTopic(testTopic, numPartitions, 1, new Properties)
-      val partitionLeader = TestUtils.waitUntilLeaderIsKnown(servers, new TopicPartition(testTopic, 0))
+      val partitionLeader = TestUtils.waitUntilLeaderIsKnown(brokers, new TopicPartition(testTopic, 0))
 
       producer.initTransactions()
 
@@ -625,7 +652,7 @@ class TransactionsTest extends KafkaServerTestHarness {
       producer.commitTransaction()
 
       var producerStateEntry =
-        servers(partitionLeader).logManager.getLog(new TopicPartition(testTopic, 0)).get.producerStateManager.activeProducers.head._2
+        brokers(partitionLeader).logManager.getLog(new TopicPartition(testTopic, 0)).get.producerStateManager.activeProducers.head._2
       val producerId = producerStateEntry.producerId
       val initialProducerEpoch = producerStateEntry.producerEpoch
 
@@ -658,15 +685,16 @@ class TransactionsTest extends KafkaServerTestHarness {
       // get here without having bumped the epoch. If bumping the epoch is possible, the producer will attempt to, so
       // check there that the epoch has actually increased
       producerStateEntry =
-        servers(partitionLeader).logManager.getLog(new TopicPartition(testTopic, 0)).get.producerStateManager.activeProducers(producerId)
+        brokers(partitionLeader).logManager.getLog(new TopicPartition(testTopic, 0)).get.producerStateManager.activeProducers(producerId)
       assertTrue(producerStateEntry.producerEpoch > initialProducerEpoch)
     } finally {
       producer.close(Duration.ZERO)
     }
   }
 
-  @Test
-  def testFailureToFenceEpoch(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFailureToFenceEpoch(quorum: String): Unit = {
     val producer1 = transactionalProducers.head
     val producer2 = createTransactionalProducer("transactional-producer", maxBlockMs = 1000)
 
@@ -676,9 +704,9 @@ class TransactionsTest extends KafkaServerTestHarness {
     producer1.send(TestUtils.producerRecordWithExpectedTransactionStatus(topic1, 0, "4", "4", willBeCommitted = true))
     producer1.commitTransaction()
 
-    val partitionLeader = TestUtils.waitUntilLeaderIsKnown(servers, new TopicPartition(topic1, 0))
+    val partitionLeader = TestUtils.waitUntilLeaderIsKnown(brokers, new TopicPartition(topic1, 0))
     var producerStateEntry =
-      servers(partitionLeader).logManager.getLog(new TopicPartition(topic1, 0)).get.producerStateManager.activeProducers.head._2
+      brokers(partitionLeader).logManager.getLog(new TopicPartition(topic1, 0)).get.producerStateManager.activeProducers.head._2
     val producerId = producerStateEntry.producerId
     val initialProducerEpoch = producerStateEntry.producerEpoch
 
@@ -720,7 +748,7 @@ class TransactionsTest extends KafkaServerTestHarness {
 
     // Check that the epoch only increased by 1
     producerStateEntry =
-      servers(partitionLeader).logManager.getLog(new TopicPartition(topic1, 0)).get.producerStateManager.activeProducers(producerId)
+      brokers(partitionLeader).logManager.getLog(new TopicPartition(topic1, 0)).get.producerStateManager.activeProducers(producerId)
     assertEquals((initialProducerEpoch + 1).toShort, producerStateEntry.producerEpoch)
   }
 
@@ -732,23 +760,6 @@ class TransactionsTest extends KafkaServerTestHarness {
     producer.flush()
   }
 
-  private def serverProps() = {
-    val serverProps = new Properties()
-    serverProps.put(KafkaConfig.AutoCreateTopicsEnableProp, false.toString)
-    // Set a smaller value for the number of partitions for the __consumer_offsets topic
-    // so that the creation of that topic/partition(s) and subsequent leader assignment doesn't take relatively long
-    serverProps.put(KafkaConfig.OffsetsTopicPartitionsProp, 1.toString)
-    serverProps.put(KafkaConfig.TransactionsTopicPartitionsProp, 3.toString)
-    serverProps.put(KafkaConfig.TransactionsTopicReplicationFactorProp, 2.toString)
-    serverProps.put(KafkaConfig.TransactionsTopicMinISRProp, 2.toString)
-    serverProps.put(KafkaConfig.ControlledShutdownEnableProp, true.toString)
-    serverProps.put(KafkaConfig.UncleanLeaderElectionEnableProp, false.toString)
-    serverProps.put(KafkaConfig.AutoLeaderRebalanceEnableProp, false.toString)
-    serverProps.put(KafkaConfig.GroupInitialRebalanceDelayMsProp, "0")
-    serverProps.put(KafkaConfig.TransactionsAbortTimedOutTransactionCleanupIntervalMsProp, "200")
-    serverProps
-  }
-
   private def createReadCommittedConsumer(group: String = "group",
                                           maxPollRecords: Int = 500,
                                           props: Properties = new Properties) = {
@@ -774,11 +785,14 @@ class TransactionsTest extends KafkaServerTestHarness {
                                           maxBlockMs: Long = 60000,
                                           deliveryTimeoutMs: Int = 120000,
                                           requestTimeoutMs: Int = 30000): KafkaProducer[Array[Byte], Array[Byte]] = {
-    val producer = TestUtils.createTransactionalProducer(transactionalId, servers,
+    val producer = TestUtils.createTransactionalProducer(
+      transactionalId,
+      brokers,
       transactionTimeoutMs = transactionTimeoutMs,
       maxBlockMs = maxBlockMs,
       deliveryTimeoutMs = deliveryTimeoutMs,
-      requestTimeoutMs = requestTimeoutMs)
+      requestTimeoutMs = requestTimeoutMs
+    )
     transactionalProducers += producer
     producer
   }
diff --git a/core/src/test/scala/integration/kafka/api/TransactionsWithMaxInFlightOneTest.scala b/core/src/test/scala/integration/kafka/api/TransactionsWithMaxInFlightOneTest.scala
index eacc58e76cc59..5dd82b6b224bc 100644
--- a/core/src/test/scala/integration/kafka/api/TransactionsWithMaxInFlightOneTest.scala
+++ b/core/src/test/scala/integration/kafka/api/TransactionsWithMaxInFlightOneTest.scala
@@ -18,15 +18,16 @@
 package kafka.api
 
 import java.util.Properties
-
 import kafka.integration.KafkaServerTestHarness
 import kafka.server.KafkaConfig
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import kafka.utils.TestUtils.consumeRecords
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer.KafkaProducer
 import org.junit.jupiter.api.Assertions.assertEquals
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.collection.Seq
 import scala.collection.mutable.Buffer
@@ -37,7 +38,7 @@ import scala.jdk.CollectionConverters._
  * A single broker is used to verify edge cases where different requests are queued on the same connection.
  */
 class TransactionsWithMaxInFlightOneTest extends KafkaServerTestHarness {
-  val numServers = 1
+  val numBrokers = 1
 
   val topic1 = "topic1"
   val topic2 = "topic2"
@@ -47,7 +48,7 @@ class TransactionsWithMaxInFlightOneTest extends KafkaServerTestHarness {
   val transactionalConsumers = Buffer[KafkaConsumer[Array[Byte], Array[Byte]]]()
 
   override def generateConfigs: Seq[KafkaConfig] = {
-    TestUtils.createBrokerConfigs(numServers, zkConnect).map(KafkaConfig.fromProps(_, serverProps()))
+    TestUtils.createBrokerConfigs(numBrokers, zkConnectOrNull).map(KafkaConfig.fromProps(_, serverProps()))
   }
 
   @BeforeEach
@@ -55,8 +56,8 @@ class TransactionsWithMaxInFlightOneTest extends KafkaServerTestHarness {
     super.setUp(testInfo)
     val topicConfig = new Properties()
     topicConfig.put(KafkaConfig.MinInSyncReplicasProp, 1.toString)
-    createTopic(topic1, numPartitions, numServers, topicConfig)
-    createTopic(topic2, numPartitions, numServers, topicConfig)
+    createTopic(topic1, numPartitions, numBrokers, topicConfig)
+    createTopic(topic2, numPartitions, numBrokers, topicConfig)
 
     createTransactionalProducer("transactional-producer")
     createReadCommittedConsumer("transactional-group")
@@ -69,10 +70,11 @@ class TransactionsWithMaxInFlightOneTest extends KafkaServerTestHarness {
     super.tearDown()
   }
 
-  @Test
-  def testTransactionalProducerSingleBrokerMaxInFlightOne(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testTransactionalProducerSingleBrokerMaxInFlightOne(quorum: String): Unit = {
     // We want to test with one broker to verify multiple requests queued on a connection
-    assertEquals(1, servers.size)
+    assertEquals(1, brokers.size)
 
     val producer = transactionalProducers.head
     val consumer = transactionalConsumers.head
@@ -124,7 +126,7 @@ class TransactionsWithMaxInFlightOneTest extends KafkaServerTestHarness {
   }
 
   private def createTransactionalProducer(transactionalId: String): KafkaProducer[Array[Byte], Array[Byte]] = {
-    val producer = TestUtils.createTransactionalProducer(transactionalId, servers, maxInFlight = 1)
+    val producer = TestUtils.createTransactionalProducer(transactionalId, brokers, maxInFlight = 1)
     transactionalProducers += producer
     producer
   }
diff --git a/core/src/test/scala/integration/kafka/api/UserClientIdQuotaTest.scala b/core/src/test/scala/integration/kafka/api/UserClientIdQuotaTest.scala
index 83c70dacf3505..362054657659b 100644
--- a/core/src/test/scala/integration/kafka/api/UserClientIdQuotaTest.scala
+++ b/core/src/test/scala/integration/kafka/api/UserClientIdQuotaTest.scala
@@ -42,7 +42,7 @@ class UserClientIdQuotaTest extends BaseQuotaTest {
     quotaTestClients.waitForQuotaUpdate(defaultProducerQuota, defaultConsumerQuota, defaultRequestQuota)
   }
 
-  override def createQuotaTestClients(topic: String, leaderNode: KafkaServer): QuotaTestClients = {
+  override def createQuotaTestClients(topic: String, leaderNode: KafkaBroker): QuotaTestClients = {
     val producer = createProducer()
     val consumer = createConsumer()
     val adminClient = createAdminClient()
diff --git a/core/src/test/scala/integration/kafka/api/UserQuotaTest.scala b/core/src/test/scala/integration/kafka/api/UserQuotaTest.scala
index ffbaebb9c9b7c..fd1639e43bf64 100644
--- a/core/src/test/scala/integration/kafka/api/UserQuotaTest.scala
+++ b/core/src/test/scala/integration/kafka/api/UserQuotaTest.scala
@@ -14,13 +14,13 @@
 
 package kafka.api
 
-import java.io.File
-
-import kafka.server.KafkaServer
+import kafka.server.KafkaBroker
 import kafka.utils.JaasTestUtils
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
 
+import java.io.File
+
 class UserQuotaTest extends BaseQuotaTest with SaslSetup {
 
   override protected def securityProtocol = SecurityProtocol.SASL_SSL
@@ -49,7 +49,7 @@ class UserQuotaTest extends BaseQuotaTest with SaslSetup {
     closeSasl()
   }
 
-  override def createQuotaTestClients(topic: String, leaderNode: KafkaServer): QuotaTestClients = {
+  override def createQuotaTestClients(topic: String, leaderNode: KafkaBroker): QuotaTestClients = {
     val producer = createProducer()
     val consumer = createConsumer()
     val adminClient = createAdminClient()
diff --git a/core/src/test/scala/integration/kafka/coordinator/transaction/ProducerIdsIntegrationTest.scala b/core/src/test/scala/integration/kafka/coordinator/transaction/ProducerIdsIntegrationTest.scala
index be9f159b86263..7d3203e93095e 100644
--- a/core/src/test/scala/integration/kafka/coordinator/transaction/ProducerIdsIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/coordinator/transaction/ProducerIdsIntegrationTest.scala
@@ -26,6 +26,7 @@ import org.apache.kafka.common.message.InitProducerIdRequestData
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.record.RecordBatch
 import org.apache.kafka.common.requests.{InitProducerIdRequest, InitProducerIdResponse}
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.BeforeEach
 import org.junit.jupiter.api.extension.ExtendWith
@@ -43,9 +44,9 @@ class ProducerIdsIntegrationTest {
   }
 
   @ClusterTests(Array(
-    new ClusterTest(clusterType = Type.ZK, brokers = 3, ibp = "2.8"),
-    new ClusterTest(clusterType = Type.ZK, brokers = 3, ibp = "3.0-IV0"),
-    new ClusterTest(clusterType = Type.KRAFT, brokers = 3, ibp = "3.0-IV0")
+    new ClusterTest(clusterType = Type.ZK, brokers = 3, metadataVersion = MetadataVersion.IBP_2_8_IV1),
+    new ClusterTest(clusterType = Type.ZK, brokers = 3, metadataVersion = MetadataVersion.IBP_3_0_IV0),
+    new ClusterTest(clusterType = Type.KRAFT, brokers = 3, metadataVersion = MetadataVersion.IBP_3_0_IV1)
   ))
   def testUniqueProducerIds(clusterInstance: ClusterInstance): Unit = {
     verifyUniqueIds(clusterInstance)
diff --git a/core/src/test/scala/integration/kafka/server/DelayedFetchTest.scala b/core/src/test/scala/integration/kafka/server/DelayedFetchTest.scala
index 581af29beceb7..dce5a2eaee75f 100644
--- a/core/src/test/scala/integration/kafka/server/DelayedFetchTest.scala
+++ b/core/src/test/scala/integration/kafka/server/DelayedFetchTest.scala
@@ -17,13 +17,14 @@
 package kafka.server
 
 import java.util.Optional
+
 import scala.collection.Seq
 import kafka.cluster.Partition
 import kafka.log.LogOffsetSnapshot
 import org.apache.kafka.common.{TopicIdPartition, Uuid}
 import org.apache.kafka.common.errors.{FencedLeaderEpochException, NotLeaderOrFollowerException}
 import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
-import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.record.MemoryRecords
 import org.apache.kafka.common.requests.FetchRequest
 import org.junit.jupiter.api.Test
@@ -47,7 +48,7 @@ class DelayedFetchTest {
     val fetchStatus = FetchPartitionStatus(
       startOffsetMetadata = LogOffsetMetadata(fetchOffset),
       fetchInfo = new FetchRequest.PartitionData(Uuid.ZERO_UUID, fetchOffset, logStartOffset, maxBytes, currentLeaderEpoch))
-    val fetchMetadata = buildFetchMetadata(replicaId, topicIdPartition, fetchStatus)
+    val fetchParams = buildFollowerFetchParams(replicaId, maxWaitMs = 500)
 
     var fetchResultOpt: Option[FetchPartitionData] = None
     def callback(responses: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
@@ -55,12 +56,12 @@ class DelayedFetchTest {
     }
 
     val delayedFetch = new DelayedFetch(
-      delayMs = 500,
-      fetchMetadata = fetchMetadata,
+      params = fetchParams,
+      fetchPartitionStatus = Seq(topicIdPartition -> fetchStatus),
       replicaManager = replicaManager,
       quota = replicaQuota,
-      clientMetadata = None,
-      responseCallback = callback)
+      responseCallback = callback
+    )
 
     val partition: Partition = mock(classOf[Partition])
 
@@ -72,7 +73,7 @@ class DelayedFetchTest {
         .thenThrow(new FencedLeaderEpochException("Requested epoch has been fenced"))
     when(replicaManager.isAddingReplica(any(), anyInt())).thenReturn(false)
 
-    expectReadFromReplica(replicaId, topicIdPartition, fetchStatus.fetchInfo, Errors.FENCED_LEADER_EPOCH)
+    expectReadFromReplica(fetchParams, topicIdPartition, fetchStatus.fetchInfo, Errors.FENCED_LEADER_EPOCH)
 
     assertTrue(delayedFetch.tryComplete())
     assertTrue(delayedFetch.isCompleted)
@@ -93,7 +94,7 @@ class DelayedFetchTest {
     val fetchStatus = FetchPartitionStatus(
       startOffsetMetadata = LogOffsetMetadata(fetchOffset),
       fetchInfo = new FetchRequest.PartitionData(Uuid.ZERO_UUID, fetchOffset, logStartOffset, maxBytes, currentLeaderEpoch))
-    val fetchMetadata = buildFetchMetadata(replicaId, topicIdPartition, fetchStatus)
+    val fetchParams = buildFollowerFetchParams(replicaId, maxWaitMs = 500)
 
     var fetchResultOpt: Option[FetchPartitionData] = None
     def callback(responses: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
@@ -101,16 +102,16 @@ class DelayedFetchTest {
     }
 
     val delayedFetch = new DelayedFetch(
-      delayMs = 500,
-      fetchMetadata = fetchMetadata,
+      params = fetchParams,
+      fetchPartitionStatus = Seq(topicIdPartition -> fetchStatus),
       replicaManager = replicaManager,
       quota = replicaQuota,
-      clientMetadata = None,
-      responseCallback = callback)
+      responseCallback = callback
+    )
 
     when(replicaManager.getPartitionOrException(topicIdPartition.topicPartition))
       .thenThrow(new NotLeaderOrFollowerException(s"Replica for $topicIdPartition not available"))
-    expectReadFromReplica(replicaId, topicIdPartition, fetchStatus.fetchInfo, Errors.NOT_LEADER_OR_FOLLOWER)
+    expectReadFromReplica(fetchParams, topicIdPartition, fetchStatus.fetchInfo, Errors.NOT_LEADER_OR_FOLLOWER)
     when(replicaManager.isAddingReplica(any(), anyInt())).thenReturn(false)
 
     assertTrue(delayedFetch.tryComplete())
@@ -130,7 +131,7 @@ class DelayedFetchTest {
     val fetchStatus = FetchPartitionStatus(
       startOffsetMetadata = LogOffsetMetadata(fetchOffset),
       fetchInfo = new FetchRequest.PartitionData(topicIdPartition.topicId, fetchOffset, logStartOffset, maxBytes, currentLeaderEpoch, lastFetchedEpoch))
-    val fetchMetadata = buildFetchMetadata(replicaId, topicIdPartition, fetchStatus)
+    val fetchParams = buildFollowerFetchParams(replicaId, maxWaitMs = 500)
 
     var fetchResultOpt: Option[FetchPartitionData] = None
     def callback(responses: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
@@ -138,12 +139,12 @@ class DelayedFetchTest {
     }
 
     val delayedFetch = new DelayedFetch(
-      delayMs = 500,
-      fetchMetadata = fetchMetadata,
+      params = fetchParams,
+      fetchPartitionStatus = Seq(topicIdPartition -> fetchStatus),
       replicaManager = replicaManager,
       quota = replicaQuota,
-      clientMetadata = None,
-      responseCallback = callback)
+      responseCallback = callback
+    )
 
     val partition: Partition = mock(classOf[Partition])
     when(replicaManager.getPartitionOrException(topicIdPartition.topicPartition)).thenReturn(partition)
@@ -159,40 +160,40 @@ class DelayedFetchTest {
         .setLeaderEpoch(lastFetchedEpoch.get)
         .setEndOffset(fetchOffset - 1))
     when(replicaManager.isAddingReplica(any(), anyInt())).thenReturn(false)
-    expectReadFromReplica(replicaId, topicIdPartition, fetchStatus.fetchInfo, Errors.NONE)
+    expectReadFromReplica(fetchParams, topicIdPartition, fetchStatus.fetchInfo, Errors.NONE)
 
     assertTrue(delayedFetch.tryComplete())
     assertTrue(delayedFetch.isCompleted)
     assertTrue(fetchResultOpt.isDefined)
   }
 
-  private def buildFetchMetadata(replicaId: Int,
-                                 topicIdPartition: TopicIdPartition,
-                                 fetchStatus: FetchPartitionStatus): FetchMetadata = {
-    FetchMetadata(fetchMinBytes = 1,
-      fetchMaxBytes = maxBytes,
-      hardMaxBytesLimit = false,
-      fetchOnlyLeader = true,
-      fetchIsolation = FetchLogEnd,
-      isFromFollower = true,
+  private def buildFollowerFetchParams(
+    replicaId: Int,
+    maxWaitMs: Int
+  ): FetchParams = {
+    FetchParams(
+      requestVersion = ApiKeys.FETCH.latestVersion,
       replicaId = replicaId,
-      fetchPartitionStatus = Seq((topicIdPartition, fetchStatus)))
+      maxWaitMs = maxWaitMs,
+      minBytes = 1,
+      maxBytes = maxBytes,
+      isolation = FetchLogEnd,
+      clientMetadata = None
+    )
   }
 
-  private def expectReadFromReplica(replicaId: Int,
-                                    topicIdPartition: TopicIdPartition,
-                                    fetchPartitionData: FetchRequest.PartitionData,
-                                    error: Errors): Unit = {
+  private def expectReadFromReplica(
+    fetchParams: FetchParams,
+    topicIdPartition: TopicIdPartition,
+    fetchPartitionData: FetchRequest.PartitionData,
+    error: Errors
+  ): Unit = {
     when(replicaManager.readFromLocalLog(
-      replicaId = replicaId,
-      fetchOnlyFromLeader = true,
-      fetchIsolation = FetchLogEnd,
-      fetchMaxBytes = maxBytes,
-      hardMaxBytesLimit = false,
+      fetchParams,
       readPartitionInfo = Seq((topicIdPartition, fetchPartitionData)),
-      clientMetadata = None,
-      quota = replicaQuota))
-      .thenReturn(Seq((topicIdPartition, buildReadResult(error))))
+      quota = replicaQuota,
+      readFromPurgatory = true
+    )).thenReturn(Seq((topicIdPartition, buildReadResult(error))))
   }
 
   private def buildReadResult(error: Errors): LogReadResult = {
diff --git a/core/src/test/scala/integration/kafka/server/DynamicBrokerReconfigurationTest.scala b/core/src/test/scala/integration/kafka/server/DynamicBrokerReconfigurationTest.scala
index 0cc58628103da..295ad061211b8 100644
--- a/core/src/test/scala/integration/kafka/server/DynamicBrokerReconfigurationTest.scala
+++ b/core/src/test/scala/integration/kafka/server/DynamicBrokerReconfigurationTest.scala
@@ -26,19 +26,17 @@ import java.time.Duration
 import java.util
 import java.util.{Collections, Properties}
 import java.util.concurrent._
-
 import javax.management.ObjectName
 import com.yammer.metrics.core.MetricName
 import kafka.admin.ConfigCommand
 import kafka.api.{KafkaSasl, SaslSetup}
 import kafka.controller.{ControllerBrokerStateInfo, ControllerChannelManager}
-import kafka.log.{CleanerConfig, LogConfig}
+import kafka.log.{CleanerConfig, LogConfig, UnifiedLog}
 import kafka.message.ProducerCompressionCodec
-import kafka.metrics.KafkaYammerMetrics
 import kafka.network.{Processor, RequestChannel}
-import kafka.server.QuorumTestHarness
 import kafka.utils._
 import kafka.utils.Implicits._
+import kafka.utils.TestUtils.TestControllerRequestCompletionHandler
 import kafka.zk.ConfigEntityChangeNotificationZNode
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType
@@ -53,18 +51,23 @@ import org.apache.kafka.common.config.types.Password
 import org.apache.kafka.common.config.provider.FileConfigProvider
 import org.apache.kafka.common.errors.{AuthenticationException, InvalidRequestException}
 import org.apache.kafka.common.internals.Topic
-import org.apache.kafka.common.metrics.Quota
-import org.apache.kafka.common.metrics.{KafkaMetric, MetricsReporter}
+import org.apache.kafka.common.message.MetadataRequestData
+import org.apache.kafka.common.metrics.{KafkaMetric, MetricsContext, MetricsReporter, Quota}
 import org.apache.kafka.common.network.{ListenerName, Mode}
 import org.apache.kafka.common.network.CertStores.{KEYSTORE_PROPS, TRUSTSTORE_PROPS}
 import org.apache.kafka.common.record.TimestampType
+import org.apache.kafka.common.requests.MetadataRequest
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.security.scram.ScramCredential
 import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.kafka.test.{TestSslUtils, TestUtils => JTestUtils}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
+import java.util.concurrent.atomic.AtomicInteger
 import scala.annotation.nowarn
 import scala.collection._
 import scala.collection.mutable.ArrayBuffer
@@ -80,7 +83,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
 
   import DynamicBrokerReconfigurationTest._
 
-  private val servers = new ArrayBuffer[KafkaServer]
+  private val servers = new ArrayBuffer[KafkaBroker]
   private val numServers = 3
   private val numPartitions = 10
   private val producers = new ArrayBuffer[KafkaProducer[String, String]]
@@ -111,15 +114,22 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
 
     (0 until numServers).foreach { brokerId =>
 
-      val props = TestUtils.createBrokerConfig(brokerId, zkConnect)
+      val props = if (isKRaftTest()) {
+        val properties = TestUtils.createBrokerConfig(brokerId, null)
+        properties.put(KafkaConfig.AdvertisedListenersProp, s"$SecureInternal://localhost:0, $SecureExternal://localhost:0")
+        properties
+      } else {
+        val properties = TestUtils.createBrokerConfig(brokerId, zkConnect)
+        properties.put(KafkaConfig.ZkEnableSecureAclsProp, "true")
+        properties
+      }
       props ++= securityProps(sslProperties1, TRUSTSTORE_PROPS)
       // Ensure that we can support multiple listeners per security protocol and multiple security protocols
       props.put(KafkaConfig.ListenersProp, s"$SecureInternal://localhost:0, $SecureExternal://localhost:0")
-      props.put(KafkaConfig.ListenerSecurityProtocolMapProp, s"$SecureInternal:SSL, $SecureExternal:SASL_SSL")
+      props.put(KafkaConfig.ListenerSecurityProtocolMapProp, s"$SecureInternal:SSL, $SecureExternal:SASL_SSL, CONTROLLER:$controllerListenerSecurityProtocol")
       props.put(KafkaConfig.InterBrokerListenerNameProp, SecureInternal)
       props.put(KafkaConfig.SslClientAuthProp, "requested")
       props.put(KafkaConfig.SaslMechanismInterBrokerProtocolProp, "PLAIN")
-      props.put(KafkaConfig.ZkEnableSecureAclsProp, "true")
       props.put(KafkaConfig.SaslEnabledMechanismsProp, kafkaServerSaslMechanisms.mkString(","))
       props.put(KafkaConfig.LogSegmentBytesProp, "2000") // low value to test log rolling on config update
       props.put(KafkaConfig.NumReplicaFetchersProp, "2") // greater than one to test reducing threads
@@ -138,17 +148,21 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
       props ++= securityProps(sslProperties1, KEYSTORE_PROPS, listenerPrefix(SecureExternal))
 
       val kafkaConfig = KafkaConfig.fromProps(props)
-      configureDynamicKeystoreInZooKeeper(kafkaConfig, sslProperties1)
+      if (!isKRaftTest()) {
+        configureDynamicKeystoreInZooKeeper(kafkaConfig, sslProperties1)
+      }
 
-      servers += TestUtils.createServer(kafkaConfig)
+      servers += createBroker(kafkaConfig)
     }
 
-    TestUtils.createTopic(zkClient, topic, numPartitions, replicationFactor = numServers, servers)
-    TestUtils.createTopic(zkClient, Topic.GROUP_METADATA_TOPIC_NAME, servers.head.config.offsetsTopicPartitions,
-      replicationFactor = numServers, servers, servers.head.groupCoordinator.offsetsTopicConfigs)
-
     createAdminClient(SecurityProtocol.SSL, SecureInternal)
 
+    TestUtils.createTopicWithAdmin(adminClients.head, topic, servers, numPartitions, replicationFactor = numServers)
+    TestUtils.createTopicWithAdmin(adminClients.head, Topic.GROUP_METADATA_TOPIC_NAME, servers,
+      numPartitions = servers.head.config.offsetsTopicPartitions,
+      replicationFactor = numServers,
+      topicConfig = servers.head.groupCoordinator.offsetsTopicConfigs)
+
     TestMetricsReporter.testReporters.clear()
   }
 
@@ -166,8 +180,9 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     closeSasl()
   }
 
-  @Test
-  def testConfigDescribeUsingAdminClient(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConfigDescribeUsingAdminClient(quorum: String): Unit = {
 
     def verifyConfig(configName: String, configEntry: ConfigEntry, isSensitive: Boolean, isReadOnly: Boolean,
                      expectedProps: Properties): Unit = {
@@ -226,9 +241,12 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     val adminClient = adminClients.head
     alterSslKeystoreUsingConfigCommand(sslProperties1, SecureExternal)
 
-    val configDesc = describeConfig(adminClient)
-    verifySslConfig("listener.name.external.", sslProperties1, configDesc)
-    verifySslConfig("", invalidSslProperties, configDesc)
+    val configDesc = TestUtils.tryUntilNoAssertionError() {
+      val describeConfigsResult = describeConfig(adminClient)
+      verifySslConfig("listener.name.external.", sslProperties1, describeConfigsResult)
+      verifySslConfig("", invalidSslProperties, describeConfigsResult)
+      describeConfigsResult
+    }
 
     // Verify a few log configs with and without synonyms
     val expectedProps = new Properties
@@ -262,8 +280,9 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     assertEquals(List((KafkaConfig.LogCleanerThreadsProp, ConfigSource.DEFAULT_CONFIG)), synonymsList(logCleanerThreads))
   }
 
-  @Test
-  def testUpdatesUsingConfigProvider(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testUpdatesUsingConfigProvider(quorum: String): Unit = {
     val PollingIntervalVal = f"$${file:polling.interval:interval}"
     val PollingIntervalUpdateVal = f"$${file:polling.interval:updinterval}"
     val SslTruststoreTypeVal = f"$${file:ssl.truststore.type:storetype}"
@@ -309,11 +328,13 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
       assertFalse(reporter.kafkaMetrics.isEmpty, "No metrics found")
     }
 
-    // fetch from ZK, values should be unresolved
-    val props = fetchBrokerConfigsFromZooKeeper(servers.head)
-    assertTrue(props.getProperty(TestMetricsReporter.PollingIntervalProp) == PollingIntervalVal, "polling interval is not updated in ZK")
-    assertTrue(props.getProperty(configPrefix+KafkaConfig.SslTruststoreTypeProp) == SslTruststoreTypeVal, "store type is not updated in ZK")
-    assertTrue(props.getProperty(configPrefix+KafkaConfig.SslKeystorePasswordProp) == SslKeystorePasswordVal, "keystore password is not updated in ZK")
+    if (!isKRaftTest()) {
+      // fetch from ZK, values should be unresolved
+      val props = fetchBrokerConfigsFromZooKeeper(servers.head)
+      assertTrue(props.getProperty(TestMetricsReporter.PollingIntervalProp) == PollingIntervalVal, "polling interval is not updated in ZK")
+      assertTrue(props.getProperty(configPrefix + KafkaConfig.SslTruststoreTypeProp) == SslTruststoreTypeVal, "store type is not updated in ZK")
+      assertTrue(props.getProperty(configPrefix + KafkaConfig.SslKeystorePasswordProp) == SslKeystorePasswordVal, "keystore password is not updated in ZK")
+    }
 
     // verify the update
     // 1. verify update not occurring if the value of property is same.
@@ -332,10 +353,11 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     }
   }
 
-  @Test
-  def testKeyStoreAlter(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testKeyStoreAlter(quorum: String): Unit = {
     val topic2 = "testtopic2"
-    TestUtils.createTopic(zkClient, topic2, numPartitions, replicationFactor = numServers, servers)
+    TestUtils.createTopicWithAdmin(adminClients.head, topic2, servers, numPartitions, replicationFactor = numServers)
 
     // Start a producer and consumer that work with the current broker keystore.
     // This should continue working while changes are made
@@ -355,7 +377,9 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
 
     // Produce/consume should work with new truststore with new producer/consumer
     val producer = ProducerBuilder().trustStoreProps(sslProperties2).maxRetries(0).build()
-    val consumer = ConsumerBuilder("group1").trustStoreProps(sslProperties2).topic(topic2).build()
+    // Start the new consumer in a separate group than the continous consumer started at the beginning of the test so
+    // that it is not disrupted by rebalance.
+    val consumer = ConsumerBuilder("group2").trustStoreProps(sslProperties2).topic(topic2).build()
     verifyProduceConsume(producer, consumer, 10, topic2)
 
     // Broker keystore update for internal listener with incompatible keystore should fail without update
@@ -399,8 +423,9 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     stopAndVerifyProduceConsume(producerThread, consumerThread)
   }
 
-  @Test
-  def testTrustStoreAlter(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testTrustStoreAlter(quorum: String): Unit = {
     val producerBuilder = ProducerBuilder().listenerName(SecureInternal).securityProtocol(SecurityProtocol.SSL)
 
     // Producer with new keystore should fail to connect before truststore update
@@ -430,9 +455,29 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
       verifyProduceConsume(producer, consumer, 10, topic)
     }
 
+    def verifyBrokerToControllerCall(controller: KafkaServer): Unit = {
+      val nonControllerBroker = servers.find(_.config.brokerId != controller.config.brokerId).get
+      val brokerToControllerManager = nonControllerBroker.clientToControllerChannelManager
+      val completionHandler = new TestControllerRequestCompletionHandler()
+      brokerToControllerManager.sendRequest(new MetadataRequest.Builder(new MetadataRequestData()), completionHandler)
+      TestUtils.waitUntilTrue(() => {
+        completionHandler.completed.get() || completionHandler.timedOut.get()
+      }, "Timed out while waiting for broker to controller API call")
+      // we do not expect a timeout from broker to controller request
+      assertFalse(completionHandler.timedOut.get(), "broker to controller request is timeout")
+      assertTrue(completionHandler.actualResponse.isDefined, "No response recorded even though request is completed")
+      val response = completionHandler.actualResponse.get
+      assertNull(response.authenticationException(), s"Request failed due to authentication error ${response.authenticationException}")
+      assertNull(response.versionMismatch(), s"Request failed due to unsupported version error ${response.versionMismatch}")
+      assertFalse(response.wasDisconnected(), "Request failed because broker is not available")
+    }
+
+    val group_id = new AtomicInteger(1)
+    def next_group_name(): String = s"alter-truststore-${group_id.getAndIncrement()}"
+
     // Produce/consume should work with old as well as new client keystore
-    verifySslProduceConsume(sslProperties1, "alter-truststore-1")
-    verifySslProduceConsume(sslProperties2, "alter-truststore-2")
+    verifySslProduceConsume(sslProperties1, next_group_name())
+    verifySslProduceConsume(sslProperties2, next_group_name())
 
     // Revert to old truststore with only one certificate and update. Clients should connect only with old keystore.
     val oldTruststoreProps = new Properties
@@ -441,7 +486,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     reconfigureServers(oldTruststoreProps, perBrokerConfig = true,
       (s"$prefix$SSL_TRUSTSTORE_LOCATION_CONFIG", sslProperties1.getProperty(SSL_TRUSTSTORE_LOCATION_CONFIG)))
     verifyAuthenticationFailure(producerBuilder.keyStoreProps(sslProperties2).build())
-    verifySslProduceConsume(sslProperties1, "alter-truststore-3")
+    verifySslProduceConsume(sslProperties1, next_group_name())
 
     // Update same truststore file to contain both certificates without changing any configs.
     // Clients should connect successfully with either keystore after admin client AlterConfigsRequest completes.
@@ -449,8 +494,14 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
       Paths.get(sslProperties1.getProperty(SSL_TRUSTSTORE_LOCATION_CONFIG)),
       StandardCopyOption.REPLACE_EXISTING)
     TestUtils.incrementalAlterConfigs(servers, adminClients.head, oldTruststoreProps, perBrokerConfig = true).all.get()
-    verifySslProduceConsume(sslProperties1, "alter-truststore-4")
-    verifySslProduceConsume(sslProperties2, "alter-truststore-5")
+    TestUtils.retry(30000) {
+      try {
+        verifySslProduceConsume(sslProperties1, next_group_name())
+        verifySslProduceConsume(sslProperties2, next_group_name())
+      } catch {
+        case t: Throwable => throw new AssertionError(t)
+      }
+    }
 
     // Update internal keystore/truststore and validate new client connections from broker (e.g. controller).
     // Alter internal keystore from `sslProperties1` to `sslProperties2`, force disconnect of a controller connection
@@ -458,22 +509,28 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     val props2 = securityProps(sslProperties2, KEYSTORE_PROPS, prefix)
     props2 ++= securityProps(combinedStoreProps, TRUSTSTORE_PROPS, prefix)
     TestUtils.incrementalAlterConfigs(servers, adminClients.head, props2, perBrokerConfig = true).all.get(15, TimeUnit.SECONDS)
-    verifySslProduceConsume(sslProperties2, "alter-truststore-6")
+    verifySslProduceConsume(sslProperties2, next_group_name())
     props2 ++= securityProps(sslProperties2, TRUSTSTORE_PROPS, prefix)
     TestUtils.incrementalAlterConfigs(servers, adminClients.head, props2, perBrokerConfig = true).all.get(15, TimeUnit.SECONDS)
-    verifySslProduceConsume(sslProperties2, "alter-truststore-7")
+    verifySslProduceConsume(sslProperties2, next_group_name())
     waitForAuthenticationFailure(producerBuilder.keyStoreProps(sslProperties1))
 
-    val controller = servers.find(_.config.brokerId == TestUtils.waitUntilControllerElected(zkClient)).get
-    val controllerChannelManager = controller.kafkaController.controllerChannelManager
-    val brokerStateInfo: mutable.HashMap[Int, ControllerBrokerStateInfo] =
-      JTestUtils.fieldValue(controllerChannelManager, classOf[ControllerChannelManager], "brokerStateInfo")
-    brokerStateInfo(0).networkClient.disconnect("0")
-    TestUtils.createTopic(zkClient, "testtopic2", numPartitions, replicationFactor = numServers, servers)
+    if (!isKRaftTest()) {
+      val controller = servers.find(_.config.brokerId == TestUtils.waitUntilControllerElected(zkClient)).get.asInstanceOf[KafkaServer]
+      val controllerChannelManager = controller.kafkaController.controllerChannelManager
+      val brokerStateInfo: mutable.HashMap[Int, ControllerBrokerStateInfo] =
+        JTestUtils.fieldValue(controllerChannelManager, classOf[ControllerChannelManager], "brokerStateInfo")
+      brokerStateInfo(0).networkClient.disconnect("0")
+      TestUtils.createTopic(zkClient, "testtopic2", numPartitions, replicationFactor = numServers, servers)
+
+      // validate that the brokerToController request works fine
+      verifyBrokerToControllerCall(controller)
+    }
   }
 
-  @Test
-  def testLogCleanerConfig(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testLogCleanerConfig(quorum: String): Unit = {
     val (producerThread, consumerThread) = startProduceConsume(retries = 0)
 
     verifyThreads("kafka-log-cleaner-thread-", countPerBroker = 1)
@@ -517,13 +574,23 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     stopAndVerifyProduceConsume(producerThread, consumerThread)
   }
 
-  @Test
-  def testConsecutiveConfigChange(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConsecutiveConfigChange(quorum: String): Unit = {
     val topic2 = "testtopic2"
     val topicProps = new Properties
     topicProps.put(KafkaConfig.MinInSyncReplicasProp, "2")
-    TestUtils.createTopic(zkClient, topic2, 1, replicationFactor = numServers, servers, topicProps)
-    var log = servers.head.logManager.getLog(new TopicPartition(topic2, 0)).getOrElse(throw new IllegalStateException("Log not found"))
+    TestUtils.createTopicWithAdmin(adminClients.head, topic2, servers, numPartitions = 1, replicationFactor = numServers, topicConfig = topicProps)
+
+    def getLogOrThrow(tp: TopicPartition): UnifiedLog = {
+      var (logOpt, found) = TestUtils.computeUntilTrue {
+        servers.head.logManager.getLog(tp)
+      }(_.isDefined)
+      assertTrue(found, "Log not found")
+      logOpt.get
+    }
+
+    var log = getLogOrThrow(new TopicPartition(topic2, 0))
     assertTrue(log.config.overriddenConfigs.contains(KafkaConfig.MinInSyncReplicasProp))
     assertEquals("2", log.config.originals().get(KafkaConfig.MinInSyncReplicasProp).toString)
 
@@ -538,7 +605,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
       }
     }
 
-    log = servers.head.logManager.getLog(new TopicPartition(topic2, 0)).getOrElse(throw new IllegalStateException("Log not found"))
+    log = getLogOrThrow(new TopicPartition(topic2, 0))
     assertTrue(log.config.overriddenConfigs.contains(KafkaConfig.MinInSyncReplicasProp))
     assertEquals("2", log.config.originals().get(KafkaConfig.MinInSyncReplicasProp).toString) // Verify topic-level config survives
 
@@ -546,12 +613,13 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     props.clear()
     props.put(KafkaConfig.LogRetentionTimeMillisProp, "604800000")
     reconfigureServers(props, perBrokerConfig = false, (KafkaConfig.LogRetentionTimeMillisProp, "604800000"))
-    log = servers.head.logManager.getLog(new TopicPartition(topic2, 0)).getOrElse(throw new IllegalStateException("Log not found"))
+    log = getLogOrThrow(new TopicPartition(topic2, 0))
     assertTrue(log.config.overriddenConfigs.contains(KafkaConfig.MinInSyncReplicasProp))
     assertEquals("2", log.config.originals().get(KafkaConfig.MinInSyncReplicasProp).toString) // Verify topic-level config still survives
   }
 
   @Test
+  @Disabled // TODO: To be re-enabled once we can make it less flaky: KAFKA-6527
   def testDefaultTopicConfig(): Unit = {
     val (producerThread, consumerThread) = startProduceConsume(retries = 0)
 
@@ -665,6 +733,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
   }
 
   @Test
+  @Disabled // TODO: To be re-enabled once we can make it less flaky: KAFKA-8280
   def testUncleanLeaderElectionEnable(): Unit = {
     val controller = servers.find(_.config.brokerId == TestUtils.waitUntilControllerElected(zkClient)).get
     val controllerId = controller.config.brokerId
@@ -864,6 +933,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
   }
 
   @Test
+  @Disabled // TODO: To be re-enabled once we can make it less flaky (KAFKA-7957)
   def testMetricsReporterUpdate(): Unit = {
     // Add a new metrics reporter
     val newProps = new Properties
@@ -951,6 +1021,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
   }
 
   @Test
+  // Modifying advertised listeners is not supported in KRaft
   def testAdvertisedListenerUpdate(): Unit = {
     val adminClient = adminClients.head
     val externalAdminClient = createAdminClient(SecurityProtocol.SASL_SSL, SecureExternal)
@@ -971,11 +1042,13 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     }
 
     // Verify that endpoints have been updated in ZK for all brokers
-    servers.foreach(validateEndpointsInZooKeeper(_, endpoints => endpoints.contains(invalidHost)))
+    servers.foreach { server =>
+      validateEndpointsInZooKeeper(server.asInstanceOf[KafkaServer], endpoints => endpoints.contains(invalidHost))
+    }
 
     // Trigger session expiry and ensure that controller registers new advertised listener after expiry
     val controllerEpoch = zkClient.getControllerEpoch
-    val controllerServer = servers(zkClient.getControllerId.getOrElse(throw new IllegalStateException("No controller")))
+    val controllerServer = servers(zkClient.getControllerId.getOrElse(throw new IllegalStateException("No controller"))).asInstanceOf[KafkaServer]
     val controllerZkClient = controllerServer.zkClient
     val sessionExpiringClient = createZooKeeperClientToTriggerSessionExpiry(controllerZkClient.currentZooKeeper)
     sessionExpiringClient.close()
@@ -999,7 +1072,9 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
       .getCause.isInstanceOf[org.apache.kafka.common.errors.TimeoutException])
 
     alterAdvertisedListener(adminClient, externalAdminClient, invalidHost, "localhost")
-    servers.foreach(validateEndpointsInZooKeeper(_, endpoints => !endpoints.contains(invalidHost)))
+    servers.foreach { server =>
+      validateEndpointsInZooKeeper(server.asInstanceOf[KafkaServer], endpoints => !endpoints.contains(invalidHost))
+    }
 
     // Verify that produce/consume work now
     val topic2 = "testtopic2"
@@ -1096,7 +1171,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     assertTrue(partitions.exists(_.leader == null), "Did not find partitions with no leader")
   }
 
-  private def addListener(servers: Seq[KafkaServer], listenerName: String, securityProtocol: SecurityProtocol,
+  private def addListener(servers: Seq[KafkaBroker], listenerName: String, securityProtocol: SecurityProtocol,
                           saslMechanisms: Seq[String]): Unit = {
     val config = servers.head.config
     val existingListenerCount = config.listeners.size
@@ -1241,11 +1316,11 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     verifyProduceConsume(producer, consumer, numRecords = 10, topic)
   }
 
-  private def hasListenerMetric(server: KafkaServer, listenerName: String): Boolean = {
+  private def hasListenerMetric(server: KafkaBroker, listenerName: String): Boolean = {
     server.socketServer.metrics.metrics.keySet.asScala.exists(_.tags.get("listener") == listenerName)
   }
 
-  private def fetchBrokerConfigsFromZooKeeper(server: KafkaServer): Properties = {
+  private def fetchBrokerConfigsFromZooKeeper(server: KafkaBroker): Properties = {
     val props = adminZkClient.fetchEntityConfig(ConfigType.Broker, server.config.brokerId.toString)
     server.config.dynamicConfig.fromPersistentProps(props, perBrokerConfig = true)
   }
@@ -1299,7 +1374,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     }, "Did not fail authentication with invalid config")
   }
 
-  private def describeConfig(adminClient: Admin, servers: Seq[KafkaServer] = this.servers): Config = {
+  private def describeConfig(adminClient: Admin, servers: Seq[KafkaBroker] = this.servers): Config = {
     val configResources = servers.map { server =>
       new ConfigResource(ConfigResource.Type.BROKER, server.config.brokerId.toString)
     }
@@ -1396,7 +1471,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
   }
 
   @nowarn("cat=deprecation")
-  private def alterConfigsOnServer(server: KafkaServer, props: Properties): Unit = {
+  private def alterConfigsOnServer(server: KafkaBroker, props: Properties): Unit = {
     val configEntries = props.asScala.map { case (k, v) => new ConfigEntry(k, v) }.toList.asJava
     val newConfig = new Config(configEntries)
     val configs = Map(new ConfigResource(ConfigResource.Type.BROKER, server.config.brokerId.toString) -> newConfig).asJava
@@ -1405,7 +1480,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
   }
 
   @nowarn("cat=deprecation")
-  private def alterConfigs(servers: Seq[KafkaServer], adminClient: Admin, props: Properties,
+  private def alterConfigs(servers: Seq[KafkaBroker], adminClient: Admin, props: Properties,
                    perBrokerConfig: Boolean): AlterConfigsResult = {
     val configEntries = props.asScala.map { case (k, v) => new ConfigEntry(k, v) }.toList.asJava
     val newConfig = new Config(configEntries)
@@ -1484,7 +1559,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
 
   private def createPasswordEncoder(config: KafkaConfig, secret: Option[Password]): PasswordEncoder = {
     val encoderSecret = secret.getOrElse(throw new IllegalStateException("Password encoder secret not configured"))
-    new PasswordEncoder(encoderSecret,
+    PasswordEncoder.encrypting(encoderSecret,
       config.passwordEncoderKeyFactoryAlgorithm,
       config.passwordEncoderCipherAlgorithm,
       config.passwordEncoderKeyLength,
@@ -1495,7 +1570,7 @@ class DynamicBrokerReconfigurationTest extends QuorumTestHarness with SaslSetup
     servers.foreach { server => waitForConfigOnServer(server, propName, propValue, maxWaitMs) }
   }
 
-  private def waitForConfigOnServer(server: KafkaServer, propName: String, propValue: String, maxWaitMs: Long = 10000): Unit = {
+  private def waitForConfigOnServer(server: KafkaBroker, propName: String, propValue: String, maxWaitMs: Long = 10000): Unit = {
     TestUtils.retry(maxWaitMs) {
       assertEquals(propValue, server.config.originals.get(propName))
     }
@@ -1810,6 +1885,7 @@ class TestMetricsReporter extends MetricsReporter with Reconfigurable with Close
   import TestMetricsReporter._
   val kafkaMetrics = ArrayBuffer[KafkaMetric]()
   @volatile var initializeCount = 0
+  @volatile var contextChangeCount = 0
   @volatile var configureCount = 0
   @volatile var reconfigureCount = 0
   @volatile var closeCount = 0
@@ -1817,7 +1893,12 @@ class TestMetricsReporter extends MetricsReporter with Reconfigurable with Close
   @volatile var pollingInterval: Int = -1
   testReporters.add(this)
 
+  override def contextChange(metricsContext: MetricsContext): Unit = {
+    contextChangeCount += 1
+  }
+
   override def init(metrics: util.List[KafkaMetric]): Unit = {
+    assertTrue(contextChangeCount > 0, "contextChange must be called before init")
     kafkaMetrics ++= metrics.asScala
     initializeCount += 1
   }
diff --git a/core/src/test/scala/integration/kafka/server/FetchRequestBetweenDifferentIbpTest.scala b/core/src/test/scala/integration/kafka/server/FetchRequestBetweenDifferentIbpTest.scala
index 405b0099ab81d..36d9c00bfd207 100644
--- a/core/src/test/scala/integration/kafka/server/FetchRequestBetweenDifferentIbpTest.scala
+++ b/core/src/test/scala/integration/kafka/server/FetchRequestBetweenDifferentIbpTest.scala
@@ -19,11 +19,13 @@ package integration.kafka.server
 
 import java.time.Duration
 import java.util.Arrays.asList
-import kafka.api.{ApiVersion, DefaultApiVersion, KAFKA_2_7_IV0, KAFKA_2_8_IV1, KAFKA_3_1_IV0}
+
 import kafka.server.{BaseRequestTest, KafkaConfig}
 import kafka.utils.TestUtils
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_7_IV0, IBP_2_8_IV1, IBP_3_1_IV0}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 
@@ -35,26 +37,26 @@ class FetchRequestBetweenDifferentIbpTest extends BaseRequestTest {
   override def generateConfigs: Seq[KafkaConfig] = {
     // Brokers should be at most 2 different IBP versions, but for more test coverage, three are used here.
     Seq(
-      createConfig(0, KAFKA_2_7_IV0),
-      createConfig(1, KAFKA_2_8_IV1),
-      createConfig(2, KAFKA_3_1_IV0)
+      createConfig(0, IBP_2_7_IV0),
+      createConfig(1, IBP_2_8_IV1),
+      createConfig(2, IBP_3_1_IV0)
     )
   }
 
   @Test
   def testControllerOldIBP(): Unit = {
-    // Ensure controller version < KAFKA_2_8_IV1, and then create a topic where leader of partition 0 is not the controller,
+    // Ensure controller version < IBP_2_8_IV1, and then create a topic where leader of partition 0 is not the controller,
     // leader of partition 1 is.
-    testControllerWithGivenIBP(KAFKA_2_7_IV0, 0)
+    testControllerWithGivenIBP(IBP_2_7_IV0, 0)
   }
 
   @Test
   def testControllerNewIBP(): Unit = {
-    // Ensure controller version = KAFKA_3_1_IV0, and then create a topic where leader of partition 1 is the old version.
-    testControllerWithGivenIBP(KAFKA_3_1_IV0, 2)
+    // Ensure controller version = IBP_3_1_IV0, and then create a topic where leader of partition 1 is the old version.
+    testControllerWithGivenIBP(IBP_3_1_IV0, 2)
   }
 
-  def testControllerWithGivenIBP(version: DefaultApiVersion, controllerBroker: Int): Unit = {
+  def testControllerWithGivenIBP(version: MetadataVersion, controllerBroker: Int): Unit = {
     val topic = "topic"
     val producer = createProducer()
     val consumer = createConsumer()
@@ -79,16 +81,16 @@ class FetchRequestBetweenDifferentIbpTest extends BaseRequestTest {
 
   @Test
   def testControllerNewToOldIBP(): Unit = {
-    testControllerSwitchingIBP(KAFKA_3_1_IV0, 2, KAFKA_2_7_IV0, 0)
+    testControllerSwitchingIBP(IBP_3_1_IV0, 2, IBP_2_7_IV0, 0)
   }
 
   @Test
   def testControllerOldToNewIBP(): Unit = {
-    testControllerSwitchingIBP(KAFKA_2_7_IV0, 0, KAFKA_3_1_IV0, 2)
+    testControllerSwitchingIBP(IBP_2_7_IV0, 0, IBP_3_1_IV0, 2)
   }
 
 
-  def testControllerSwitchingIBP(version1: DefaultApiVersion, broker1: Int, version2: DefaultApiVersion, broker2: Int): Unit = {
+  def testControllerSwitchingIBP(version1: MetadataVersion, broker1: Int, version2: MetadataVersion, broker2: Int): Unit = {
     val topic = "topic"
     val topic2 = "topic2"
     val producer = createProducer()
@@ -132,7 +134,7 @@ class FetchRequestBetweenDifferentIbpTest extends BaseRequestTest {
     assertEquals(2, count2)
   }
 
-  private def ensureControllerWithIBP(version: DefaultApiVersion): Unit = {
+  private def ensureControllerWithIBP(version: MetadataVersion): Unit = {
     val nonControllerServers = servers.filter(_.config.interBrokerProtocolVersion != version)
     nonControllerServers.iterator.foreach(server => {
       server.shutdown()
@@ -143,7 +145,7 @@ class FetchRequestBetweenDifferentIbpTest extends BaseRequestTest {
     })
   }
 
-  private def createConfig(nodeId: Int, interBrokerVersion: ApiVersion): KafkaConfig = {
+  private def createConfig(nodeId: Int, interBrokerVersion: MetadataVersion): KafkaConfig = {
     val props = TestUtils.createBrokerConfig(nodeId, zkConnect)
     props.put(KafkaConfig.InterBrokerProtocolVersionProp, interBrokerVersion.version)
     KafkaConfig.fromProps(props)
diff --git a/core/src/test/scala/integration/kafka/server/FetchRequestTestDowngrade.scala b/core/src/test/scala/integration/kafka/server/FetchRequestTestDowngrade.scala
index 3c0bff81b2d1a..c714b8cc3689a 100644
--- a/core/src/test/scala/integration/kafka/server/FetchRequestTestDowngrade.scala
+++ b/core/src/test/scala/integration/kafka/server/FetchRequestTestDowngrade.scala
@@ -20,12 +20,13 @@ package integration.kafka.server
 import java.time.Duration
 import java.util.Arrays.asList
 
-import kafka.api.{ApiVersion, KAFKA_2_7_IV0, KAFKA_3_1_IV0}
 import kafka.server.{BaseRequestTest, KafkaConfig}
 import kafka.utils.TestUtils
 import kafka.zk.ZkVersion
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_7_IV0, IBP_3_1_IV0}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 
@@ -37,8 +38,8 @@ class FetchRequestTestDowngrade extends BaseRequestTest {
     override def generateConfigs: Seq[KafkaConfig] = {
         // Controller should start with newer IBP and downgrade to the older one.
         Seq(
-            createConfig(0, KAFKA_3_1_IV0),
-            createConfig(1, KAFKA_2_7_IV0)
+            createConfig(0, IBP_3_1_IV0),
+            createConfig(1, IBP_2_7_IV0)
         )
     }
 
@@ -72,7 +73,7 @@ class FetchRequestTestDowngrade extends BaseRequestTest {
         }
     }
 
-    private def createConfig(nodeId: Int, interBrokerVersion: ApiVersion): KafkaConfig = {
+    private def createConfig(nodeId: Int, interBrokerVersion: MetadataVersion): KafkaConfig = {
         val props = TestUtils.createBrokerConfig(nodeId, zkConnect)
         props.put(KafkaConfig.InterBrokerProtocolVersionProp, interBrokerVersion.version)
         KafkaConfig.fromProps(props)
diff --git a/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala b/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala
index c62dbd5284374..509facf921189 100644
--- a/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala
+++ b/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala
@@ -30,18 +30,20 @@ import org.apache.kafka.common.requests.{ApiError, DescribeClusterRequest, Descr
 import org.apache.kafka.metadata.BrokerState
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{Tag, Test, Timeout}
+
 import java.util
-import java.util.concurrent.ExecutionException
 import java.util.{Arrays, Collections, Optional}
-
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType
 import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.config.ConfigResource.Type
 import org.apache.kafka.common.protocol.Errors._
+import org.apache.kafka.image.ClusterImage
+import org.apache.kafka.server.common.MetadataVersion
 import org.slf4j.LoggerFactory
 
 import scala.annotation.nowarn
 import scala.collection.mutable
+import scala.concurrent.ExecutionException
 import scala.concurrent.duration.{FiniteDuration, MILLISECONDS, SECONDS}
 import scala.jdk.CollectionConverters._
 
@@ -69,8 +71,8 @@ class KRaftClusterTest {
   def testCreateClusterAndWaitForBrokerInRunningState(): Unit = {
     val cluster = new KafkaClusterTestKit.Builder(
       new TestKitNodes.Builder().
-        setNumBrokerNodes(3).
-        setNumControllerNodes(3).build()).build()
+        setNumBrokerNodes(1).
+        setNumControllerNodes(1).build()).build()
     try {
       cluster.format()
       cluster.startup()
@@ -292,6 +294,17 @@ class KRaftClusterTest {
     }
   }
 
+  @Test
+  def testCreateClusterInvalidMetadataVersion(): Unit = {
+    assertThrows(classOf[IllegalArgumentException], () => {
+      new KafkaClusterTestKit.Builder(
+        new TestKitNodes.Builder().
+          setBootstrapMetadataVersion(MetadataVersion.IBP_2_7_IV0).
+          setNumBrokerNodes(1).
+          setNumControllerNodes(1).build()).build()
+    })
+  }
+
   private def doOnStartedKafkaCluster(numControllerNodes: Int = 1,
                                       numBrokerNodes: Int,
                                       brokerPropertyOverrides: (TestKitNodes, BrokerNode) => Map[String, String])
@@ -713,4 +726,56 @@ class KRaftClusterTest {
       cluster.close()
     }
   }
+
+  private def clusterImage(
+    cluster: KafkaClusterTestKit,
+    brokerId: Int
+  ): ClusterImage = {
+    cluster.brokers().get(brokerId).metadataCache.currentImage().cluster()
+  }
+
+  private def brokerIsUnfenced(
+    image: ClusterImage,
+    brokerId: Int
+  ): Boolean = {
+    Option(image.brokers().get(brokerId)) match {
+      case None => false
+      case Some(registration) => !registration.fenced()
+    }
+  }
+
+  private def brokerIsAbsent(
+    image: ClusterImage,
+    brokerId: Int
+  ): Boolean = {
+    Option(image.brokers().get(brokerId)).isEmpty
+  }
+
+  @Test
+  def testUnregisterBroker(): Unit = {
+    val cluster = new KafkaClusterTestKit.Builder(
+      new TestKitNodes.Builder().
+        setNumBrokerNodes(4).
+        setNumControllerNodes(3).build()).build()
+    try {
+      cluster.format()
+      cluster.startup()
+      cluster.waitForReadyBrokers()
+      TestUtils.waitUntilTrue(() => brokerIsUnfenced(clusterImage(cluster, 1), 0),
+        "Timed out waiting for broker 0 to be unfenced.")
+      cluster.brokers().get(0).shutdown()
+      TestUtils.waitUntilTrue(() => !brokerIsUnfenced(clusterImage(cluster, 1), 0),
+        "Timed out waiting for broker 0 to be fenced.")
+      val admin = Admin.create(cluster.clientProperties())
+      try {
+        admin.unregisterBroker(0)
+      } finally {
+        admin.close()
+      }
+      TestUtils.waitUntilTrue(() => brokerIsAbsent(clusterImage(cluster, 1), 0),
+        "Timed out waiting for broker 0 to be fenced.")
+    } finally {
+      cluster.close()
+    }
+  }
 }
diff --git a/core/src/test/scala/integration/kafka/server/MetadataRequestBetweenDifferentIbpTest.scala b/core/src/test/scala/integration/kafka/server/MetadataRequestBetweenDifferentIbpTest.scala
index aad5ae7f9f9ef..fac859effb298 100644
--- a/core/src/test/scala/integration/kafka/server/MetadataRequestBetweenDifferentIbpTest.scala
+++ b/core/src/test/scala/integration/kafka/server/MetadataRequestBetweenDifferentIbpTest.scala
@@ -17,7 +17,6 @@
 
 package kafka.server
 
-import kafka.api.{ApiVersion, KAFKA_2_8_IV0}
 import kafka.network.SocketServer
 import kafka.utils.TestUtils
 import kafka.zk.ZkVersion
@@ -25,6 +24,8 @@ import org.apache.kafka.common.Uuid
 import org.apache.kafka.common.message.MetadataRequestData
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.{MetadataRequest, MetadataResponse}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_8_IV0
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 
@@ -35,9 +36,9 @@ class MetadataRequestBetweenDifferentIbpTest extends BaseRequestTest {
   override def brokerCount: Int = 3
   override def generateConfigs: Seq[KafkaConfig] = {
     Seq(
-      createConfig(0, KAFKA_2_8_IV0),
-      createConfig(1, ApiVersion.latestVersion),
-      createConfig(2, ApiVersion.latestVersion)
+      createConfig(0, IBP_2_8_IV0),
+      createConfig(1, MetadataVersion.latest),
+      createConfig(2, MetadataVersion.latest)
     )
   }
 
@@ -58,7 +59,7 @@ class MetadataRequestBetweenDifferentIbpTest extends BaseRequestTest {
     assertEquals(topicId, topicMetadata.topicId())
     assertEquals(topic, topicMetadata.topic())
 
-    // Make the broker whose version=KAFKA_2_8_IV0 controller
+    // Make the broker whose version=IBP_2_8_IV0 controller
     ensureControllerIn(Seq(0))
 
     // Restart the broker whose ibp is higher, and the controller will send metadata request to it
@@ -77,7 +78,7 @@ class MetadataRequestBetweenDifferentIbpTest extends BaseRequestTest {
     }
   }
 
-  private def createConfig(nodeId: Int,interBrokerVersion: ApiVersion): KafkaConfig = {
+  private def createConfig(nodeId: Int, interBrokerVersion: MetadataVersion): KafkaConfig = {
     val props = TestUtils.createBrokerConfig(nodeId, zkConnect)
     props.put(KafkaConfig.InterBrokerProtocolVersionProp, interBrokerVersion.version)
     KafkaConfig.fromProps(props)
diff --git a/core/src/test/scala/integration/kafka/server/MetadataVersionIntegrationTest.scala b/core/src/test/scala/integration/kafka/server/MetadataVersionIntegrationTest.scala
new file mode 100644
index 0000000000000..c060e3a6daae4
--- /dev/null
+++ b/core/src/test/scala/integration/kafka/server/MetadataVersionIntegrationTest.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package integration.kafka.server
+
+import kafka.test.ClusterInstance
+import kafka.test.annotation.{ClusterTest, ClusterTests, Type}
+import kafka.test.junit.ClusterTestExtensions
+import kafka.utils.TestUtils
+import org.apache.kafka.clients.admin.FeatureUpdate.UpgradeType
+import org.apache.kafka.clients.admin.{FeatureUpdate, UpdateFeaturesOptions}
+import org.apache.kafka.server.common.MetadataVersion
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.extension.ExtendWith
+
+import scala.jdk.CollectionConverters._
+
+@ExtendWith(value = Array(classOf[ClusterTestExtensions]))
+class MetadataVersionIntegrationTest {
+  @ClusterTests(value = Array(
+      new ClusterTest(clusterType = Type.KRAFT, metadataVersion = MetadataVersion.IBP_3_0_IV1),
+      new ClusterTest(clusterType = Type.KRAFT, metadataVersion = MetadataVersion.IBP_3_1_IV0),
+      new ClusterTest(clusterType = Type.KRAFT, metadataVersion = MetadataVersion.IBP_3_2_IV0)
+  ))
+  def testBasicMetadataVersionUpgrade(clusterInstance: ClusterInstance): Unit = {
+    val admin = clusterInstance.createAdminClient()
+    val describeResult = admin.describeFeatures()
+    val ff = describeResult.featureMetadata().get().finalizedFeatures().get(MetadataVersion.FEATURE_NAME)
+    assertEquals(ff.minVersionLevel(), clusterInstance.config().metadataVersion().featureLevel())
+    assertEquals(ff.maxVersionLevel(), clusterInstance.config().metadataVersion().featureLevel())
+
+    // Update to new version
+    val updateVersion = MetadataVersion.IBP_3_3_IV0.featureLevel.shortValue
+    val updateResult = admin.updateFeatures(
+      Map("metadata.version" -> new FeatureUpdate(updateVersion, UpgradeType.UPGRADE)).asJava, new UpdateFeaturesOptions())
+    updateResult.all().get()
+
+    // Verify that new version is visible on broker
+    TestUtils.waitUntilTrue(() => {
+      val describeResult2 = admin.describeFeatures()
+      val ff2 = describeResult2.featureMetadata().get().finalizedFeatures().get(MetadataVersion.FEATURE_NAME)
+      ff2.minVersionLevel() == updateVersion && ff2.maxVersionLevel() == updateVersion
+    }, "Never saw metadata.version increase on broker")
+  }
+
+  @ClusterTest(clusterType = Type.KRAFT, metadataVersion = MetadataVersion.IBP_3_3_IV0)
+  def testUpgradeSameVersion(clusterInstance: ClusterInstance): Unit = {
+    val admin = clusterInstance.createAdminClient()
+    val updateVersion = MetadataVersion.IBP_3_3_IV0.featureLevel.shortValue
+    val updateResult = admin.updateFeatures(
+      Map("metadata.version" -> new FeatureUpdate(updateVersion, UpgradeType.UPGRADE)).asJava, new UpdateFeaturesOptions())
+    updateResult.all().get()
+  }
+
+  @ClusterTest(clusterType = Type.KRAFT)
+  def testDefaultIsLatestVersion(clusterInstance: ClusterInstance): Unit = {
+    val admin = clusterInstance.createAdminClient()
+    val describeResult = admin.describeFeatures()
+    val ff = describeResult.featureMetadata().get().finalizedFeatures().get(MetadataVersion.FEATURE_NAME)
+    assertEquals(ff.minVersionLevel(), MetadataVersion.latest().featureLevel(),
+      "If this test fails, check the default MetadataVersion in the @ClusterTest annotation")
+    assertEquals(ff.maxVersionLevel(), MetadataVersion.latest().featureLevel())
+  }
+}
diff --git a/core/src/test/scala/integration/kafka/server/QuorumTestHarness.scala b/core/src/test/scala/integration/kafka/server/QuorumTestHarness.scala
index 5bfa651ee4c74..c4ca966f9abcb 100755
--- a/core/src/test/scala/integration/kafka/server/QuorumTestHarness.scala
+++ b/core/src/test/scala/integration/kafka/server/QuorumTestHarness.scala
@@ -22,9 +22,9 @@ import java.net.InetSocketAddress
 import java.util
 import java.util.{Collections, Properties}
 import java.util.concurrent.CompletableFuture
-
 import javax.security.auth.login.Configuration
 import kafka.raft.KafkaRaftManager
+import kafka.server.metadata.BrokerServerMetrics
 import kafka.tools.StorageTool
 import kafka.utils.{CoreUtils, Logging, TestInfoUtils, TestUtils}
 import kafka.zk.{AdminZkClient, EmbeddedZookeeper, KafkaZkClient}
@@ -33,32 +33,47 @@ import org.apache.kafka.common.{TopicPartition, Uuid}
 import org.apache.kafka.common.security.JaasUtils
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.utils.{Exit, Time}
+import org.apache.kafka.controller.{BootstrapMetadata, QuorumControllerMetrics}
 import org.apache.kafka.metadata.MetadataRecordSerde
 import org.apache.kafka.raft.RaftConfig.{AddressSpec, InetAddressSpec}
-import org.apache.kafka.server.common.ApiMessageAndVersion
+import org.apache.kafka.server.common.{ApiMessageAndVersion, MetadataVersion}
+import org.apache.kafka.server.fault.{FaultHandler, MockFaultHandler}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.zookeeper.client.ZKClientConfig
 import org.apache.zookeeper.{WatchedEvent, Watcher, ZooKeeper}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterAll, AfterEach, BeforeAll, BeforeEach, Tag, TestInfo}
 
+import scala.collection.mutable.ListBuffer
 import scala.collection.{Seq, immutable}
+import scala.compat.java8.OptionConverters._
+import scala.jdk.CollectionConverters._
 
 trait QuorumImplementation {
-  def createBroker(config: KafkaConfig,
-                   time: Time,
-                   startup: Boolean): KafkaBroker
+  def createBroker(
+    config: KafkaConfig,
+    time: Time = Time.SYSTEM,
+    startup: Boolean = true,
+    threadNamePrefix: Option[String] = None,
+  ): KafkaBroker
 
   def shutdown(): Unit
 }
 
-class ZooKeeperQuorumImplementation(val zookeeper: EmbeddedZookeeper,
-                                    val zkClient: KafkaZkClient,
-                                    val adminZkClient: AdminZkClient,
-                                    val log: Logging) extends QuorumImplementation {
-  override def createBroker(config: KafkaConfig,
-                            time: Time,
-                            startup: Boolean): KafkaBroker = {
-    val server = new KafkaServer(config, time, None, false)
+class ZooKeeperQuorumImplementation(
+  val zookeeper: EmbeddedZookeeper,
+  val zkConnect: String,
+  val zkClient: KafkaZkClient,
+  val adminZkClient: AdminZkClient,
+  val log: Logging
+) extends QuorumImplementation {
+  override def createBroker(
+    config: KafkaConfig,
+    time: Time,
+    startup: Boolean,
+    threadNamePrefix: Option[String],
+  ): KafkaBroker = {
+    val server = new KafkaServer(config, time, threadNamePrefix, false)
     if (startup) server.startup()
     server
   }
@@ -69,24 +84,34 @@ class ZooKeeperQuorumImplementation(val zookeeper: EmbeddedZookeeper,
   }
 }
 
-class KRaftQuorumImplementation(val raftManager: KafkaRaftManager[ApiMessageAndVersion],
-                                val controllerServer: ControllerServer,
-                                val metadataDir: File,
-                                val controllerQuorumVotersFuture: CompletableFuture[util.Map[Integer, AddressSpec]],
-                                val clusterId: String,
-                                val log: Logging) extends QuorumImplementation {
-  override def createBroker(config: KafkaConfig,
-                            time: Time,
-                            startup: Boolean): KafkaBroker = {
+class KRaftQuorumImplementation(
+  val raftManager: KafkaRaftManager[ApiMessageAndVersion],
+  val controllerServer: ControllerServer,
+  val metadataDir: File,
+  val controllerQuorumVotersFuture: CompletableFuture[util.Map[Integer, AddressSpec]],
+  val clusterId: String,
+  val log: Logging,
+  val faultHandler: FaultHandler
+) extends QuorumImplementation {
+  override def createBroker(
+    config: KafkaConfig,
+    time: Time,
+    startup: Boolean,
+    threadNamePrefix: Option[String],
+  ): KafkaBroker = {
+    val metrics = new Metrics()
     val broker = new BrokerServer(config = config,
       metaProps = new MetaProperties(clusterId, config.nodeId),
       raftManager = raftManager,
       time = time,
-      metrics = new Metrics(),
+      metrics = metrics,
+      brokerMetrics = BrokerServerMetrics(metrics),
       threadNamePrefix = Some("Broker%02d_".format(config.nodeId)),
       initialOfflineDirs = Seq(),
       controllerQuorumVotersFuture = controllerQuorumVotersFuture,
-      supportedFeatures = Collections.emptyMap())
+      fatalFaultHandler = faultHandler,
+      metadataLoadingFaultHandler = faultHandler,
+      metadataPublishingFaultHandler = faultHandler)
     if (startup) broker.startup()
     broker
   }
@@ -115,9 +140,16 @@ abstract class QuorumTestHarness extends Logging {
     Seq(new Properties())
   }
 
+  protected def metadataVersion: MetadataVersion = MetadataVersion.latest()
+
+  val bootstrapRecords: ListBuffer[ApiMessageAndVersion] = ListBuffer()
+
+  private var testInfo: TestInfo = null
   private var implementation: QuorumImplementation = null
 
-  def isKRaftTest(): Boolean = implementation.isInstanceOf[KRaftQuorumImplementation]
+  def isKRaftTest(): Boolean = {
+    TestInfoUtils.isKRaft(testInfo)
+  }
 
   def checkIsZKTest(): Unit = {
     if (isKRaftTest()) {
@@ -167,6 +199,8 @@ abstract class QuorumTestHarness extends Logging {
     }
   }
 
+  val faultHandler = new MockFaultHandler("quorumTestHarnessFaultHandler")
+
   // Note: according to the junit documentation: "JUnit Jupiter does not guarantee the execution
   // order of multiple @BeforeEach methods that are declared within a single test class or test
   // interface." Therefore, if you have things you would like to do before each test case runs, it
@@ -174,6 +208,7 @@ abstract class QuorumTestHarness extends Logging {
   // That way you control the initialization order.
   @BeforeEach
   def setUp(testInfo: TestInfo): Unit = {
+    this.testInfo = testInfo
     Exit.setExitProcedure((code, message) => {
       try {
         throw new RuntimeException(s"exit(${code}, ${message}) called!")
@@ -194,24 +229,25 @@ abstract class QuorumTestHarness extends Logging {
         tearDown()
       }
     })
-    val name = if (testInfo.getTestMethod().isPresent()) {
-      testInfo.getTestMethod().get().toString()
-    } else {
-      "[unspecified]"
-    }
+    val name = testInfo.getTestMethod.asScala
+      .map(_.toString)
+      .getOrElse("[unspecified]")
     if (TestInfoUtils.isKRaft(testInfo)) {
-      info(s"Running KRAFT test ${name}")
+      info(s"Running KRAFT test $name")
       implementation = newKRaftQuorum(testInfo)
     } else {
-      info(s"Running ZK test ${name}")
+      info(s"Running ZK test $name")
       implementation = newZooKeeperQuorum()
     }
   }
 
-  def createBroker(config: KafkaConfig,
-                   time: Time = Time.SYSTEM,
-                   startup: Boolean = true): KafkaBroker = {
-    implementation.createBroker(config, time, startup)
+  def createBroker(
+    config: KafkaConfig,
+    time: Time = Time.SYSTEM,
+    startup: Boolean = true,
+    threadNamePrefix: Option[String] = None
+  ): KafkaBroker = {
+    implementation.createBroker(config, time, startup, threadNamePrefix)
   }
 
   def shutdownZooKeeper(): Unit = asZk().shutdown()
@@ -228,7 +264,7 @@ abstract class QuorumTestHarness extends Logging {
     var out: PrintStream = null
     try {
       out = new PrintStream(stream)
-      if (StorageTool.formatCommand(out, directories, metaProperties, false) != 0) {
+      if (StorageTool.formatCommand(out, directories, metaProperties, metadataVersion, ignoreFormatted = false) != 0) {
         throw new RuntimeException(stream.toString())
       }
       debug(s"Formatted storage directory(ies) ${directories}")
@@ -239,24 +275,26 @@ abstract class QuorumTestHarness extends Logging {
   }
 
   private def newKRaftQuorum(testInfo: TestInfo): KRaftQuorumImplementation = {
-    val clusterId = Uuid.randomUuid().toString
-    val metadataDir = TestUtils.tempDir()
-    val metaProperties = new MetaProperties(clusterId, 0)
-    formatDirectories(immutable.Seq(metadataDir.getAbsolutePath()), metaProperties)
-    val controllerMetrics = new Metrics()
     val propsList = kraftControllerConfigs()
     if (propsList.size != 1) {
       throw new RuntimeException("Only one KRaft controller is supported for now.")
     }
     val props = propsList(0)
     props.setProperty(KafkaConfig.ProcessRolesProp, "controller")
-    props.setProperty(KafkaConfig.NodeIdProp, "1000")
+    if (props.getProperty(KafkaConfig.NodeIdProp) == null) {
+      props.setProperty(KafkaConfig.NodeIdProp, "1000")
+    }
+    val nodeId = Integer.parseInt(props.getProperty(KafkaConfig.NodeIdProp))
+    val metadataDir = TestUtils.tempDir()
+    val metaProperties = new MetaProperties(Uuid.randomUuid().toString, nodeId)
+    formatDirectories(immutable.Seq(metadataDir.getAbsolutePath()), metaProperties)
+    val controllerMetrics = new Metrics()
     props.setProperty(KafkaConfig.MetadataLogDirProp, metadataDir.getAbsolutePath())
     val proto = controllerListenerSecurityProtocol.toString()
     props.setProperty(KafkaConfig.ListenerSecurityProtocolMapProp, s"CONTROLLER:${proto}")
     props.setProperty(KafkaConfig.ListenersProp, s"CONTROLLER://localhost:0")
     props.setProperty(KafkaConfig.ControllerListenerNamesProp, "CONTROLLER")
-    props.setProperty(KafkaConfig.QuorumVotersProp, "1000@localhost:0")
+    props.setProperty(KafkaConfig.QuorumVotersProp, s"${nodeId}@localhost:0")
     val config = new KafkaConfig(props)
     val threadNamePrefix = "Controller_" + testInfo.getDisplayName
     val controllerQuorumVotersFuture = new CompletableFuture[util.Map[Integer, AddressSpec]]
@@ -278,16 +316,21 @@ abstract class QuorumTestHarness extends Logging {
         raftManager = raftManager,
         time = Time.SYSTEM,
         metrics = controllerMetrics,
+        controllerMetrics = new QuorumControllerMetrics(KafkaYammerMetrics.defaultRegistry(), Time.SYSTEM),
         threadNamePrefix = Option(threadNamePrefix),
         controllerQuorumVotersFuture = controllerQuorumVotersFuture,
         configSchema = KafkaRaftServer.configSchema,
+        raftApiVersions = raftManager.apiVersions,
+        bootstrapMetadata = BootstrapMetadata.create(metadataVersion, bootstrapRecords.asJava),
+        metadataFaultHandler = faultHandler,
+        fatalFaultHandler = faultHandler,
       )
       controllerServer.socketServerFirstBoundPortFuture.whenComplete((port, e) => {
         if (e != null) {
           error("Error completing controller socket server future", e)
           controllerQuorumVotersFuture.completeExceptionally(e)
         } else {
-          controllerQuorumVotersFuture.complete(Collections.singletonMap(1000,
+          controllerQuorumVotersFuture.complete(Collections.singletonMap(nodeId,
             new InetAddressSpec(new InetSocketAddress("localhost", port))))
         }
       })
@@ -303,16 +346,19 @@ abstract class QuorumTestHarness extends Logging {
       controllerServer,
       metadataDir,
       controllerQuorumVotersFuture,
-      clusterId,
-      this)
+      metaProperties.clusterId,
+      this,
+      faultHandler)
   }
 
   private def newZooKeeperQuorum(): ZooKeeperQuorumImplementation = {
     val zookeeper = new EmbeddedZookeeper()
     var zkClient: KafkaZkClient = null
     var adminZkClient: AdminZkClient = null
+    val zkConnect = s"127.0.0.1:${zookeeper.port}"
     try {
-      zkClient = KafkaZkClient(s"127.0.0.1:${zookeeper.port}",
+      zkClient = KafkaZkClient(
+        zkConnect,
         zkAclsEnabled.getOrElse(JaasUtils.isZkSaslEnabled),
         zkSessionTimeout,
         zkConnectionTimeout,
@@ -327,10 +373,13 @@ abstract class QuorumTestHarness extends Logging {
         if (zkClient != null) CoreUtils.swallow(zkClient.close(), this)
         throw t
     }
-    new ZooKeeperQuorumImplementation(zookeeper,
+    new ZooKeeperQuorumImplementation(
+      zookeeper,
+      zkConnect,
       zkClient,
       adminZkClient,
-      this)
+      this
+    )
   }
 
   @AfterEach
@@ -342,6 +391,7 @@ abstract class QuorumTestHarness extends Logging {
     }
     System.clearProperty(JaasUtils.JAVA_LOGIN_CONFIG_PARAM)
     Configuration.setConfiguration(null)
+    faultHandler.maybeRethrowFirstException()
   }
 
   // Trigger session expiry by reusing the session id in another client
diff --git a/core/src/test/scala/integration/kafka/server/RaftClusterSnapshotTest.scala b/core/src/test/scala/integration/kafka/server/RaftClusterSnapshotTest.scala
index e34a5ed6edbbe..f8dccd17d0d8a 100644
--- a/core/src/test/scala/integration/kafka/server/RaftClusterSnapshotTest.scala
+++ b/core/src/test/scala/integration/kafka/server/RaftClusterSnapshotTest.scala
@@ -21,6 +21,7 @@ import java.util.Collections
 import kafka.testkit.KafkaClusterTestKit
 import kafka.testkit.TestKitNodes
 import kafka.utils.TestUtils
+import kafka.server.KafkaConfig.{MetadataMaxIdleIntervalMsProp, MetadataSnapshotMaxNewRecordBytesProp}
 import org.apache.kafka.common.utils.BufferSupplier
 import org.apache.kafka.metadata.MetadataRecordSerde
 import org.apache.kafka.snapshot.RecordsSnapshotReader
@@ -38,7 +39,6 @@ class RaftClusterSnapshotTest {
   def testSnapshotsGenerated(): Unit = {
     val numberOfBrokers = 3
     val numberOfControllers = 3
-    val metadataSnapshotMaxNewRecordBytes = 100
 
     TestUtils.resource(
       new KafkaClusterTestKit
@@ -48,10 +48,8 @@ class RaftClusterSnapshotTest {
             .setNumControllerNodes(numberOfControllers)
             .build()
         )
-        .setConfigProp(
-          KafkaConfig.MetadataSnapshotMaxNewRecordBytesProp,
-          metadataSnapshotMaxNewRecordBytes.toString
-        )
+        .setConfigProp(MetadataSnapshotMaxNewRecordBytesProp, "10")
+        .setConfigProp(MetadataMaxIdleIntervalMsProp, "0")
         .build()
     ) { cluster =>
       cluster.format()
@@ -80,7 +78,8 @@ class RaftClusterSnapshotTest {
             raftManager.replicatedLog.latestSnapshot.get(),
             new MetadataRecordSerde(),
             BufferSupplier.create(),
-            1
+            1,
+            true
           )
         ) { snapshot =>
           // Check that the snapshot is non-empty
diff --git a/core/src/test/scala/integration/kafka/tools/MirrorMakerIntegrationTest.scala b/core/src/test/scala/integration/kafka/tools/MirrorMakerIntegrationTest.scala
index 4f673cdd60ada..c64d25fe4e6a6 100644
--- a/core/src/test/scala/integration/kafka/tools/MirrorMakerIntegrationTest.scala
+++ b/core/src/test/scala/integration/kafka/tools/MirrorMakerIntegrationTest.scala
@@ -18,26 +18,27 @@ package kafka.tools
 
 import java.util.Properties
 import java.util.concurrent.atomic.AtomicBoolean
-
 import scala.collection.Seq
 import kafka.integration.KafkaServerTestHarness
 import kafka.server.KafkaConfig
 import kafka.tools.MirrorMaker.{ConsumerWrapper, MirrorMakerProducer, NoRecordsException}
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.apache.kafka.clients.consumer.{ConsumerConfig, KafkaConsumer}
 import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord}
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.errors.TimeoutException
 import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer}
 import org.apache.kafka.common.utils.Exit
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
 import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 @deprecated(message = "Use the Connect-based MirrorMaker instead (aka MM2).", since = "3.0")
 class MirrorMakerIntegrationTest extends KafkaServerTestHarness {
 
   override def generateConfigs: Seq[KafkaConfig] =
-    TestUtils.createBrokerConfigs(1, zkConnect).map(KafkaConfig.fromProps(_, new Properties()))
+    TestUtils.createBrokerConfigs(1, zkConnectOrNull).map(KafkaConfig.fromProps(_, new Properties()))
 
   val exited = new AtomicBoolean(false)
 
@@ -57,8 +58,9 @@ class MirrorMakerIntegrationTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testCommitOffsetsThrowTimeoutException(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCommitOffsetsThrowTimeoutException(quorum: String): Unit = {
     val consumerProps = new Properties
     consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "test-group")
     consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
@@ -70,8 +72,9 @@ class MirrorMakerIntegrationTest extends KafkaServerTestHarness {
     assertThrows(classOf[TimeoutException], () => mirrorMakerConsumer.commit())
   }
 
-  @Test
-  def testCommitOffsetsRemoveNonExistentTopics(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCommitOffsetsRemoveNonExistentTopics(quorum: String): Unit = {
     val consumerProps = new Properties
     consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, "test-group")
     consumerProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
@@ -85,8 +88,9 @@ class MirrorMakerIntegrationTest extends KafkaServerTestHarness {
     assertTrue(mirrorMakerConsumer.offsets.isEmpty, "Offsets for non-existent topics should be removed")
   }
 
-  @Test
-  def testCommaSeparatedRegex(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testCommaSeparatedRegex(quorum: String): Unit = {
     val topic = "new-topic"
     val msg = "a test message"
 
diff --git a/core/src/main/scala/kafka/common/KafkaException.scala b/core/src/test/scala/kafka/api/LeaderAndIsrTest.scala
similarity index 58%
rename from core/src/main/scala/kafka/common/KafkaException.scala
rename to core/src/test/scala/kafka/api/LeaderAndIsrTest.scala
index 9c34dd9bd78b4..bcb48f748ed19 100644
--- a/core/src/main/scala/kafka/common/KafkaException.scala
+++ b/core/src/test/scala/kafka/api/LeaderAndIsrTest.scala
@@ -13,15 +13,22 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
-*/
-package kafka.common
-
-/**
- * Usage of this class is discouraged. Use org.apache.kafka.common.KafkaException instead.
- *
- * This class will be removed once kafka.security.auth classes are removed.
  */
-class KafkaException(message: String, t: Throwable) extends RuntimeException(message, t) {
-  def this(message: String) = this(message, null)
-  def this(t: Throwable) = this("", t)
+
+package kafka.api
+
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Test
+
+final class LeaderAndIsrTest {
+  @Test
+  def testRecoveringLeaderAndIsr(): Unit = {
+    val leaderAndIsr = LeaderAndIsr(1, List(1, 2))
+    val recoveringLeaderAndIsr = leaderAndIsr.newRecoveringLeaderAndIsr(3, List(3))
+
+    assertEquals(3, recoveringLeaderAndIsr.leader)
+    assertEquals(List(3), recoveringLeaderAndIsr.isr)
+    assertEquals(LeaderRecoveryState.RECOVERING, recoveringLeaderAndIsr.leaderRecoveryState)
+  }
 }
diff --git a/core/src/test/scala/kafka/server/BrokerToControllerRequestThreadTest.scala b/core/src/test/scala/kafka/server/BrokerToControllerRequestThreadTest.scala
index 3297ec01ecef6..bee1aefaca28f 100644
--- a/core/src/test/scala/kafka/server/BrokerToControllerRequestThreadTest.scala
+++ b/core/src/test/scala/kafka/server/BrokerToControllerRequestThreadTest.scala
@@ -19,13 +19,14 @@ package kafka.server
 
 import java.nio.ByteBuffer
 import java.util.Collections
-import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
+import java.util.concurrent.atomic.AtomicReference
 import kafka.utils.TestUtils
+import kafka.utils.TestUtils.TestControllerRequestCompletionHandler
 import org.apache.kafka.clients.{ClientResponse, ManualMetadataUpdater, Metadata, MockClient, NodeApiVersions}
 import org.apache.kafka.common.Node
 import org.apache.kafka.common.message.{EnvelopeResponseData, MetadataRequestData}
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
-import org.apache.kafka.common.requests.{AbstractRequest, EnvelopeRequest, EnvelopeResponse, MetadataRequest, MetadataResponse, RequestTestUtils}
+import org.apache.kafka.common.requests.{AbstractRequest, EnvelopeRequest, EnvelopeResponse, MetadataRequest, RequestTestUtils}
 import org.apache.kafka.common.security.auth.KafkaPrincipal
 import org.apache.kafka.common.security.authenticator.DefaultKafkaPrincipalBuilder
 import org.apache.kafka.common.utils.MockTime
@@ -51,7 +52,7 @@ class BrokerToControllerRequestThreadTest {
       config, time, "", retryTimeoutMs)
     testRequestThread.started = true
 
-    val completionHandler = new TestRequestCompletionHandler(None)
+    val completionHandler = new TestControllerRequestCompletionHandler(None)
     val queueItem = BrokerToControllerQueueItem(
       time.milliseconds(),
       new MetadataRequest.Builder(new MetadataRequestData()),
@@ -89,7 +90,7 @@ class BrokerToControllerRequestThreadTest {
     testRequestThread.started = true
     mockClient.prepareResponse(expectedResponse)
 
-    val completionHandler = new TestRequestCompletionHandler(Some(expectedResponse))
+    val completionHandler = new TestControllerRequestCompletionHandler(Some(expectedResponse))
     val queueItem = BrokerToControllerQueueItem(
       time.milliseconds(),
       new MetadataRequest.Builder(new MetadataRequestData()),
@@ -130,7 +131,7 @@ class BrokerToControllerRequestThreadTest {
       controllerNodeProvider, config, time, "", retryTimeoutMs = Long.MaxValue)
     testRequestThread.started = true
 
-    val completionHandler = new TestRequestCompletionHandler(Some(expectedResponse))
+    val completionHandler = new TestControllerRequestCompletionHandler(Some(expectedResponse))
     val queueItem = BrokerToControllerQueueItem(
       time.milliseconds(),
       new MetadataRequest.Builder(new MetadataRequestData()),
@@ -180,7 +181,7 @@ class BrokerToControllerRequestThreadTest {
       config, time, "", retryTimeoutMs = Long.MaxValue)
     testRequestThread.started = true
 
-    val completionHandler = new TestRequestCompletionHandler(Some(expectedResponse))
+    val completionHandler = new TestControllerRequestCompletionHandler(Some(expectedResponse))
     val queueItem = BrokerToControllerQueueItem(
       time.milliseconds(),
       new MetadataRequest.Builder(new MetadataRequestData()
@@ -243,7 +244,7 @@ class BrokerToControllerRequestThreadTest {
       config, time, "", retryTimeoutMs = Long.MaxValue)
     testRequestThread.started = true
 
-    val completionHandler = new TestRequestCompletionHandler(Some(expectedResponse))
+    val completionHandler = new TestControllerRequestCompletionHandler(Some(expectedResponse))
     val kafkaPrincipal = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, "principal", true)
     val kafkaPrincipalBuilder = new DefaultKafkaPrincipalBuilder(null, null)
 
@@ -305,7 +306,7 @@ class BrokerToControllerRequestThreadTest {
       config, time, "", retryTimeoutMs)
     testRequestThread.started = true
 
-    val completionHandler = new TestRequestCompletionHandler()
+    val completionHandler = new TestControllerRequestCompletionHandler()
     val queueItem = BrokerToControllerQueueItem(
       time.milliseconds(),
       new MetadataRequest.Builder(new MetadataRequestData()
@@ -419,7 +420,7 @@ class BrokerToControllerRequestThreadTest {
     val testRequestThread = new BrokerToControllerRequestThread(mockClient, new ManualMetadataUpdater(), controllerNodeProvider,
       config, time, "", retryTimeoutMs = Long.MaxValue)
 
-    val completionHandler = new TestRequestCompletionHandler(None)
+    val completionHandler = new TestControllerRequestCompletionHandler(None)
     val queueItem = BrokerToControllerQueueItem(
       time.milliseconds(),
       new MetadataRequest.Builder(new MetadataRequestData()),
@@ -445,22 +446,4 @@ class BrokerToControllerRequestThreadTest {
       fail(s"Condition failed to be met after polling $tries times")
     }
   }
-
-  class TestRequestCompletionHandler(
-    expectedResponse: Option[MetadataResponse] = None
-  ) extends ControllerRequestCompletionHandler {
-    val completed: AtomicBoolean = new AtomicBoolean(false)
-    val timedOut: AtomicBoolean = new AtomicBoolean(false)
-
-    override def onComplete(response: ClientResponse): Unit = {
-      expectedResponse.foreach { expected =>
-        assertEquals(expected, response.responseBody())
-      }
-      completed.set(true)
-    }
-
-    override def onTimeout(): Unit = {
-      timedOut.set(true)
-    }
-  }
 }
diff --git a/core/src/test/scala/kafka/server/metadata/BrokerServerMetricsTest.scala b/core/src/test/scala/kafka/server/metadata/BrokerServerMetricsTest.scala
new file mode 100644
index 0000000000000..ea2b439c166ae
--- /dev/null
+++ b/core/src/test/scala/kafka/server/metadata/BrokerServerMetricsTest.scala
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server.metadata
+
+import java.util.Collections
+import kafka.utils.TestUtils
+import org.apache.kafka.common.MetricName
+import org.apache.kafka.common.metrics.Metrics
+import org.apache.kafka.common.utils.MockTime
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Assertions.assertTrue
+import org.junit.jupiter.api.Test
+import scala.jdk.CollectionConverters._
+
+final class BrokerServerMetricsTest {
+  @Test
+  def testMetricsExported(): Unit = {
+    val metrics = new Metrics()
+    val expectedGroup = "broker-metadata-metrics"
+
+    // Metric description is not use for metric name equality
+    val expectedMetrics = Set(
+      new MetricName("last-applied-record-offset", expectedGroup, "", Collections.emptyMap()),
+      new MetricName("last-applied-record-timestamp", expectedGroup, "", Collections.emptyMap()),
+      new MetricName("last-applied-record-lag-ms", expectedGroup, "", Collections.emptyMap()),
+      new MetricName("metadata-load-error-count", expectedGroup, "", Collections.emptyMap()),
+      new MetricName("metadata-apply-error-count", expectedGroup, "", Collections.emptyMap())
+    )
+     
+    TestUtils.resource(BrokerServerMetrics(metrics)) { brokerMetrics =>
+      val metricsMap = metrics.metrics().asScala.filter{ case (name, _) => name.group == expectedGroup }
+      assertEquals(expectedMetrics.size, metricsMap.size)
+      metricsMap.foreach { case (name, metric) =>
+        assertTrue(expectedMetrics.contains(name))
+      }
+    }
+
+    val metricsMap = metrics.metrics().asScala.filter{ case (name, _) => name.group == expectedGroup }
+    assertEquals(0, metricsMap.size)
+  }
+
+  @Test
+  def testLastAppliedRecordOffset(): Unit = {
+    val metrics = new Metrics()
+    TestUtils.resource(BrokerServerMetrics(metrics)) { brokerMetrics =>
+      val offsetMetric = metrics.metrics().get(brokerMetrics.lastAppliedRecordOffsetName)
+      assertEquals(0, offsetMetric.metricValue.asInstanceOf[Long])
+
+      // Update metric value and check
+      val expectedValue = 1000
+      brokerMetrics.lastAppliedRecordOffset.set(expectedValue)
+      assertEquals(expectedValue, offsetMetric.metricValue.asInstanceOf[Long])
+    }
+  }
+
+  @Test
+  def testLastAppliedRecordTimestamp(): Unit = {
+    val time = new MockTime()
+    val metrics = new Metrics(time)
+    TestUtils.resource(BrokerServerMetrics(metrics)) { brokerMetrics =>
+      time.sleep(1000)
+      val timestampMetric = metrics.metrics().get(brokerMetrics.lastAppliedRecordTimestampName)
+      val lagMetric = metrics.metrics().get(brokerMetrics.lastAppliedRecordLagMsName)
+
+      assertEquals(0, timestampMetric.metricValue.asInstanceOf[Long])
+      assertEquals(time.milliseconds, lagMetric.metricValue.asInstanceOf[Long])
+
+      // Update metric value and check
+      val timestamp = 500
+      brokerMetrics.lastAppliedRecordTimestamp.set(timestamp)
+      assertEquals(timestamp, timestampMetric.metricValue.asInstanceOf[Long])
+      assertEquals(time.milliseconds - timestamp, lagMetric.metricValue.asInstanceOf[Long])
+    }
+  }
+
+  @Test
+  def testMetadataLoadErrorCount(): Unit = {
+    val time = new MockTime()
+    val metrics = new Metrics(time)
+    TestUtils.resource(BrokerServerMetrics(metrics)) { brokerMetrics =>
+      val metadataLoadErrorCountMetric = metrics.metrics().get(brokerMetrics.metadataLoadErrorCountName)
+
+      assertEquals(0L, metadataLoadErrorCountMetric.metricValue.asInstanceOf[Long])
+
+      // Update metric value and check
+      val errorCount = 100
+      brokerMetrics.metadataLoadErrorCount.set(errorCount)
+      assertEquals(errorCount, metadataLoadErrorCountMetric.metricValue.asInstanceOf[Long])
+    }
+  }
+
+  @Test
+  def testMetadataApplyErrorCount(): Unit = {
+    val time = new MockTime()
+    val metrics = new Metrics(time)
+    TestUtils.resource(BrokerServerMetrics(metrics)) { brokerMetrics =>
+      val metadataApplyErrorCountMetric = metrics.metrics().get(brokerMetrics.metadataApplyErrorCountName)
+
+      assertEquals(0L, metadataApplyErrorCountMetric.metricValue.asInstanceOf[Long])
+
+      // Update metric value and check
+      val errorCount = 100
+      brokerMetrics.metadataApplyErrorCount.set(errorCount)
+      assertEquals(errorCount, metadataApplyErrorCountMetric.metricValue.asInstanceOf[Long])
+    }
+  }
+}
diff --git a/core/src/test/scala/kafka/tools/GetOffsetShellParsingTest.scala b/core/src/test/scala/kafka/tools/GetOffsetShellParsingTest.scala
index edfadea401eca..889631be19332 100644
--- a/core/src/test/scala/kafka/tools/GetOffsetShellParsingTest.scala
+++ b/core/src/test/scala/kafka/tools/GetOffsetShellParsingTest.scala
@@ -17,191 +17,248 @@
 
 package kafka.tools
 
-import org.apache.kafka.common.PartitionInfo
-import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertThrows, assertTrue}
+import org.apache.kafka.common.TopicPartition
+import org.junit.jupiter.api.Assertions.{assertFalse, assertThrows, assertTrue}
 import org.junit.jupiter.api.Test
-import org.junit.jupiter.params.ParameterizedTest
-import org.junit.jupiter.params.provider.ValueSource
 
 class GetOffsetShellParsingTest {
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForTopicName(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test", 1)))
-    assertFalse(filter.apply(partitionInfo("test1", 0)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 0)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForInternalTopicName(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList("__consumer_offsets", excludeInternal)
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 0)))
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 1)))
-    assertFalse(filter.apply(partitionInfo("test1", 0)))
-    assertFalse(filter.apply(partitionInfo("test2", 0)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForTopicNameList(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test,test1,__consumer_offsets", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test1", 1)))
-    assertFalse(filter.apply(partitionInfo("test2", 0)))
-
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 0)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForRegex(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test.*", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test1", 1)))
-    assertTrue(filter.apply(partitionInfo("test2", 0)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 0)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForPartitionIndexSpec(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":0", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test1", 0)))
-    assertFalse(filter.apply(partitionInfo("test2", 1)))
-
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 0)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 1)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForPartitionRangeSpec(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1-3", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 1)))
-    assertTrue(filter.apply(partitionInfo("test1", 2)))
-    assertFalse(filter.apply(partitionInfo("test2", 0)))
-    assertFalse(filter.apply(partitionInfo("test2", 3)))
-
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 2)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 3)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForPartitionLowerBoundSpec(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1-", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 1)))
-    assertTrue(filter.apply(partitionInfo("test1", 2)))
-    assertFalse(filter.apply(partitionInfo("test2", 0)))
-
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 2)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 0)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterForPartitionUpperBoundSpec(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":-3", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test1", 1)))
-    assertTrue(filter.apply(partitionInfo("test2", 2)))
-    assertFalse(filter.apply(partitionInfo("test3", 3)))
-
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 2)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 3)))
-  }
-
-  @ParameterizedTest
-  @ValueSource(booleans = Array(true, false))
-  def testTopicPartitionFilterComplex(excludeInternal: Boolean): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test.*:0,__consumer_offsets:1-2,.*:3", excludeInternal)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test", 3)))
-    assertFalse(filter.apply(partitionInfo("test", 1)))
-
-    assertTrue(filter.apply(partitionInfo("test1", 0)))
-    assertTrue(filter.apply(partitionInfo("test1", 3)))
-    assertFalse(filter.apply(partitionInfo("test1", 1)))
 
-    assertTrue(filter.apply(partitionInfo("custom", 3)))
-    assertFalse(filter.apply(partitionInfo("custom", 0)))
+  @Test
+  def testTopicPartitionFilterForTopicName(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertFalse(topicPartitionFilter.isTopicAllowed("test1"))
+    assertFalse(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForInternalTopicName(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList("__consumer_offsets")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+    assertFalse(topicPartitionFilter.isTopicAllowed("test1"))
+    assertFalse(topicPartitionFilter.isTopicAllowed("test2"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 1)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 0)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForTopicNameList(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test,test1,__consumer_offsets")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+    assertFalse(topicPartitionFilter.isTopicAllowed("test2"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 0)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForRegex(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test.*")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test2"))
+    assertFalse(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForPartitionIndexSpec(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":0")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test2"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 1)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForPartitionRangeSpec(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1-3")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test2"))
 
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 1)))
-    assertEquals(!excludeInternal, filter.apply(partitionInfo("__consumer_offsets", 3)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 0)))
-    assertFalse(filter.apply(partitionInfo("__consumer_offsets", 2)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 2)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 2)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 3)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 3)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForPartitionLowerBoundSpec(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1-")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test2"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 2)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 2)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+  }
+
+  @Test
+  def testTopicPartitionFilterForPartitionUpperBoundSpec(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":-3")
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test2"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test3"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test2", 2)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 2)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test3", 3)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 3)))
+  }
+
+  @Test
+  def testTopicPartitionFilterComplex(): Unit = {
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList("test.*:0,__consumer_offsets:1-2,.*:3")
+
+    assertTrue(topicPartitionFilter.isTopicAllowed("test"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("test1"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("custom"))
+    assertTrue(topicPartitionFilter.isTopicAllowed("__consumer_offsets"))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test1", 1)))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("custom", 3)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("custom", 0)))
+
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 3)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("__consumer_offsets", 2)))
   }
 
   @Test
   def testPartitionFilterForSingleIndex(): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1", excludeInternalTopics = false)
-    assertTrue(filter.apply(partitionInfo("test", 1)))
-    assertFalse(filter.apply(partitionInfo("test", 0)))
-    assertFalse(filter.apply(partitionInfo("test", 2)))
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1")
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 2)))
   }
 
   @Test
   def testPartitionFilterForRange(): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1-3", excludeInternalTopics = false)
-    assertFalse(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test", 1)))
-    assertTrue(filter.apply(partitionInfo("test", 2)))
-    assertFalse(filter.apply(partitionInfo("test", 3)))
-    assertFalse(filter.apply(partitionInfo("test", 4)))
-    assertFalse(filter.apply(partitionInfo("test", 5)))
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":1-3")
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 2)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 3)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 4)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 5)))
   }
 
   @Test
   def testPartitionFilterForLowerBound(): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":3-", excludeInternalTopics = false)
-    assertFalse(filter.apply(partitionInfo("test", 0)))
-    assertFalse(filter.apply(partitionInfo("test", 1)))
-    assertFalse(filter.apply(partitionInfo("test", 2)))
-    assertTrue(filter.apply(partitionInfo("test", 3)))
-    assertTrue(filter.apply(partitionInfo("test", 4)))
-    assertTrue(filter.apply(partitionInfo("test", 5)))
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":3-")
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 2)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 3)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 4)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 5)))
   }
 
   @Test
   def testPartitionFilterForUpperBound(): Unit = {
-    val filter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":-3", excludeInternalTopics = false)
-    assertTrue(filter.apply(partitionInfo("test", 0)))
-    assertTrue(filter.apply(partitionInfo("test", 1)))
-    assertTrue(filter.apply(partitionInfo("test", 2)))
-    assertFalse(filter.apply(partitionInfo("test", 3)))
-    assertFalse(filter.apply(partitionInfo("test", 4)))
-    assertFalse(filter.apply(partitionInfo("test", 5)))
+    val topicPartitionFilter = GetOffsetShell.createTopicPartitionFilterWithPatternList(":-3")
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 0)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 1)))
+    assertTrue(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 2)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 3)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 4)))
+    assertFalse(topicPartitionFilter.isTopicPartitionAllowed(topicPartition("test", 5)))
+  }
+
+  @Test
+  def testPartitionsSetFilter(): Unit = {
+    val partitionsSetFilter = GetOffsetShell.createTopicPartitionFilterWithTopicAndPartitionPattern(Some("topic"), "1,3,5")
+
+    assertFalse(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic", 0)))
+    assertFalse(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic", 2)))
+    assertFalse(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic", 4)))
+
+    assertFalse(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic1", 1)))
+    assertFalse(partitionsSetFilter.isTopicAllowed("topic1"))
+
+    assertTrue(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic", 1)))
+    assertTrue(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic", 3)))
+    assertTrue(partitionsSetFilter.isTopicPartitionAllowed(topicPartition("topic", 5)))
+    assertTrue(partitionsSetFilter.isTopicAllowed("topic"))
   }
 
   @Test
   def testPartitionFilterForInvalidSingleIndex(): Unit = {
     assertThrows(classOf[IllegalArgumentException],
-      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":a", excludeInternalTopics = false))
+      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":a"))
   }
 
   @Test
   def testPartitionFilterForInvalidRange(): Unit = {
     assertThrows(classOf[IllegalArgumentException],
-      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":a-b", excludeInternalTopics = false))
+      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":a-b"))
   }
 
   @Test
   def testPartitionFilterForInvalidLowerBound(): Unit = {
     assertThrows(classOf[IllegalArgumentException],
-      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":a-", excludeInternalTopics = false))
+      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":a-"))
   }
 
   @Test
   def testPartitionFilterForInvalidUpperBound(): Unit = {
     assertThrows(classOf[IllegalArgumentException],
-      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":-b", excludeInternalTopics = false))
+      () => GetOffsetShell.createTopicPartitionFilterWithPatternList(":-b"))
+  }
+
+  @Test
+  def testInvalidTimeValue(): Unit = {
+    assertThrows(classOf[IllegalArgumentException],
+      () => GetOffsetShell.fetchOffsets(Array("--bootstrap-server", "localhost:9092", "--time", "invalid")))
   }
 
-  private def partitionInfo(topic: String, partition: Int): PartitionInfo = {
-    new PartitionInfo(topic, partition, null, null, null)
+  private def topicPartition(topic: String, partition: Int): TopicPartition = {
+    new TopicPartition(topic, partition)
   }
 }
diff --git a/core/src/test/scala/kafka/tools/GetOffsetShellTest.scala b/core/src/test/scala/kafka/tools/GetOffsetShellTest.scala
index 8e646d633e5e8..cbce573192748 100644
--- a/core/src/test/scala/kafka/tools/GetOffsetShellTest.scala
+++ b/core/src/test/scala/kafka/tools/GetOffsetShellTest.scala
@@ -24,8 +24,10 @@ import kafka.utils.{Exit, Logging, TestUtils}
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
 import org.apache.kafka.common.serialization.StringSerializer
-import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 class GetOffsetShellTest extends KafkaServerTestHarness with Logging {
   private val topicCount = 4
@@ -109,6 +111,68 @@ class GetOffsetShellTest extends KafkaServerTestHarness with Logging {
     )
   }
 
+  @ParameterizedTest
+  @ValueSource(strings = Array("-1", "latest"))
+  def testGetLatestOffsets(time: String): Unit = {
+    val offsets = executeAndParse(Array("--topic-partitions", "topic.*:0", "--time", time))
+    assertEquals(
+      List(
+        ("topic1", 0, Some(1)),
+        ("topic2", 0, Some(2)),
+        ("topic3", 0, Some(3)),
+        ("topic4", 0, Some(4))
+      ),
+      offsets
+    )
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = Array("-2", "earliest"))
+  def testGetEarliestOffsets(time: String): Unit = {
+    val offsets = executeAndParse(Array("--topic-partitions", "topic.*:0", "--time", time))
+    assertEquals(
+      List(
+        ("topic1", 0, Some(0)),
+        ("topic2", 0, Some(0)),
+        ("topic3", 0, Some(0)),
+        ("topic4", 0, Some(0))
+      ),
+      offsets
+    )
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = Array("-3", "max-timestamp"))
+  def testGetOffsetsByMaxTimestamp(time: String): Unit = {
+    val offsets = executeAndParse(Array("--topic-partitions", "topic.*", "--time", time))
+    offsets.foreach { case (topic, _, timestampOpt) =>
+      // We can't know the exact offsets with max timestamp
+      assertTrue(timestampOpt.get >= 0 && timestampOpt.get <= topic.replace("topic", "").toInt)
+    }
+  }
+
+  @Test
+  def testGetOffsetsByTimestamp(): Unit = {
+    val time = (System.currentTimeMillis() / 2).toString
+    val offsets = executeAndParse(Array("--topic-partitions", "topic.*:0", "--time", time))
+    assertEquals(
+      List(
+        ("topic1", 0, Some(0)),
+        ("topic2", 0, Some(0)),
+        ("topic3", 0, Some(0)),
+        ("topic4", 0, Some(0))
+      ),
+      offsets
+    )
+  }
+
+  @Test
+  def testNoOffsetIfTimestampGreaterThanLatestRecord(): Unit = {
+    val time = (System.currentTimeMillis() * 2).toString
+    val offsets = executeAndParse(Array("--topic-partitions", "topic.*", "--time", time))
+    assertEquals(List.empty, offsets)
+  }
+
   @Test
   def testTopicPartitionsArgWithInternalExcluded(): Unit = {
     val offsets = executeAndParse(Array("--topic-partitions",
@@ -124,6 +188,12 @@ class GetOffsetShellTest extends KafkaServerTestHarness with Logging {
     )
   }
 
+  @Test
+  def testTopicPartitionsArgWithInternalIncluded(): Unit = {
+    val offsets = executeAndParse(Array("--topic-partitions", "__.*:0"))
+    assertEquals(List(("__consumer_offsets", 0, Some(0))), offsets)
+  }
+
   @Test
   def testTopicPartitionsNotFoundForNonExistentTopic(): Unit = {
     assertExitCodeIsOne(Array("--topic", "some_nonexistent_topic"))
diff --git a/core/src/test/scala/kafka/utils/TestInfoUtils.scala b/core/src/test/scala/kafka/utils/TestInfoUtils.scala
index ecd656e0fb65e..fa48024f313d6 100644
--- a/core/src/test/scala/kafka/utils/TestInfoUtils.scala
+++ b/core/src/test/scala/kafka/utils/TestInfoUtils.scala
@@ -43,4 +43,5 @@ object TestInfoUtils {
       false
     }
   }
+  final val TestWithParameterizedQuorumName = "{displayName}.quorum={0}"
 }
diff --git a/core/src/test/scala/kafka/zk/FeatureZNodeTest.scala b/core/src/test/scala/kafka/zk/FeatureZNodeTest.scala
index 9344724ff158c..b7778c1d5ff83 100644
--- a/core/src/test/scala/kafka/zk/FeatureZNodeTest.scala
+++ b/core/src/test/scala/kafka/zk/FeatureZNodeTest.scala
@@ -17,31 +17,40 @@
 
 package kafka.zk
 
-import java.nio.charset.StandardCharsets
 
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange}
-import org.apache.kafka.common.feature.Features._
-import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows}
+import org.apache.kafka.server.common.MetadataVersion.{IBP_3_2_IV0, IBP_3_3_IV0}
+import org.junit.jupiter.api.Assertions.{assertDoesNotThrow, assertEquals, assertThrows}
 import org.junit.jupiter.api.Test
 
-import scala.jdk.CollectionConverters._
+import java.nio.charset.StandardCharsets
 
 class FeatureZNodeTest {
 
   @Test
   def testEncodeDecode(): Unit = {
-    val featureZNode = FeatureZNode(
+    val featureZNodeV1 = FeatureZNode(
+      IBP_3_2_IV0,
+      FeatureZNodeStatus.Enabled,
+
+      Map[String, Short](
+        "feature1" -> 2,
+        "feature2" -> 4))
+    val decodedV1 = FeatureZNode.decode(FeatureZNode.encode(featureZNodeV1))
+    assertEquals(featureZNodeV1, decodedV1)
+
+    val featureZNodeV2 = FeatureZNode(
+      IBP_3_3_IV0,
       FeatureZNodeStatus.Enabled,
-      Features.finalizedFeatures(
-        Map[String, FinalizedVersionRange](
-          "feature1" -> new FinalizedVersionRange(1, 2),
-          "feature2" -> new FinalizedVersionRange(2, 4)).asJava))
-    val decoded = FeatureZNode.decode(FeatureZNode.encode(featureZNode))
-    assertEquals(featureZNode, decoded)
+
+      Map[String, Short](
+        "feature1" -> 2,
+        "feature2" -> 4))
+    val decodedV2 = FeatureZNode.decode(FeatureZNode.encode(featureZNodeV2))
+    assertEquals(featureZNodeV2, decodedV2)
   }
 
   @Test
-  def testDecodeSuccess(): Unit = {
+  def testDecodeSuccessV1(): Unit = {
     val featureZNodeStrTemplate = """{
       "version":1,
       "status":1,
@@ -52,15 +61,36 @@ class FeatureZNodeTest {
     val node1 = FeatureZNode.decode(featureZNodeStrTemplate.format(validFeatures).getBytes(StandardCharsets.UTF_8))
     assertEquals(FeatureZNodeStatus.Enabled, node1.status)
     assertEquals(
-      Features.finalizedFeatures(
-        Map[String, FinalizedVersionRange](
-          "feature1" -> new FinalizedVersionRange(1, 2),
-          "feature2" -> new FinalizedVersionRange(2, 4)).asJava), node1.features)
+      Map[String, Short](
+        "feature1" -> 2,
+        "feature2" -> 4), node1.features)
+
+    val emptyFeatures = "{}"
+    val node2 = FeatureZNode.decode(featureZNodeStrTemplate.format(emptyFeatures).getBytes(StandardCharsets.UTF_8))
+    assertEquals(FeatureZNodeStatus.Enabled, node2.status)
+    assertEquals(Map.empty, node2.features)
+  }
+
+  @Test
+  def testDecodeSuccessV2(): Unit = {
+    val featureZNodeStrTemplate = """{
+      "version":2,
+      "status":1,
+      "features":%s
+    }"""
+
+    val validFeatures = """{"feature1": {"max_version_level": 2}, "feature2": {"max_version_level": 4}}"""
+    val node1 = FeatureZNode.decode(featureZNodeStrTemplate.format(validFeatures).getBytes(StandardCharsets.UTF_8))
+    assertEquals(FeatureZNodeStatus.Enabled, node1.status)
+    assertEquals(
+      Map[String, Short](
+        "feature1" -> 2,
+        "feature2" -> 4), node1.features)
 
     val emptyFeatures = "{}"
     val node2 = FeatureZNode.decode(featureZNodeStrTemplate.format(emptyFeatures).getBytes(StandardCharsets.UTF_8))
     assertEquals(FeatureZNodeStatus.Enabled, node2.status)
-    assertEquals(emptyFinalizedFeatures, node2.features)
+    assertEquals(Map.empty, node2.features)
   }
 
   @Test
@@ -73,11 +103,11 @@ class FeatureZNodeTest {
     }"""
     assertThrows(classOf[IllegalArgumentException], () => FeatureZNode.decode(featureZNodeStrTemplate.format(FeatureZNode.V1 - 1, 1).getBytes(StandardCharsets.UTF_8)))
     val invalidStatus = FeatureZNodeStatus.Enabled.id + 1
-    assertThrows(classOf[IllegalArgumentException], () => FeatureZNode.decode(featureZNodeStrTemplate.format(FeatureZNode.CurrentVersion, invalidStatus).getBytes(StandardCharsets.UTF_8)))
+    assertThrows(classOf[IllegalArgumentException], () => FeatureZNode.decode(featureZNodeStrTemplate.format(FeatureZNode.V2, invalidStatus).getBytes(StandardCharsets.UTF_8)))
   }
 
   @Test
-  def testDecodeFailOnInvalidFeatures(): Unit = {
+  def testDecodeFailOnInvalidFeaturesV1(): Unit = {
     val featureZNodeStrTemplate =
       """{
       "version":1,
@@ -99,4 +129,29 @@ class FeatureZNodeTest {
     val invalidFeaturesMissingMinVersionLevel = ""","features":{"feature1": {"max_version_level": 1}}"""
     assertThrows(classOf[IllegalArgumentException], () => FeatureZNode.decode(featureZNodeStrTemplate.format(invalidFeaturesMissingMinVersionLevel).getBytes(StandardCharsets.UTF_8)))
   }
+
+  @Test
+  def testDecodeFailOnInvalidFeaturesV2(): Unit = {
+    val featureZNodeStrTemplate =
+      """{
+      "version":2,
+      "status":1%s
+    }"""
+
+    val missingFeatures = ""
+    assertThrows(classOf[IllegalArgumentException], () => FeatureZNode.decode(featureZNodeStrTemplate.format(missingFeatures).getBytes(StandardCharsets.UTF_8)))
+
+    val malformedFeatures = ""","features":{"feature1": {"min_version_level": 1, "max_version_level": 2}, "partial"}"""
+    assertThrows(classOf[IllegalArgumentException], () => FeatureZNode.decode(featureZNodeStrTemplate.format(malformedFeatures).getBytes(StandardCharsets.UTF_8)))
+
+    // We only inspect these configs in v1
+    val invalidFeaturesMinVersionLevel = ""","features":{"feature1": {"min_version_level": 0, "max_version_level": 2}}"""
+    assertDoesNotThrow(() => FeatureZNode.decode(featureZNodeStrTemplate.format(invalidFeaturesMinVersionLevel).getBytes(StandardCharsets.UTF_8)))
+
+    val invalidFeaturesMaxVersionLevel = ""","features":{"feature1": {"min_version_level": 2, "max_version_level": 1}}"""
+    assertDoesNotThrow(() => FeatureZNode.decode(featureZNodeStrTemplate.format(invalidFeaturesMaxVersionLevel).getBytes(StandardCharsets.UTF_8)))
+
+    val invalidFeaturesMissingMinVersionLevel = ""","features":{"feature1": {"max_version_level": 1}}"""
+    assertDoesNotThrow(() => FeatureZNode.decode(featureZNodeStrTemplate.format(invalidFeaturesMissingMinVersionLevel).getBytes(StandardCharsets.UTF_8)))
+  }
 }
diff --git a/core/src/test/scala/kafka/zk/TopicPartitionStateZNodeTest.scala b/core/src/test/scala/kafka/zk/TopicPartitionStateZNodeTest.scala
new file mode 100644
index 0000000000000..90d770f58dcfb
--- /dev/null
+++ b/core/src/test/scala/kafka/zk/TopicPartitionStateZNodeTest.scala
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+package kafka.zk
+
+import TopicPartitionStateZNode.decode
+import TopicPartitionStateZNode.encode
+import kafka.api.LeaderAndIsr
+import kafka.controller.LeaderIsrAndControllerEpoch
+import kafka.utils.Json
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.zookeeper.data.Stat
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Test
+import org.mockito.Mockito.mock
+import org.mockito.Mockito.when
+import scala.jdk.CollectionConverters._
+
+final class TopicPartitionStateZNodeTest {
+
+  @Test
+  def testEncodeDecodeRecovering(): Unit = {
+    val zkVersion = 5
+    val stat = mock(classOf[Stat])
+    when(stat.getVersion).thenReturn(zkVersion)
+
+    val expected = LeaderIsrAndControllerEpoch(LeaderAndIsr(1, 6, List(1), LeaderRecoveryState.RECOVERING, zkVersion), 10)
+
+    assertEquals(Some(expected), decode(encode(expected), stat))
+  }
+
+  @Test
+  def testEncodeDecodeRecovered(): Unit = {
+    val zkVersion = 5
+    val stat = mock(classOf[Stat])
+    when(stat.getVersion).thenReturn(zkVersion)
+
+    val expected = LeaderIsrAndControllerEpoch(LeaderAndIsr(1, 6, List(1), LeaderRecoveryState.RECOVERED, zkVersion), 10)
+
+    assertEquals(Some(expected), decode(encode(expected), stat))
+  }
+
+  @Test
+  def testDecodeOldValue(): Unit = {
+    val zkVersion = 5
+    val stat = mock(classOf[Stat])
+    when(stat.getVersion).thenReturn(zkVersion)
+
+    val expected = LeaderIsrAndControllerEpoch(LeaderAndIsr(1, 6, List(1), LeaderRecoveryState.RECOVERED, zkVersion), 10)
+
+    val partitionState = Map(
+      "version" -> 1,
+      "leader" -> expected.leaderAndIsr.leader,
+      "leader_epoch" -> expected.leaderAndIsr.leaderEpoch,
+      "controller_epoch" -> expected.controllerEpoch,
+      "isr" -> expected.leaderAndIsr.isr.asJava
+    )
+
+    assertEquals(Some(expected), decode(Json.encodeAsBytes(partitionState.asJava), stat))
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/admin/AclCommandTest.scala b/core/src/test/scala/unit/kafka/admin/AclCommandTest.scala
index 7cd5a18c22704..b5d9692040ed5 100644
--- a/core/src/test/scala/unit/kafka/admin/AclCommandTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/AclCommandTest.scala
@@ -54,13 +54,15 @@ class AclCommandTest extends QuorumTestHarness with Logging {
   private val GroupResources = Set(new ResourcePattern(GROUP, "testGroup-1", LITERAL), new ResourcePattern(GROUP, "testGroup-2", LITERAL))
   private val TransactionalIdResources = Set(new ResourcePattern(TRANSACTIONAL_ID, "t0", LITERAL), new ResourcePattern(TRANSACTIONAL_ID, "t1", LITERAL))
   private val TokenResources = Set(new ResourcePattern(DELEGATION_TOKEN, "token1", LITERAL), new ResourcePattern(DELEGATION_TOKEN, "token2", LITERAL))
+  private val UserResources = Set(new ResourcePattern(USER, "User:test-user1", LITERAL), new ResourcePattern(USER, "User:test-user2", LITERAL))
 
   private val ResourceToCommand = Map[Set[ResourcePattern], Array[String]](
     TopicResources -> Array("--topic", "test-1", "--topic", "test-2"),
     Set(ClusterResource) -> Array("--cluster"),
     GroupResources -> Array("--group", "testGroup-1", "--group", "testGroup-2"),
     TransactionalIdResources -> Array("--transactional-id", "t0", "--transactional-id", "t1"),
-    TokenResources -> Array("--delegation-token", "token1", "--delegation-token", "token2")
+    TokenResources -> Array("--delegation-token", "token1", "--delegation-token", "token2"),
+    UserResources -> Array("--user-principal", "User:test-user1", "--user-principal", "User:test-user2")
   )
 
   private val ResourceToOperations = Map[Set[ResourcePattern], (Set[AclOperation], Array[String])](
@@ -72,7 +74,8 @@ class AclCommandTest extends QuorumTestHarness with Logging {
         "--operation", "AlterConfigs", "--operation", "IdempotentWrite", "--operation", "Alter", "--operation", "Describe")),
     GroupResources -> (Set(READ, DESCRIBE, DELETE), Array("--operation", "Read", "--operation", "Describe", "--operation", "Delete")),
     TransactionalIdResources -> (Set(DESCRIBE, WRITE), Array("--operation", "Describe", "--operation", "Write")),
-    TokenResources -> (Set(DESCRIBE), Array("--operation", "Describe"))
+    TokenResources -> (Set(DESCRIBE), Array("--operation", "Describe")),
+    UserResources -> (Set(CREATE_TOKENS, DESCRIBE_TOKENS), Array("--operation", "CreateTokens", "--operation", "DescribeTokens"))
   )
 
   private def ProducerResourceToAcls(enableIdempotence: Boolean = false) = Map[Set[ResourcePattern], Set[AccessControlEntry]](
diff --git a/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala b/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala
index ea4215d9c39f5..4e2bfee60ee54 100755
--- a/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/AddPartitionsTest.scala
@@ -17,18 +17,24 @@
 
 package kafka.admin
 
-import java.util.Optional
+import java.util.{Collections, Optional}
 import kafka.controller.ReplicaAssignment
-import kafka.server.BaseRequestTest
-import kafka.utils.TestUtils
+import kafka.server.{BaseRequestTest, BrokerServer}
+import kafka.utils.{TestInfoUtils, TestUtils}
 import kafka.utils.TestUtils._
-import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.clients.admin.{Admin, NewPartitions, NewTopic}
 import org.apache.kafka.common.errors.InvalidReplicaAssignmentException
 import org.apache.kafka.common.requests.MetadataResponse.TopicMetadata
 import org.apache.kafka.common.requests.{MetadataRequest, MetadataResponse}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
-
+import org.junit.jupiter.api.{BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
+
+import java.util
+import java.util.Arrays.asList
+import java.util.Collections.singletonList
+import java.util.concurrent.ExecutionException
 import scala.jdk.CollectionConverters._
 
 class AddPartitionsTest extends BaseRequestTest {
@@ -47,44 +53,97 @@ class AddPartitionsTest extends BaseRequestTest {
   val topic4Assignment = Map(0 -> ReplicaAssignment(Seq(0,3), List(), List()))
   val topic5 = "new-topic5"
   val topic5Assignment = Map(1 -> ReplicaAssignment(Seq(0,1), List(), List()))
+  var admin: Admin = null
 
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
 
+    if (isKRaftTest()) {
+      brokers.foreach(broker => broker.asInstanceOf[BrokerServer].lifecycleManager.initialUnfenceFuture.get())
+    }
     createTopicWithAssignment(topic1, partitionReplicaAssignment = topic1Assignment.map { case (k, v) => k -> v.replicas })
     createTopicWithAssignment(topic2, partitionReplicaAssignment = topic2Assignment.map { case (k, v) => k -> v.replicas })
     createTopicWithAssignment(topic3, partitionReplicaAssignment = topic3Assignment.map { case (k, v) => k -> v.replicas })
     createTopicWithAssignment(topic4, partitionReplicaAssignment = topic4Assignment.map { case (k, v) => k -> v.replicas })
+    admin = createAdminClient()
   }
 
-  @Test
-  def testWrongReplicaCount(): Unit = {
-    assertThrows(classOf[InvalidReplicaAssignmentException], () => adminZkClient.addPartitions(topic1, topic1Assignment, adminZkClient.getBrokerMetadatas(), 2,
-      Some(Map(0 -> Seq(0, 1), 1 -> Seq(0, 1, 2)))))
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testWrongReplicaCount(quorum: String): Unit = {
+    assertEquals(classOf[InvalidReplicaAssignmentException], assertThrows(classOf[ExecutionException], () => {
+        admin.createPartitions(Collections.singletonMap(topic1,
+          NewPartitions.increaseTo(2, singletonList(asList(0, 1, 2))))).all().get()
+      }).getCause.getClass)
   }
 
-  @Test
-  def testMissingPartition0(): Unit = {
-    val e = assertThrows(classOf[AdminOperationException], () => adminZkClient.addPartitions(topic5, topic5Assignment, adminZkClient.getBrokerMetadatas(), 2,
-      Some(Map(1 -> Seq(0, 1), 2 -> Seq(0, 1, 2)))))
-    assertTrue(e.getMessage.contains("Unexpected existing replica assignment for topic 'new-topic5', partition id 0 is missing"))
+  /**
+   * Test that when we supply a manual partition assignment to createTopics, it must be 0-based
+   * and consecutive.
+   */
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testMissingPartitionsInCreateTopics(quorum: String): Unit = {
+    val topic6Placements = new util.HashMap[Integer, util.List[Integer]]
+    topic6Placements.put(1, asList(0, 1))
+    topic6Placements.put(2, asList(1, 0))
+    val topic7Placements = new util.HashMap[Integer, util.List[Integer]]
+    topic7Placements.put(2, asList(0, 1))
+    topic7Placements.put(3, asList(1, 0))
+    val futures = admin.createTopics(asList(
+      new NewTopic("new-topic6", topic6Placements),
+      new NewTopic("new-topic7", topic7Placements))).values()
+    val topic6Cause = assertThrows(classOf[ExecutionException], () => futures.get("new-topic6").get()).getCause
+    assertEquals(classOf[InvalidReplicaAssignmentException], topic6Cause.getClass)
+    assertTrue(topic6Cause.getMessage.contains("partitions should be a consecutive 0-based integer sequence"),
+      "Unexpected error message: " + topic6Cause.getMessage)
+    val topic7Cause = assertThrows(classOf[ExecutionException], () => futures.get("new-topic7").get()).getCause
+    assertEquals(classOf[InvalidReplicaAssignmentException], topic7Cause.getClass)
+    assertTrue(topic7Cause.getMessage.contains("partitions should be a consecutive 0-based integer sequence"),
+      "Unexpected error message: " + topic7Cause.getMessage)
   }
 
-  @Test
-  def testIncrementPartitions(): Unit = {
-    adminZkClient.addPartitions(topic1, topic1Assignment, adminZkClient.getBrokerMetadatas(), 3)
+  /**
+   * Test that when we supply a manual partition assignment to createPartitions, it must contain
+   * enough partitions.
+   */
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testMissingPartitionsInCreatePartitions(quorum: String): Unit = {
+    val cause = assertThrows(classOf[ExecutionException], () =>
+      admin.createPartitions(Collections.singletonMap(topic1,
+        NewPartitions.increaseTo(3, singletonList(asList(0, 1, 2))))).all().get()).getCause
+    assertEquals(classOf[InvalidReplicaAssignmentException], cause.getClass)
+    if (isKRaftTest()) {
+      assertTrue(cause.getMessage.contains("Attempted to add 2 additional partition(s), but only 1 assignment(s) " +
+        "were specified."), "Unexpected error message: " + cause.getMessage)
+    } else {
+      assertTrue(cause.getMessage.contains("Increasing the number of partitions by 2 but 1 assignments provided."),
+        "Unexpected error message: " + cause.getMessage)
+    }
+    if (!isKRaftTest()) {
+      // In ZK mode, test the raw AdminZkClient method as well.
+      val e = assertThrows(classOf[AdminOperationException], () => adminZkClient.addPartitions(
+        topic5, topic5Assignment, adminZkClient.getBrokerMetadatas(), 2,
+        Some(Map(1 -> Seq(0, 1), 2 -> Seq(0, 1, 2)))))
+      assertTrue(e.getMessage.contains("Unexpected existing replica assignment for topic 'new-topic5', partition " +
+        "id 0 is missing"))
+    }
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testIncrementPartitions(quorum: String): Unit = {
+    admin.createPartitions(Collections.singletonMap(topic1, NewPartitions.increaseTo(3))).all().get()
+
     // wait until leader is elected
-    val leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 1)
-    val leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic1, 2)
-    val leader1FromZk = zkClient.getLeaderForPartition(new TopicPartition(topic1, 1)).get
-    val leader2FromZk = zkClient.getLeaderForPartition(new TopicPartition(topic1, 2)).get
-    assertEquals(leader1, leader1FromZk)
-    assertEquals(leader2, leader2FromZk)
+    waitUntilLeaderIsElectedOrChangedWithAdmin(admin, topic1, 1)
+    waitUntilLeaderIsElectedOrChangedWithAdmin(admin, topic1, 2)
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitForPartitionMetadata(servers, topic1, 1)
-    TestUtils.waitForPartitionMetadata(servers, topic1, 2)
+    TestUtils.waitForPartitionMetadata(brokers, topic1, 1)
+    TestUtils.waitForPartitionMetadata(brokers, topic1, 2)
     val response = connectAndReceive[MetadataResponse](
       new MetadataRequest.Builder(Seq(topic1).asJava, false).build)
     assertEquals(1, response.topicMetadata.size)
@@ -102,22 +161,21 @@ class AddPartitionsTest extends BaseRequestTest {
     }
   }
 
-  @Test
-  def testManualAssignmentOfReplicas(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testManualAssignmentOfReplicas(quorum: String): Unit = {
     // Add 2 partitions
-    adminZkClient.addPartitions(topic2, topic2Assignment, adminZkClient.getBrokerMetadatas(), 3,
-      Some(Map(0 -> Seq(1, 2), 1 -> Seq(0, 1), 2 -> Seq(2, 3))))
+    admin.createPartitions(Collections.singletonMap(topic2, NewPartitions.increaseTo(3,
+      asList(asList(0, 1), asList(2, 3))))).all().get()
     // wait until leader is elected
-    val leader1 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 1)
-    val leader2 = waitUntilLeaderIsElectedOrChanged(zkClient, topic2, 2)
-    val leader1FromZk = zkClient.getLeaderForPartition(new TopicPartition(topic2, 1)).get
-    val leader2FromZk = zkClient.getLeaderForPartition(new TopicPartition(topic2, 2)).get
-    assertEquals(leader1, leader1FromZk)
-    assertEquals(leader2, leader2FromZk)
+    val leader1 = waitUntilLeaderIsElectedOrChangedWithAdmin(admin, topic2, 1)
+    val leader2 = waitUntilLeaderIsElectedOrChangedWithAdmin(admin, topic2, 2)
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitForPartitionMetadata(servers, topic2, 1)
-    TestUtils.waitForPartitionMetadata(servers, topic2, 2)
+    val partition1Metadata = TestUtils.waitForPartitionMetadata(brokers, topic2, 1)
+    assertEquals(leader1, partition1Metadata.leader())
+    val partition2Metadata = TestUtils.waitForPartitionMetadata(brokers, topic2, 2)
+    assertEquals(leader2, partition2Metadata.leader())
     val response = connectAndReceive[MetadataResponse](
       new MetadataRequest.Builder(Seq(topic2).asJava, false).build)
     assertEquals(1, response.topicMetadata.size)
@@ -132,17 +190,18 @@ class AddPartitionsTest extends BaseRequestTest {
     assertEquals(Set(0, 1), replicas.asScala.toSet)
   }
 
-  @Test
-  def testReplicaPlacementAllServers(): Unit = {
-    adminZkClient.addPartitions(topic3, topic3Assignment, adminZkClient.getBrokerMetadatas(), 7)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // TODO: add kraft support
+  def testReplicaPlacementAllServers(quorum: String): Unit = {
+    admin.createPartitions(Collections.singletonMap(topic3, NewPartitions.increaseTo(7))).all().get()
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitForPartitionMetadata(servers, topic3, 1)
-    TestUtils.waitForPartitionMetadata(servers, topic3, 2)
-    TestUtils.waitForPartitionMetadata(servers, topic3, 3)
-    TestUtils.waitForPartitionMetadata(servers, topic3, 4)
-    TestUtils.waitForPartitionMetadata(servers, topic3, 5)
-    TestUtils.waitForPartitionMetadata(servers, topic3, 6)
+    TestUtils.waitForPartitionMetadata(brokers, topic3, 1)
+    TestUtils.waitForPartitionMetadata(brokers, topic3, 2)
+    TestUtils.waitForPartitionMetadata(brokers, topic3, 3)
+    TestUtils.waitForPartitionMetadata(brokers, topic3, 4)
+    TestUtils.waitForPartitionMetadata(brokers, topic3, 5)
+    TestUtils.waitForPartitionMetadata(brokers, topic3, 6)
 
     val response = connectAndReceive[MetadataResponse](
       new MetadataRequest.Builder(Seq(topic3).asJava, false).build)
@@ -157,13 +216,14 @@ class AddPartitionsTest extends BaseRequestTest {
     validateLeaderAndReplicas(topicMetadata, 6, 0, Set(0, 1, 2, 3))
   }
 
-  @Test
-  def testReplicaPlacementPartialServers(): Unit = {
-    adminZkClient.addPartitions(topic2, topic2Assignment, adminZkClient.getBrokerMetadatas(), 3)
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk")) // TODO: add kraft support
+  def testReplicaPlacementPartialServers(quorum: String): Unit = {
+    admin.createPartitions(Collections.singletonMap(topic2, NewPartitions.increaseTo(3))).all().get()
 
     // read metadata from a broker and verify the new topic partitions exist
-    TestUtils.waitForPartitionMetadata(servers, topic2, 1)
-    TestUtils.waitForPartitionMetadata(servers, topic2, 2)
+    TestUtils.waitForPartitionMetadata(brokers, topic2, 1)
+    TestUtils.waitForPartitionMetadata(brokers, topic2, 2)
 
     val response = connectAndReceive[MetadataResponse](
       new MetadataRequest.Builder(Seq(topic2).asJava, false).build)
diff --git a/core/src/test/scala/unit/kafka/admin/ConfigCommandTest.scala b/core/src/test/scala/unit/kafka/admin/ConfigCommandTest.scala
index c7ccf822b99a2..8516ba3a2451e 100644
--- a/core/src/test/scala/unit/kafka/admin/ConfigCommandTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/ConfigCommandTest.scala
@@ -19,46 +19,37 @@ package kafka.admin
 import java.util
 import java.util.Properties
 import kafka.admin.ConfigCommand.ConfigCommandOptions
-import kafka.api.ApiVersion
-import kafka.cluster.{Broker, EndPoint}
-import kafka.server.{ConfigEntityName, ConfigType, KafkaConfig, QuorumTestHarness}
+import kafka.cluster.Broker
+import kafka.server.{ConfigEntityName, ConfigType}
 import kafka.utils.{Exit, Logging}
-import kafka.zk.{AdminZkClient, BrokerInfo, KafkaZkClient}
+import kafka.zk.{AdminZkClient, KafkaZkClient}
 import org.apache.kafka.clients.admin._
 import org.apache.kafka.common.Node
-import org.apache.kafka.common.config.{ConfigException, ConfigResource}
+import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.errors.InvalidConfigurationException
 import org.apache.kafka.common.internals.KafkaFutureImpl
-import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.quota.{ClientQuotaAlteration, ClientQuotaEntity, ClientQuotaFilter, ClientQuotaFilterComponent}
-import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.security.scram.internals.ScramCredentialUtils
 import org.apache.kafka.common.utils.Sanitizer
 import org.apache.kafka.test.TestUtils
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
+import org.mockito.ArgumentMatchers.anyString
 import org.mockito.Mockito.{mock, times, verify, when}
 
 import scala.collection.{Seq, mutable}
 import scala.jdk.CollectionConverters._
 
-class ConfigCommandTest extends QuorumTestHarness with Logging {
+class ConfigCommandTest extends Logging {
+
+  private val zkConnect = "localhost:2181"
+  private val dummyAdminZkClient = new DummyAdminZkClient(null)
 
   @Test
   def shouldExitWithNonZeroStatusOnArgError(): Unit = {
     assertNonZeroStatusExit(Array("--blah"))
   }
 
-  @Test
-  def shouldExitWithNonZeroStatusOnUpdatingUnallowedConfigViaZk(): Unit = {
-    assertNonZeroStatusExit(Array(
-      "--zookeeper", zkConnect,
-      "--entity-name", "1",
-      "--entity-type", "brokers",
-      "--alter",
-      "--add-config", "security.inter.broker.protocol=PLAINTEXT"))
-  }
-
   @Test
   def shouldExitWithNonZeroStatusOnZkCommandWithTopicsEntity(): Unit = {
     assertNonZeroStatusExit(Array(
@@ -83,15 +74,6 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--describe"))
   }
 
-  @Test
-  def shouldExitWithNonZeroStatusOnZkCommandAlterUserQuota(): Unit = {
-    assertNonZeroStatusExit(Array(
-      "--zookeeper", zkConnect,
-      "--entity-type", "users",
-      "--entity-name", "admin",
-      "--alter", "--add-config", "consumer_byte_rate=20000"))
-  }
-
   @Test
   def shouldExitWithNonZeroStatusAlterUserQuotaWithoutEntityName(): Unit = {
     assertNonZeroStatusExit(Array(
@@ -100,7 +82,6 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--alter", "--add-config", "consumer_byte_rate=20000"))
   }
 
-
   @Test
   def shouldExitWithNonZeroStatusOnBrokerCommandError(): Unit = {
     assertNonZeroStatusExit(Array(
@@ -391,7 +372,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
   def shouldFailIfUnrecognisedEntityTypeUsingZookeeper(): Unit = {
     val createOpts = new ConfigCommandOptions(Array("--zookeeper", zkConnect,
       "--entity-name", "client", "--entity-type", "not-recognised", "--alter", "--add-config", "a=b,c=d"))
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -405,7 +386,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
   def shouldFailIfBrokerEntityTypeIsNotAnIntegerUsingZookeeper(): Unit = {
     val createOpts = new ConfigCommandOptions(Array("--zookeeper", zkConnect,
       "--entity-name", "A", "--entity-type", "brokers", "--alter", "--add-config", "a=b,c=d"))
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -419,7 +400,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
   def shouldFailIfShortBrokerEntityTypeIsNotAnIntegerUsingZookeeper(): Unit = {
     val createOpts = new ConfigCommandOptions(Array("--zookeeper", zkConnect,
       "--broker", "A", "--alter", "--add-config", "a=b,c=d"))
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -479,6 +460,9 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--alter",
       "--add-config", "a=b,c=d"))
 
+    val zkClient = mock(classOf[KafkaZkClient])
+    when(zkClient.getEntityConfigs(anyString(), anyString())).thenReturn(new Properties())
+
     class TestAdminZkClient(zkClient: KafkaZkClient) extends AdminZkClient(zkClient) {
       override def changeClientIdConfig(clientId: String, configChange: Properties): Unit = {
         assertEquals("my-client-id", clientId)
@@ -498,6 +482,9 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--alter",
       "--add-config", "a=b,c=d"))
 
+    val zkClient = mock(classOf[KafkaZkClient])
+    when(zkClient.getEntityConfigs(anyString(), anyString())).thenReturn(new Properties())
+
     class TestAdminZkClient(zkClient: KafkaZkClient) extends AdminZkClient(zkClient) {
       override def changeIpConfig(ip: String, configChange: Properties): Unit = {
         assertEquals("1.2.3.4", ip)
@@ -774,6 +761,9 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--alter",
       "--add-config", "a=b,c=d"))
 
+    val zkClient = mock(classOf[KafkaZkClient])
+    when(zkClient.getEntityConfigs(anyString(), anyString())).thenReturn(new Properties())
+
     class TestAdminZkClient(zkClient: KafkaZkClient) extends AdminZkClient(zkClient) {
       override def changeTopicConfig(topic: String, configChange: Properties): Unit = {
         assertEquals("my-topic", topic)
@@ -909,7 +899,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
     when(mockZkClient.getBroker(1)).thenReturn(Option(mockBroker))
 
     assertThrows(classOf[IllegalArgumentException],
-      () => ConfigCommand.alterConfigWithZk(mockZkClient, alterOpts, new DummyAdminZkClient(zkClient)))
+      () => ConfigCommand.alterConfigWithZk(mockZkClient, alterOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -924,7 +914,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
     when(mockZkClient.getBroker(1)).thenReturn(Option(mockBroker))
 
     assertThrows(classOf[IllegalArgumentException],
-      () => ConfigCommand.describeConfigWithZk(mockZkClient, describeOpts, new DummyAdminZkClient(zkClient)))
+      () => ConfigCommand.describeConfigWithZk(mockZkClient, describeOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -946,7 +936,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
     val mockZkClient: KafkaZkClient = mock(classOf[KafkaZkClient])
     when(mockZkClient.getBroker(1)).thenReturn(None)
 
-    ConfigCommand.describeConfigWithZk(mockZkClient, describeOpts, new TestAdminZkClient(zkClient))
+    ConfigCommand.describeConfigWithZk(mockZkClient, describeOpts, new TestAdminZkClient(null))
   }
 
   @Test
@@ -1197,6 +1187,9 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--alter",
       "--add-config", "a=b,c=[d,e ,f],g=[h,i]"))
 
+    val zkClient = mock(classOf[KafkaZkClient])
+    when(zkClient.getEntityConfigs(anyString(), anyString())).thenReturn(new Properties())
+
     class TestAdminZkClient(zkClient: KafkaZkClient) extends AdminZkClient(zkClient) {
       override def changeTopicConfig(topic: String, configChange: Properties): Unit = {
         assertEquals("my-topic", topic)
@@ -1216,7 +1209,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--entity-type", "brokers",
       "--alter",
       "--add-config", "leader.replication.throttled.rate=10"))
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -1229,101 +1222,6 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
     assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfig(new DummyAdminClient(new Node(1, "localhost", 9092)), createOpts))
   }
 
-  @Test
-  def testDynamicBrokerConfigUpdateUsingZooKeeper(): Unit = {
-    val brokerId = "1"
-    val adminZkClient = new AdminZkClient(zkClient)
-    val alterOpts = Array("--zookeeper", zkConnect, "--entity-type", "brokers", "--alter")
-
-    def entityOpt(brokerId: Option[String]): Array[String] = {
-      brokerId.map(id => Array("--entity-name", id)).getOrElse(Array("--entity-default"))
-    }
-
-    def alterConfigWithZk(configs: Map[String, String], brokerId: Option[String],
-                          encoderConfigs: Map[String, String] = Map.empty): Unit = {
-      val configStr = (configs ++ encoderConfigs).map { case (k, v) => s"$k=$v" }.mkString(",")
-      val addOpts = new ConfigCommandOptions(alterOpts ++ entityOpt(brokerId) ++ Array("--add-config", configStr))
-      ConfigCommand.alterConfigWithZk(zkClient, addOpts, adminZkClient)
-    }
-
-    def verifyConfig(configs: Map[String, String], brokerId: Option[String]): Unit = {
-      val entityConfigs = zkClient.getEntityConfigs("brokers", brokerId.getOrElse(ConfigEntityName.Default))
-      assertEquals(configs, entityConfigs.asScala)
-    }
-
-    def alterAndVerifyConfig(configs: Map[String, String], brokerId: Option[String]): Unit = {
-      alterConfigWithZk(configs, brokerId)
-      verifyConfig(configs, brokerId)
-    }
-
-    def deleteAndVerifyConfig(configNames: Set[String], brokerId: Option[String]): Unit = {
-      val deleteOpts = new ConfigCommandOptions(alterOpts ++ entityOpt(brokerId) ++
-        Array("--delete-config", configNames.mkString(",")))
-      ConfigCommand.alterConfigWithZk(zkClient, deleteOpts, adminZkClient)
-      verifyConfig(Map.empty, brokerId)
-    }
-
-    // Add config
-    alterAndVerifyConfig(Map("message.max.size" -> "110000"), Some(brokerId))
-    alterAndVerifyConfig(Map("message.max.size" -> "120000"), None)
-
-    // Change config
-    alterAndVerifyConfig(Map("message.max.size" -> "130000"), Some(brokerId))
-    alterAndVerifyConfig(Map("message.max.size" -> "140000"), None)
-
-    // Delete config
-    deleteAndVerifyConfig(Set("message.max.size"), Some(brokerId))
-    deleteAndVerifyConfig(Set("message.max.size"), None)
-
-    // Listener configs: should work only with listener name
-    alterAndVerifyConfig(Map("listener.name.external.ssl.keystore.location" -> "/tmp/test.jks"), Some(brokerId))
-    assertThrows(classOf[ConfigException], () => alterConfigWithZk(Map("ssl.keystore.location" -> "/tmp/test.jks"), Some(brokerId)))
-
-    // Per-broker config configured at default cluster-level should fail
-    assertThrows(classOf[ConfigException], () => alterConfigWithZk(Map("listener.name.external.ssl.keystore.location" -> "/tmp/test.jks"), None))
-    deleteAndVerifyConfig(Set("listener.name.external.ssl.keystore.location"), Some(brokerId))
-
-    // Password config update without encoder secret should fail
-    assertThrows(classOf[IllegalArgumentException], () => alterConfigWithZk(Map("listener.name.external.ssl.keystore.password" -> "secret"), Some(brokerId)))
-
-    // Password config update with encoder secret should succeed and encoded password must be stored in ZK
-    val configs = Map("listener.name.external.ssl.keystore.password" -> "secret", "log.cleaner.threads" -> "2")
-    val encoderConfigs = Map(KafkaConfig.PasswordEncoderSecretProp -> "encoder-secret")
-    alterConfigWithZk(configs, Some(brokerId), encoderConfigs)
-    val brokerConfigs = zkClient.getEntityConfigs("brokers", brokerId)
-    assertFalse(brokerConfigs.contains(KafkaConfig.PasswordEncoderSecretProp), "Encoder secret stored in ZooKeeper")
-    assertEquals("2", brokerConfigs.getProperty("log.cleaner.threads")) // not encoded
-    val encodedPassword = brokerConfigs.getProperty("listener.name.external.ssl.keystore.password")
-    val passwordEncoder = ConfigCommand.createPasswordEncoder(encoderConfigs)
-    assertEquals("secret", passwordEncoder.decode(encodedPassword).value)
-    assertEquals(configs.size, brokerConfigs.size)
-
-    // Password config update with overrides for encoder parameters
-    val configs2 = Map("listener.name.internal.ssl.keystore.password" -> "secret2")
-    val encoderConfigs2 = Map(KafkaConfig.PasswordEncoderSecretProp -> "encoder-secret",
-      KafkaConfig.PasswordEncoderCipherAlgorithmProp -> "DES/CBC/PKCS5Padding",
-      KafkaConfig.PasswordEncoderIterationsProp -> "1024",
-      KafkaConfig.PasswordEncoderKeyFactoryAlgorithmProp -> "PBKDF2WithHmacSHA1",
-      KafkaConfig.PasswordEncoderKeyLengthProp -> "64")
-    alterConfigWithZk(configs2, Some(brokerId), encoderConfigs2)
-    val brokerConfigs2 = zkClient.getEntityConfigs("brokers", brokerId)
-    val encodedPassword2 = brokerConfigs2.getProperty("listener.name.internal.ssl.keystore.password")
-    assertEquals("secret2", ConfigCommand.createPasswordEncoder(encoderConfigs).decode(encodedPassword2).value)
-    assertEquals("secret2", ConfigCommand.createPasswordEncoder(encoderConfigs2).decode(encodedPassword2).value)
-
-
-    // Password config update at default cluster-level should fail
-    assertThrows(classOf[ConfigException], () => alterConfigWithZk(configs, None, encoderConfigs))
-
-    // Dynamic config updates using ZK should fail if broker is running.
-    registerBrokerInZk(brokerId.toInt)
-    assertThrows(classOf[IllegalArgumentException], () => alterConfigWithZk(Map("message.max.size" -> "210000"), Some(brokerId)))
-    assertThrows(classOf[IllegalArgumentException], () => alterConfigWithZk(Map("message.max.size" -> "220000"), None))
-
-    // Dynamic config updates using ZK should for a different broker that is not running should succeed
-    alterAndVerifyConfig(Map("message.max.size" -> "230000"), Some("2"))
-  }
-
   @Test
   def shouldNotUpdateBrokerConfigIfMalformedConfigUsingZookeeper(): Unit = {
     val createOpts = new ConfigCommandOptions(Array("--zookeeper", zkConnect,
@@ -1331,7 +1229,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--entity-type", "brokers",
       "--alter",
       "--add-config", "a=="))
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -1351,7 +1249,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--entity-type", "brokers",
       "--alter",
       "--add-config", "a=[b,c,d=e"))
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -1371,7 +1269,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
       "--entity-type", "topics",
       "--alter",
       "--delete-config", "missing_config1, missing_config2"))
-    assertThrows(classOf[InvalidConfigurationException], () => ConfigCommand.alterConfigWithZk(null, createOpts, new DummyAdminZkClient(zkClient)))
+    assertThrows(classOf[InvalidConfigurationException], () => ConfigCommand.alterConfigWithZk(null, createOpts, dummyAdminZkClient))
   }
 
   @Test
@@ -1432,7 +1330,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
     val mockBroker: Broker = mock(classOf[Broker])
     when(mockZkClient.getBroker(1)).thenReturn(Option(mockBroker))
 
-    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(mockZkClient, createOpts, new TestAdminZkClient(zkClient)))
+    assertThrows(classOf[IllegalArgumentException], () => ConfigCommand.alterConfigWithZk(mockZkClient, createOpts, new TestAdminZkClient(null)))
   }
 
   @Test
@@ -1452,7 +1350,7 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
         "--delete-config", mechanism))
 
     val credentials = mutable.Map[String, Properties]()
-    case class CredentialChange(user: String, mechanisms: Set[String], iterations: Int) extends AdminZkClient(zkClient) {
+    case class CredentialChange(user: String, mechanisms: Set[String], iterations: Int) extends AdminZkClient(null) {
       override def fetchEntityConfig(entityType: String, entityName: String): Properties = {
         credentials.getOrElse(entityName, new Properties())
       }
@@ -1679,14 +1577,6 @@ class ConfigCommandTest extends QuorumTestHarness with Logging {
         Seq("<default>/clients/client-3", sanitizedPrincipal + "/clients/client-2"))
   }
 
-  private def registerBrokerInZk(id: Int): Unit = {
-    zkClient.createTopLevelPaths()
-    val securityProtocol = SecurityProtocol.PLAINTEXT
-    val endpoint = new EndPoint("localhost", 9092, ListenerName.forSecurityProtocol(securityProtocol), securityProtocol)
-    val brokerInfo = BrokerInfo(Broker(id, Seq(endpoint), rack = None), ApiVersion.latestVersion, jmxPort = 9192)
-    zkClient.registerBroker(brokerInfo)
-  }
-
   class DummyAdminZkClient(zkClient: KafkaZkClient) extends AdminZkClient(zkClient) {
     override def changeBrokerConfig(brokerIds: Seq[Int], configs: Properties): Unit = {}
     override def fetchEntityConfig(entityType: String, entityName: String): Properties = {new Properties}
diff --git a/core/src/test/scala/unit/kafka/admin/ConsumerGroupCommandTest.scala b/core/src/test/scala/unit/kafka/admin/ConsumerGroupCommandTest.scala
index 571f2dbf4d7ab..6851ba2d4764b 100644
--- a/core/src/test/scala/unit/kafka/admin/ConsumerGroupCommandTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/ConsumerGroupCommandTest.scala
@@ -46,7 +46,7 @@ class ConsumerGroupCommandTest extends KafkaServerTestHarness {
 
   // configure the servers and clients
   override def generateConfigs = {
-    TestUtils.createBrokerConfigs(1, zkConnect, enableControlledShutdown = false).map { props =>
+    TestUtils.createBrokerConfigs(1, zkConnectOrNull, enableControlledShutdown = false).map { props =>
       KafkaConfig.fromProps(props)
     }
   }
diff --git a/core/src/test/scala/unit/kafka/admin/ConsumerGroupServiceTest.scala b/core/src/test/scala/unit/kafka/admin/ConsumerGroupServiceTest.scala
index 76a3855a87274..44b241a7ed3c6 100644
--- a/core/src/test/scala/unit/kafka/admin/ConsumerGroupServiceTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/ConsumerGroupServiceTest.scala
@@ -49,8 +49,8 @@ class ConsumerGroupServiceTest {
 
     when(admin.describeConsumerGroups(ArgumentMatchers.eq(Collections.singletonList(group)), any()))
       .thenReturn(describeGroupsResult(ConsumerGroupState.STABLE))
-    when(admin.listConsumerGroupOffsets(ArgumentMatchers.eq(group), any()))
-      .thenReturn(listGroupOffsetsResult)
+    when(admin.listConsumerGroupOffsets(ArgumentMatchers.eq(listConsumerGroupOffsetsSpec), any()))
+      .thenReturn(listGroupOffsetsResult(group))
     when(admin.listOffsets(offsetsArgMatcher, any()))
       .thenReturn(listOffsetsResult)
 
@@ -60,7 +60,7 @@ class ConsumerGroupServiceTest {
     assertEquals(topicPartitions.size, assignments.get.size)
 
     verify(admin, times(1)).describeConsumerGroups(ArgumentMatchers.eq(Collections.singletonList(group)), any())
-    verify(admin, times(1)).listConsumerGroupOffsets(ArgumentMatchers.eq(group), any())
+    verify(admin, times(1)).listConsumerGroupOffsets(ArgumentMatchers.eq(listConsumerGroupOffsetsSpec), any())
     verify(admin, times(1)).listOffsets(offsetsArgMatcher, any())
   }
 
@@ -112,8 +112,10 @@ class ConsumerGroupServiceTest {
     future.complete(consumerGroupDescription)
     when(admin.describeConsumerGroups(ArgumentMatchers.eq(Collections.singletonList(group)), any()))
       .thenReturn(new DescribeConsumerGroupsResult(Collections.singletonMap(group, future)))
-    when(admin.listConsumerGroupOffsets(ArgumentMatchers.eq(group), any()))
-      .thenReturn(AdminClientTestUtils.listConsumerGroupOffsetsResult(commitedOffsets))
+    when(admin.listConsumerGroupOffsets(ArgumentMatchers.eq(listConsumerGroupOffsetsSpec), any()))
+      .thenReturn(
+        AdminClientTestUtils.listConsumerGroupOffsetsResult(
+          Collections.singletonMap(group, commitedOffsets)))
     when(admin.listOffsets(
       ArgumentMatchers.argThat(offsetsArgMatcher(assignedTopicPartitions)),
       any()
@@ -142,7 +144,7 @@ class ConsumerGroupServiceTest {
     assertEquals(expectedOffsets, returnedOffsets)
 
     verify(admin, times(1)).describeConsumerGroups(ArgumentMatchers.eq(Collections.singletonList(group)), any())
-    verify(admin, times(1)).listConsumerGroupOffsets(ArgumentMatchers.eq(group), any())
+    verify(admin, times(1)).listConsumerGroupOffsets(ArgumentMatchers.eq(listConsumerGroupOffsetsSpec), any())
     verify(admin, times(1)).listOffsets(ArgumentMatchers.argThat(offsetsArgMatcher(assignedTopicPartitions)), any())
     verify(admin, times(1)).listOffsets(ArgumentMatchers.argThat(offsetsArgMatcher(unassignedTopicPartitions)), any())
   }
@@ -192,9 +194,9 @@ class ConsumerGroupServiceTest {
     new DescribeConsumerGroupsResult(Collections.singletonMap(group, future))
   }
 
-  private def listGroupOffsetsResult: ListConsumerGroupOffsetsResult = {
+  private def listGroupOffsetsResult(groupId: String): ListConsumerGroupOffsetsResult = {
     val offsets = topicPartitions.map(_ -> new OffsetAndMetadata(100)).toMap.asJava
-    AdminClientTestUtils.listConsumerGroupOffsetsResult(offsets)
+    AdminClientTestUtils.listConsumerGroupOffsetsResult(Map(groupId -> offsets).asJava)
   }
 
   private def offsetsArgMatcher: util.Map[TopicPartition, OffsetSpec] = {
@@ -217,4 +219,8 @@ class ConsumerGroupServiceTest {
     }.toMap
     AdminClientTestUtils.describeTopicsResult(topicDescriptions.asJava)
   }
+
+  private def listConsumerGroupOffsetsSpec: util.Map[String, ListConsumerGroupOffsetsSpec] = {
+    Collections.singletonMap(group, new ListConsumerGroupOffsetsSpec())
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/admin/FeatureCommandTest.scala b/core/src/test/scala/unit/kafka/admin/FeatureCommandTest.scala
index 93c22eb7a0fc8..ac715d217bd31 100644
--- a/core/src/test/scala/unit/kafka/admin/FeatureCommandTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/FeatureCommandTest.scala
@@ -17,23 +17,22 @@
 
 package kafka.admin
 
-import kafka.api.KAFKA_2_7_IV0
 import kafka.server.{BaseRequestTest, KafkaConfig, KafkaServer}
 import kafka.utils.TestUtils
 import kafka.utils.TestUtils.waitUntilTrue
 import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
 import org.apache.kafka.common.utils.Utils
-
 import java.util.Properties
 
-import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue, assertThrows}
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_7_IV0
+import org.junit.jupiter.api.Assertions.assertTrue
 import org.junit.jupiter.api.Test
 
 class FeatureCommandTest extends BaseRequestTest {
   override def brokerCount: Int = 3
 
   override def brokerPropertyOverrides(props: Properties): Unit = {
-    props.put(KafkaConfig.InterBrokerProtocolVersionProp, KAFKA_2_7_IV0.toString)
+    props.put(KafkaConfig.InterBrokerProtocolVersionProp, IBP_2_7_IV0.toString)
   }
 
   private val defaultSupportedFeatures: Features[SupportedVersionRange] =
@@ -75,167 +74,37 @@ class FeatureCommandTest extends BaseRequestTest {
   @Test
   def testDescribeFeaturesSuccess(): Unit = {
     updateSupportedFeaturesInAllBrokers(defaultSupportedFeatures)
-    val featureApis = new FeatureApis(new FeatureCommandOptions(Array("--bootstrap-server", bootstrapServers(), "--describe")))
-    featureApis.setSupportedFeatures(defaultSupportedFeatures)
-    try {
-      val initialDescribeOutput = TestUtils.grabConsoleOutput(featureApis.describeFeatures())
-      val expectedInitialDescribeOutput =
-        "Feature: feature_1\tSupportedMinVersion: 1\tSupportedMaxVersion: 3\tFinalizedMinVersionLevel: -\tFinalizedMaxVersionLevel: -\tEpoch: 0\n" +
-        "Feature: feature_2\tSupportedMinVersion: 1\tSupportedMaxVersion: 5\tFinalizedMinVersionLevel: -\tFinalizedMaxVersionLevel: -\tEpoch: 0\n"
-      assertEquals(expectedInitialDescribeOutput, initialDescribeOutput)
-      featureApis.upgradeAllFeatures()
-      val finalDescribeOutput = TestUtils.grabConsoleOutput(featureApis.describeFeatures())
-      val expectedFinalDescribeOutput =
-        "Feature: feature_1\tSupportedMinVersion: 1\tSupportedMaxVersion: 3\tFinalizedMinVersionLevel: 1\tFinalizedMaxVersionLevel: 3\tEpoch: 1\n" +
-        "Feature: feature_2\tSupportedMinVersion: 1\tSupportedMaxVersion: 5\tFinalizedMinVersionLevel: 1\tFinalizedMaxVersionLevel: 5\tEpoch: 1\n"
-      assertEquals(expectedFinalDescribeOutput, finalDescribeOutput)
-    } finally {
-      featureApis.close()
-    }
-  }
-
-  /**
-   * Tests if the FeatureApis#upgradeAllFeatures API works as expected during a success case.
-   */
-  @Test
-  def testUpgradeAllFeaturesSuccess(): Unit = {
-    val upgradeOpts = new FeatureCommandOptions(Array("--bootstrap-server", bootstrapServers(), "--upgrade-all"))
-    val featureApis = new FeatureApis(upgradeOpts)
-    try {
-      // Step (1):
-      // - Update the supported features across all brokers.
-      // - Upgrade non-existing feature_1 to maxVersionLevel: 2.
-      // - Verify results.
-      val initialSupportedFeatures = Features.supportedFeatures(Utils.mkMap(Utils.mkEntry("feature_1", new SupportedVersionRange(1, 2))))
-      updateSupportedFeaturesInAllBrokers(initialSupportedFeatures)
-      featureApis.setSupportedFeatures(initialSupportedFeatures)
-      var output = TestUtils.grabConsoleOutput(featureApis.upgradeAllFeatures())
-      var expected =
-        "      [Add]\tFeature: feature_1\tExistingFinalizedMaxVersion: -\tNewFinalizedMaxVersion: 2\tResult: OK\n"
-      assertEquals(expected, output)
 
-      // Step (2):
-      // - Update the supported features across all brokers.
-      // - Upgrade existing feature_1 to maxVersionLevel: 3.
-      // - Upgrade non-existing feature_2 to maxVersionLevel: 5.
-      // - Verify results.
-      updateSupportedFeaturesInAllBrokers(defaultSupportedFeatures)
-      featureApis.setSupportedFeatures(defaultSupportedFeatures)
-      output = TestUtils.grabConsoleOutput(featureApis.upgradeAllFeatures())
-      expected =
-        "  [Upgrade]\tFeature: feature_1\tExistingFinalizedMaxVersion: 2\tNewFinalizedMaxVersion: 3\tResult: OK\n" +
-        "      [Add]\tFeature: feature_2\tExistingFinalizedMaxVersion: -\tNewFinalizedMaxVersion: 5\tResult: OK\n"
-      assertEquals(expected, output)
+    val initialDescribeOutput = TestUtils.grabConsoleOutput(FeatureCommand.mainNoExit(Array("--bootstrap-server", bootstrapServers(), "describe")))
+    val expectedInitialDescribeOutputs = Seq(
+      "Feature: feature_1\tSupportedMinVersion: 1\tSupportedMaxVersion: 3\tFinalizedVersionLevel: -",
+      "Feature: feature_2\tSupportedMinVersion: 1\tSupportedMaxVersion: 5\tFinalizedVersionLevel: -"
+    )
 
-      // Step (3):
-      // - Perform an upgrade of all features again.
-      // - Since supported features have not changed, expect that the above action does not yield
-      //   any results.
-      output = TestUtils.grabConsoleOutput(featureApis.upgradeAllFeatures())
-      assertTrue(output.isEmpty)
-      featureApis.setOptions(upgradeOpts)
-      output = TestUtils.grabConsoleOutput(featureApis.upgradeAllFeatures())
-      assertTrue(output.isEmpty)
-    } finally {
-      featureApis.close()
+    expectedInitialDescribeOutputs.foreach { expectedOutput =>
+      assertTrue(initialDescribeOutput.contains(expectedOutput))
     }
-  }
 
-  /**
-   * Tests if the FeatureApis#downgradeAllFeatures API works as expected during a success case.
-   */
-  @Test
-  def testDowngradeFeaturesSuccess(): Unit = {
-    val downgradeOpts = new FeatureCommandOptions(Array("--bootstrap-server", bootstrapServers(), "--downgrade-all"))
-    val upgradeOpts = new FeatureCommandOptions(Array("--bootstrap-server", bootstrapServers(), "--upgrade-all"))
-    val featureApis = new FeatureApis(upgradeOpts)
-    try {
-      // Step (1):
-      // - Update the supported features across all brokers.
-      // - Upgrade non-existing feature_1 to maxVersionLevel: 3.
-      // - Upgrade non-existing feature_2 to maxVersionLevel: 5.
-      updateSupportedFeaturesInAllBrokers(defaultSupportedFeatures)
-      featureApis.setSupportedFeatures(defaultSupportedFeatures)
-      featureApis.upgradeAllFeatures()
-
-      // Step (2):
-      // - Downgrade existing feature_1 to maxVersionLevel: 2.
-      // - Delete feature_2 since it is no longer supported by the FeatureApis object.
-      // - Verify results.
-      val downgradedFeatures = Features.supportedFeatures(Utils.mkMap(Utils.mkEntry("feature_1", new SupportedVersionRange(1, 2))))
-      featureApis.setSupportedFeatures(downgradedFeatures)
-      featureApis.setOptions(downgradeOpts)
-      var output = TestUtils.grabConsoleOutput(featureApis.downgradeAllFeatures())
-      var expected =
-        "[Downgrade]\tFeature: feature_1\tExistingFinalizedMaxVersion: 3\tNewFinalizedMaxVersion: 2\tResult: OK\n" +
-        "   [Delete]\tFeature: feature_2\tExistingFinalizedMaxVersion: 5\tNewFinalizedMaxVersion: -\tResult: OK\n"
-      assertEquals(expected, output)
-
-      // Step (3):
-      // - Perform a downgrade of all features again.
-      // - Since supported features have not changed, expect that the above action does not yield
-      //   any results.
-      updateSupportedFeaturesInAllBrokers(downgradedFeatures)
-      output = TestUtils.grabConsoleOutput(featureApis.downgradeAllFeatures())
-      assertTrue(output.isEmpty)
-
-      // Step (4):
-      // - Delete feature_1 since it is no longer supported by the FeatureApis object.
-      // - Verify results.
-      featureApis.setSupportedFeatures(Features.emptySupportedFeatures())
-      output = TestUtils.grabConsoleOutput(featureApis.downgradeAllFeatures())
-      expected =
-        "   [Delete]\tFeature: feature_1\tExistingFinalizedMaxVersion: 2\tNewFinalizedMaxVersion: -\tResult: OK\n"
-      assertEquals(expected, output)
-    } finally {
-      featureApis.close()
+    FeatureCommand.mainNoExit(Array("--bootstrap-server", bootstrapServers(), "upgrade",
+      "--feature", "feature_1", "--version", "3", "--feature", "feature_2", "--version", "5"))
+    val upgradeDescribeOutput = TestUtils.grabConsoleOutput(FeatureCommand.mainNoExit(Array("--bootstrap-server", bootstrapServers(), "describe")))
+    val expectedUpgradeDescribeOutput = Seq(
+      "Feature: feature_1\tSupportedMinVersion: 1\tSupportedMaxVersion: 3\tFinalizedVersionLevel: 3",
+      "Feature: feature_2\tSupportedMinVersion: 1\tSupportedMaxVersion: 5\tFinalizedVersionLevel: 5"
+    )
+    expectedUpgradeDescribeOutput.foreach { expectedOutput =>
+      assertTrue(upgradeDescribeOutput.contains(expectedOutput))
     }
-  }
-
-  /**
-   * Tests if the FeatureApis#upgradeAllFeatures API works as expected during a partial failure case.
-   */
-  @Test
-  def testUpgradeFeaturesFailure(): Unit = {
-    val upgradeOpts = new FeatureCommandOptions(Array("--bootstrap-server", bootstrapServers(), "--upgrade-all"))
-    val featureApis = new FeatureApis(upgradeOpts)
-    try {
-      // Step (1): Update the supported features across all brokers.
-      updateSupportedFeaturesInAllBrokers(defaultSupportedFeatures)
 
-      // Step (2):
-      // - Intentionally setup the FeatureApis object such that it contains incompatible target
-      //   features (viz. feature_2 and feature_3).
-      // - Upgrade non-existing feature_1 to maxVersionLevel: 4. Expect the operation to fail with
-      //   an incompatibility failure.
-      // - Upgrade non-existing feature_2 to maxVersionLevel: 5. Expect the operation to succeed.
-      // - Upgrade non-existing feature_3 to maxVersionLevel: 3. Expect the operation to fail
-      //   since the feature is not supported.
-      val targetFeaturesWithIncompatibilities =
-        Features.supportedFeatures(
-          Utils.mkMap(Utils.mkEntry("feature_1", new SupportedVersionRange(1, 4)),
-                      Utils.mkEntry("feature_2", new SupportedVersionRange(1, 5)),
-                      Utils.mkEntry("feature_3", new SupportedVersionRange(1, 3))))
-      featureApis.setSupportedFeatures(targetFeaturesWithIncompatibilities)
-      val output = TestUtils.grabConsoleOutput({
-        val exception = assertThrows(classOf[UpdateFeaturesException], () => featureApis.upgradeAllFeatures())
-        assertEquals("2 feature updates failed!", exception.getMessage)
-      })
-      val expected =
-        "      [Add]\tFeature: feature_1\tExistingFinalizedMaxVersion: -" +
-        "\tNewFinalizedMaxVersion: 4\tResult: FAILED due to" +
-        " org.apache.kafka.common.errors.InvalidRequestException: Could not apply finalized" +
-        " feature update because brokers were found to have incompatible versions for the" +
-        " feature.\n" +
-        "      [Add]\tFeature: feature_2\tExistingFinalizedMaxVersion: -" +
-        "\tNewFinalizedMaxVersion: 5\tResult: OK\n" +
-        "      [Add]\tFeature: feature_3\tExistingFinalizedMaxVersion: -" +
-        "\tNewFinalizedMaxVersion: 3\tResult: FAILED due to" +
-        " org.apache.kafka.common.errors.InvalidRequestException: Could not apply finalized" +
-        " feature update because the provided feature is not supported.\n"
-      assertEquals(expected, output)
-    } finally {
-      featureApis.close()
+    FeatureCommand.mainNoExit(Array("--bootstrap-server", bootstrapServers(), "downgrade",
+      "--feature", "feature_1", "--version", "2", "--feature", "feature_2", "--version", "2"))
+    val downgradeDescribeOutput = TestUtils.grabConsoleOutput(FeatureCommand.mainNoExit(Array("--bootstrap-server", bootstrapServers(), "describe")))
+    val expectedFinalDescribeOutput = Seq(
+      "Feature: feature_1\tSupportedMinVersion: 1\tSupportedMaxVersion: 3\tFinalizedVersionLevel: 2",
+      "Feature: feature_2\tSupportedMinVersion: 1\tSupportedMaxVersion: 5\tFinalizedVersionLevel: 2"
+    )
+    expectedFinalDescribeOutput.foreach { expectedOutput =>
+      assertTrue(downgradeDescribeOutput.contains(expectedOutput))
     }
   }
 }
diff --git a/core/src/test/scala/unit/kafka/admin/LeaderElectionCommandTest.scala b/core/src/test/scala/unit/kafka/admin/LeaderElectionCommandTest.scala
index 4a2a401655fc0..aebd479f18a92 100644
--- a/core/src/test/scala/unit/kafka/admin/LeaderElectionCommandTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/LeaderElectionCommandTest.scala
@@ -35,7 +35,7 @@ import org.junit.jupiter.api.extension.ExtendWith
 import org.junit.jupiter.api.{BeforeEach, Tag}
 
 @ExtendWith(value = Array(classOf[ClusterTestExtensions]))
-@ClusterTestDefaults(clusterType = Type.BOTH, brokers = 3)
+@ClusterTestDefaults(clusterType = Type.ALL, brokers = 3)
 @Tag("integration")
 final class LeaderElectionCommandTest(cluster: ClusterInstance) {
   import LeaderElectionCommandTest._
@@ -56,12 +56,12 @@ final class LeaderElectionCommandTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testAllTopicPartition(): Unit = {
-    val client = cluster.createAdminClient()
     val topic = "unclean-topic"
     val partition = 0
     val assignment = Seq(broker2, broker3)
 
     cluster.waitForReadyBrokers()
+    val client = cluster.createAdminClient()
     createTopic(client, topic, Map(partition -> assignment))
 
     val topicPartition = new TopicPartition(topic, partition)
@@ -87,12 +87,12 @@ final class LeaderElectionCommandTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testTopicPartition(): Unit = {
-    val client = cluster.createAdminClient()
     val topic = "unclean-topic"
     val partition = 0
     val assignment = Seq(broker2, broker3)
 
     cluster.waitForReadyBrokers()
+    val client = cluster.createAdminClient()
     createTopic(client, topic, Map(partition -> assignment))
 
     val topicPartition = new TopicPartition(topic, partition)
@@ -120,12 +120,12 @@ final class LeaderElectionCommandTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testPathToJsonFile(): Unit = {
-    val client = cluster.createAdminClient()
     val topic = "unclean-topic"
     val partition = 0
     val assignment = Seq(broker2, broker3)
 
     cluster.waitForReadyBrokers()
+    val client = cluster.createAdminClient()
     createTopic(client, topic, Map(partition -> assignment))
 
     val topicPartition = new TopicPartition(topic, partition)
@@ -154,12 +154,12 @@ final class LeaderElectionCommandTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testPreferredReplicaElection(): Unit = {
-    val client = cluster.createAdminClient()
     val topic = "preferred-topic"
     val partition = 0
     val assignment = Seq(broker2, broker3)
 
     cluster.waitForReadyBrokers()
+    val client = cluster.createAdminClient()
     createTopic(client, topic, Map(partition -> assignment))
 
     val topicPartition = new TopicPartition(topic, partition)
@@ -197,7 +197,6 @@ final class LeaderElectionCommandTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testElectionResultOutput(): Unit = {
-    val client = cluster.createAdminClient()
     val topic = "non-preferred-topic"
     val partition0 = 0
     val partition1 = 1
@@ -205,6 +204,7 @@ final class LeaderElectionCommandTest(cluster: ClusterInstance) {
     val assignment1 = Seq(broker3, broker2)
 
     cluster.waitForReadyBrokers()
+    val client = cluster.createAdminClient()
     createTopic(client, topic, Map(
       partition0 -> assignment0,
       partition1 -> assignment1
diff --git a/core/src/test/scala/unit/kafka/admin/ListConsumerGroupTest.scala b/core/src/test/scala/unit/kafka/admin/ListConsumerGroupTest.scala
index 11fd0a3b1f206..4e7575e797c8c 100644
--- a/core/src/test/scala/unit/kafka/admin/ListConsumerGroupTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/ListConsumerGroupTest.scala
@@ -18,16 +18,19 @@ package kafka.admin
 
 import joptsimple.OptionException
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Test
 import kafka.utils.TestUtils
 import org.apache.kafka.common.ConsumerGroupState
 import org.apache.kafka.clients.admin.ConsumerGroupListing
 import java.util.Optional
 
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
+
 class ListConsumerGroupTest extends ConsumerGroupCommandTest {
 
-  @Test
-  def testListConsumerGroups(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListConsumerGroups(quorum: String): Unit = {
     val simpleGroup = "simple-group"
     addSimpleGroupExecutor(group = simpleGroup)
     addConsumerGroupExecutor(numConsumers = 1)
@@ -43,13 +46,15 @@ class ListConsumerGroupTest extends ConsumerGroupCommandTest {
     }, s"Expected --list to show groups $expectedGroups, but found $foundGroups.")
   }
 
-  @Test
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
   def testListWithUnrecognizedNewConsumerOption(): Unit = {
     val cgcArgs = Array("--new-consumer", "--bootstrap-server", bootstrapServers(), "--list")
     assertThrows(classOf[OptionException], () => getConsumerGroupService(cgcArgs))
   }
 
-  @Test
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
   def testListConsumerGroupsWithStates(): Unit = {
     val simpleGroup = "simple-group"
     addSimpleGroupExecutor(group = simpleGroup)
@@ -78,8 +83,9 @@ class ListConsumerGroupTest extends ConsumerGroupCommandTest {
     }, s"Expected to show groups $expectedListingStable, but found $foundListing")
   }
 
-  @Test
-  def testConsumerGroupStatesFromString(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testConsumerGroupStatesFromString(quorum: String): Unit = {
     var result = ConsumerGroupCommand.consumerGroupStatesFromString("Stable")
     assertEquals(Set(ConsumerGroupState.STABLE), result)
 
@@ -98,8 +104,9 @@ class ListConsumerGroupTest extends ConsumerGroupCommandTest {
     assertThrows(classOf[IllegalArgumentException], () => ConsumerGroupCommand.consumerGroupStatesFromString("   ,   ,"))
   }
 
-  @Test
-  def testListGroupCommand(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testListGroupCommand(quorum: String): Unit = {
     val simpleGroup = "simple-group"
     addSimpleGroupExecutor(group = simpleGroup)
     addConsumerGroupExecutor(numConsumers = 1)
diff --git a/core/src/test/scala/unit/kafka/admin/ReassignPartitionsUnitTest.scala b/core/src/test/scala/unit/kafka/admin/ReassignPartitionsUnitTest.scala
index cbbebe7c825ce..3b7c1ab719168 100644
--- a/core/src/test/scala/unit/kafka/admin/ReassignPartitionsUnitTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/ReassignPartitionsUnitTest.scala
@@ -66,7 +66,7 @@ class ReassignPartitionsUnitTest {
     assertEquals(Seq(
       "Status of partition reassignment:",
       "Reassignment of partition bar-0 is still in progress.",
-      "Reassignment of partition foo-0 is complete.",
+      "Reassignment of partition foo-0 is completed.",
       "Reassignment of partition foo-1 is still in progress.").
         mkString(System.lineSeparator()),
       partitionReassignmentStatesToString(Map(
diff --git a/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala b/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala
index 9586cf5395c46..dcbf2cff133e1 100644
--- a/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala
+++ b/core/src/test/scala/unit/kafka/admin/TopicCommandTest.scala
@@ -16,15 +16,23 @@
  */
 package kafka.admin
 
-import kafka.admin.TopicCommand.{PartitionDescription, TopicCommandOptions}
+import kafka.admin.TopicCommand.{PartitionDescription, TopicCommandOptions, TopicService}
 import kafka.common.AdminCommandFailedException
 import kafka.utils.Exit
-import org.apache.kafka.clients.admin.PartitionReassignment
+import org.apache.kafka.clients.admin.{Admin, AdminClientTestUtils, CreatePartitionsOptions, CreateTopicsOptions, DeleteTopicsOptions, NewPartitions, NewTopic, PartitionReassignment, TopicDescription}
 import org.apache.kafka.common.Node
 import org.apache.kafka.common.TopicPartitionInfo
+import org.apache.kafka.common.errors.ThrottlingQuotaExceededException
+import org.apache.kafka.common.protocol.Errors
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
+import org.mockito.ArgumentMatcher
+import org.mockito.ArgumentMatchers.{any, argThat, eq => eqThat}
+import org.mockito.Mockito.{mock, times, verify, when}
 
+import java.util.{Collection, Collections, Optional}
+import scala.collection.Seq
+import scala.concurrent.ExecutionException
 import scala.jdk.CollectionConverters._
 
 class TopicCommandTest {
@@ -159,6 +167,74 @@ class TopicCommandTest {
     assertEquals(expectedAssignment, actualAssignment)
   }
 
+  @Test
+  def testCreateTopicDoesNotRetryThrottlingQuotaExceededException(): Unit = {
+    val adminClient = mock(classOf[Admin])
+    val topicService = TopicService(adminClient)
+
+    val result = AdminClientTestUtils.createTopicsResult(topicName, Errors.THROTTLING_QUOTA_EXCEEDED.exception())
+    when(adminClient.createTopics(any(), any())).thenReturn(result)
+
+    assertThrows(classOf[ThrottlingQuotaExceededException],
+      () => topicService.createTopic(new TopicCommandOptions(Array("--topic", topicName))))
+
+    val expectedNewTopic = new NewTopic(topicName, Optional.empty[Integer](), Optional.empty[java.lang.Short]())
+      .configs(Map.empty[String, String].asJava)
+
+    verify(adminClient, times(1)).createTopics(
+      eqThat(Set(expectedNewTopic).asJava),
+      argThat((_.shouldRetryOnQuotaViolation() == false): ArgumentMatcher[CreateTopicsOptions])
+    )
+  }
+
+  @Test
+  def testDeleteTopicDoesNotRetryThrottlingQuotaExceededException(): Unit = {
+    val adminClient = mock(classOf[Admin])
+    val topicService = TopicService(adminClient)
+
+    val listResult = AdminClientTestUtils.listTopicsResult(topicName)
+    when(adminClient.listTopics(any())).thenReturn(listResult)
+
+    val result = AdminClientTestUtils.deleteTopicsResult(topicName, Errors.THROTTLING_QUOTA_EXCEEDED.exception())
+    when(adminClient.deleteTopics(any[Collection[String]](), any())).thenReturn(result)
+
+    val exception = assertThrows(classOf[ExecutionException],
+      () => topicService.deleteTopic(new TopicCommandOptions(Array("--topic", topicName))))
+    assertTrue(exception.getCause.isInstanceOf[ThrottlingQuotaExceededException])
+
+    verify(adminClient, times(1)).deleteTopics(
+      eqThat(Seq(topicName).asJavaCollection),
+      argThat((_.shouldRetryOnQuotaViolation() == false): ArgumentMatcher[DeleteTopicsOptions])
+    )
+  }
+
+  @Test
+  def testCreatePartitionsDoesNotRetryThrottlingQuotaExceededException(): Unit = {
+    val adminClient = mock(classOf[Admin])
+    val topicService = TopicService(adminClient)
+
+    val listResult = AdminClientTestUtils.listTopicsResult(topicName)
+    when(adminClient.listTopics(any())).thenReturn(listResult)
+
+    val topicPartitionInfo = new TopicPartitionInfo(0, new Node(0, "", 0),
+      Collections.emptyList(), Collections.emptyList())
+    val describeResult = AdminClientTestUtils.describeTopicsResult(topicName, new TopicDescription(
+      topicName, false, Collections.singletonList(topicPartitionInfo)))
+    when(adminClient.describeTopics(any(classOf[java.util.Collection[String]]))).thenReturn(describeResult)
+
+    val result = AdminClientTestUtils.createPartitionsResult(topicName, Errors.THROTTLING_QUOTA_EXCEEDED.exception())
+    when(adminClient.createPartitions(any(), any())).thenReturn(result)
+
+    val exception = assertThrows(classOf[ExecutionException],
+      () => topicService.alterTopic(new TopicCommandOptions(Array("--topic", topicName, "--partitions", "3"))))
+    assertTrue(exception.getCause.isInstanceOf[ThrottlingQuotaExceededException])
+
+    verify(adminClient, times(1)).createPartitions(
+      argThat((_.get(topicName).totalCount() == 3): ArgumentMatcher[java.util.Map[String, NewPartitions]]),
+      argThat((_.shouldRetryOnQuotaViolation() == false): ArgumentMatcher[CreatePartitionsOptions])
+    )
+  }
+
   private[this] def assertCheckArgsExitCode(expected: Int, options: TopicCommandOptions): Unit = {
     Exit.setExitProcedure {
       (exitCode: Int, _: Option[String]) =>
diff --git a/core/src/test/scala/unit/kafka/api/ApiVersionTest.scala b/core/src/test/scala/unit/kafka/api/ApiVersionTest.scala
deleted file mode 100644
index 75dd68280ad76..0000000000000
--- a/core/src/test/scala/unit/kafka/api/ApiVersionTest.scala
+++ /dev/null
@@ -1,283 +0,0 @@
-/**
-  * Licensed to the Apache Software Foundation (ASF) under one or more
-  * contributor license agreements.  See the NOTICE file distributed with
-  * this work for additional information regarding copyright ownership.
-  * The ASF licenses this file to You under the Apache License, Version 2.0
-  * (the "License"); you may not use this file except in compliance with
-  * the License.  You may obtain a copy of the License at
-  *
-  *    http://www.apache.org/licenses/LICENSE-2.0
-  *
-  * Unless required by applicable law or agreed to in writing, software
-  * distributed under the License is distributed on an "AS IS" BASIS,
-  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  * See the License for the specific language governing permissions and
-  * limitations under the License.
-  */
-
-package kafka.api
-
-import java.util
-
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
-import org.apache.kafka.common.message.ApiMessageType.ListenerType
-import org.apache.kafka.common.protocol.ApiKeys
-import org.apache.kafka.common.record.{RecordBatch, RecordVersion}
-import org.apache.kafka.common.requests.{AbstractResponse, ApiVersionsResponse}
-import org.apache.kafka.common.utils.Utils
-import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Test
-
-import scala.jdk.CollectionConverters._
-
-class ApiVersionTest {
-
-  @Test
-  def testApply(): Unit = {
-    assertEquals(KAFKA_0_8_0, ApiVersion("0.8.0"))
-    assertEquals(KAFKA_0_8_0, ApiVersion("0.8.0.0"))
-    assertEquals(KAFKA_0_8_0, ApiVersion("0.8.0.1"))
-
-    assertEquals(KAFKA_0_8_1, ApiVersion("0.8.1"))
-    assertEquals(KAFKA_0_8_1, ApiVersion("0.8.1.0"))
-    assertEquals(KAFKA_0_8_1, ApiVersion("0.8.1.1"))
-
-    assertEquals(KAFKA_0_8_2, ApiVersion("0.8.2"))
-    assertEquals(KAFKA_0_8_2, ApiVersion("0.8.2.0"))
-    assertEquals(KAFKA_0_8_2, ApiVersion("0.8.2.1"))
-
-    assertEquals(KAFKA_0_9_0, ApiVersion("0.9.0"))
-    assertEquals(KAFKA_0_9_0, ApiVersion("0.9.0.0"))
-    assertEquals(KAFKA_0_9_0, ApiVersion("0.9.0.1"))
-
-    assertEquals(KAFKA_0_10_0_IV0, ApiVersion("0.10.0-IV0"))
-
-    assertEquals(KAFKA_0_10_0_IV1, ApiVersion("0.10.0"))
-    assertEquals(KAFKA_0_10_0_IV1, ApiVersion("0.10.0.0"))
-    assertEquals(KAFKA_0_10_0_IV1, ApiVersion("0.10.0.0-IV0"))
-    assertEquals(KAFKA_0_10_0_IV1, ApiVersion("0.10.0.1"))
-
-    assertEquals(KAFKA_0_10_1_IV0, ApiVersion("0.10.1-IV0"))
-    assertEquals(KAFKA_0_10_1_IV1, ApiVersion("0.10.1-IV1"))
-
-    assertEquals(KAFKA_0_10_1_IV2, ApiVersion("0.10.1"))
-    assertEquals(KAFKA_0_10_1_IV2, ApiVersion("0.10.1.0"))
-    assertEquals(KAFKA_0_10_1_IV2, ApiVersion("0.10.1-IV2"))
-    assertEquals(KAFKA_0_10_1_IV2, ApiVersion("0.10.1.1"))
-
-    assertEquals(KAFKA_0_10_2_IV0, ApiVersion("0.10.2"))
-    assertEquals(KAFKA_0_10_2_IV0, ApiVersion("0.10.2.0"))
-    assertEquals(KAFKA_0_10_2_IV0, ApiVersion("0.10.2-IV0"))
-    assertEquals(KAFKA_0_10_2_IV0, ApiVersion("0.10.2.1"))
-
-    assertEquals(KAFKA_0_11_0_IV0, ApiVersion("0.11.0-IV0"))
-    assertEquals(KAFKA_0_11_0_IV1, ApiVersion("0.11.0-IV1"))
-
-    assertEquals(KAFKA_0_11_0_IV2, ApiVersion("0.11.0"))
-    assertEquals(KAFKA_0_11_0_IV2, ApiVersion("0.11.0.0"))
-    assertEquals(KAFKA_0_11_0_IV2, ApiVersion("0.11.0-IV2"))
-    assertEquals(KAFKA_0_11_0_IV2, ApiVersion("0.11.0.1"))
-
-    assertEquals(KAFKA_1_0_IV0, ApiVersion("1.0"))
-    assertEquals(KAFKA_1_0_IV0, ApiVersion("1.0.0"))
-    assertEquals(KAFKA_1_0_IV0, ApiVersion("1.0.0-IV0"))
-    assertEquals(KAFKA_1_0_IV0, ApiVersion("1.0.1"))
-
-    assertEquals(KAFKA_1_1_IV0, ApiVersion("1.1-IV0"))
-
-    assertEquals(KAFKA_2_0_IV1, ApiVersion("2.0"))
-    assertEquals(KAFKA_2_0_IV0, ApiVersion("2.0-IV0"))
-    assertEquals(KAFKA_2_0_IV1, ApiVersion("2.0-IV1"))
-
-    assertEquals(KAFKA_2_1_IV2, ApiVersion("2.1"))
-    assertEquals(KAFKA_2_1_IV0, ApiVersion("2.1-IV0"))
-    assertEquals(KAFKA_2_1_IV1, ApiVersion("2.1-IV1"))
-    assertEquals(KAFKA_2_1_IV2, ApiVersion("2.1-IV2"))
-
-    assertEquals(KAFKA_2_2_IV1, ApiVersion("2.2"))
-    assertEquals(KAFKA_2_2_IV0, ApiVersion("2.2-IV0"))
-    assertEquals(KAFKA_2_2_IV1, ApiVersion("2.2-IV1"))
-
-    assertEquals(KAFKA_2_3_IV1, ApiVersion("2.3"))
-    assertEquals(KAFKA_2_3_IV0, ApiVersion("2.3-IV0"))
-    assertEquals(KAFKA_2_3_IV1, ApiVersion("2.3-IV1"))
-
-    assertEquals(KAFKA_2_4_IV1, ApiVersion("2.4"))
-    assertEquals(KAFKA_2_4_IV0, ApiVersion("2.4-IV0"))
-    assertEquals(KAFKA_2_4_IV1, ApiVersion("2.4-IV1"))
-
-    assertEquals(KAFKA_2_5_IV0, ApiVersion("2.5"))
-    assertEquals(KAFKA_2_5_IV0, ApiVersion("2.5-IV0"))
-
-    assertEquals(KAFKA_2_6_IV0, ApiVersion("2.6"))
-    assertEquals(KAFKA_2_6_IV0, ApiVersion("2.6-IV0"))
-
-    assertEquals(KAFKA_2_7_IV0, ApiVersion("2.7-IV0"))
-    assertEquals(KAFKA_2_7_IV1, ApiVersion("2.7-IV1"))
-    assertEquals(KAFKA_2_7_IV2, ApiVersion("2.7-IV2"))
-
-    assertEquals(KAFKA_2_8_IV1, ApiVersion("2.8"))
-    assertEquals(KAFKA_2_8_IV0, ApiVersion("2.8-IV0"))
-    assertEquals(KAFKA_2_8_IV1, ApiVersion("2.8-IV1"))
-
-    assertEquals(KAFKA_3_0_IV1, ApiVersion("3.0"))
-    assertEquals(KAFKA_3_0_IV0, ApiVersion("3.0-IV0"))
-    assertEquals(KAFKA_3_0_IV1, ApiVersion("3.0-IV1"))
-
-    assertEquals(KAFKA_3_1_IV0, ApiVersion("3.1"))
-    assertEquals(KAFKA_3_1_IV0, ApiVersion("3.1-IV0"))
-  }
-
-  @Test
-  def testApiVersionUniqueIds(): Unit = {
-    val allIds: Seq[Int] = ApiVersion.allVersions.map(apiVersion => {
-      apiVersion.id
-    })
-
-    val uniqueIds: Set[Int] = allIds.toSet
-
-    assertEquals(allIds.size, uniqueIds.size)
-  }
-
-  @Test
-  def testMinSupportedVersionFor(): Unit = {
-    assertEquals(KAFKA_0_8_0, ApiVersion.minSupportedFor(RecordVersion.V0))
-    assertEquals(KAFKA_0_10_0_IV0, ApiVersion.minSupportedFor(RecordVersion.V1))
-    assertEquals(KAFKA_0_11_0_IV0, ApiVersion.minSupportedFor(RecordVersion.V2))
-
-    // Ensure that all record versions have a defined min version so that we remember to update the method
-    for (recordVersion <- RecordVersion.values)
-      assertNotNull(ApiVersion.minSupportedFor(recordVersion))
-  }
-
-  @Test
-  def testShortVersion(): Unit = {
-    assertEquals("0.8.0", KAFKA_0_8_0.shortVersion)
-    assertEquals("0.10.0", KAFKA_0_10_0_IV0.shortVersion)
-    assertEquals("0.10.0", KAFKA_0_10_0_IV1.shortVersion)
-    assertEquals("0.11.0", KAFKA_0_11_0_IV0.shortVersion)
-    assertEquals("0.11.0", KAFKA_0_11_0_IV1.shortVersion)
-    assertEquals("0.11.0", KAFKA_0_11_0_IV2.shortVersion)
-    assertEquals("1.0", KAFKA_1_0_IV0.shortVersion)
-    assertEquals("1.1", KAFKA_1_1_IV0.shortVersion)
-    assertEquals("2.0", KAFKA_2_0_IV0.shortVersion)
-    assertEquals("2.0", KAFKA_2_0_IV1.shortVersion)
-    assertEquals("2.1", KAFKA_2_1_IV0.shortVersion)
-    assertEquals("2.1", KAFKA_2_1_IV1.shortVersion)
-    assertEquals("2.1", KAFKA_2_1_IV2.shortVersion)
-    assertEquals("2.2", KAFKA_2_2_IV0.shortVersion)
-    assertEquals("2.2", KAFKA_2_2_IV1.shortVersion)
-    assertEquals("2.3", KAFKA_2_3_IV0.shortVersion)
-    assertEquals("2.3", KAFKA_2_3_IV1.shortVersion)
-    assertEquals("2.4", KAFKA_2_4_IV0.shortVersion)
-    assertEquals("2.5", KAFKA_2_5_IV0.shortVersion)
-    assertEquals("2.6", KAFKA_2_6_IV0.shortVersion)
-    assertEquals("2.7", KAFKA_2_7_IV2.shortVersion)
-    assertEquals("2.8", KAFKA_2_8_IV0.shortVersion)
-    assertEquals("2.8", KAFKA_2_8_IV1.shortVersion)
-    assertEquals("3.0", KAFKA_3_0_IV0.shortVersion)
-    assertEquals("3.0", KAFKA_3_0_IV1.shortVersion)
-    assertEquals("3.1", KAFKA_3_1_IV0.shortVersion)
-  }
-
-  @Test
-  def testApiVersionValidator(): Unit = {
-    val str = ApiVersionValidator.toString
-    val apiVersions = str.slice(1, str.length).split(",")
-    assertEquals(ApiVersion.allVersions.size, apiVersions.length)
-  }
-
-  @Test
-  def shouldCreateApiResponseOnlyWithKeysSupportedByMagicValue(): Unit = {
-    val response = ApiVersion.apiVersionsResponse(
-      10,
-      RecordVersion.V1,
-      Features.emptySupportedFeatures,
-      None,
-      ListenerType.ZK_BROKER
-    )
-    verifyApiKeysForMagic(response, RecordBatch.MAGIC_VALUE_V1)
-    assertEquals(10, response.throttleTimeMs)
-    assertTrue(response.data.supportedFeatures.isEmpty)
-    assertTrue(response.data.finalizedFeatures.isEmpty)
-    assertEquals(ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH, response.data.finalizedFeaturesEpoch)
-  }
-
-  @Test
-  def shouldReturnFeatureKeysWhenMagicIsCurrentValueAndThrottleMsIsDefaultThrottle(): Unit = {
-    val response = ApiVersion.apiVersionsResponse(
-      10,
-      RecordVersion.V1,
-      Features.supportedFeatures(
-        Utils.mkMap(Utils.mkEntry("feature", new SupportedVersionRange(1.toShort, 4.toShort)))),
-      Features.finalizedFeatures(
-        Utils.mkMap(Utils.mkEntry("feature", new FinalizedVersionRange(2.toShort, 3.toShort)))),
-      10,
-      None,
-      ListenerType.ZK_BROKER
-    )
-
-    verifyApiKeysForMagic(response, RecordBatch.MAGIC_VALUE_V1)
-    assertEquals(10, response.throttleTimeMs)
-    assertEquals(1, response.data.supportedFeatures.size)
-    val sKey = response.data.supportedFeatures.find("feature")
-    assertNotNull(sKey)
-    assertEquals(1, sKey.minVersion)
-    assertEquals(4, sKey.maxVersion)
-    assertEquals(1, response.data.finalizedFeatures.size)
-    val fKey = response.data.finalizedFeatures.find("feature")
-    assertNotNull(fKey)
-    assertEquals(2, fKey.minVersionLevel)
-    assertEquals(3, fKey.maxVersionLevel)
-    assertEquals(10, response.data.finalizedFeaturesEpoch)
-  }
-
-  private def verifyApiKeysForMagic(response: ApiVersionsResponse, maxMagic: Byte): Unit = {
-    for (version <- response.data.apiKeys.asScala) {
-      assertTrue(ApiKeys.forId(version.apiKey).minRequiredInterBrokerMagic <= maxMagic)
-    }
-  }
-
-  @Test
-  def shouldReturnAllKeysWhenMagicIsCurrentValueAndThrottleMsIsDefaultThrottle(): Unit = {
-    val response = ApiVersion.apiVersionsResponse(
-      AbstractResponse.DEFAULT_THROTTLE_TIME,
-      RecordVersion.current(),
-      Features.emptySupportedFeatures,
-      None,
-      ListenerType.ZK_BROKER
-    )
-    assertEquals(new util.HashSet[ApiKeys](ApiKeys.zkBrokerApis), apiKeysInResponse(response))
-    assertEquals(AbstractResponse.DEFAULT_THROTTLE_TIME, response.throttleTimeMs)
-    assertTrue(response.data.supportedFeatures.isEmpty)
-    assertTrue(response.data.finalizedFeatures.isEmpty)
-    assertEquals(ApiVersionsResponse.UNKNOWN_FINALIZED_FEATURES_EPOCH, response.data.finalizedFeaturesEpoch)
-  }
-
-  @Test
-  def testMetadataQuorumApisAreDisabled(): Unit = {
-    val response = ApiVersion.apiVersionsResponse(
-      AbstractResponse.DEFAULT_THROTTLE_TIME,
-      RecordVersion.current(),
-      Features.emptySupportedFeatures,
-      None,
-      ListenerType.ZK_BROKER
-    )
-
-    // Ensure that APIs needed for the KRaft mode are not exposed through ApiVersions until we are ready for them
-    val exposedApis = apiKeysInResponse(response)
-    assertFalse(exposedApis.contains(ApiKeys.ENVELOPE))
-    assertFalse(exposedApis.contains(ApiKeys.VOTE))
-    assertFalse(exposedApis.contains(ApiKeys.BEGIN_QUORUM_EPOCH))
-    assertFalse(exposedApis.contains(ApiKeys.END_QUORUM_EPOCH))
-    assertFalse(exposedApis.contains(ApiKeys.DESCRIBE_QUORUM))
-  }
-
-  private def apiKeysInResponse(apiVersions: ApiVersionsResponse) = {
-    val apiKeys = new util.HashSet[ApiKeys]
-    for (version <- apiVersions.data.apiKeys.asScala) {
-      apiKeys.add(ApiKeys.forId(version.apiKey))
-    }
-    apiKeys
-  }
-}
diff --git a/core/src/test/scala/unit/kafka/cluster/AbstractPartitionTest.scala b/core/src/test/scala/unit/kafka/cluster/AbstractPartitionTest.scala
index 887f16c5fa092..13e627b529851 100644
--- a/core/src/test/scala/unit/kafka/cluster/AbstractPartitionTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/AbstractPartitionTest.scala
@@ -16,12 +16,11 @@
   */
 package kafka.cluster
 
-import kafka.api.ApiVersion
 import kafka.log.{CleanerConfig, LogConfig, LogManager}
 import kafka.server.{Defaults, MetadataCache}
 import kafka.server.checkpoints.OffsetCheckpoints
 import kafka.server.metadata.MockConfigRepository
-import kafka.utils.TestUtils.{MockAlterIsrManager, MockIsrChangeListener}
+import kafka.utils.TestUtils.{MockAlterPartitionManager, MockAlterPartitionListener}
 import kafka.utils.{MockTime, TestUtils}
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState
@@ -30,10 +29,11 @@ import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.{AfterEach, BeforeEach}
 import org.mockito.ArgumentMatchers
 import org.mockito.Mockito.{mock, when}
-
 import java.io.File
 import java.util.Properties
 
+import org.apache.kafka.server.common.MetadataVersion
+
 import scala.jdk.CollectionConverters._
 
 object AbstractPartitionTest {
@@ -43,14 +43,15 @@ object AbstractPartitionTest {
 class AbstractPartitionTest {
 
   val brokerId = AbstractPartitionTest.brokerId
+  val remoteReplicaId = brokerId + 1
   val topicPartition = new TopicPartition("test-topic", 0)
   val time = new MockTime()
   var tmpDir: File = _
   var logDir1: File = _
   var logDir2: File = _
   var logManager: LogManager = _
-  var alterIsrManager: MockAlterIsrManager = _
-  var isrChangeListener: MockIsrChangeListener = _
+  var alterPartitionManager: MockAlterPartitionManager = _
+  var alterPartitionListener: MockAlterPartitionListener = _
   var logConfig: LogConfig = _
   var configRepository: MockConfigRepository = _
   val delayedOperations: DelayedOperations = mock(classOf[DelayedOperations])
@@ -64,7 +65,7 @@ class AbstractPartitionTest {
 
     val logProps = createLogProperties(Map.empty)
     logConfig = LogConfig(logProps)
-    configRepository = MockConfigRepository.forTopic(topicPartition.topic(), logProps)
+    configRepository = MockConfigRepository.forTopic(topicPartition.topic, logProps)
 
     tmpDir = TestUtils.tempDir()
     logDir1 = TestUtils.randomPartitionLogDir(tmpDir)
@@ -73,24 +74,24 @@ class AbstractPartitionTest {
       CleanerConfig(enableCleaner = false), time, interBrokerProtocolVersion)
     logManager.startup(Set.empty)
 
-    alterIsrManager = TestUtils.createAlterIsrManager()
-    isrChangeListener = TestUtils.createIsrChangeListener()
+    alterPartitionManager = TestUtils.createAlterIsrManager()
+    alterPartitionListener = TestUtils.createIsrChangeListener()
     partition = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
       interBrokerProtocolVersion = interBrokerProtocolVersion,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       logManager,
-      alterIsrManager)
+      alterPartitionManager)
 
     when(offsetCheckpoints.fetch(ArgumentMatchers.anyString, ArgumentMatchers.eq(topicPartition)))
       .thenReturn(None)
   }
 
-  protected def interBrokerProtocolVersion: ApiVersion = ApiVersion.latestVersion
+  protected def interBrokerProtocolVersion: MetadataVersion = MetadataVersion.latest
 
   def createLogProperties(overrides: Map[String, String]): Properties = {
     val logProps = new Properties()
@@ -115,7 +116,7 @@ class AbstractPartitionTest {
     partition.createLogIfNotExists(isNew = false, isFutureReplica = false, offsetCheckpoints, None)
 
     val controllerEpoch = 0
-    val replicas = List[Integer](brokerId, brokerId + 1).asJava
+    val replicas = List[Integer](brokerId, remoteReplicaId).asJava
     val isr = replicas
 
     if (isLeader) {
@@ -124,17 +125,17 @@ class AbstractPartitionTest {
         .setLeader(brokerId)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(isr)
-        .setZkVersion(1)
+        .setPartitionEpoch(1)
         .setReplicas(replicas)
         .setIsNew(true), offsetCheckpoints, None), "Expected become leader transition to succeed")
       assertEquals(leaderEpoch, partition.getLeaderEpoch)
     } else {
       assertTrue(partition.makeFollower(new LeaderAndIsrPartitionState()
         .setControllerEpoch(controllerEpoch)
-        .setLeader(brokerId + 1)
+        .setLeader(remoteReplicaId)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(isr)
-        .setZkVersion(1)
+        .setPartitionEpoch(1)
         .setReplicas(replicas)
         .setIsNew(true), offsetCheckpoints, None), "Expected become follower transition to succeed")
       assertEquals(leaderEpoch, partition.getLeaderEpoch)
diff --git a/core/src/test/scala/unit/kafka/cluster/AssignmentStateTest.scala b/core/src/test/scala/unit/kafka/cluster/AssignmentStateTest.scala
index a618825e8a5f1..ce8c567353dda 100644
--- a/core/src/test/scala/unit/kafka/cluster/AssignmentStateTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/AssignmentStateTest.scala
@@ -94,7 +94,7 @@ class AssignmentStateTest extends AbstractPartitionTest {
       .setLeader(brokerId)
       .setLeaderEpoch(6)
       .setIsr(isr.asJava)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas.asJava)
       .setIsNew(false)
     if (adding.nonEmpty)
diff --git a/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala b/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala
index f36ce9b4067c7..9dbb00d1849de 100644
--- a/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/BrokerEndPointTest.scala
@@ -199,34 +199,6 @@ class BrokerEndPointTest {
       broker.features)
   }
 
-  @Test
-  def testBrokerEndpointFromUri(): Unit = {
-    var connectionString = "localhost:9092"
-    var endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString)
-    assertEquals("localhost", endpoint.host)
-    assertEquals(9092, endpoint.port)
-    //KAFKA-3719
-    connectionString = "local_host:9092"
-    endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString)
-    assertEquals("local_host", endpoint.host)
-    assertEquals(9092, endpoint.port)
-    // also test for ipv6
-    connectionString = "[::1]:9092"
-    endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString)
-    assertEquals("::1", endpoint.host)
-    assertEquals(9092, endpoint.port)
-    // test for ipv6 with % character
-    connectionString = "[fe80::b1da:69ca:57f7:63d8%3]:9092"
-    endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString)
-    assertEquals("fe80::b1da:69ca:57f7:63d8%3", endpoint.host)
-    assertEquals(9092, endpoint.port)
-    // add test for uppercase in hostname
-    connectionString = "MyHostname:9092"
-    endpoint = BrokerEndPoint.createBrokerEndPoint(1, connectionString)
-    assertEquals("MyHostname", endpoint.host)
-    assertEquals(9092, endpoint.port)
-  }
-
   @Test
   def testEndpointFromUri(): Unit = {
     var connectionString = "PLAINTEXT://localhost:9092"
diff --git a/core/src/test/scala/unit/kafka/cluster/PartitionLockTest.scala b/core/src/test/scala/unit/kafka/cluster/PartitionLockTest.scala
index b55c62fe7e22e..6c374fe3c1d89 100644
--- a/core/src/test/scala/unit/kafka/cluster/PartitionLockTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/PartitionLockTest.scala
@@ -17,21 +17,24 @@
 
 package kafka.cluster
 
-import java.util.Properties
+import java.util.{Optional, Properties}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
-
-import kafka.api.{ApiVersion, LeaderAndIsr}
+import kafka.api.LeaderAndIsr
 import kafka.log._
 import kafka.server._
 import kafka.server.checkpoints.OffsetCheckpoints
 import kafka.server.epoch.LeaderEpochFileCache
 import kafka.server.metadata.MockConfigRepository
 import kafka.utils._
+import org.apache.kafka.common.TopicIdPartition
 import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState
+import org.apache.kafka.common.protocol.ApiKeys
 import org.apache.kafka.common.record.{MemoryRecords, SimpleRecord}
+import org.apache.kafka.common.requests.FetchRequest
 import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.common.{TopicPartition, Uuid}
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.mockito.ArgumentMatchers
@@ -60,7 +63,6 @@ class PartitionLockTest extends Logging {
   val executorService = Executors.newFixedThreadPool(numReplicaFetchers + numProducers + 1)
   val appendSemaphore = new Semaphore(0)
   val shrinkIsrSemaphore = new Semaphore(0)
-  val followerQueues = (0 until numReplicaFetchers).map(_ => new ArrayBlockingQueue[MemoryRecords](2))
 
   var logManager: LogManager = _
   var partition: Partition = _
@@ -139,7 +141,7 @@ class PartitionLockTest extends Logging {
       .setLeader(replicas.get(0))
       .setLeaderEpoch(1)
       .setIsr(replicas)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(true)
     val offsetCheckpoints: OffsetCheckpoints = mock(classOf[OffsetCheckpoints])
@@ -180,14 +182,16 @@ class PartitionLockTest extends Logging {
    * Then release the permit for the final append and verify that all appends and follower updates complete.
    */
   private def concurrentProduceFetchWithReadLockOnly(): Unit = {
+    val leaderEpoch = partition.getLeaderEpoch
+
     val appendFutures = scheduleAppends()
-    val stateUpdateFutures = scheduleUpdateFollowers(numProducers * numRecordsPerProducer - 1)
+    val stateUpdateFutures = scheduleFollowerFetches(leaderEpoch, numRecords = numProducers * numRecordsPerProducer - 1)
 
     appendSemaphore.release(numProducers * numRecordsPerProducer - 1)
     stateUpdateFutures.foreach(_.get(15, TimeUnit.SECONDS))
 
     appendSemaphore.release(1)
-    scheduleUpdateFollowers(1).foreach(_.get(15, TimeUnit.SECONDS)) // just to make sure follower state update still works
+    scheduleFollowerFetches(leaderEpoch, numRecords = 1).foreach(_.get(15, TimeUnit.SECONDS)) // just to make sure follower state update still works
     appendFutures.foreach(_.get(15, TimeUnit.SECONDS))
   }
 
@@ -198,9 +202,10 @@ class PartitionLockTest extends Logging {
    * permits for all appends to complete before verifying state updates.
    */
   private def concurrentProduceFetchWithWriteLock(): Unit = {
+    val leaderEpoch = partition.getLeaderEpoch
 
     val appendFutures = scheduleAppends()
-    val stateUpdateFutures = scheduleUpdateFollowers(numProducers * numRecordsPerProducer)
+    val stateUpdateFutures = scheduleFollowerFetches(leaderEpoch, numRecords = numProducers * numRecordsPerProducer)
 
     assertFalse(stateUpdateFutures.exists(_.isDone))
     appendSemaphore.release(numProducers * numRecordsPerProducer)
@@ -215,7 +220,7 @@ class PartitionLockTest extends Logging {
     (0 until numProducers).map { _ =>
       executorService.submit((() => {
         try {
-          append(partition, numRecordsPerProducer, followerQueues)
+          append(partition, numRecordsPerProducer)
         } catch {
           case e: Throwable =>
             error("Exception during append", e)
@@ -225,11 +230,11 @@ class PartitionLockTest extends Logging {
     }
   }
 
-  private def scheduleUpdateFollowers(numRecords: Int): Seq[Future[_]] = {
+  private def scheduleFollowerFetches(leaderEpoch: Int, numRecords: Int): Seq[Future[_]] = {
     (1 to numReplicaFetchers).map { index =>
       executorService.submit((() => {
         try {
-          updateFollowerFetchState(partition, index, numRecords, followerQueues(index - 1))
+          fetchFollower(partition, index, leaderEpoch, numRecords)
         } catch {
           case e: Throwable =>
             error("Exception during updateFollowerFetchState", e)
@@ -253,16 +258,16 @@ class PartitionLockTest extends Logging {
   private def setupPartitionWithMocks(logManager: LogManager): Partition = {
     val leaderEpoch = 1
     val brokerId = 0
-    val isrChangeListener: IsrChangeListener = mock(classOf[IsrChangeListener])
+    val isrChangeListener: AlterPartitionListener = mock(classOf[AlterPartitionListener])
     val delayedOperations: DelayedOperations = mock(classOf[DelayedOperations])
     val metadataCache: MetadataCache = mock(classOf[MetadataCache])
     val offsetCheckpoints: OffsetCheckpoints = mock(classOf[OffsetCheckpoints])
-    val alterIsrManager: AlterIsrManager = mock(classOf[AlterIsrManager])
+    val alterIsrManager: AlterPartitionManager = mock(classOf[AlterPartitionManager])
 
     logManager.startup(Set.empty)
     val partition = new Partition(topicPartition,
       replicaLagTimeMaxMs = kafka.server.Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       mockTime,
       isrChangeListener,
@@ -271,10 +276,13 @@ class PartitionLockTest extends Logging {
       logManager,
       alterIsrManager) {
 
-      override def prepareIsrShrink(outOfSyncReplicaIds: Set[Int]): PendingShrinkIsr = {
+      override def prepareIsrShrink(
+        currentState: CommittedPartitionState,
+        outOfSyncReplicaIds: Set[Int]
+      ): PendingShrinkIsr = {
         shrinkIsrSemaphore.acquire()
         try {
-          super.prepareIsrShrink(outOfSyncReplicaIds)
+          super.prepareIsrShrink(currentState, outOfSyncReplicaIds)
         } finally {
           shrinkIsrSemaphore.release()
         }
@@ -314,12 +322,14 @@ class PartitionLockTest extends Logging {
         new SlowLog(log, offsets.logStartOffset, localLog, leaderEpochCache, producerStateManager, appendSemaphore)
       }
     }
+
+    val topicIdPartition = new TopicIdPartition(partition.topicId.getOrElse(Uuid.ZERO_UUID), topicPartition)
     when(offsetCheckpoints.fetch(
       ArgumentMatchers.anyString,
       ArgumentMatchers.eq(topicPartition)
     )).thenReturn(None)
     when(alterIsrManager.submit(
-      ArgumentMatchers.eq(topicPartition),
+      ArgumentMatchers.eq(topicIdPartition),
       ArgumentMatchers.any[LeaderAndIsr],
       ArgumentMatchers.anyInt()
     )).thenReturn(new CompletableFuture[LeaderAndIsr]())
@@ -335,7 +345,7 @@ class PartitionLockTest extends Logging {
       .setLeader(brokerId)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(true), offsetCheckpoints, None), "Expected become leader transition to succeed")
 
@@ -351,30 +361,68 @@ class PartitionLockTest extends Logging {
     logProps
   }
 
-  private def append(partition: Partition, numRecords: Int, followerQueues: Seq[ArrayBlockingQueue[MemoryRecords]]): Unit = {
+  private def append(
+    partition: Partition,
+    numRecords: Int
+  ): Unit = {
     val requestLocal = RequestLocal.withThreadConfinedCaching
     (0 until numRecords).foreach { _ =>
       val batch = TestUtils.records(records = List(new SimpleRecord("k1".getBytes, "v1".getBytes),
         new SimpleRecord("k2".getBytes, "v2".getBytes)))
       partition.appendRecordsToLeader(batch, origin = AppendOrigin.Client, requiredAcks = 0, requestLocal)
-      followerQueues.foreach(_.put(batch))
     }
   }
 
-  private def updateFollowerFetchState(partition: Partition, followerId: Int, numRecords: Int, followerQueue: ArrayBlockingQueue[MemoryRecords]): Unit = {
-    (1 to numRecords).foreach { i =>
-      val batch = followerQueue.poll(15, TimeUnit.SECONDS)
-      if (batch == null)
-        throw new RuntimeException(s"Timed out waiting for next batch $i")
-      val batches = batch.batches.iterator.asScala.toList
-      assertEquals(1, batches.size)
-      val recordBatch = batches.head
-      partition.updateFollowerFetchState(
-        followerId,
-        followerFetchOffsetMetadata = LogOffsetMetadata(recordBatch.lastOffset + 1),
-        followerStartOffset = 0L,
-        followerFetchTimeMs = mockTime.milliseconds(),
-        leaderEndOffset = partition.localLogOrException.logEndOffset)
+  private def fetchFollower(
+    partition: Partition,
+    followerId: Int,
+    leaderEpoch: Int,
+    numRecords: Int
+  ): Unit = {
+    val logStartOffset = 0L
+    var fetchOffset = 0L
+    var lastFetchedEpoch = Optional.empty[Integer]
+    val maxBytes = 1
+
+    while (fetchOffset < numRecords) {
+      val fetchParams = FetchParams(
+        requestVersion = ApiKeys.FETCH.latestVersion,
+        replicaId = followerId,
+        maxWaitMs = 0,
+        minBytes = 1,
+        maxBytes = maxBytes,
+        isolation = FetchLogEnd,
+        clientMetadata = None
+      )
+
+      val fetchPartitionData = new FetchRequest.PartitionData(
+        Uuid.ZERO_UUID,
+        fetchOffset,
+        logStartOffset,
+        maxBytes,
+        Optional.of(Int.box(leaderEpoch)),
+        lastFetchedEpoch
+      )
+
+      val logReadInfo = partition.fetchRecords(
+        fetchParams,
+        fetchPartitionData,
+        mockTime.milliseconds(),
+        maxBytes,
+        minOneMessage = true,
+        updateFetchState = true
+      )
+
+      assertTrue(logReadInfo.divergingEpoch.isEmpty)
+
+      val batches = logReadInfo.fetchedData.records.batches.asScala
+      if (batches.nonEmpty) {
+        assertEquals(1, batches.size)
+
+        val batch = batches.head
+        lastFetchedEpoch = Optional.of(Int.box(batch.partitionLeaderEpoch))
+        fetchOffset = batch.lastOffset + 1
+      }
     }
   }
 
@@ -396,7 +444,7 @@ class PartitionLockTest extends Logging {
     keepPartitionMetadataFile = true) {
 
     override def appendAsLeader(records: MemoryRecords, leaderEpoch: Int, origin: AppendOrigin,
-                                interBrokerProtocolVersion: ApiVersion, requestLocal: RequestLocal): LogAppendInfo = {
+                                interBrokerProtocolVersion: MetadataVersion, requestLocal: RequestLocal): LogAppendInfo = {
       val appendInfo = super.appendAsLeader(records, leaderEpoch, origin, interBrokerProtocolVersion, requestLocal)
       appendSemaphore.acquire()
       appendInfo
diff --git a/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala b/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala
index 972d9d5f72d3f..948abc6c3b87f 100644
--- a/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala
@@ -16,24 +16,25 @@
  */
 package kafka.cluster
 
+import java.net.InetAddress
 import com.yammer.metrics.core.Metric
-import kafka.api.{ApiVersion, KAFKA_2_6_IV0}
 import kafka.common.UnexpectedAppendOffsetException
 import kafka.log.{Defaults => _, _}
-import kafka.metrics.KafkaYammerMetrics
 import kafka.server._
 import kafka.server.checkpoints.OffsetCheckpoints
+import kafka.server.epoch.EpochEntry
 import kafka.utils._
 import kafka.zk.KafkaZkClient
-import org.apache.kafka.common.errors.{ApiException, InconsistentTopicIdException, NotLeaderOrFollowerException, OffsetNotAvailableException, OffsetOutOfRangeException}
-import org.apache.kafka.common.message.FetchResponseData
+import org.apache.kafka.common.errors.{ApiException, FencedLeaderEpochException, InconsistentTopicIdException, NotLeaderOrFollowerException, OffsetNotAvailableException, OffsetOutOfRangeException, UnknownLeaderEpochException}
+import org.apache.kafka.common.message.{AlterPartitionResponseData, FetchResponseData}
 import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState
-import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.record.FileRecords.TimestampAndOffset
 import org.apache.kafka.common.record._
-import org.apache.kafka.common.requests.ListOffsetsRequest
+import org.apache.kafka.common.requests.{AlterPartitionResponse, FetchRequest, ListOffsetsRequest, RequestHeader}
 import org.apache.kafka.common.utils.SystemTime
 import org.apache.kafka.common.{IsolationLevel, TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 import org.mockito.ArgumentMatchers
@@ -45,10 +46,60 @@ import java.nio.ByteBuffer
 import java.util.Optional
 import java.util.concurrent.{CountDownLatch, Semaphore}
 import kafka.server.epoch.LeaderEpochFileCache
-
+import kafka.server.metadata.{KRaftMetadataCache, ZkMetadataCache}
+import org.apache.kafka.clients.ClientResponse
+import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.replica.ClientMetadata
+import org.apache.kafka.common.replica.ClientMetadata.DefaultClientMetadata
+import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_6_IV0
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
+
+import scala.compat.java8.OptionConverters._
 import scala.jdk.CollectionConverters._
 
+object PartitionTest {
+  def followerFetchParams(
+    replicaId: Int,
+    maxWaitMs: Long = 0L,
+    minBytes: Int = 1,
+    maxBytes: Int = Int.MaxValue
+  ): FetchParams = {
+    FetchParams(
+      requestVersion = ApiKeys.FETCH.latestVersion,
+      replicaId = replicaId,
+      maxWaitMs = maxWaitMs,
+      minBytes = minBytes,
+      maxBytes = maxBytes,
+      isolation = FetchLogEnd,
+      clientMetadata = None
+    )
+  }
+
+  def consumerFetchParams(
+    maxWaitMs: Long = 0L,
+    minBytes: Int = 1,
+    maxBytes: Int = Int.MaxValue,
+    clientMetadata: Option[ClientMetadata] = None,
+    isolation: FetchIsolation = FetchHighWatermark
+  ): FetchParams = {
+    FetchParams(
+      requestVersion = ApiKeys.FETCH.latestVersion,
+      replicaId = FetchRequest.CONSUMER_REPLICA_ID,
+      maxWaitMs = maxWaitMs,
+      minBytes = minBytes,
+      maxBytes = maxBytes,
+      isolation = isolation,
+      clientMetadata = clientMetadata
+    )
+  }
+}
+
 class PartitionTest extends AbstractPartitionTest {
+  import PartitionTest._
 
   @Test
   def testLastFetchedOffsetValidation(): Unit = {
@@ -71,6 +122,7 @@ class PartitionTest extends AbstractPartitionTest {
     assertEquals(17L, log.logEndOffset)
 
     val leaderEpoch = 10
+    val logStartOffset = 0L
     val partition = setupPartitionWithMocks(leaderEpoch = leaderEpoch, isLeader = true)
 
     def epochEndOffset(epoch: Int, endOffset: Long): FetchResponseData.EpochEndOffset = {
@@ -80,14 +132,13 @@ class PartitionTest extends AbstractPartitionTest {
     }
 
     def read(lastFetchedEpoch: Int, fetchOffset: Long): LogReadInfo = {
-      partition.readRecords(
-        Optional.of(lastFetchedEpoch),
+      fetchFollower(
+        partition,
+        remoteReplicaId,
         fetchOffset,
-        currentLeaderEpoch = Optional.of(leaderEpoch),
-        maxBytes = Int.MaxValue,
-        fetchIsolation = FetchLogEnd,
-        fetchOnlyFromLeader = true,
-        minOneMessage = true
+        logStartOffset,
+        leaderEpoch = Some(leaderEpoch),
+        lastFetchedEpoch = Some(lastFetchedEpoch)
       )
     }
 
@@ -189,6 +240,125 @@ class PartitionTest extends AbstractPartitionTest {
     assertEquals(None, partition.futureLog)
   }
 
+  @Test
+  def testReplicaFetchToFollower(): Unit = {
+    val controllerEpoch = 3
+    val followerId = brokerId + 1
+    val leaderId = brokerId + 2
+    val replicas = List[Integer](brokerId, followerId, leaderId).asJava
+    val isr = List[Integer](brokerId, followerId, leaderId).asJava
+    val leaderEpoch = 8
+    val partitionEpoch = 1
+
+    assertTrue(partition.makeFollower(new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(isr)
+      .setPartitionEpoch(partitionEpoch)
+      .setReplicas(replicas)
+      .setIsNew(true),
+      offsetCheckpoints, None
+    ))
+
+    def assertFetchFromReplicaFails[T <: ApiException](
+      expectedExceptionClass: Class[T],
+      leaderEpoch: Option[Int]
+    ): Unit = {
+      assertThrows(expectedExceptionClass, () => {
+        fetchFollower(
+          partition,
+          replicaId = followerId,
+          fetchOffset = 0L,
+          leaderEpoch = leaderEpoch
+        )
+      })
+    }
+
+    assertFetchFromReplicaFails(classOf[NotLeaderOrFollowerException], None)
+    assertFetchFromReplicaFails(classOf[NotLeaderOrFollowerException], Some(leaderEpoch))
+    assertFetchFromReplicaFails(classOf[UnknownLeaderEpochException], Some(leaderEpoch + 1))
+    assertFetchFromReplicaFails(classOf[FencedLeaderEpochException], Some(leaderEpoch - 1))
+  }
+
+  @Test
+  def testFetchFromUnrecognizedFollower(): Unit = {
+    val controllerEpoch = 3
+    val leader = brokerId
+    val validReplica = brokerId + 1
+    val addingReplica1 = brokerId + 2
+    val addingReplica2 = brokerId + 3
+    val replicas = List(leader, validReplica)
+    val isr = List[Integer](leader, validReplica).asJava
+    val leaderEpoch = 8
+    val partitionEpoch = 1
+
+    assertTrue(partition.makeLeader(new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leader)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(isr)
+      .setPartitionEpoch(partitionEpoch)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true),
+      offsetCheckpoints, None
+    ))
+
+    assertThrows(classOf[UnknownLeaderEpochException], () => {
+      fetchFollower(
+        partition,
+        replicaId = addingReplica1,
+        fetchOffset = 0L,
+        leaderEpoch = Some(leaderEpoch)
+      )
+    })
+    assertEquals(None, partition.getReplica(addingReplica1).map(_.stateSnapshot.logEndOffset))
+
+    assertThrows(classOf[NotLeaderOrFollowerException], () => {
+      fetchFollower(
+        partition,
+        replicaId = addingReplica2,
+        fetchOffset = 0L,
+        leaderEpoch = None
+      )
+    })
+    assertEquals(None, partition.getReplica(addingReplica2).map(_.stateSnapshot.logEndOffset))
+
+    // The replicas are added as part of a reassignment
+    val newReplicas = List(leader, validReplica, addingReplica1, addingReplica2)
+    val newPartitionEpoch = partitionEpoch + 1
+    val addingReplicas = List(addingReplica1, addingReplica2)
+
+    assertFalse(partition.makeLeader(new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leader)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(isr)
+      .setPartitionEpoch(newPartitionEpoch)
+      .setReplicas(newReplicas.map(Int.box).asJava)
+      .setAddingReplicas(addingReplicas.map(Int.box).asJava)
+      .setIsNew(true),
+      offsetCheckpoints, None
+    ))
+
+    // Now the fetches are allowed
+    assertEquals(0L, fetchFollower(
+      partition,
+      replicaId = addingReplica1,
+      fetchOffset = 0L,
+      leaderEpoch = Some(leaderEpoch)
+    ).logEndOffset)
+    assertEquals(Some(0L), partition.getReplica(addingReplica1).map(_.stateSnapshot.logEndOffset))
+
+    assertEquals(0L, fetchFollower(
+      partition,
+      replicaId = addingReplica2,
+      fetchOffset = 0L,
+      leaderEpoch = None
+    ).logEndOffset)
+    assertEquals(Some(0L), partition.getReplica(addingReplica2).map(_.stateSnapshot.logEndOffset))
+  }
+
   // Verify that partition.makeFollower() and partition.appendRecordsToFollowerOrFutureReplica() can run concurrently
   @Test
   def testMakeFollowerWithWithFollowerAppendRecords(): Unit = {
@@ -198,14 +368,14 @@ class PartitionTest extends AbstractPartitionTest {
     partition = new Partition(
       topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       logManager,
-      alterIsrManager) {
+      alterPartitionManager) {
 
       override def createLog(isNew: Boolean, isFutureReplica: Boolean, offsetCheckpoints: OffsetCheckpoints, topicId: Option[Uuid]): UnifiedLog = {
         val log = super.createLog(isNew, isFutureReplica, offsetCheckpoints, None)
@@ -260,7 +430,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(2)
       .setLeaderEpoch(1)
       .setIsr(List[Integer](0, 1, 2, brokerId).asJava)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(List[Integer](0, 1, 2, brokerId).asJava)
       .setIsNew(false)
     assertTrue(partition.makeFollower(partitionState, offsetCheckpoints, None))
@@ -402,69 +572,59 @@ class PartitionTest extends AbstractPartitionTest {
   }
 
   @Test
-  def testReadRecordEpochValidationForLeader(): Unit = {
+  def testLeaderEpochValidationOnLeader(): Unit = {
     val leaderEpoch = 5
     val partition = setupPartitionWithMocks(leaderEpoch, isLeader = true)
 
-    def assertReadRecordsError(error: Errors,
-                               currentLeaderEpochOpt: Optional[Integer]): Unit = {
-      try {
-        partition.readRecords(
-          lastFetchedEpoch = Optional.empty(),
-          fetchOffset = 0L,
-          currentLeaderEpoch = currentLeaderEpochOpt,
-          maxBytes = 1024,
-          fetchIsolation = FetchLogEnd,
-          fetchOnlyFromLeader = true,
-          minOneMessage = false)
-        if (error != Errors.NONE)
-          fail(s"Expected readRecords to fail with error $error")
-      } catch {
-        case e: Exception =>
-          assertEquals(error, Errors.forException(e))
-      }
+    def sendFetch(leaderEpoch: Option[Int]): LogReadInfo = {
+      fetchFollower(
+        partition,
+        remoteReplicaId,
+        fetchOffset = 0L,
+        leaderEpoch = leaderEpoch
+      )
     }
 
-    assertReadRecordsError(Errors.NONE, Optional.empty())
-    assertReadRecordsError(Errors.NONE, Optional.of(leaderEpoch))
-    assertReadRecordsError(Errors.FENCED_LEADER_EPOCH, Optional.of(leaderEpoch - 1))
-    assertReadRecordsError(Errors.UNKNOWN_LEADER_EPOCH, Optional.of(leaderEpoch + 1))
+    assertEquals(0L, sendFetch(leaderEpoch = None).logEndOffset)
+    assertEquals(0L, sendFetch(leaderEpoch = Some(leaderEpoch)).logEndOffset)
+    assertThrows(classOf[FencedLeaderEpochException], () => sendFetch(Some(leaderEpoch - 1)))
+    assertThrows(classOf[UnknownLeaderEpochException], () => sendFetch(Some(leaderEpoch + 1)))
   }
 
   @Test
-  def testReadRecordEpochValidationForFollower(): Unit = {
+  def testLeaderEpochValidationOnFollower(): Unit = {
     val leaderEpoch = 5
     val partition = setupPartitionWithMocks(leaderEpoch, isLeader = false)
 
-    def assertReadRecordsError(error: Errors,
-                               currentLeaderEpochOpt: Optional[Integer],
-                               fetchOnlyLeader: Boolean): Unit = {
-      try {
-        partition.readRecords(
-          lastFetchedEpoch = Optional.empty(),
-          fetchOffset = 0L,
-          currentLeaderEpoch = currentLeaderEpochOpt,
-          maxBytes = 1024,
-          fetchIsolation = FetchLogEnd,
-          fetchOnlyFromLeader = fetchOnlyLeader,
-          minOneMessage = false)
-        if (error != Errors.NONE)
-          fail(s"Expected readRecords to fail with error $error")
-      } catch {
-        case e: Exception =>
-          assertEquals(error, Errors.forException(e))
-      }
+    def sendFetch(
+      leaderEpoch: Option[Int],
+      clientMetadata: Option[ClientMetadata]
+    ): LogReadInfo = {
+      fetchConsumer(
+        partition,
+        fetchOffset = 0L,
+        leaderEpoch = leaderEpoch,
+        clientMetadata = clientMetadata
+      )
     }
 
-    assertReadRecordsError(Errors.NONE, Optional.empty(), fetchOnlyLeader = false)
-    assertReadRecordsError(Errors.NONE, Optional.of(leaderEpoch), fetchOnlyLeader = false)
-    assertReadRecordsError(Errors.FENCED_LEADER_EPOCH, Optional.of(leaderEpoch - 1), fetchOnlyLeader = false)
-    assertReadRecordsError(Errors.UNKNOWN_LEADER_EPOCH, Optional.of(leaderEpoch + 1), fetchOnlyLeader = false)
-
-    assertReadRecordsError(Errors.NOT_LEADER_OR_FOLLOWER, Optional.empty(), fetchOnlyLeader = true)
-    assertReadRecordsError(Errors.NOT_LEADER_OR_FOLLOWER, Optional.of(leaderEpoch), fetchOnlyLeader = true)
-    assertReadRecordsError(Errors.FENCED_LEADER_EPOCH, Optional.of(leaderEpoch - 1), fetchOnlyLeader = true)
-    assertReadRecordsError(Errors.UNKNOWN_LEADER_EPOCH, Optional.of(leaderEpoch + 1), fetchOnlyLeader = true)
+    // Follower fetching is only allowed when the client provides metadata
+    assertThrows(classOf[NotLeaderOrFollowerException], () => sendFetch(None, None))
+    assertThrows(classOf[NotLeaderOrFollowerException], () => sendFetch(Some(leaderEpoch), None))
+    assertThrows(classOf[FencedLeaderEpochException], () => sendFetch(Some(leaderEpoch - 1), None))
+    assertThrows(classOf[UnknownLeaderEpochException], () => sendFetch(Some(leaderEpoch + 1), None))
+
+    val clientMetadata = new DefaultClientMetadata(
+      "rack",
+      "clientId",
+      InetAddress.getLoopbackAddress,
+      KafkaPrincipal.ANONYMOUS,
+      ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT).value
+    )
+    assertEquals(0L, sendFetch(leaderEpoch = None, Some(clientMetadata)).logEndOffset)
+    assertEquals(0L, sendFetch(leaderEpoch = Some(leaderEpoch), Some(clientMetadata)).logEndOffset)
+    assertThrows(classOf[FencedLeaderEpochException], () => sendFetch(Some(leaderEpoch - 1), Some(clientMetadata)))
+    assertThrows(classOf[UnknownLeaderEpochException], () => sendFetch(Some(leaderEpoch + 1), Some(clientMetadata)))
   }
 
   @Test
@@ -569,13 +729,13 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(leader)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas.map(Int.box).asJava)
       .setIsNew(true)
 
     assertTrue(partition.makeLeader(leaderState, offsetCheckpoints, None), "Expected first makeLeader() to return 'leader changed'")
     assertEquals(leaderEpoch, partition.getLeaderEpoch, "Current leader epoch")
-    assertEquals(Set[Integer](leader, follower2), partition.isrState.isr, "ISR")
+    assertEquals(Set[Integer](leader, follower2), partition.partitionState.isr, "ISR")
 
     val requestLocal = RequestLocal.withThreadConfinedCaching
     // after makeLeader(() call, partition should know about all the replicas
@@ -585,16 +745,6 @@ class PartitionTest extends AbstractPartitionTest {
     assertEquals(partition.localLogOrException.logStartOffset, partition.localLogOrException.highWatermark,
       "Expected leader's HW not move")
 
-    // let the follower in ISR move leader's HW to move further but below LEO
-    def updateFollowerFetchState(followerId: Int, fetchOffsetMetadata: LogOffsetMetadata): Unit = {
-      partition.updateFollowerFetchState(
-        followerId,
-        followerFetchOffsetMetadata = fetchOffsetMetadata,
-        followerStartOffset = 0L,
-        followerFetchTimeMs = time.milliseconds(),
-        leaderEndOffset = partition.localLogOrException.logEndOffset)
-    }
-
     def fetchOffsetsForTimestamp(timestamp: Long, isolation: Option[IsolationLevel]): Either[ApiException, Option[TimestampAndOffset]] = {
       try {
         Right(partition.fetchOffsetForTimestamp(
@@ -608,14 +758,15 @@ class PartitionTest extends AbstractPartitionTest {
       }
     }
 
-    updateFollowerFetchState(follower1, LogOffsetMetadata(0))
-    updateFollowerFetchState(follower1, LogOffsetMetadata(2))
+    // let the follower in ISR move leader's HW to move further but below LEO
+    fetchFollower(partition, replicaId = follower1, fetchOffset = 0L)
+    fetchFollower(partition, replicaId = follower1, fetchOffset = 2L)
 
-    updateFollowerFetchState(follower2, LogOffsetMetadata(0))
-    updateFollowerFetchState(follower2, LogOffsetMetadata(2))
+    fetchFollower(partition, replicaId = follower2, fetchOffset = 0L)
+    fetchFollower(partition, replicaId = follower2, fetchOffset = 2L)
 
     // Simulate successful ISR update
-    alterIsrManager.completeIsrUpdate(2)
+    alterPartitionManager.completeIsrUpdate(2)
 
     // At this point, the leader has gotten 5 writes, but followers have only fetched two
     assertEquals(2, partition.localLogOrException.highWatermark)
@@ -643,7 +794,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(follower2)
       .setLeaderEpoch(leaderEpoch + 1)
       .setIsr(isr)
-      .setZkVersion(4)
+      .setPartitionEpoch(4)
       .setReplicas(replicas.map(Int.box).asJava)
       .setIsNew(false)
 
@@ -655,7 +806,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(leader)
       .setLeaderEpoch(leaderEpoch + 2)
       .setIsr(isr)
-      .setZkVersion(5)
+      .setPartitionEpoch(5)
       .setReplicas(replicas.map(Int.box).asJava)
       .setIsNew(false)
 
@@ -701,11 +852,11 @@ class PartitionTest extends AbstractPartitionTest {
     }
 
     // Next fetch from replicas, HW is moved up to 5 (ahead of the LEO)
-    updateFollowerFetchState(follower1, LogOffsetMetadata(5))
-    updateFollowerFetchState(follower2, LogOffsetMetadata(5))
+    fetchFollower(partition, replicaId = follower1, fetchOffset = 5L)
+    fetchFollower(partition, replicaId = follower2, fetchOffset = 5L)
 
     // Simulate successful ISR update
-    alterIsrManager.completeIsrUpdate(6)
+    alterPartitionManager.completeIsrUpdate(6)
 
     // Error goes away
     fetchOffsetsForTimestamp(ListOffsetsRequest.LATEST_TIMESTAMP, Some(IsolationLevel.READ_UNCOMMITTED)) match {
@@ -781,7 +932,7 @@ class PartitionTest extends AbstractPartitionTest {
         .setLeader(brokerId)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(isr)
-        .setZkVersion(1)
+        .setPartitionEpoch(1)
         .setReplicas(replicas)
         .setIsNew(true), offsetCheckpoints, None), "Expected become leader transition to succeed")
     assertEquals(leaderEpoch, partition.getLeaderEpoch)
@@ -850,7 +1001,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(1)
       .setLeaderEpoch(1)
       .setIsr(List[Integer](0, 1, 2, brokerId).asJava)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(List[Integer](0, 1, 2, brokerId).asJava)
       .setIsNew(false)
     partition.makeFollower(partitionState, offsetCheckpoints, None)
@@ -861,7 +1012,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(1)
       .setLeaderEpoch(4)
       .setIsr(List[Integer](0, 1, 2, brokerId).asJava)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(List[Integer](0, 1, 2, brokerId).asJava)
       .setIsNew(false)
     assertTrue(partition.makeFollower(partitionState, offsetCheckpoints, None))
@@ -872,7 +1023,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(1)
       .setLeaderEpoch(4)
       .setIsr(List[Integer](0, 1, 2, brokerId).asJava)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(List[Integer](0, 1, 2, brokerId).asJava)
     assertFalse(partition.makeFollower(partitionState, offsetCheckpoints, None))
   }
@@ -899,12 +1050,12 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(leader)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(true)
     assertTrue(partition.makeLeader(leaderState, offsetCheckpoints, None), "Expected first makeLeader() to return 'leader changed'")
     assertEquals(leaderEpoch, partition.getLeaderEpoch, "Current leader epoch")
-    assertEquals(Set[Integer](leader, follower2), partition.isrState.isr, "ISR")
+    assertEquals(Set[Integer](leader, follower2), partition.partitionState.isr, "ISR")
 
     val requestLocal = RequestLocal.withThreadConfinedCaching
 
@@ -916,17 +1067,8 @@ class PartitionTest extends AbstractPartitionTest {
     assertEquals(partition.localLogOrException.logStartOffset, partition.log.get.highWatermark, "Expected leader's HW not move")
 
     // let the follower in ISR move leader's HW to move further but below LEO
-    def updateFollowerFetchState(followerId: Int, fetchOffsetMetadata: LogOffsetMetadata): Unit = {
-      partition.updateFollowerFetchState(
-        followerId,
-        followerFetchOffsetMetadata = fetchOffsetMetadata,
-        followerStartOffset = 0L,
-        followerFetchTimeMs = time.milliseconds(),
-        leaderEndOffset = partition.localLogOrException.logEndOffset)
-    }
-
-    updateFollowerFetchState(follower2, LogOffsetMetadata(0))
-    updateFollowerFetchState(follower2, LogOffsetMetadata(lastOffsetOfFirstBatch))
+    fetchFollower(partition, replicaId = follower2, fetchOffset = 0)
+    fetchFollower(partition, replicaId = follower2, fetchOffset = lastOffsetOfFirstBatch)
     assertEquals(lastOffsetOfFirstBatch, partition.log.get.highWatermark, "Expected leader's HW")
 
     // current leader becomes follower and then leader again (without any new records appended)
@@ -935,7 +1077,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(follower2)
       .setLeaderEpoch(leaderEpoch + 1)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(false)
     partition.makeFollower(followerState, offsetCheckpoints, None)
@@ -945,7 +1087,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(leader)
       .setLeaderEpoch(leaderEpoch + 2)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(false)
     assertTrue(partition.makeLeader(newLeaderState, offsetCheckpoints, None),
@@ -956,18 +1098,18 @@ class PartitionTest extends AbstractPartitionTest {
     partition.appendRecordsToLeader(batch3, origin = AppendOrigin.Client, requiredAcks = 0, requestLocal)
 
     // fetch from follower not in ISR from log start offset should not add this follower to ISR
-    updateFollowerFetchState(follower1, LogOffsetMetadata(0))
-    updateFollowerFetchState(follower1, LogOffsetMetadata(lastOffsetOfFirstBatch))
-    assertEquals(Set[Integer](leader, follower2), partition.isrState.isr, "ISR")
+    fetchFollower(partition, replicaId = follower1, fetchOffset = 0)
+    fetchFollower(partition, replicaId = follower1, fetchOffset = lastOffsetOfFirstBatch)
+    assertEquals(Set[Integer](leader, follower2), partition.partitionState.isr, "ISR")
 
     // fetch from the follower not in ISR from start offset of the current leader epoch should
     // add this follower to ISR
-    updateFollowerFetchState(follower1, LogOffsetMetadata(currentLeaderEpochStartOffset))
+    fetchFollower(partition, replicaId = follower1, fetchOffset = currentLeaderEpochStartOffset)
 
     // Expansion does not affect the ISR
-    assertEquals(Set[Integer](leader, follower2), partition.isrState.isr, "ISR")
-    assertEquals(Set[Integer](leader, follower1, follower2), partition.isrState.maximalIsr, "ISR")
-    assertEquals(alterIsrManager.isrUpdates.head.leaderAndIsr.isr.toSet,
+    assertEquals(Set[Integer](leader, follower2), partition.partitionState.isr, "ISR")
+    assertEquals(Set[Integer](leader, follower1, follower2), partition.partitionState.maximalIsr, "ISR")
+    assertEquals(alterPartitionManager.isrUpdates.head.leaderAndIsr.isr.toSet,
       Set(leader, follower1, follower2), "AlterIsr")
   }
 
@@ -1014,7 +1156,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(leader)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(true)
     partition.makeLeader(leaderState, offsetCheckpoints, None)
@@ -1041,40 +1183,85 @@ class PartitionTest extends AbstractPartitionTest {
           .setLeader(brokerId)
           .setLeaderEpoch(leaderEpoch)
           .setIsr(isr)
-          .setZkVersion(1)
+          .setPartitionEpoch(1)
           .setReplicas(replicas)
           .setIsNew(true),
         offsetCheckpoints, None), "Expected become leader transition to succeed")
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     time.sleep(500)
 
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(3),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 6L)
-
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(3L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 3L)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = 0L,
+      logEndOffset = 3L
+    )
 
     time.sleep(500)
 
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(6L),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 6L)
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 6L)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = 6L
+    )
+  }
+
+  @Test
+  def testInvalidAlterPartitionRequestsAreNotRetried(): Unit = {
+    val log = logManager.getOrCreateLog(topicPartition, topicId = None)
+    seedLogData(log, numRecords = 10, leaderEpoch = 4)
 
-    assertEquals(time.milliseconds(), remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(6L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
+    val controllerEpoch = 0
+    val leaderEpoch = 5
+    val remoteBrokerId = brokerId + 1
+    val replicas = List[Integer](brokerId, remoteBrokerId).asJava
+    val isr = List[Integer](brokerId).asJava
 
+    partition.createLogIfNotExists(isNew = false, isFutureReplica = false, offsetCheckpoints, None)
+    assertTrue(partition.makeLeader(
+        new LeaderAndIsrPartitionState()
+          .setControllerEpoch(controllerEpoch)
+          .setLeader(brokerId)
+          .setLeaderEpoch(leaderEpoch)
+          .setIsr(isr)
+          .setPartitionEpoch(1)
+          .setReplicas(replicas)
+          .setIsNew(true),
+        offsetCheckpoints, None), "Expected become leader transition to succeed")
+    assertEquals(Set(brokerId), partition.partitionState.isr)
+
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = 0L,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
+
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 10L)
+
+    // Check that the isr didn't change and alter update is scheduled
+    assertEquals(Set(brokerId), partition.inSyncReplicaIds)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
+    assertEquals(1, alterPartitionManager.isrUpdates.size)
+    assertEquals(Set(brokerId, remoteBrokerId), alterPartitionManager.isrUpdates.head.leaderAndIsr.isr.toSet)
+
+    // Simulate invalid request failure
+    alterPartitionManager.failIsrUpdate(Errors.INVALID_REQUEST)
+
+    // Still no ISR change and no retry
+    assertEquals(Set(brokerId), partition.inSyncReplicaIds)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
+
+    assertEquals(0, alterPartitionListener.expands.get)
+    assertEquals(0, alterPartitionListener.shrinks.get)
+    assertEquals(1, alterPartitionListener.failures.get)
   }
 
   @Test
@@ -1095,47 +1282,45 @@ class PartitionTest extends AbstractPartitionTest {
           .setLeader(brokerId)
           .setLeaderEpoch(leaderEpoch)
           .setIsr(isr)
-          .setZkVersion(1)
+          .setPartitionEpoch(1)
           .setReplicas(replicas.map(Int.box).asJava)
           .setIsNew(true),
         offsetCheckpoints, None), "Expected become leader transition to succeed")
-    assertEquals(Set(brokerId), partition.isrState.isr)
-
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
-
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(3),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 6L)
-
-    assertEquals(Set(brokerId), partition.isrState.isr)
-    assertEquals(3L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
-
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 6L)
-
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
-    val isrItem = alterIsrManager.isrUpdates.head
+    assertEquals(Set(brokerId), partition.partitionState.isr)
+
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = 0L,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
+
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 3L)
+    assertEquals(Set(brokerId), partition.partitionState.isr)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = 0L,
+      logStartOffset = 0L,
+      logEndOffset = 3L
+    )
+
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 10L)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
+    val isrItem = alterPartitionManager.isrUpdates.head
     assertEquals(isrItem.leaderAndIsr.isr, List(brokerId, remoteBrokerId))
-    assertEquals(Set(brokerId), partition.isrState.isr)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
-    assertEquals(10L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
+    assertEquals(Set(brokerId), partition.partitionState.isr)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = 10L
+    )
 
     // Complete the ISR expansion
-    alterIsrManager.completeIsrUpdate(2)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
+    alterPartitionManager.completeIsrUpdate(2)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
 
-    assertEquals(isrChangeListener.expands.get, 1)
-    assertEquals(isrChangeListener.shrinks.get, 0)
-    assertEquals(isrChangeListener.failures.get, 0)
+    assertEquals(alterPartitionListener.expands.get, 1)
+    assertEquals(alterPartitionListener.shrinks.get, 0)
+    assertEquals(alterPartitionListener.failures.get, 0)
   }
 
   @Test
@@ -1156,40 +1341,253 @@ class PartitionTest extends AbstractPartitionTest {
           .setLeader(brokerId)
           .setLeaderEpoch(leaderEpoch)
           .setIsr(isr)
-          .setZkVersion(1)
+          .setPartitionEpoch(1)
           .setReplicas(replicas)
           .setIsNew(true),
         offsetCheckpoints, None), "Expected become leader transition to succeed")
-    assertEquals(Set(brokerId), partition.isrState.isr)
+    assertEquals(Set(brokerId), partition.partitionState.isr)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = 0L,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 10L)
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 10L)
 
     // Follower state is updated, but the ISR has not expanded
     assertEquals(Set(brokerId), partition.inSyncReplicaIds)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
-    assertEquals(10L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = 10L
+    )
 
     // Simulate failure callback
-    alterIsrManager.failIsrUpdate(Errors.INVALID_UPDATE_VERSION)
+    alterPartitionManager.failIsrUpdate(Errors.INVALID_UPDATE_VERSION)
 
-    // Still no ISR change
+    // Still no ISR change and it doesn't retry
     assertEquals(Set(brokerId), partition.inSyncReplicaIds)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
-    assertEquals(alterIsrManager.isrUpdates.size, 0)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
+    assertEquals(alterPartitionManager.isrUpdates.size, 0)
+    assertEquals(alterPartitionListener.expands.get, 0)
+    assertEquals(alterPartitionListener.shrinks.get, 0)
+    assertEquals(alterPartitionListener.failures.get, 1)
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testIsrNotExpandedIfReplicaIsFencedOrShutdown(quorum: String): Unit = {
+    val kraft = quorum == "kraft"
+
+    val log = logManager.getOrCreateLog(topicPartition, topicId = None)
+    seedLogData(log, numRecords = 10, leaderEpoch = 4)
+
+    val controllerEpoch = 0
+    val leaderEpoch = 5
+    val remoteBrokerId = brokerId + 1
+    val replicas = List(brokerId, remoteBrokerId)
+    val isr = Set(brokerId)
+
+    val metadataCache: MetadataCache = if (kraft) mock(classOf[KRaftMetadataCache]) else mock(classOf[ZkMetadataCache])
+
+    // Mark the remote broker as eligible or ineligible in the metadata cache of the leader.
+    // When using kraft, we can make the broker ineligible by fencing it.
+    // In ZK mode, we must mark the broker as alive for it to be eligible.
+    def markRemoteReplicaEligible(eligible: Boolean): Unit = {
+      if (kraft) {
+        when(metadataCache.asInstanceOf[KRaftMetadataCache].isBrokerFenced(remoteBrokerId)).thenReturn(!eligible)
+      } else {
+        when(metadataCache.hasAliveBroker(remoteBrokerId)).thenReturn(eligible)
+      }
+    }
+
+    val partition = new Partition(
+      topicPartition,
+      replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
+      interBrokerProtocolVersion = MetadataVersion.latest,
+      localBrokerId = brokerId,
+      time,
+      alterPartitionListener,
+      delayedOperations,
+      metadataCache,
+      logManager,
+      alterPartitionManager
+    )
+
+    partition.createLogIfNotExists(isNew = false, isFutureReplica = false, offsetCheckpoints, None)
+    assertTrue(partition.makeLeader(
+      new LeaderAndIsrPartitionState()
+        .setControllerEpoch(controllerEpoch)
+        .setLeader(brokerId)
+        .setLeaderEpoch(leaderEpoch)
+        .setIsr(isr.toList.map(Int.box).asJava)
+        .setPartitionEpoch(1)
+        .setReplicas(replicas.map(Int.box).asJava)
+        .setIsNew(true),
+      offsetCheckpoints, None), "Expected become leader transition to succeed")
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(isr, partition.partitionState.maximalIsr)
+
+    markRemoteReplicaEligible(true)
+
+    // Fetch to let the follower catch up to the log end offset and
+    // to check if an expansion is possible.
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = log.logEndOffset)
+
+    // Follower fetches and catches up to the log end offset.
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = log.logEndOffset
+    )
+
+    // Expansion is triggered.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(replicas.toSet, partition.partitionState.maximalIsr)
+    assertEquals(1, alterPartitionManager.isrUpdates.size)
+
+    // Controller rejects the expansion because the broker is fenced or offline.
+    alterPartitionManager.failIsrUpdate(Errors.INELIGIBLE_REPLICA)
+
+    // The leader reverts back to the previous ISR.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(isr, partition.partitionState.maximalIsr)
+    assertFalse(partition.partitionState.isInflight)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
+
+    // The leader eventually learns about the fenced or offline broker.
+    markRemoteReplicaEligible(false)
+
+    // The follower fetches again.
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = log.logEndOffset)
+
+    // Expansion is not triggered because the follower is fenced.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(isr, partition.partitionState.maximalIsr)
+    assertFalse(partition.partitionState.isInflight)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
+
+    // The broker is eventually unfenced or brought back online.
+    markRemoteReplicaEligible(true)
+
+    // The follower fetches again.
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = log.logEndOffset)
+
+    // Expansion is triggered.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(replicas.toSet, partition.partitionState.maximalIsr)
+    assertTrue(partition.partitionState.isInflight)
+    assertEquals(1, alterPartitionManager.isrUpdates.size)
+
+    // Expansion succeeds.
+    alterPartitionManager.completeIsrUpdate(newPartitionEpoch = 1)
+
+    // ISR is committed.
+    assertEquals(replicas.toSet, partition.partitionState.isr)
+    assertEquals(replicas.toSet, partition.partitionState.maximalIsr)
+    assertFalse(partition.partitionState.isInflight)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
+  }
+
+  @Test
+  def testIsrNotExpandedIfReplicaIsInControlledShutdown(): Unit = {
+    val log = logManager.getOrCreateLog(topicPartition, topicId = None)
+    seedLogData(log, numRecords = 10, leaderEpoch = 4)
+
+    val controllerEpoch = 0
+    val leaderEpoch = 5
+    val remoteBrokerId = brokerId + 1
+    val replicas = List(brokerId, remoteBrokerId)
+    val isr = Set(brokerId)
+
+    val metadataCache = mock(classOf[KRaftMetadataCache])
+    val partition = new Partition(
+      topicPartition,
+      replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
+      interBrokerProtocolVersion = MetadataVersion.latest,
+      localBrokerId = brokerId,
+      time,
+      alterPartitionListener,
+      delayedOperations,
+      metadataCache,
+      logManager,
+      alterPartitionManager
+    )
+
+    partition.createLogIfNotExists(isNew = false, isFutureReplica = false, offsetCheckpoints, None)
+    assertTrue(partition.makeLeader(
+      new LeaderAndIsrPartitionState()
+        .setControllerEpoch(controllerEpoch)
+        .setLeader(brokerId)
+        .setLeaderEpoch(leaderEpoch)
+        .setIsr(isr.toList.map(Int.box).asJava)
+        .setPartitionEpoch(1)
+        .setReplicas(replicas.map(Int.box).asJava)
+        .setIsNew(true),
+      offsetCheckpoints, None), "Expected become leader transition to succeed")
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(isr, partition.partitionState.maximalIsr)
+
+    // Fetch to let the follower catch up to the log end offset and
+    // to check if an expansion is possible.
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = log.logEndOffset)
+
+    // Follower fetches and catches up to the log end offset.
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = log.logEndOffset
+    )
+
+    // Expansion is triggered.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(replicas.toSet, partition.partitionState.maximalIsr)
+    assertEquals(1, alterPartitionManager.isrUpdates.size)
+
+    // Controller rejects the expansion because the broker is in controlled shutdown.
+    alterPartitionManager.failIsrUpdate(Errors.INELIGIBLE_REPLICA)
+
+    // The leader reverts back to the previous ISR.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(isr, partition.partitionState.maximalIsr)
+    assertFalse(partition.partitionState.isInflight)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
 
-    assertEquals(isrChangeListener.expands.get, 0)
-    assertEquals(isrChangeListener.shrinks.get, 0)
-    assertEquals(isrChangeListener.failures.get, 1)
+    // The leader eventually learns about the in controlled shutdown broker.
+    when(metadataCache.isBrokerShuttingDown(remoteBrokerId)).thenReturn(true)
+
+    // The follower fetches again.
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = log.logEndOffset)
+
+    // Expansion is not triggered because the follower is fenced.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(isr, partition.partitionState.maximalIsr)
+    assertFalse(partition.partitionState.isInflight)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
+
+    // The broker eventually comes back.
+    when(metadataCache.isBrokerShuttingDown(remoteBrokerId)).thenReturn(false)
+
+    // The follower fetches again.
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = log.logEndOffset)
+
+    // Expansion is triggered.
+    assertEquals(isr, partition.partitionState.isr)
+    assertEquals(replicas.toSet, partition.partitionState.maximalIsr)
+    assertTrue(partition.partitionState.isInflight)
+    assertEquals(1, alterPartitionManager.isrUpdates.size)
+
+    // Expansion succeeds.
+    alterPartitionManager.completeIsrUpdate(newPartitionEpoch= 1)
+
+    // ISR is committed.
+    assertEquals(replicas.toSet, partition.partitionState.isr)
+    assertEquals(replicas.toSet, partition.partitionState.maximalIsr)
+    assertFalse(partition.partitionState.isInflight)
+    assertEquals(0, alterPartitionManager.isrUpdates.size)
   }
 
   @Test
@@ -1210,7 +1608,7 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
@@ -1220,28 +1618,28 @@ class PartitionTest extends AbstractPartitionTest {
 
     // Try to shrink the ISR
     partition.maybeShrinkIsr()
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
-    assertEquals(alterIsrManager.isrUpdates.head.leaderAndIsr.isr, List(brokerId))
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
+    assertEquals(alterPartitionManager.isrUpdates.head.leaderAndIsr.isr, List(brokerId))
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
 
     // The shrink fails and we retry
-    alterIsrManager.failIsrUpdate(Errors.NETWORK_EXCEPTION)
-    assertEquals(0, isrChangeListener.shrinks.get)
-    assertEquals(1, isrChangeListener.failures.get)
-    assertEquals(1, partition.getZkVersion)
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
+    alterPartitionManager.failIsrUpdate(Errors.NETWORK_EXCEPTION)
+    assertEquals(0, alterPartitionListener.shrinks.get)
+    assertEquals(1, alterPartitionListener.failures.get)
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
     // The shrink succeeds after retrying
-    alterIsrManager.completeIsrUpdate(newZkVersion = 2)
-    assertEquals(1, isrChangeListener.shrinks.get)
-    assertEquals(2, partition.getZkVersion)
-    assertEquals(alterIsrManager.isrUpdates.size, 0)
-    assertEquals(Set(brokerId), partition.isrState.isr)
-    assertEquals(Set(brokerId), partition.isrState.maximalIsr)
+    alterPartitionManager.completeIsrUpdate(newPartitionEpoch = 2)
+    assertEquals(1, alterPartitionListener.shrinks.get)
+    assertEquals(2, partition.getPartitionEpoch)
+    assertEquals(alterPartitionManager.isrUpdates.size, 0)
+    assertEquals(Set(brokerId), partition.partitionState.isr)
+    assertEquals(Set(brokerId), partition.partitionState.maximalIsr)
     assertEquals(log.logEndOffset, partition.localLogOrException.highWatermark)
   }
 
@@ -1263,40 +1661,41 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     // On initialization, the replica is considered caught up and should not be removed
     partition.maybeShrinkIsr()
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
 
     // If enough time passes without a fetch update, the ISR should shrink
     time.sleep(partition.replicaLagTimeMaxMs + 1)
 
     // Shrink the ISR
     partition.maybeShrinkIsr()
-    assertEquals(0, isrChangeListener.shrinks.get)
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
-    assertEquals(alterIsrManager.isrUpdates.head.leaderAndIsr.isr, List(brokerId))
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
+    assertEquals(0, alterPartitionListener.shrinks.get)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
+    assertEquals(alterPartitionManager.isrUpdates.head.leaderAndIsr.isr, List(brokerId))
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
     // After the ISR shrink completes, the ISR state should be updated and the
     // high watermark should be advanced
-    alterIsrManager.completeIsrUpdate(newZkVersion = 2)
-    assertEquals(1, isrChangeListener.shrinks.get)
-    assertEquals(2, partition.getZkVersion)
-    assertEquals(alterIsrManager.isrUpdates.size, 0)
-    assertEquals(Set(brokerId), partition.isrState.isr)
-    assertEquals(Set(brokerId), partition.isrState.maximalIsr)
+    alterPartitionManager.completeIsrUpdate(newPartitionEpoch = 2)
+    assertEquals(1, alterPartitionListener.shrinks.get)
+    assertEquals(2, partition.getPartitionEpoch)
+    assertEquals(alterPartitionManager.isrUpdates.size, 0)
+    assertEquals(Set(brokerId), partition.partitionState.isr)
+    assertEquals(Set(brokerId), partition.partitionState.maximalIsr)
     assertEquals(log.logEndOffset, partition.localLogOrException.highWatermark)
   }
 
@@ -1318,20 +1717,21 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     // Shrink the ISR
     time.sleep(partition.replicaLagTimeMaxMs + 1)
     partition.maybeShrinkIsr()
-    assertTrue(partition.isrState.isInflight)
+    assertTrue(partition.partitionState.isInflight)
 
     // Become leader again, reset the ISR state
     assertFalse(makeLeader(
@@ -1340,21 +1740,21 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 2,
+      partitionEpoch = 2,
       isNew = false
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
-    assertFalse(partition.isrState.isInflight, "ISR should be committed and not inflight")
+    assertFalse(partition.partitionState.isInflight, "ISR should be committed and not inflight")
 
     // Try the shrink again, should not submit until AlterIsr response arrives
     time.sleep(partition.replicaLagTimeMaxMs + 1)
     partition.maybeShrinkIsr()
-    assertFalse(partition.isrState.isInflight, "ISR should still be committed and not inflight")
+    assertFalse(partition.partitionState.isInflight, "ISR should still be committed and not inflight")
 
     // Complete the AlterIsr update and now we can make modifications again
-    alterIsrManager.completeIsrUpdate(10)
+    alterPartitionManager.completeIsrUpdate(10)
     partition.maybeShrinkIsr()
-    assertTrue(partition.isrState.isInflight, "ISR should be pending a shrink")
+    assertTrue(partition.partitionState.isInflight, "ISR should be pending a shrink")
   }
 
   @Test
@@ -1375,48 +1775,45 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     // There is a short delay before the first fetch. The follower is not yet caught up to the log end.
     time.sleep(5000)
     val firstFetchTimeMs = time.milliseconds()
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(5),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = firstFetchTimeMs,
-      leaderEndOffset = 10L)
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 5L, fetchTimeMs = firstFetchTimeMs)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = 0L,
+      logEndOffset = 5L
+    )
     assertEquals(5L, partition.localLogOrException.highWatermark)
-    assertEquals(5L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
 
     // Some new data is appended, but the follower catches up to the old end offset.
     // The total elapsed time from initialization is larger than the max allowed replica lag.
     time.sleep(5001)
     seedLogData(log, numRecords = 5, leaderEpoch = leaderEpoch)
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 15L)
-    assertEquals(firstFetchTimeMs, remoteReplica.lastCaughtUpTimeMs)
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 10L, fetchTimeMs = time.milliseconds())
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = firstFetchTimeMs,
+      logStartOffset = 0L,
+      logEndOffset = 10L
+    )
     assertEquals(10L, partition.localLogOrException.highWatermark)
-    assertEquals(10L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
 
     // The ISR should not be shrunk because the follower has caught up with the leader at the
     // time of the first fetch.
     partition.maybeShrinkIsr()
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
-    assertEquals(alterIsrManager.isrUpdates.size, 0)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
+    assertEquals(alterPartitionManager.isrUpdates.size, 0)
   }
 
   @Test
@@ -1437,34 +1834,33 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     // The follower catches up to the log end immediately.
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 10L)
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 10L)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = 10L
+    )
     assertEquals(10L, partition.localLogOrException.highWatermark)
-    assertEquals(10L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
 
     // Sleep longer than the max allowed follower lag
     time.sleep(30001)
 
     // The ISR should not be shrunk because the follower is caught up to the leader's log end
     partition.maybeShrinkIsr()
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.isr)
-    assertEquals(alterIsrManager.isrUpdates.size, 0)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.isr)
+    assertEquals(alterPartitionManager.isrUpdates.size, 0)
   }
 
   @Test
@@ -1485,41 +1881,52 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(initializeTimeMs, remoteReplica.lastCaughtUpTimeMs)
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = initializeTimeMs,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     time.sleep(30001)
 
     // Enqueue and AlterIsr that will fail
     partition.maybeShrinkIsr()
     assertEquals(Set(brokerId, remoteBrokerId), partition.inSyncReplicaIds)
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
     // Simulate failure callback
-    alterIsrManager.failIsrUpdate(Errors.INVALID_UPDATE_VERSION)
+    alterPartitionManager.failIsrUpdate(Errors.INVALID_UPDATE_VERSION)
 
     // Ensure ISR hasn't changed
-    assertEquals(partition.isrState.getClass, classOf[PendingShrinkIsr])
+    assertEquals(partition.partitionState.getClass, classOf[PendingShrinkIsr])
     assertEquals(Set(brokerId, remoteBrokerId), partition.inSyncReplicaIds)
-    assertEquals(alterIsrManager.isrUpdates.size, 0)
+    assertEquals(alterPartitionManager.isrUpdates.size, 0)
     assertEquals(0L, partition.localLogOrException.highWatermark)
   }
 
+  @Test
+  def testAlterIsrNewLeaderElected(): Unit = {
+    handleAlterIsrFailure(Errors.NEW_LEADER_ELECTED,
+      (brokerId: Int, remoteBrokerId: Int, partition: Partition) => {
+        assertEquals(partition.partitionState.isr, Set(brokerId))
+        assertEquals(partition.partitionState.maximalIsr, Set(brokerId, remoteBrokerId))
+        assertEquals(alterPartitionManager.isrUpdates.size, 0)
+      })
+  }
+
   @Test
   def testAlterIsrUnknownTopic(): Unit = {
     handleAlterIsrFailure(Errors.UNKNOWN_TOPIC_OR_PARTITION,
       (brokerId: Int, remoteBrokerId: Int, partition: Partition) => {
-        assertEquals(partition.isrState.isr, Set(brokerId))
-        assertEquals(partition.isrState.maximalIsr, Set(brokerId, remoteBrokerId))
-        assertEquals(alterIsrManager.isrUpdates.size, 0)
+        assertEquals(partition.partitionState.isr, Set(brokerId))
+        assertEquals(partition.partitionState.maximalIsr, Set(brokerId, remoteBrokerId))
+        assertEquals(alterPartitionManager.isrUpdates.size, 0)
       })
   }
 
@@ -1527,9 +1934,9 @@ class PartitionTest extends AbstractPartitionTest {
   def testAlterIsrInvalidVersion(): Unit = {
     handleAlterIsrFailure(Errors.INVALID_UPDATE_VERSION,
       (brokerId: Int, remoteBrokerId: Int, partition: Partition) => {
-        assertEquals(partition.isrState.isr, Set(brokerId))
-        assertEquals(partition.isrState.maximalIsr, Set(brokerId, remoteBrokerId))
-        assertEquals(alterIsrManager.isrUpdates.size, 0)
+        assertEquals(partition.partitionState.isr, Set(brokerId))
+        assertEquals(partition.partitionState.maximalIsr, Set(brokerId, remoteBrokerId))
+        assertEquals(alterPartitionManager.isrUpdates.size, 0)
       })
   }
 
@@ -1538,9 +1945,9 @@ class PartitionTest extends AbstractPartitionTest {
     handleAlterIsrFailure(Errors.UNKNOWN_SERVER_ERROR,
       (brokerId: Int, remoteBrokerId: Int, partition: Partition) => {
         // We retry these
-        assertEquals(partition.isrState.isr, Set(brokerId))
-        assertEquals(partition.isrState.maximalIsr, Set(brokerId, remoteBrokerId))
-        assertEquals(alterIsrManager.isrUpdates.size, 1)
+        assertEquals(partition.partitionState.isr, Set(brokerId))
+        assertEquals(partition.partitionState.maximalIsr, Set(brokerId, remoteBrokerId))
+        assertEquals(alterPartitionManager.isrUpdates.size, 1)
       })
   }
 
@@ -1560,34 +1967,138 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(10L, partition.localLogOrException.highWatermark)
 
-    val remoteReplica = partition.getReplica(remoteBrokerId).get
-    assertEquals(LogOffsetMetadata.UnknownOffsetMetadata.messageOffset, remoteReplica.logEndOffset)
-    assertEquals(UnifiedLog.UnknownOffset, remoteReplica.logStartOffset)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = 0L,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
 
     // This will attempt to expand the ISR
-    partition.updateFollowerFetchState(remoteBrokerId,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 10L)
+    val firstFetchTimeMs = time.milliseconds()
+    fetchFollower(partition, replicaId = remoteBrokerId, fetchOffset = 10L, fetchTimeMs = firstFetchTimeMs)
 
     // Follower state is updated, but the ISR has not expanded
     assertEquals(Set(brokerId), partition.inSyncReplicaIds)
-    assertEquals(Set(brokerId, remoteBrokerId), partition.isrState.maximalIsr)
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
-    assertEquals(10L, remoteReplica.logEndOffset)
-    assertEquals(0L, remoteReplica.logStartOffset)
+    assertEquals(Set(brokerId, remoteBrokerId), partition.partitionState.maximalIsr)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
+    assertReplicaState(partition, remoteBrokerId,
+      lastCaughtUpTimeMs = firstFetchTimeMs,
+      logStartOffset = 0L,
+      logEndOffset = 10L
+    )
 
     // Failure
-    alterIsrManager.failIsrUpdate(error)
+    alterPartitionManager.failIsrUpdate(error)
     callback(brokerId, remoteBrokerId, partition)
   }
 
+  private def createClientResponseWithAlterPartitionResponse(
+    topicPartition: TopicPartition,
+    partitionErrorCode: Short,
+    isr: List[Int] = List.empty,
+    leaderEpoch: Int = 0,
+    partitionEpoch: Int = 0
+  ): ClientResponse = {
+    val alterPartitionResponseData = new AlterPartitionResponseData()
+    val topicResponse = new AlterPartitionResponseData.TopicData()
+      .setTopicName(topicPartition.topic)
+
+    topicResponse.partitions.add(new AlterPartitionResponseData.PartitionData()
+      .setPartitionIndex(topicPartition.partition)
+      .setIsr(isr.map(Integer.valueOf).asJava)
+      .setLeaderEpoch(leaderEpoch)
+      .setPartitionEpoch(partitionEpoch)
+      .setErrorCode(partitionErrorCode))
+    alterPartitionResponseData.topics.add(topicResponse)
+
+    val alterPartitionResponse = new AlterPartitionResponse(alterPartitionResponseData)
+
+    new ClientResponse(new RequestHeader(ApiKeys.ALTER_PARTITION, 0, "client", 1),
+      null, null, 0, 0, false, null, null, alterPartitionResponse)
+  }
+
+  @Test
+  def testPartitionShouldRetryAlterPartitionRequest(): Unit = {
+    val mockChannelManager = mock(classOf[BrokerToControllerChannelManager])
+    val alterPartitionManager = new DefaultAlterPartitionManager(
+      controllerChannelManager = mockChannelManager,
+      scheduler = mock(classOf[KafkaScheduler]),
+      time = time,
+      brokerId = brokerId,
+      brokerEpochSupplier = () => 0,
+      metadataVersionSupplier = () => MetadataVersion.IBP_3_0_IV0
+    )
+
+    partition = new Partition(topicPartition,
+      replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
+      interBrokerProtocolVersion = interBrokerProtocolVersion,
+      localBrokerId = brokerId,
+      time,
+      alterPartitionListener,
+      delayedOperations,
+      metadataCache,
+      logManager,
+      alterPartitionManager)
+
+    val log = logManager.getOrCreateLog(topicPartition, topicId = None)
+    seedLogData(log, numRecords = 10, leaderEpoch = 4)
+
+    val controllerEpoch = 0
+    val leaderEpoch = 5
+    val follower1 = brokerId + 1
+    val follower2 = brokerId + 2
+    val follower3 = brokerId + 3
+    val replicas = Seq(brokerId, follower1, follower2, follower3)
+    val isr = Seq(brokerId, follower1, follower2)
+    val partitionEpoch = 1
+
+    doNothing().when(delayedOperations).checkAndCompleteAll()
+
+    // Fail the first alter partition request with a retryable error to trigger a retry from the partition callback
+    val alterPartitionResponseWithUnknownServerError =
+      createClientResponseWithAlterPartitionResponse(topicPartition, Errors.UNKNOWN_SERVER_ERROR.code)
+
+    // Complete the ISR expansion
+    val alterPartitionResponseWithoutError =
+      createClientResponseWithAlterPartitionResponse(topicPartition, Errors.NONE.code, List(brokerId, follower1, follower2, follower3), leaderEpoch, partitionEpoch + 1)
+
+    when(mockChannelManager.sendRequest(any(), any()))
+      .thenAnswer { invocation =>
+        val controllerRequestCompletionHandler = invocation.getArguments()(1).asInstanceOf[ControllerRequestCompletionHandler]
+        controllerRequestCompletionHandler.onComplete(alterPartitionResponseWithUnknownServerError)
+      }
+      .thenAnswer { invocation =>
+        val controllerRequestCompletionHandler = invocation.getArguments()(1).asInstanceOf[ControllerRequestCompletionHandler]
+        controllerRequestCompletionHandler.onComplete(alterPartitionResponseWithoutError)
+      }
+
+    assertTrue(makeLeader(
+      topicId = None,
+      controllerEpoch,
+      leaderEpoch,
+      isr,
+      replicas,
+      partitionEpoch,
+      isNew = true
+    ))
+    assertEquals(0L, partition.localLogOrException.highWatermark)
+
+    // Expand ISR
+    fetchFollower(partition, replicaId = follower3, fetchOffset = 10L)
+
+    assertEquals(Set(brokerId, follower1, follower2, follower3), partition.partitionState.isr)
+    assertEquals(partitionEpoch + 1, partition.getPartitionEpoch)
+    // Verify that the AlterPartition request was sent twice
+    verify(mockChannelManager, times(2)).sendRequest(any(), any())
+    // After the retry, the partition state should be committed
+    assertFalse(partition.partitionState.isInflight)
+  }
+
   @Test
   def testSingleInFlightAlterIsr(): Unit = {
     val log = logManager.getOrCreateLog(topicPartition, topicId = None)
@@ -1609,29 +2120,23 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
     // Expand ISR
-    partition.updateFollowerFetchState(
-      followerId = follower3,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 10
-    )
-    assertEquals(Set(brokerId, follower1, follower2), partition.isrState.isr)
-    assertEquals(Set(brokerId, follower1, follower2, follower3), partition.isrState.maximalIsr)
+    fetchFollower(partition, replicaId = follower3, fetchOffset = 10L)
+    assertEquals(Set(brokerId, follower1, follower2), partition.partitionState.isr)
+    assertEquals(Set(brokerId, follower1, follower2, follower3), partition.partitionState.maximalIsr)
 
     // One AlterIsr request in-flight
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
 
     // Try to modify ISR again, should do nothing
     time.sleep(partition.replicaLagTimeMaxMs + 1)
     partition.maybeShrinkIsr()
-    assertEquals(alterIsrManager.isrUpdates.size, 1)
+    assertEquals(alterPartitionManager.isrUpdates.size, 1)
   }
 
   @Test
@@ -1645,15 +2150,15 @@ class PartitionTest extends AbstractPartitionTest {
       .when(kafkaZkClient)
       .conditionalUpdatePath(anyString(), any(), ArgumentMatchers.eq(1), any())
 
-    val zkIsrManager = AlterIsrManager(scheduler, time, kafkaZkClient)
+    val zkIsrManager = AlterPartitionManager(scheduler, time, kafkaZkClient)
     zkIsrManager.start()
 
     val partition = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = KAFKA_2_6_IV0, // shouldn't matter, but set this to a ZK isr version
+      interBrokerProtocolVersion = IBP_2_6_IV0, // shouldn't matter, but set this to a ZK isr version
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       logManager,
@@ -1679,25 +2184,19 @@ class PartitionTest extends AbstractPartitionTest {
       leaderEpoch = leaderEpoch,
       isr = isr,
       replicas = replicas,
-      zkVersion = 1,
+      partitionEpoch = 1,
       isNew = true
     ))
     assertEquals(0L, partition.localLogOrException.highWatermark)
 
     // Expand ISR
-    partition.updateFollowerFetchState(
-      followerId = follower3,
-      followerFetchOffsetMetadata = LogOffsetMetadata(10),
-      followerStartOffset = 0L,
-      followerFetchTimeMs = time.milliseconds(),
-      leaderEndOffset = 10
-    )
+    fetchFollower(partition, replicaId = follower3, fetchOffset = 10L)
 
     // Try avoiding a race
-    TestUtils.waitUntilTrue(() => !partition.isrState.isInflight, "Expected ISR state to be committed", 100)
+    TestUtils.waitUntilTrue(() => !partition.partitionState.isInflight, "Expected ISR state to be committed", 100)
 
-    partition.isrState match {
-      case committed: CommittedIsr => assertEquals(Set(brokerId, follower1, follower2, follower3), committed.isr)
+    partition.partitionState match {
+      case CommittedPartitionState(isr, _) => assertEquals(Set(brokerId, follower1, follower2, follower3), isr)
       case _ => fail("Expected a committed ISR following Zk expansion")
     }
 
@@ -1719,7 +2218,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(brokerId)
       .setLeaderEpoch(6)
       .setIsr(replicas)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(false)
     partition.makeLeader(leaderState, offsetCheckpoints, None)
@@ -1737,7 +2236,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(brokerId)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(replicas)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(false)
     partition.makeLeader(leaderState, offsetCheckpoints, Some(topicId))
@@ -1747,14 +2246,14 @@ class PartitionTest extends AbstractPartitionTest {
     // Create new Partition object for same topicPartition
     val partition2 = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       logManager,
-      alterIsrManager)
+      alterPartitionManager)
 
     // partition2 should not yet be associated with the log, but should be able to get ID
     assertTrue(partition2.topicId.isDefined)
@@ -1781,7 +2280,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(brokerId)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(replicas)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(false)
     partition.makeFollower(leaderState, offsetCheckpoints, Some(topicId))
@@ -1791,14 +2290,14 @@ class PartitionTest extends AbstractPartitionTest {
     // Create new Partition object for same topicPartition
     val partition2 = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       logManager,
-      alterIsrManager)
+      alterPartitionManager)
 
     // partition2 should not yet be associated with the log, but should be able to get ID
     assertTrue(partition2.topicId.isDefined)
@@ -1820,8 +2319,8 @@ class PartitionTest extends AbstractPartitionTest {
     assertTrue(partition.log.isDefined)
     val log = partition.log.get
     assertEquals(expectedTopicId, log.topicId.get)
-    assertTrue(log.partitionMetadataFile.exists())
-    assertEquals(expectedTopicId, log.partitionMetadataFile.read().topicId)
+    assertTrue(log.partitionMetadataFile.get.exists())
+    assertEquals(expectedTopicId, log.partitionMetadataFile.get.read().topicId)
   }
 
   @Test
@@ -1858,7 +2357,7 @@ class PartitionTest extends AbstractPartitionTest {
       .setLeader(brokerId)
       .setLeaderEpoch(6)
       .setIsr(isr)
-      .setZkVersion(1)
+      .setPartitionEpoch(1)
       .setReplicas(replicas)
       .setIsNew(false)
     partition.makeLeader(leaderState, offsetCheckpoints, None)
@@ -1873,34 +2372,66 @@ class PartitionTest extends AbstractPartitionTest {
   def testUpdateAssignmentAndIsr(): Unit = {
     val topicPartition = new TopicPartition("test", 1)
     val partition = new Partition(
-      topicPartition, 1000, ApiVersion.latestVersion, 0,
-      new SystemTime(), mock(classOf[IsrChangeListener]), mock(classOf[DelayedOperations]),
-      mock(classOf[MetadataCache]), mock(classOf[LogManager]), mock(classOf[AlterIsrManager]))
+      topicPartition, 1000, MetadataVersion.latest, 0,
+      new SystemTime(), mock(classOf[AlterPartitionListener]), mock(classOf[DelayedOperations]),
+      mock(classOf[MetadataCache]), mock(classOf[LogManager]), mock(classOf[AlterPartitionManager]))
 
     val replicas = Seq(0, 1, 2, 3)
+    val followers = Seq(1, 2, 3)
     val isr = Set(0, 1, 2, 3)
     val adding = Seq(4, 5)
     val removing = Seq(1, 2)
 
     // Test with ongoing reassignment
-    partition.updateAssignmentAndIsr(replicas, isr, adding, removing)
+    partition.updateAssignmentAndIsr(
+      replicas,
+      isLeader = true,
+      isr,
+      adding,
+      removing,
+      LeaderRecoveryState.RECOVERED
+    )
 
     assertTrue(partition.assignmentState.isInstanceOf[OngoingReassignmentState], "The assignmentState is not OngoingReassignmentState")
     assertEquals(replicas, partition.assignmentState.replicas)
-    assertEquals(isr, partition.isrState.isr)
+    assertEquals(isr, partition.partitionState.isr)
     assertEquals(adding, partition.assignmentState.asInstanceOf[OngoingReassignmentState].addingReplicas)
     assertEquals(removing, partition.assignmentState.asInstanceOf[OngoingReassignmentState].removingReplicas)
-    assertEquals(Seq(1, 2, 3), partition.remoteReplicas.map(_.brokerId))
+    assertEquals(followers, partition.remoteReplicas.map(_.brokerId))
 
     // Test with simple assignment
     val replicas2 = Seq(0, 3, 4, 5)
+    val followers2 = Seq(3, 4, 5)
     val isr2 = Set(0, 3, 4, 5)
-    partition.updateAssignmentAndIsr(replicas2, isr2, Seq.empty, Seq.empty)
+    partition.updateAssignmentAndIsr(
+      replicas2,
+      isLeader = true,
+      isr2,
+      Seq.empty,
+      Seq.empty,
+      LeaderRecoveryState.RECOVERED
+    )
 
     assertTrue(partition.assignmentState.isInstanceOf[SimpleAssignmentState], "The assignmentState is not SimpleAssignmentState")
     assertEquals(replicas2, partition.assignmentState.replicas)
-    assertEquals(isr2, partition.isrState.isr)
-    assertEquals(Seq(3, 4, 5), partition.remoteReplicas.map(_.brokerId))
+    assertEquals(isr2, partition.partitionState.isr)
+    assertEquals(followers2, partition.remoteReplicas.map(_.brokerId))
+
+    // Test with no followers
+    val replicas3 = Seq(1, 2, 3, 4)
+    partition.updateAssignmentAndIsr(
+      replicas3,
+      isLeader = false,
+      Set.empty,
+      Seq.empty,
+      Seq.empty,
+      LeaderRecoveryState.RECOVERED
+    )
+
+    assertTrue(partition.assignmentState.isInstanceOf[SimpleAssignmentState], "The assignmentState is not SimpleAssignmentState")
+    assertEquals(replicas3, partition.assignmentState.replicas)
+    assertEquals(Set.empty, partition.partitionState.isr)
+    assertEquals(Seq.empty, partition.remoteReplicas.map(_.brokerId))
   }
 
   /**
@@ -1916,14 +2447,14 @@ class PartitionTest extends AbstractPartitionTest {
     val spyLogManager = spy(logManager)
     val partition = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       spyLogManager,
-      alterIsrManager)
+      alterPartitionManager)
 
     partition.createLog(isNew = true, isFutureReplica = false, offsetCheckpoints, topicId = None)
 
@@ -1954,14 +2485,14 @@ class PartitionTest extends AbstractPartitionTest {
 
     val partition = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       spyLogManager,
-      alterIsrManager)
+      alterPartitionManager)
 
     partition.createLog(isNew = true, isFutureReplica = false, offsetCheckpoints, topicId = None)
 
@@ -1995,14 +2526,14 @@ class PartitionTest extends AbstractPartitionTest {
 
     val partition = new Partition(topicPartition,
       replicaLagTimeMaxMs = Defaults.ReplicaLagTimeMaxMs,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       localBrokerId = brokerId,
       time,
-      isrChangeListener,
+      alterPartitionListener,
       delayedOperations,
       metadataCache,
       spyLogManager,
-      alterIsrManager)
+      alterPartitionManager)
 
     partition.createLog(isNew = true, isFutureReplica = false, offsetCheckpoints, topicId = None)
 
@@ -2015,13 +2546,250 @@ class PartitionTest extends AbstractPartitionTest {
     verify(spyConfigRepository, times(2)).topicConfig(topicPartition.topic())
   }
 
+  @Test
+  def testDoNotResetReplicaStateIfLeaderEpochIsNotBumped(): Unit = {
+    val controllerEpoch = 3
+    val leaderId = brokerId
+    val followerId = brokerId + 1
+    val replicas = List(leaderId, followerId)
+    val leaderEpoch = 8
+    val topicId = Uuid.randomUuid()
+
+    val initialLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId).map(Int.box).asJava)
+      .setPartitionEpoch(1)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true)
+
+    assertTrue(partition.makeLeader(initialLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+    assertEquals(Set(leaderId), partition.partitionState.isr)
+
+    // Follower's state is initialized with unknown offset because it is not
+    // in the ISR.
+    assertReplicaState(partition, followerId,
+      lastCaughtUpTimeMs = 0L,
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset
+    )
+
+    // Follower fetches and updates its replica state.
+    fetchFollower(partition, replicaId = followerId, fetchOffset = 0L)
+    assertReplicaState(partition, followerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = 0L
+    )
+
+    // makeLeader is called again with the same leader epoch but with
+    // a newer partition epoch. This can happen in KRaft when a partition
+    // is reassigned. The leader epoch is not bumped when we add replicas.
+    val updatedLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId).map(Int.box).asJava)
+      .setPartitionEpoch(2)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(false)
+
+    assertFalse(partition.makeLeader(updatedLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(2, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+    assertEquals(Set(leaderId), partition.partitionState.isr)
+
+    // Follower's state has not been reset.
+    assertReplicaState(partition, followerId,
+      lastCaughtUpTimeMs = time.milliseconds(),
+      logStartOffset = 0L,
+      logEndOffset = 0L
+    )
+  }
+
+  @Test
+  def testDoNotUpdateEpochStartOffsetIfLeaderEpochIsNotBumped(): Unit = {
+    val controllerEpoch = 3
+    val leaderId = brokerId
+    val followerId = brokerId + 1
+    val replicas = List(leaderId, followerId)
+    val leaderEpoch = 8
+    val topicId = Uuid.randomUuid()
+
+    val initialLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId).map(Int.box).asJava)
+      .setPartitionEpoch(1)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true)
+
+    assertTrue(partition.makeLeader(initialLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+    assertEquals(Set(leaderId), partition.partitionState.isr)
+    assertEquals(Some(0L), partition.leaderEpochStartOffsetOpt)
+
+    val leaderLog = partition.localLogOrException
+    assertEquals(Some(EpochEntry(leaderEpoch, 0L)), leaderLog.leaderEpochCache.flatMap(_.latestEntry))
+
+    // Write to the log to increment the log end offset.
+    leaderLog.appendAsLeader(MemoryRecords.withRecords(0L, CompressionType.NONE, 0,
+      new SimpleRecord("k1".getBytes, "v1".getBytes),
+      new SimpleRecord("k1".getBytes, "v1".getBytes)
+    ), leaderEpoch = leaderEpoch)
+
+    // makeLeader is called again with the same leader epoch but with
+    // a newer partition epoch.
+    val updatedLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId).map(Int.box).asJava)
+      .setPartitionEpoch(2)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(false)
+
+    assertFalse(partition.makeLeader(updatedLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(2, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+    assertEquals(Set(leaderId), partition.partitionState.isr)
+    assertEquals(Some(0L), partition.leaderEpochStartOffsetOpt)
+    assertEquals(Some(EpochEntry(leaderEpoch, 0L)), leaderLog.leaderEpochCache.flatMap(_.latestEntry))
+  }
+
+  @Test
+  def testIgnoreLeaderPartitionStateChangeWithOlderPartitionEpoch(): Unit = {
+    val controllerEpoch = 3
+    val leaderId = brokerId
+    val replicas = List(leaderId)
+    val leaderEpoch = 8
+    val topicId = Uuid.randomUuid()
+
+    val initialLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId).map(Int.box).asJava)
+      .setPartitionEpoch(1)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true)
+
+    assertTrue(partition.makeLeader(initialLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+
+    // makeLeader is called again with the same leader epoch but with
+    // a older partition epoch.
+    val updatedLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(leaderId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId).map(Int.box).asJava)
+      .setPartitionEpoch(0)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(false)
+
+    assertFalse(partition.makeLeader(updatedLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+  }
+
+  @Test
+  def testIgnoreFollowerPartitionStateChangeWithOlderPartitionEpoch(): Unit = {
+    val controllerEpoch = 3
+    val leaderId = brokerId
+    val followerId = brokerId + 1
+    val replicas = List(leaderId, followerId)
+    val leaderEpoch = 8
+    val topicId = Uuid.randomUuid()
+
+    val initialFollowerState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(followerId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId, followerId).map(Int.box).asJava)
+      .setPartitionEpoch(1)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true)
+
+    assertTrue(partition.makeFollower(initialFollowerState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+
+    // makeLeader is called again with the same leader epoch but with
+    // a older partition epoch.
+    val updatedFollowerState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(followerId)
+      .setLeaderEpoch(leaderEpoch)
+      .setIsr(List(leaderId, followerId).map(Int.box).asJava)
+      .setPartitionEpoch(1)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true)
+
+    assertFalse(partition.makeFollower(updatedFollowerState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(leaderEpoch, partition.getLeaderEpoch)
+  }
+
+  @Test
+  def testFollowerShouldNotHaveAnyRemoteReplicaStates(): Unit = {
+    val controllerEpoch = 3
+    val localReplica = brokerId
+    val remoteReplica1 = brokerId + 1
+    val remoteReplica2 = brokerId + 2
+    val replicas = List(localReplica, remoteReplica1, remoteReplica2)
+    val topicId = Uuid.randomUuid()
+
+    // The local replica is the leader.
+    val initialLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(localReplica)
+      .setLeaderEpoch(1)
+      .setIsr(replicas.map(Int.box).asJava)
+      .setPartitionEpoch(1)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(true)
+
+    assertTrue(partition.makeLeader(initialLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(1, partition.getPartitionEpoch)
+    assertEquals(1, partition.getLeaderEpoch)
+    assertEquals(Some(localReplica), partition.leaderReplicaIdOpt)
+    assertEquals(replicas.toSet, partition.partitionState.isr)
+    assertEquals(Seq(remoteReplica1, remoteReplica2), partition.remoteReplicas.map(_.brokerId).toSeq)
+    assertEquals(replicas, partition.assignmentState.replicas)
+
+    // The local replica becomes a follower.
+    val updatedLeaderState = new LeaderAndIsrPartitionState()
+      .setControllerEpoch(controllerEpoch)
+      .setLeader(remoteReplica1)
+      .setLeaderEpoch(2)
+      .setIsr(replicas.map(Int.box).asJava)
+      .setPartitionEpoch(2)
+      .setReplicas(replicas.map(Int.box).asJava)
+      .setIsNew(false)
+
+    assertTrue(partition.makeFollower(updatedLeaderState, offsetCheckpoints, Some(topicId)))
+    assertEquals(2, partition.getPartitionEpoch)
+    assertEquals(2, partition.getLeaderEpoch)
+    assertEquals(Some(remoteReplica1), partition.leaderReplicaIdOpt)
+    assertEquals(Set.empty, partition.partitionState.isr)
+    assertEquals(Seq.empty, partition.remoteReplicas.map(_.brokerId).toSeq)
+    assertEquals(replicas, partition.assignmentState.replicas)
+  }
+
   private def makeLeader(
     topicId: Option[Uuid],
     controllerEpoch: Int,
     leaderEpoch: Int,
     isr: Seq[Int],
     replicas: Seq[Int],
-    zkVersion: Int,
+    partitionEpoch: Int,
     isNew: Boolean,
     partition: Partition = partition
   ): Boolean = {
@@ -2037,19 +2805,19 @@ class PartitionTest extends AbstractPartitionTest {
         .setLeader(brokerId)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(isr.map(Int.box).asJava)
-        .setZkVersion(zkVersion)
+        .setPartitionEpoch(partitionEpoch)
         .setReplicas(replicas.map(Int.box).asJava)
         .setIsNew(isNew),
       offsetCheckpoints,
       topicId
     )
     assertTrue(partition.isLeader)
-    assertFalse(partition.isrState.isInflight)
+    assertFalse(partition.partitionState.isInflight)
     assertEquals(topicId, partition.topicId)
     assertEquals(leaderEpoch, partition.getLeaderEpoch)
-    assertEquals(isr.toSet, partition.isrState.isr)
-    assertEquals(isr.toSet, partition.isrState.maximalIsr)
-    assertEquals(zkVersion, partition.getZkVersion)
+    assertEquals(isr.toSet, partition.partitionState.isr)
+    assertEquals(isr.toSet, partition.partitionState.maximalIsr)
+    assertEquals(partitionEpoch, partition.getPartitionEpoch)
     newLeader
   }
 
@@ -2084,4 +2852,98 @@ class PartitionTest extends AbstractPartitionTest {
       appendInfo
     }
   }
+
+  private def assertReplicaState(
+    partition: Partition,
+    replicaId: Int,
+    lastCaughtUpTimeMs: Long,
+    logEndOffset: Long,
+    logStartOffset: Long
+  ): Unit = {
+    partition.getReplica(replicaId) match {
+      case Some(replica) =>
+        val replicaState = replica.stateSnapshot
+        assertEquals(lastCaughtUpTimeMs, replicaState.lastCaughtUpTimeMs,
+          "Unexpected Last Caught Up Time")
+        assertEquals(logEndOffset, replicaState.logEndOffset,
+          "Unexpected Log End Offset")
+        assertEquals(logStartOffset, replicaState.logStartOffset,
+          "Unexpected Log Start Offset")
+
+      case None =>
+        fail(s"Replica $replicaId not found.")
+    }
+  }
+
+  private def fetchConsumer(
+    partition: Partition,
+    fetchOffset: Long,
+    leaderEpoch: Option[Int],
+    clientMetadata: Option[ClientMetadata],
+    maxBytes: Int = Int.MaxValue,
+    lastFetchedEpoch: Option[Int] = None,
+    fetchTimeMs: Long = time.milliseconds(),
+    topicId: Uuid = Uuid.ZERO_UUID,
+    isolation: FetchIsolation = FetchHighWatermark
+  ): LogReadInfo = {
+    val fetchParams = consumerFetchParams(
+      maxBytes = maxBytes,
+      clientMetadata = clientMetadata,
+      isolation = isolation
+    )
+
+    val fetchPartitionData = new FetchRequest.PartitionData(
+      topicId,
+      fetchOffset,
+      FetchRequest.INVALID_LOG_START_OFFSET,
+      maxBytes,
+      leaderEpoch.map(Int.box).asJava,
+      lastFetchedEpoch.map(Int.box).asJava
+    )
+
+    partition.fetchRecords(
+      fetchParams,
+      fetchPartitionData,
+      fetchTimeMs,
+      maxBytes,
+      minOneMessage = true,
+      updateFetchState = false
+    )
+  }
+
+  private def fetchFollower(
+    partition: Partition,
+    replicaId: Int,
+    fetchOffset: Long,
+    logStartOffset: Long = 0L,
+    maxBytes: Int = Int.MaxValue,
+    leaderEpoch: Option[Int] = None,
+    lastFetchedEpoch: Option[Int] = None,
+    fetchTimeMs: Long = time.milliseconds(),
+    topicId: Uuid = Uuid.ZERO_UUID
+  ): LogReadInfo = {
+    val fetchParams = followerFetchParams(
+      replicaId,
+      maxBytes = maxBytes
+    )
+
+    val fetchPartitionData = new FetchRequest.PartitionData(
+      topicId,
+      fetchOffset,
+      logStartOffset,
+      maxBytes,
+      leaderEpoch.map(Int.box).asJava,
+      lastFetchedEpoch.map(Int.box).asJava
+    )
+
+    partition.fetchRecords(
+      fetchParams,
+      fetchPartitionData,
+      fetchTimeMs,
+      maxBytes,
+      minOneMessage = true,
+      updateFetchState = true
+    )
+  }
+
 }
diff --git a/core/src/test/scala/unit/kafka/cluster/PartitionWithLegacyMessageFormatTest.scala b/core/src/test/scala/unit/kafka/cluster/PartitionWithLegacyMessageFormatTest.scala
index 50b10fa20ee35..75fec767abd5b 100644
--- a/core/src/test/scala/unit/kafka/cluster/PartitionWithLegacyMessageFormatTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/PartitionWithLegacyMessageFormatTest.scala
@@ -16,28 +16,30 @@
  */
 package kafka.cluster
 
-import kafka.api.{ApiVersion, KAFKA_2_8_IV1}
 import kafka.log.LogConfig
 import kafka.utils.TestUtils
 import org.apache.kafka.common.record.{RecordVersion, SimpleRecord}
 import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.{UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET}
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Test
-
 import java.util.Optional
+
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_8_IV1
+
 import scala.annotation.nowarn
 
 class PartitionWithLegacyMessageFormatTest extends AbstractPartitionTest {
 
   // legacy message formats are only supported with IBP < 3.0
-  override protected def interBrokerProtocolVersion: ApiVersion = KAFKA_2_8_IV1
+  override protected def interBrokerProtocolVersion: MetadataVersion = IBP_2_8_IV1
 
   @nowarn("cat=deprecation")
   @Test
   def testMakeLeaderDoesNotUpdateEpochCacheForOldFormats(): Unit = {
     val leaderEpoch = 8
     configRepository.setTopicConfig(topicPartition.topic(),
-      LogConfig.MessageFormatVersionProp, kafka.api.KAFKA_0_10_2_IV0.shortVersion)
+      LogConfig.MessageFormatVersionProp, MetadataVersion.IBP_0_10_2_IV0.shortVersion)
     val log = logManager.getOrCreateLog(topicPartition, topicId = None)
     log.appendAsLeader(TestUtils.records(List(
       new SimpleRecord("k1".getBytes, "v1".getBytes),
diff --git a/core/src/test/scala/unit/kafka/cluster/ReplicaTest.scala b/core/src/test/scala/unit/kafka/cluster/ReplicaTest.scala
index 201ec1dea50a6..76910642ae985 100644
--- a/core/src/test/scala/unit/kafka/cluster/ReplicaTest.scala
+++ b/core/src/test/scala/unit/kafka/cluster/ReplicaTest.scala
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -16,117 +16,293 @@
  */
 package kafka.cluster
 
-import java.util.Properties
+import kafka.log.UnifiedLog
+import kafka.server.LogOffsetMetadata
+import kafka.utils.MockTime
+import org.apache.kafka.common.TopicPartition
+import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
+import org.junit.jupiter.api.{BeforeEach, Test}
 
-import kafka.log.{ClientRecordDeletion, UnifiedLog, LogConfig, LogManager}
-import kafka.server.{BrokerTopicStats, LogDirFailureChannel}
-import kafka.utils.{MockTime, TestUtils}
-import org.apache.kafka.common.errors.OffsetOutOfRangeException
-import org.apache.kafka.common.utils.Utils
-import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
+object ReplicaTest {
+  val BrokerId: Int = 0
+  val Partition: TopicPartition = new TopicPartition("foo", 0)
+  val ReplicaLagTimeMaxMs: Long = 30000
+}
 
 class ReplicaTest {
+  import ReplicaTest._
 
-  val tmpDir = TestUtils.tempDir()
-  val logDir = TestUtils.randomPartitionLogDir(tmpDir)
   val time = new MockTime()
-  val brokerTopicStats = new BrokerTopicStats
-  var log: UnifiedLog = _
+  var replica: Replica = _
 
   @BeforeEach
   def setup(): Unit = {
-    val logProps = new Properties()
-    logProps.put(LogConfig.SegmentBytesProp, 512: java.lang.Integer)
-    logProps.put(LogConfig.SegmentIndexBytesProp, 1000: java.lang.Integer)
-    logProps.put(LogConfig.RetentionMsProp, 999: java.lang.Integer)
-    val config = LogConfig(logProps)
-    log = UnifiedLog(
-      dir = logDir,
-      config = config,
-      logStartOffset = 0L,
-      recoveryPoint = 0L,
-      scheduler = time.scheduler,
-      brokerTopicStats = brokerTopicStats,
-      time = time,
-      maxTransactionTimeoutMs = 5 * 60 * 1000,
-      maxProducerIdExpirationMs = 60 * 60 * 1000,
-      producerIdExpirationCheckIntervalMs = LogManager.ProducerIdExpirationCheckIntervalMs,
-      logDirFailureChannel = new LogDirFailureChannel(10),
-      topicId = None,
-      keepPartitionMetadataFile = true
+    replica = new Replica(BrokerId, Partition)
+  }
+
+  private def assertReplicaState(
+    logStartOffset: Long,
+    logEndOffset: Long,
+    lastCaughtUpTimeMs: Long,
+    lastFetchLeaderLogEndOffset: Long,
+    lastFetchTimeMs: Long
+  ): Unit = {
+    val replicaState = replica.stateSnapshot
+    assertEquals(logStartOffset, replicaState.logStartOffset,
+      "Unexpected Log Start Offset")
+    assertEquals(logEndOffset, replicaState.logEndOffset,
+      "Unexpected Log End Offset")
+    assertEquals(lastCaughtUpTimeMs, replicaState.lastCaughtUpTimeMs,
+      "Unexpected Last Caught Up Time")
+    assertEquals(lastFetchLeaderLogEndOffset, replicaState.lastFetchLeaderLogEndOffset,
+      "Unexpected Last Fetch Leader Log End Offset")
+    assertEquals(lastFetchTimeMs, replicaState.lastFetchTimeMs,
+      "Unexpected Last Fetch Time")
+  }
+
+  def assertReplicaStateDoesNotChange(
+    op: => Unit
+  ): Unit = {
+    val previousState = replica.stateSnapshot
+
+    op
+
+    assertReplicaState(
+      logStartOffset = previousState.logStartOffset,
+      logEndOffset = previousState.logEndOffset,
+      lastCaughtUpTimeMs = previousState.lastCaughtUpTimeMs,
+      lastFetchLeaderLogEndOffset = previousState.lastFetchLeaderLogEndOffset,
+      lastFetchTimeMs = previousState.lastFetchTimeMs
+    )
+  }
+
+  private def updateFetchState(
+    followerFetchOffset: Long,
+    followerStartOffset: Long,
+    leaderEndOffset: Long
+  ): Long = {
+    val currentTimeMs = time.milliseconds()
+    replica.updateFetchState(
+      followerFetchOffsetMetadata = LogOffsetMetadata(followerFetchOffset),
+      followerStartOffset = followerStartOffset,
+      followerFetchTimeMs = currentTimeMs,
+      leaderEndOffset = leaderEndOffset
+    )
+    currentTimeMs
+  }
+
+  private def resetReplicaState(
+    leaderEndOffset: Long,
+    isNewLeader: Boolean,
+    isFollowerInSync: Boolean
+  ): Long = {
+    val currentTimeMs = time.milliseconds()
+    replica.resetReplicaState(
+      currentTimeMs = currentTimeMs,
+      leaderEndOffset = leaderEndOffset,
+      isNewLeader = isNewLeader,
+      isFollowerInSync = isFollowerInSync
+    )
+    currentTimeMs
+  }
+
+  private def isCaughtUp(
+    leaderEndOffset: Long
+  ): Boolean = {
+    replica.stateSnapshot.isCaughtUp(
+      leaderEndOffset = leaderEndOffset,
+      currentTimeMs = time.milliseconds(),
+      replicaMaxLagMs = ReplicaLagTimeMaxMs
+    )
+  }
+
+  @Test
+  def testInitialState(): Unit = {
+    assertReplicaState(
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset,
+      lastCaughtUpTimeMs = 0L,
+      lastFetchLeaderLogEndOffset = 0L,
+      lastFetchTimeMs = 0L
+    )
+  }
+
+  @Test
+  def testUpdateFetchState(): Unit = {
+    val fetchTimeMs1 = updateFetchState(
+      followerFetchOffset = 5L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    assertReplicaState(
+      logStartOffset = 1L,
+      logEndOffset = 5L,
+      lastCaughtUpTimeMs = 0L,
+      lastFetchLeaderLogEndOffset = 10L,
+      lastFetchTimeMs = fetchTimeMs1
+    )
+
+    val fetchTimeMs2 = updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 2L,
+      leaderEndOffset = 15L
+    )
+
+    assertReplicaState(
+      logStartOffset = 2L,
+      logEndOffset = 10L,
+      lastCaughtUpTimeMs = fetchTimeMs1,
+      lastFetchLeaderLogEndOffset = 15L,
+      lastFetchTimeMs = fetchTimeMs2
+    )
+
+    val fetchTimeMs3 = updateFetchState(
+      followerFetchOffset = 15L,
+      followerStartOffset = 3L,
+      leaderEndOffset = 15L
+    )
+
+    assertReplicaState(
+      logStartOffset = 3L,
+      logEndOffset = 15L,
+      lastCaughtUpTimeMs = fetchTimeMs3,
+      lastFetchLeaderLogEndOffset = 15L,
+      lastFetchTimeMs = fetchTimeMs3
+    )
+  }
+
+  @Test
+  def testResetReplicaStateWhenLeaderIsReelectedAndReplicaIsInSync(): Unit = {
+    updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    val resetTimeMs1 = resetReplicaState(
+      leaderEndOffset = 11L,
+      isNewLeader = false,
+      isFollowerInSync = true
+    )
+
+    assertReplicaState(
+      logStartOffset = 1L,
+      logEndOffset = 10L,
+      lastCaughtUpTimeMs = resetTimeMs1,
+      lastFetchLeaderLogEndOffset = 11L,
+      lastFetchTimeMs = resetTimeMs1
     )
   }
 
-  @AfterEach
-  def tearDown(): Unit = {
-    log.close()
-    brokerTopicStats.close()
-    Utils.delete(tmpDir)
+  @Test
+  def testResetReplicaStateWhenLeaderIsReelectedAndReplicaIsNotInSync(): Unit = {
+    updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    resetReplicaState(
+      leaderEndOffset = 11L,
+      isNewLeader = false,
+      isFollowerInSync = false
+    )
+
+    assertReplicaState(
+      logStartOffset = 1L,
+      logEndOffset = 10L,
+      lastCaughtUpTimeMs = 0L,
+      lastFetchLeaderLogEndOffset = 11L,
+      lastFetchTimeMs = 0L
+    )
   }
 
   @Test
-  def testSegmentDeletionWithHighWatermarkInitialization(): Unit = {
-    val expiredTimestamp = time.milliseconds() - 1000
-    for (i <- 0 until 100) {
-      val records = TestUtils.singletonRecords(value = s"test$i".getBytes, timestamp = expiredTimestamp)
-      log.appendAsLeader(records, leaderEpoch = 0)
-    }
-
-    val initialHighWatermark = log.updateHighWatermark(25L)
-    assertEquals(25L, initialHighWatermark)
-
-    val initialNumSegments = log.numberOfSegments
-    log.deleteOldSegments()
-    assertTrue(log.numberOfSegments < initialNumSegments)
-    assertTrue(log.logStartOffset <= initialHighWatermark)
+  def testResetReplicaStateWhenNewLeaderIsElectedAndReplicaIsInSync(): Unit = {
+    updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    val resetTimeMs1 = resetReplicaState(
+      leaderEndOffset = 11L,
+      isNewLeader = true,
+      isFollowerInSync = true
+    )
+
+    assertReplicaState(
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset,
+      lastCaughtUpTimeMs = resetTimeMs1,
+      lastFetchLeaderLogEndOffset = UnifiedLog.UnknownOffset,
+      lastFetchTimeMs = 0L
+    )
   }
 
   @Test
-  def testCannotDeleteSegmentsAtOrAboveHighWatermark(): Unit = {
-    val expiredTimestamp = time.milliseconds() - 1000
-    for (i <- 0 until 100) {
-      val records = TestUtils.singletonRecords(value = s"test$i".getBytes, timestamp = expiredTimestamp)
-      log.appendAsLeader(records, leaderEpoch = 0)
-    }
-
-    // ensure we have at least a few segments so the test case is not trivial
-    assertTrue(log.numberOfSegments > 5)
-    assertEquals(0L, log.highWatermark)
-    assertEquals(0L, log.logStartOffset)
-    assertEquals(100L, log.logEndOffset)
-
-    for (hw <- 0 to 100) {
-      log.updateHighWatermark(hw)
-      assertEquals(hw, log.highWatermark)
-      log.deleteOldSegments()
-      assertTrue(log.logStartOffset <= hw)
-
-      // verify that all segments up to the high watermark have been deleted
-
-      log.logSegments.headOption.foreach { segment =>
-        assertTrue(segment.baseOffset <= hw)
-        assertTrue(segment.baseOffset >= log.logStartOffset)
-      }
-      log.logSegments.tail.foreach { segment =>
-        assertTrue(segment.baseOffset > hw)
-        assertTrue(segment.baseOffset >= log.logStartOffset)
-      }
-    }
-
-    assertEquals(100L, log.logStartOffset)
-    assertEquals(1, log.numberOfSegments)
-    assertEquals(0, log.activeSegment.size)
+  def testResetReplicaStateWhenNewLeaderIsElectedAndReplicaIsNotInSync(): Unit = {
+    updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    resetReplicaState(
+      leaderEndOffset = 11L,
+      isNewLeader = true,
+      isFollowerInSync = false
+    )
+
+    assertReplicaState(
+      logStartOffset = UnifiedLog.UnknownOffset,
+      logEndOffset = UnifiedLog.UnknownOffset,
+      lastCaughtUpTimeMs = 0L,
+      lastFetchLeaderLogEndOffset = UnifiedLog.UnknownOffset,
+      lastFetchTimeMs = 0L
+    )
+  }
+
+  @Test
+  def testIsCaughtUpWhenReplicaIsCaughtUpToLogEnd(): Unit = {
+    assertFalse(isCaughtUp(leaderEndOffset = 10L))
+
+    updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    assertTrue(isCaughtUp(leaderEndOffset = 10L))
+
+    time.sleep(ReplicaLagTimeMaxMs + 1)
+
+    assertTrue(isCaughtUp(leaderEndOffset = 10L))
   }
 
   @Test
-  def testCannotIncrementLogStartOffsetPastHighWatermark(): Unit = {
-    for (i <- 0 until 100) {
-      val records = TestUtils.singletonRecords(value = s"test$i".getBytes)
-      log.appendAsLeader(records, leaderEpoch = 0)
-    }
-
-    log.updateHighWatermark(25L)
-    assertThrows(classOf[OffsetOutOfRangeException], () => log.maybeIncrementLogStartOffset(26L, ClientRecordDeletion))
+  def testIsCaughtUpWhenReplicaIsNotCaughtUpToLogEnd(): Unit = {
+    assertFalse(isCaughtUp(leaderEndOffset = 10L))
+
+    updateFetchState(
+      followerFetchOffset = 5L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 10L
+    )
+
+    assertFalse(isCaughtUp(leaderEndOffset = 10L))
+
+    updateFetchState(
+      followerFetchOffset = 10L,
+      followerStartOffset = 1L,
+      leaderEndOffset = 15L
+    )
+
+    assertTrue(isCaughtUp(leaderEndOffset = 16L))
+
+    time.sleep(ReplicaLagTimeMaxMs + 1)
+
+    assertFalse(isCaughtUp(leaderEndOffset = 16L))
   }
 }
diff --git a/core/src/test/scala/unit/kafka/controller/ControllerChannelManagerTest.scala b/core/src/test/scala/unit/kafka/controller/ControllerChannelManagerTest.scala
index 495f819df143f..a77b42e46e6df 100644
--- a/core/src/test/scala/unit/kafka/controller/ControllerChannelManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/ControllerChannelManagerTest.scala
@@ -17,26 +17,30 @@
 package kafka.controller
 
 import java.util.Properties
-import kafka.api.{ApiVersion, KAFKA_0_10_0_IV1, KAFKA_0_10_2_IV0, KAFKA_0_9_0, KAFKA_1_0_IV0, KAFKA_2_2_IV0, KAFKA_2_4_IV0, KAFKA_2_4_IV1, KAFKA_2_6_IV0, KAFKA_2_8_IV1, LeaderAndIsr}
+
+import kafka.api.LeaderAndIsr
 import kafka.cluster.{Broker, EndPoint}
 import kafka.server.KafkaConfig
 import kafka.utils.TestUtils
-import org.apache.kafka.common.{TopicPartition, Uuid}
-import org.apache.kafka.common.message.{LeaderAndIsrResponseData, StopReplicaResponseData, UpdateMetadataResponseData}
 import org.apache.kafka.common.message.LeaderAndIsrResponseData.LeaderAndIsrPartitionError
+import org.apache.kafka.common.message.LeaderAndIsrResponseData.LeaderAndIsrTopicError
 import org.apache.kafka.common.message.StopReplicaRequestData.StopReplicaPartitionState
 import org.apache.kafka.common.message.StopReplicaResponseData.StopReplicaPartitionError
+import org.apache.kafka.common.message.{LeaderAndIsrResponseData, StopReplicaResponseData, UpdateMetadataResponseData}
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.requests.{AbstractControlRequest, AbstractResponse, LeaderAndIsrRequest, LeaderAndIsrResponse, StopReplicaRequest, StopReplicaResponse, UpdateMetadataRequest, UpdateMetadataResponse}
-import org.apache.kafka.common.message.LeaderAndIsrResponseData.LeaderAndIsrTopicError
 import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.apache.kafka.common.{TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_10_0_IV1, IBP_0_10_2_IV0, IBP_0_9_0, IBP_1_0_IV0, IBP_2_2_IV0, IBP_2_4_IV0, IBP_2_4_IV1, IBP_2_6_IV0, IBP_2_8_IV1, IBP_3_2_IV0}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 
-import scala.jdk.CollectionConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
+import scala.jdk.CollectionConverters._
 
 class ControllerChannelManagerTest {
   private val controllerId = 1
@@ -157,29 +161,33 @@ class ControllerChannelManagerTest {
 
   @Test
   def testLeaderAndIsrInterBrokerProtocolVersion(): Unit = {
-    testLeaderAndIsrRequestFollowsInterBrokerProtocolVersion(ApiVersion.latestVersion, ApiKeys.LEADER_AND_ISR.latestVersion)
+    testLeaderAndIsrRequestFollowsInterBrokerProtocolVersion(MetadataVersion.latest, ApiKeys.LEADER_AND_ISR.latestVersion)
 
-    for (apiVersion <- ApiVersion.allVersions) {
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
       val leaderAndIsrRequestVersion: Short =
-        if (apiVersion >= KAFKA_2_8_IV1) 5
-        else if (apiVersion >= KAFKA_2_4_IV1) 4
-        else if (apiVersion >= KAFKA_2_4_IV0) 3
-        else if (apiVersion >= KAFKA_2_2_IV0) 2
-        else if (apiVersion >= KAFKA_1_0_IV0) 1
+        if (metadataVersion.isAtLeast(IBP_3_2_IV0)) 6
+        else if (metadataVersion.isAtLeast(IBP_2_8_IV1)) 5
+        else if (metadataVersion.isAtLeast(IBP_2_4_IV1)) 4
+        else if (metadataVersion.isAtLeast(IBP_2_4_IV0)) 3
+        else if (metadataVersion.isAtLeast(IBP_2_2_IV0)) 2
+        else if (metadataVersion.isAtLeast(IBP_1_0_IV0)) 1
         else 0
 
-      testLeaderAndIsrRequestFollowsInterBrokerProtocolVersion(apiVersion, leaderAndIsrRequestVersion)
+      testLeaderAndIsrRequestFollowsInterBrokerProtocolVersion(metadataVersion, leaderAndIsrRequestVersion)
     }
   }
 
-  private def testLeaderAndIsrRequestFollowsInterBrokerProtocolVersion(interBrokerProtocolVersion: ApiVersion,
+  private def testLeaderAndIsrRequestFollowsInterBrokerProtocolVersion(interBrokerProtocolVersion: MetadataVersion,
                                                                        expectedLeaderAndIsrVersion: Short): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo", "bar"))
     val config = createConfig(interBrokerProtocolVersion)
     val batch = new MockControllerBrokerRequestBatch(context, config)
 
     val partition = new TopicPartition("foo", 0)
-    val leaderAndIsr = LeaderAndIsr(1, List(1, 2))
+    var leaderAndIsr = LeaderAndIsr(1, List(1, 2))
+    if (interBrokerProtocolVersion.isAtLeast(IBP_3_2_IV0)) {
+      leaderAndIsr = leaderAndIsr.copy(leaderRecoveryState = LeaderRecoveryState.RECOVERING)
+    }
 
     val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerEpoch)
     context.putPartitionLeadershipInfo(partition, leaderIsrAndControllerEpoch)
@@ -192,15 +200,27 @@ class ControllerChannelManagerTest {
     assertEquals(1, leaderAndIsrRequests.size)
     assertEquals(expectedLeaderAndIsrVersion, leaderAndIsrRequests.head.version,
       s"IBP $interBrokerProtocolVersion should use version $expectedLeaderAndIsrVersion")
-    
+
     val request = leaderAndIsrRequests.head
     val byteBuffer = request.serialize
     val deserializedRequest = LeaderAndIsrRequest.parse(byteBuffer, expectedLeaderAndIsrVersion)
-    
-    if (interBrokerProtocolVersion >= KAFKA_2_8_IV1) {
+
+    val expectedRecovery = if (interBrokerProtocolVersion.isAtLeast(IBP_3_2_IV0)) {
+      LeaderRecoveryState.RECOVERING
+    } else {
+      LeaderRecoveryState.RECOVERED
+    }
+
+    Seq(request, deserializedRequest).foreach { request =>
+      request.partitionStates.forEach { state =>
+        assertEquals(expectedRecovery , LeaderRecoveryState.of(state.leaderRecoveryState()))
+      }
+    }
+
+    if (interBrokerProtocolVersion.isAtLeast(IBP_2_8_IV1)) {
       assertFalse(request.topicIds().get("foo").equals(Uuid.ZERO_UUID))
       assertFalse(deserializedRequest.topicIds().get("foo").equals(Uuid.ZERO_UUID))
-    } else if (interBrokerProtocolVersion >= KAFKA_2_2_IV0) {
+    } else if (interBrokerProtocolVersion.isAtLeast(IBP_2_2_IV0)) {
       assertFalse(request.topicIds().get("foo").equals(Uuid.ZERO_UUID))
       assertTrue(deserializedRequest.topicIds().get("foo").equals(Uuid.ZERO_UUID))
     } else {
@@ -358,24 +378,24 @@ class ControllerChannelManagerTest {
 
   @Test
   def testUpdateMetadataInterBrokerProtocolVersion(): Unit = {
-    testUpdateMetadataFollowsInterBrokerProtocolVersion(ApiVersion.latestVersion, ApiKeys.UPDATE_METADATA.latestVersion)
+    testUpdateMetadataFollowsInterBrokerProtocolVersion(MetadataVersion.latest, ApiKeys.UPDATE_METADATA.latestVersion)
 
-    for (apiVersion <- ApiVersion.allVersions) {
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
       val updateMetadataRequestVersion: Short =
-        if (apiVersion >= KAFKA_2_8_IV1) 7
-        else if (apiVersion >= KAFKA_2_4_IV1) 6
-        else if (apiVersion >= KAFKA_2_2_IV0) 5
-        else if (apiVersion >= KAFKA_1_0_IV0) 4
-        else if (apiVersion >= KAFKA_0_10_2_IV0) 3
-        else if (apiVersion >= KAFKA_0_10_0_IV1) 2
-        else if (apiVersion >= KAFKA_0_9_0) 1
+        if (metadataVersion.isAtLeast(IBP_2_8_IV1)) 7
+        else if (metadataVersion.isAtLeast(IBP_2_4_IV1)) 6
+        else if (metadataVersion.isAtLeast(IBP_2_2_IV0)) 5
+        else if (metadataVersion.isAtLeast(IBP_1_0_IV0)) 4
+        else if (metadataVersion.isAtLeast(IBP_0_10_2_IV0)) 3
+        else if (metadataVersion.isAtLeast(IBP_0_10_0_IV1)) 2
+        else if (metadataVersion.isAtLeast(IBP_0_9_0)) 1
         else 0
 
-      testUpdateMetadataFollowsInterBrokerProtocolVersion(apiVersion, updateMetadataRequestVersion)
+      testUpdateMetadataFollowsInterBrokerProtocolVersion(metadataVersion, updateMetadataRequestVersion)
     }
   }
 
-  private def testUpdateMetadataFollowsInterBrokerProtocolVersion(interBrokerProtocolVersion: ApiVersion,
+  private def testUpdateMetadataFollowsInterBrokerProtocolVersion(interBrokerProtocolVersion: MetadataVersion,
                                                                   expectedUpdateMetadataVersion: Short): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo", "bar"))
     val config = createConfig(interBrokerProtocolVersion)
@@ -454,12 +474,12 @@ class ControllerChannelManagerTest {
 
   @Test
   def testStopReplicaRequestsWhileTopicQueuedForDeletion(): Unit = {
-    for (apiVersion <- ApiVersion.allVersions) {
-      testStopReplicaRequestsWhileTopicQueuedForDeletion(apiVersion)
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
+      testStopReplicaRequestsWhileTopicQueuedForDeletion(metadataVersion)
     }
   }
 
-  private def testStopReplicaRequestsWhileTopicQueuedForDeletion(interBrokerProtocolVersion: ApiVersion): Unit = {
+  private def testStopReplicaRequestsWhileTopicQueuedForDeletion(interBrokerProtocolVersion: MetadataVersion): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo", "bar"))
     val config = createConfig(interBrokerProtocolVersion)
     val batch = new MockControllerBrokerRequestBatch(context, config)
@@ -501,12 +521,12 @@ class ControllerChannelManagerTest {
 
   @Test
   def testStopReplicaRequestsWhileTopicDeletionStarted(): Unit = {
-    for (apiVersion <- ApiVersion.allVersions) {
-      testStopReplicaRequestsWhileTopicDeletionStarted(apiVersion)
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
+      testStopReplicaRequestsWhileTopicDeletionStarted(metadataVersion)
     }
   }
 
-  private def testStopReplicaRequestsWhileTopicDeletionStarted(interBrokerProtocolVersion: ApiVersion): Unit = {
+  private def testStopReplicaRequestsWhileTopicDeletionStarted(interBrokerProtocolVersion: MetadataVersion): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo", "bar"))
     val config = createConfig(interBrokerProtocolVersion)
     val batch = new MockControllerBrokerRequestBatch(context, config)
@@ -556,12 +576,12 @@ class ControllerChannelManagerTest {
 
   @Test
   def testStopReplicaRequestWithoutDeletePartitionWhileTopicDeletionStarted(): Unit = {
-    for (apiVersion <- ApiVersion.allVersions) {
-      testStopReplicaRequestWithoutDeletePartitionWhileTopicDeletionStarted(apiVersion)
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
+      testStopReplicaRequestWithoutDeletePartitionWhileTopicDeletionStarted(metadataVersion)
     }
   }
 
-  private def testStopReplicaRequestWithoutDeletePartitionWhileTopicDeletionStarted(interBrokerProtocolVersion: ApiVersion): Unit = {
+  private def testStopReplicaRequestWithoutDeletePartitionWhileTopicDeletionStarted(interBrokerProtocolVersion: MetadataVersion): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo", "bar"))
     val config = createConfig(interBrokerProtocolVersion)
     val batch = new MockControllerBrokerRequestBatch(context, config)
@@ -603,22 +623,22 @@ class ControllerChannelManagerTest {
 
   @Test
   def testMixedDeleteAndNotDeleteStopReplicaRequests(): Unit = {
-    testMixedDeleteAndNotDeleteStopReplicaRequests(ApiVersion.latestVersion,
+    testMixedDeleteAndNotDeleteStopReplicaRequests(MetadataVersion.latest,
       ApiKeys.STOP_REPLICA.latestVersion)
 
-    for (apiVersion <- ApiVersion.allVersions) {
-      if (apiVersion < KAFKA_2_2_IV0)
-        testMixedDeleteAndNotDeleteStopReplicaRequests(apiVersion, 0.toShort)
-      else if (apiVersion < KAFKA_2_4_IV1)
-        testMixedDeleteAndNotDeleteStopReplicaRequests(apiVersion, 1.toShort)
-      else if (apiVersion < KAFKA_2_6_IV0)
-        testMixedDeleteAndNotDeleteStopReplicaRequests(apiVersion, 2.toShort)
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
+      if (metadataVersion.isLessThan(IBP_2_2_IV0))
+        testMixedDeleteAndNotDeleteStopReplicaRequests(metadataVersion, 0.toShort)
+      else if (metadataVersion.isLessThan(IBP_2_4_IV1))
+        testMixedDeleteAndNotDeleteStopReplicaRequests(metadataVersion, 1.toShort)
+      else if (metadataVersion.isLessThan(IBP_2_6_IV0))
+        testMixedDeleteAndNotDeleteStopReplicaRequests(metadataVersion, 2.toShort)
       else
-        testMixedDeleteAndNotDeleteStopReplicaRequests(apiVersion, 3.toShort)
+        testMixedDeleteAndNotDeleteStopReplicaRequests(metadataVersion, 3.toShort)
     }
   }
 
-  private def testMixedDeleteAndNotDeleteStopReplicaRequests(interBrokerProtocolVersion: ApiVersion,
+  private def testMixedDeleteAndNotDeleteStopReplicaRequests(interBrokerProtocolVersion: MetadataVersion,
                                                              expectedStopReplicaRequestVersion: Short): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo", "bar"))
     val config = createConfig(interBrokerProtocolVersion)
@@ -649,8 +669,8 @@ class ControllerChannelManagerTest {
     assertEquals(1, batch.sentRequests.size)
     assertTrue(batch.sentRequests.contains(2))
 
-    // Since KAFKA_2_6_IV0, only one StopReplicaRequest is sent out
-    if (interBrokerProtocolVersion >= KAFKA_2_6_IV0) {
+    // Since IBP_2_6_IV0, only one StopReplicaRequest is sent out
+    if (interBrokerProtocolVersion.isAtLeast(IBP_2_6_IV0)) {
       val sentRequests = batch.sentRequests(2)
       assertEquals(1, sentRequests.size)
 
@@ -753,21 +773,21 @@ class ControllerChannelManagerTest {
 
   @Test
   def testStopReplicaInterBrokerProtocolVersion(): Unit = {
-    testStopReplicaFollowsInterBrokerProtocolVersion(ApiVersion.latestVersion, ApiKeys.STOP_REPLICA.latestVersion)
-
-    for (apiVersion <- ApiVersion.allVersions) {
-      if (apiVersion < KAFKA_2_2_IV0)
-        testStopReplicaFollowsInterBrokerProtocolVersion(apiVersion, 0.toShort)
-      else if (apiVersion < KAFKA_2_4_IV1)
-        testStopReplicaFollowsInterBrokerProtocolVersion(apiVersion, 1.toShort)
-      else if (apiVersion < KAFKA_2_6_IV0)
-        testStopReplicaFollowsInterBrokerProtocolVersion(apiVersion, 2.toShort)
+    testStopReplicaFollowsInterBrokerProtocolVersion(MetadataVersion.latest, ApiKeys.STOP_REPLICA.latestVersion)
+
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
+      if (metadataVersion.isLessThan(IBP_2_2_IV0))
+        testStopReplicaFollowsInterBrokerProtocolVersion(metadataVersion, 0.toShort)
+      else if (metadataVersion.isLessThan(IBP_2_4_IV1))
+        testStopReplicaFollowsInterBrokerProtocolVersion(metadataVersion, 1.toShort)
+      else if (metadataVersion.isLessThan(IBP_2_6_IV0))
+        testStopReplicaFollowsInterBrokerProtocolVersion(metadataVersion, 2.toShort)
       else
-        testStopReplicaFollowsInterBrokerProtocolVersion(apiVersion, 3.toShort)
+        testStopReplicaFollowsInterBrokerProtocolVersion(metadataVersion, 3.toShort)
     }
   }
 
-  private def testStopReplicaFollowsInterBrokerProtocolVersion(interBrokerProtocolVersion: ApiVersion,
+  private def testStopReplicaFollowsInterBrokerProtocolVersion(interBrokerProtocolVersion: MetadataVersion,
                                                                expectedStopReplicaRequestVersion: Short): Unit = {
     val context = initContext(Seq(1, 2, 3), 2, 3, Set("foo"))
     val config = createConfig(interBrokerProtocolVersion)
@@ -868,7 +888,7 @@ class ControllerChannelManagerTest {
     }
   }
 
-  private def createConfig(interBrokerVersion: ApiVersion): KafkaConfig = {
+  private def createConfig(interBrokerVersion: MetadataVersion): KafkaConfig = {
     val props = new Properties()
     props.put(KafkaConfig.BrokerIdProp, controllerId.toString)
     props.put(KafkaConfig.ZkConnectProp, "zkConnect")
diff --git a/core/src/test/scala/unit/kafka/controller/ControllerContextTest.scala b/core/src/test/scala/unit/kafka/controller/ControllerContextTest.scala
index e8efa5af79356..e88bb321ad9b3 100644
--- a/core/src/test/scala/unit/kafka/controller/ControllerContextTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/ControllerContextTest.scala
@@ -203,4 +203,25 @@ class ControllerContextTest {
     context.removeTopic(tp3.topic)
     assertEquals(0, context.preferredReplicaImbalanceCount)
   }
+
+  @Test
+  def testPreferredReplicaImbalanceMetricOnConcurrentTopicDeletion(): Unit = {
+    val topicA = "A"
+    val topicB = "B"
+    val tpA = new TopicPartition(topicA, 0)
+    val tpB = new TopicPartition(topicB, 0)
+    context.updatePartitionFullReplicaAssignment(tpA, ReplicaAssignment(Seq(1, 2, 3)))
+    context.updatePartitionFullReplicaAssignment(tpB, ReplicaAssignment(Seq(1, 2, 3)))
+    assertEquals(0, context.preferredReplicaImbalanceCount)
+
+    context.queueTopicDeletion(Set(topicA))
+    // All partitions in topic will be marked as Offline during deletion procedure
+    context.putPartitionLeadershipInfo(tpA, LeaderIsrAndControllerEpoch(LeaderAndIsr(LeaderAndIsr.NoLeader, List(1, 2, 3)), 0))
+    assertEquals(0, context.preferredReplicaImbalanceCount)
+
+    // Initiate topicB's topic deletion before topicA's deletion completes.
+    // Since topicA's delete-topic ZK node still exists, context.queueTopicDeletion will be called with Set(topicA, topicB)
+    context.queueTopicDeletion(Set(topicA, topicB))
+    assertEquals(0, context.preferredReplicaImbalanceCount)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/controller/ControllerEventManagerTest.scala b/core/src/test/scala/unit/kafka/controller/ControllerEventManagerTest.scala
index 26bbf94e90141..97d046f5ea7a9 100644
--- a/core/src/test/scala/unit/kafka/controller/ControllerEventManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/ControllerEventManagerTest.scala
@@ -22,12 +22,12 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import com.yammer.metrics.core.{Histogram, MetricName, Timer}
 import kafka.controller
-import kafka.metrics.KafkaYammerMetrics
 import kafka.utils.TestUtils
 import org.apache.kafka.common.message.UpdateMetadataResponseData
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.UpdateMetadataResponse
 import org.apache.kafka.common.utils.MockTime
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue, fail}
 import org.junit.jupiter.api.{AfterEach, Test}
 
diff --git a/core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala b/core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala
index 300db0047b441..532ff1a946e9e 100644
--- a/core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/ControllerIntegrationTest.scala
@@ -19,21 +19,28 @@ package kafka.controller
 
 import java.util.Properties
 import java.util.concurrent.{CompletableFuture, CountDownLatch, LinkedBlockingQueue, TimeUnit}
+import java.util.stream.{Stream => JStream}
 import com.yammer.metrics.core.Timer
-import kafka.api.{ApiVersion, KAFKA_2_6_IV0, KAFKA_2_7_IV0, LeaderAndIsr}
-import kafka.controller.KafkaController.AlterIsrCallback
-import kafka.metrics.KafkaYammerMetrics
+import kafka.api.LeaderAndIsr
 import kafka.server.{KafkaConfig, KafkaServer, QuorumTestHarness}
 import kafka.utils.{LogCaptureAppender, TestUtils}
 import kafka.zk.{FeatureZNodeStatus, _}
 import org.apache.kafka.common.errors.{ControllerMovedException, StaleBrokerEpochException}
-import org.apache.kafka.common.feature.Features
+import org.apache.kafka.common.message.{AlterPartitionRequestData, AlterPartitionResponseData}
 import org.apache.kafka.common.metrics.KafkaMetric
+import org.apache.kafka.common.protocol.ApiKeys
 import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.utils.annotation.ApiKeyVersionsSource
 import org.apache.kafka.common.{ElectionType, TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_6_IV0, IBP_2_7_IV0, IBP_3_2_IV0}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.log4j.Level
 import org.junit.jupiter.api.Assertions.{assertEquals, assertNotEquals, assertTrue}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.{Arguments, MethodSource}
 import org.mockito.Mockito.{doAnswer, spy, verify}
 import org.mockito.invocation.InvocationOnMock
 
@@ -41,6 +48,16 @@ import scala.collection.{Map, Seq, mutable}
 import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Success, Try}
 
+object ControllerIntegrationTest {
+  def testAlterPartitionSource(): JStream[Arguments] = {
+    Seq(MetadataVersion.IBP_2_7_IV0, MetadataVersion.latest).asJava.stream.flatMap { metadataVersion =>
+      ApiKeys.ALTER_PARTITION.allVersions.stream.map { alterPartitionVersion =>
+        Arguments.of(metadataVersion, alterPartitionVersion)
+      }
+    }
+  }
+}
+
 class ControllerIntegrationTest extends QuorumTestHarness {
   var servers = Seq.empty[KafkaServer]
   val firstControllerEpoch = KafkaController.InitialControllerEpoch + 1
@@ -123,7 +140,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     TestUtils.waitUntilBrokerMetadataIsPropagated(servers)
     val controllerId = TestUtils.waitUntilControllerElected(zkClient)
     // Need to make sure the broker we shutdown and startup are not the controller. Otherwise we will send out
-    // full UpdateMetadataReuqest to all brokers during controller failover.
+    // full UpdateMetadataRequest to all brokers during controller failover.
     val testBroker = servers.filter(e => e.config.brokerId != controllerId).head
     val remainingBrokers = servers.filter(_.config.brokerId != testBroker.config.brokerId)
     val topic = "topic1"
@@ -255,7 +272,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(0))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
   }
 
@@ -269,7 +286,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(otherBrokerId, controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers.take(1))
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
   }
 
@@ -284,7 +301,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
       tp1 -> ReplicaAssignment(Seq(0), Seq(), Seq()))
     TestUtils.createTopic(zkClient, tp0.topic, partitionReplicaAssignment = assignment, servers = servers)
     zkClient.setTopicAssignment(tp0.topic, Some(Uuid.randomUuid()), expandedAssignment, firstControllerEpochZkVersion)
-    waitForPartitionState(tp1, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp1, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic partition expansion")
     TestUtils.waitForPartitionMetadata(servers, tp1.topic, tp1.partition)
   }
@@ -304,7 +321,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     servers(otherBrokerId).shutdown()
     servers(otherBrokerId).awaitShutdown()
     zkClient.setTopicAssignment(tp0.topic, Some(Uuid.randomUuid()), expandedAssignment, firstControllerEpochZkVersion)
-    waitForPartitionState(tp1, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp1, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic partition expansion")
     TestUtils.waitForPartitionMetadata(Seq(servers(controllerId)), tp1.topic, tp1.partition)
   }
@@ -323,7 +340,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val reassignment = Map(tp -> ReplicaAssignment(Seq(otherBrokerId), List(), List()))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
     zkClient.createPartitionReassignment(reassignment.map { case (k, v) => k -> v.replicas })
-    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.initialLeaderEpoch + 3,
+    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.InitialLeaderEpoch + 3,
       "failed to get expected partition state after partition reassignment")
     TestUtils.waitUntilTrue(() =>  zkClient.getFullReplicaAssignmentForTopics(Set(tp.topic)) == reassignment,
       "failed to get updated partition assignment on topic znode after partition reassignment")
@@ -362,7 +379,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val reassignment = Map(tp -> ReplicaAssignment(Seq(otherBrokerId), List(), List()))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
     zkClient.createPartitionReassignment(reassignment.map { case (k, v) => k -> v.replicas })
-    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.initialLeaderEpoch + 3,
+    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.InitialLeaderEpoch + 3,
       "with an offline log directory on the target broker, the partition reassignment stalls")
     TestUtils.waitUntilTrue(() =>  zkClient.getFullReplicaAssignmentForTopics(Set(tp.topic)) == reassignment,
       "failed to get updated partition assignment on topic znode after partition reassignment")
@@ -387,7 +404,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     servers(otherBrokerId).awaitShutdown()
     val controller = getController()
     zkClient.setOrCreatePartitionReassignment(reassignment, controller.kafkaController.controllerContext.epochZkVersion)
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch + 1,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch + 1,
       "failed to get expected partition state during partition reassignment with offline replica")
     TestUtils.waitUntilTrue(() => zkClient.reassignPartitionsInProgress,
       "partition reassignment path should remain while reassignment in progress")
@@ -405,10 +422,10 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     servers(otherBrokerId).shutdown()
     servers(otherBrokerId).awaitShutdown()
     zkClient.createPartitionReassignment(reassignment.map { case (k, v) => k -> v.replicas })
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch + 1,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch + 1,
       "failed to get expected partition state during partition reassignment with offline replica")
     servers(otherBrokerId).startup()
-    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.initialLeaderEpoch + 4,
+    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.InitialLeaderEpoch + 4,
       "failed to get expected partition state after partition reassignment")
     TestUtils.waitUntilTrue(() => zkClient.getFullReplicaAssignmentForTopics(Set(tp.topic)) == reassignment,
       "failed to get updated partition assignment on topic znode after partition reassignment")
@@ -424,7 +441,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(otherBroker.config.brokerId, controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    preferredReplicaLeaderElection(controllerId, otherBroker, tp, assignment(tp.partition).toSet, LeaderAndIsr.initialLeaderEpoch)
+    preferredReplicaLeaderElection(controllerId, otherBroker, tp, assignment(tp.partition).toSet, LeaderAndIsr.InitialLeaderEpoch)
   }
 
   @Test
@@ -435,8 +452,8 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(otherBroker.config.brokerId, controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    preferredReplicaLeaderElection(controllerId, otherBroker, tp, assignment(tp.partition).toSet, LeaderAndIsr.initialLeaderEpoch)
-    preferredReplicaLeaderElection(controllerId, otherBroker, tp, assignment(tp.partition).toSet, LeaderAndIsr.initialLeaderEpoch + 2)
+    preferredReplicaLeaderElection(controllerId, otherBroker, tp, assignment(tp.partition).toSet, LeaderAndIsr.InitialLeaderEpoch)
+    preferredReplicaLeaderElection(controllerId, otherBroker, tp, assignment(tp.partition).toSet, LeaderAndIsr.InitialLeaderEpoch + 2)
   }
 
   @Test
@@ -452,7 +469,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     zkClient.createPreferredReplicaElection(Set(tp))
     TestUtils.waitUntilTrue(() => !zkClient.pathExists(PreferredReplicaElectionZNode.path),
       "failed to remove preferred replica leader election path after giving up")
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch + 1,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch + 1,
       "failed to get expected partition state upon broker shutdown")
   }
 
@@ -466,10 +483,10 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
     servers(otherBrokerId).shutdown()
     servers(otherBrokerId).awaitShutdown()
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch + 1,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch + 1,
       "failed to get expected partition state upon broker shutdown")
     servers(otherBrokerId).startup()
-    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.initialLeaderEpoch + 2,
+    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.InitialLeaderEpoch + 2,
       "failed to get expected partition state upon broker startup")
   }
 
@@ -481,14 +498,14 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(otherBrokerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     servers(otherBrokerId).shutdown()
     servers(otherBrokerId).awaitShutdown()
     TestUtils.waitUntilTrue(() => {
       val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp))
       leaderIsrAndControllerEpochMap.contains(tp) &&
-        isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, LeaderAndIsr.NoLeader, LeaderAndIsr.initialLeaderEpoch + 1) &&
+        isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, LeaderAndIsr.NoLeader, LeaderAndIsr.InitialLeaderEpoch + 1) &&
         leaderIsrAndControllerEpochMap(tp).leaderAndIsr.isr == List(otherBrokerId)
     }, "failed to get expected partition state after entire isr went offline")
   }
@@ -501,14 +518,14 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(otherBrokerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, otherBrokerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     servers(otherBrokerId).shutdown()
     servers(otherBrokerId).awaitShutdown()
     TestUtils.waitUntilTrue(() => {
       val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp))
       leaderIsrAndControllerEpochMap.contains(tp) &&
-        isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, LeaderAndIsr.NoLeader, LeaderAndIsr.initialLeaderEpoch + 1) &&
+        isExpectedPartitionState(leaderIsrAndControllerEpochMap(tp), firstControllerEpoch, LeaderAndIsr.NoLeader, LeaderAndIsr.InitialLeaderEpoch + 1) &&
         leaderIsrAndControllerEpochMap(tp).leaderAndIsr.isr == List(otherBrokerId)
     }, "failed to get expected partition state after entire isr went offline")
   }
@@ -524,7 +541,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     // create the topic
     TestUtils.createTopic(zkClient, topic, partitionReplicaAssignment = expectedReplicaAssignment, servers = servers)
 
-    val controllerId = zkClient.getControllerId.get
+    val controllerId = TestUtils.waitUntilControllerElected(zkClient)
     val controller = servers.find(p => p.config.brokerId == controllerId).get.kafkaController
     val resultQueue = new LinkedBlockingQueue[Try[collection.Set[TopicPartition]]]()
     val controlledShutdownCallback = (controlledShutdownResult: Try[collection.Set[TopicPartition]]) => resultQueue.put(controlledShutdownResult)
@@ -628,32 +645,32 @@ class ControllerIntegrationTest extends QuorumTestHarness {
 
   @Test
   def testControllerFeatureZNodeSetupWhenFeatureVersioningIsEnabledWithNonExistingFeatureZNode(): Unit = {
-    testControllerFeatureZNodeSetup(Option.empty, KAFKA_2_7_IV0)
+    testControllerFeatureZNodeSetup(Option.empty, IBP_2_7_IV0)
   }
 
   @Test
   def testControllerFeatureZNodeSetupWhenFeatureVersioningIsEnabledWithDisabledExistingFeatureZNode(): Unit = {
-    testControllerFeatureZNodeSetup(Some(new FeatureZNode(FeatureZNodeStatus.Disabled, Features.emptyFinalizedFeatures())), KAFKA_2_7_IV0)
+    testControllerFeatureZNodeSetup(Some(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Disabled, Map.empty[String, Short])), IBP_2_7_IV0)
   }
 
   @Test
   def testControllerFeatureZNodeSetupWhenFeatureVersioningIsEnabledWithEnabledExistingFeatureZNode(): Unit = {
-    testControllerFeatureZNodeSetup(Some(new FeatureZNode(FeatureZNodeStatus.Enabled, Features.emptyFinalizedFeatures())), KAFKA_2_7_IV0)
+    testControllerFeatureZNodeSetup(Some(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, Map.empty[String, Short])), IBP_2_7_IV0)
   }
 
   @Test
   def testControllerFeatureZNodeSetupWhenFeatureVersioningIsDisabledWithNonExistingFeatureZNode(): Unit = {
-    testControllerFeatureZNodeSetup(Option.empty, KAFKA_2_6_IV0)
+    testControllerFeatureZNodeSetup(Option.empty, IBP_2_6_IV0)
   }
 
   @Test
   def testControllerFeatureZNodeSetupWhenFeatureVersioningIsDisabledWithDisabledExistingFeatureZNode(): Unit = {
-    testControllerFeatureZNodeSetup(Some(new FeatureZNode(FeatureZNodeStatus.Disabled, Features.emptyFinalizedFeatures())), KAFKA_2_6_IV0)
+    testControllerFeatureZNodeSetup(Some(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Disabled, Map.empty[String, Short])), IBP_2_6_IV0)
   }
 
   @Test
   def testControllerFeatureZNodeSetupWhenFeatureVersioningIsDisabledWithEnabledExistingFeatureZNode(): Unit = {
-    testControllerFeatureZNodeSetup(Some(new FeatureZNode(FeatureZNodeStatus.Enabled, Features.emptyFinalizedFeatures())), KAFKA_2_6_IV0)
+    testControllerFeatureZNodeSetup(Some(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, Map.empty[String, Short])), IBP_2_6_IV0)
   }
 
   @Test
@@ -667,7 +684,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val assignment = Map(tp.partition -> Seq(0, 1))
 
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
 
     // Wait until the event thread is idle
@@ -780,7 +797,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
   }
 
   private def testControllerFeatureZNodeSetup(initialZNode: Option[FeatureZNode],
-                                              interBrokerProtocolVersion: ApiVersion): Unit = {
+                                              interBrokerProtocolVersion: MetadataVersion): Unit = {
     val versionBeforeOpt = initialZNode match {
       case Some(node) =>
         zkClient.createFeatureZNode(node)
@@ -807,8 +824,8 @@ class ControllerIntegrationTest extends QuorumTestHarness {
 
     val (mayBeFeatureZNodeBytes, versionAfter) = zkClient.getDataAndVersion(FeatureZNode.path)
     val newZNode = FeatureZNode.decode(mayBeFeatureZNodeBytes.get)
-    if (interBrokerProtocolVersion >= KAFKA_2_7_IV0) {
-      val emptyZNode = new FeatureZNode(FeatureZNodeStatus.Enabled, Features.emptyFinalizedFeatures)
+    if (interBrokerProtocolVersion.isAtLeast(IBP_2_7_IV0)) {
+      val emptyZNode = FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, Map.empty[String, Short])
       initialZNode match {
         case Some(node) => {
           node.status match {
@@ -822,10 +839,10 @@ class ControllerIntegrationTest extends QuorumTestHarness {
         }
         case None =>
           assertEquals(0, versionAfter)
-          assertEquals(new FeatureZNode(FeatureZNodeStatus.Enabled, Features.emptyFinalizedFeatures), newZNode)
+          assertEquals(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, Map.empty[String, Short]), newZNode)
       }
     } else {
-      val emptyZNode = new FeatureZNode(FeatureZNodeStatus.Disabled, Features.emptyFinalizedFeatures)
+      val emptyZNode = FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Disabled, Map.empty[String, Short])
       initialZNode match {
         case Some(node) => {
           node.status match {
@@ -839,13 +856,132 @@ class ControllerIntegrationTest extends QuorumTestHarness {
         }
         case None =>
           assertEquals(0, versionAfter)
-          assertEquals(new FeatureZNode(FeatureZNodeStatus.Disabled, Features.emptyFinalizedFeatures), newZNode)
+          assertEquals(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Disabled, Map.empty[String, Short]), newZNode)
       }
     }
   }
 
+  @ParameterizedTest
+  @MethodSource(Array("testAlterPartitionSource"))
+  def testAlterPartition(metadataVersion: MetadataVersion, alterPartitionVersion: Short): Unit = {
+    if (!metadataVersion.isTopicIdsSupported && alterPartitionVersion > 1) {
+      // This combination is not valid. We cannot use alter partition version > 1
+      // if the broker is on an IBP < 2.8 because topics don't have id in this case.
+      return
+    }
+
+    servers = makeServers(1, interBrokerProtocolVersion = Some(metadataVersion))
+
+    val controllerId = TestUtils.waitUntilControllerElected(zkClient)
+    val tp = new TopicPartition("t", 0)
+    val assignment = Map(tp.partition -> Seq(controllerId))
+    TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
+
+    val controller = getController().kafkaController
+    val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp))
+    val newLeaderAndIsr = leaderIsrAndControllerEpochMap(tp).leaderAndIsr
+    val topicId = controller.controllerContext.topicIds.getOrElse(tp.topic, Uuid.ZERO_UUID)
+    val brokerId = controllerId
+    val brokerEpoch = controller.controllerContext.liveBrokerIdAndEpochs(controllerId)
+
+    // The caller of the AlterPartition API can only use topics ids iff 1) the controller is
+    // on IBP >= 2.8 and 2) the AlterPartition version 2 and above is used.
+    val canCallerUseTopicIds = metadataVersion.isTopicIdsSupported && alterPartitionVersion > 1
+
+    val alterPartitionRequest = new AlterPartitionRequestData()
+      .setBrokerId(brokerId)
+      .setBrokerEpoch(brokerEpoch)
+      .setTopics(Seq(new AlterPartitionRequestData.TopicData()
+        .setTopicName(if (!canCallerUseTopicIds) tp.topic else "")
+        .setTopicId(if (canCallerUseTopicIds) topicId else Uuid.ZERO_UUID)
+        .setPartitions(Seq(new AlterPartitionRequestData.PartitionData()
+          .setPartitionIndex(tp.partition)
+          .setLeaderEpoch(newLeaderAndIsr.leaderEpoch)
+          .setPartitionEpoch(newLeaderAndIsr.partitionEpoch)
+          .setNewIsr(newLeaderAndIsr.isr.map(Int.box).asJava)
+          .setLeaderRecoveryState(newLeaderAndIsr.leaderRecoveryState.value)
+        ).asJava)
+      ).asJava)
+
+    val future = alterPartitionFuture(alterPartitionRequest, alterPartitionVersion)
+
+    val expectedAlterPartitionResponse = new AlterPartitionResponseData()
+      .setTopics(Seq(new AlterPartitionResponseData.TopicData()
+        .setTopicName(if (!canCallerUseTopicIds) tp.topic else "")
+        .setTopicId(if (canCallerUseTopicIds) topicId else Uuid.ZERO_UUID)
+        .setPartitions(Seq(new AlterPartitionResponseData.PartitionData()
+          .setPartitionIndex(tp.partition)
+          .setLeaderId(brokerId)
+          .setLeaderEpoch(newLeaderAndIsr.leaderEpoch)
+          .setPartitionEpoch(newLeaderAndIsr.partitionEpoch)
+          .setIsr(newLeaderAndIsr.isr.map(Int.box).asJava)
+          .setLeaderRecoveryState(newLeaderAndIsr.leaderRecoveryState.value)
+        ).asJava)
+      ).asJava)
+
+    assertEquals(expectedAlterPartitionResponse, future.get(10, TimeUnit.SECONDS))
+  }
+
   @Test
-  def testIdempotentAlterIsr(): Unit = {
+  def testAlterPartitionVersion2KeepWorkingWhenControllerDowngradeToPre28IBP(): Unit = {
+    // When the controller downgrades from IBP >= 2.8 to IBP < 2.8, it does not assign
+    // topic ids anymore. However, the already assigned topic ids are kept. This means
+    // that using AlterPartition version 2 should still work assuming that it only
+    // contains topic with topics ids.
+    servers = makeServers(1, interBrokerProtocolVersion = Some(MetadataVersion.latest))
+
+    val controllerId = TestUtils.waitUntilControllerElected(zkClient)
+    val tp = new TopicPartition("t", 0)
+    val assignment = Map(tp.partition -> Seq(controllerId))
+    TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
+
+    // Downgrade controller to IBP 2.7
+    servers(0).shutdown()
+    servers(0).awaitShutdown()
+    servers = makeServers(1, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
+    TestUtils.waitUntilControllerElected(zkClient)
+
+    val controller = getController().kafkaController
+    val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp))
+    val newLeaderAndIsr = leaderIsrAndControllerEpochMap(tp).leaderAndIsr
+    val topicId = controller.controllerContext.topicIds.getOrElse(tp.topic, Uuid.ZERO_UUID)
+    val brokerId = controllerId
+    val brokerEpoch = controller.controllerContext.liveBrokerIdAndEpochs(controllerId)
+
+    val alterPartitionRequest = new AlterPartitionRequestData()
+      .setBrokerId(brokerId)
+      .setBrokerEpoch(brokerEpoch)
+      .setTopics(Seq(new AlterPartitionRequestData.TopicData()
+        .setTopicId(topicId)
+        .setPartitions(Seq(new AlterPartitionRequestData.PartitionData()
+          .setPartitionIndex(tp.partition)
+          .setLeaderEpoch(newLeaderAndIsr.leaderEpoch)
+          .setPartitionEpoch(newLeaderAndIsr.partitionEpoch)
+          .setNewIsr(newLeaderAndIsr.isr.map(Int.box).asJava)
+          .setLeaderRecoveryState(newLeaderAndIsr.leaderRecoveryState.value)
+        ).asJava)
+      ).asJava)
+
+    val future = alterPartitionFuture(alterPartitionRequest, ApiKeys.ALTER_PARTITION.latestVersion)
+
+    val expectedAlterPartitionResponse = new AlterPartitionResponseData()
+      .setTopics(Seq(new AlterPartitionResponseData.TopicData()
+        .setTopicId(topicId)
+        .setPartitions(Seq(new AlterPartitionResponseData.PartitionData()
+          .setPartitionIndex(tp.partition)
+          .setLeaderId(brokerId)
+          .setLeaderEpoch(newLeaderAndIsr.leaderEpoch)
+          .setPartitionEpoch(newLeaderAndIsr.partitionEpoch)
+          .setIsr(newLeaderAndIsr.isr.map(Int.box).asJava)
+          .setLeaderRecoveryState(newLeaderAndIsr.leaderRecoveryState.value)
+        ).asJava)
+      ).asJava)
+
+    assertEquals(expectedAlterPartitionResponse, future.get(10, TimeUnit.SECONDS))
+  }
+
+  @Test
+  def testIdempotentAlterPartition(): Unit = {
     servers = makeServers(2)
     val controllerId = TestUtils.waitUntilControllerElected(zkClient)
     val otherBroker = servers.find(_.config.brokerId != controllerId).get
@@ -853,89 +989,438 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val assignment = Map(tp.partition -> Seq(otherBroker.config.brokerId, controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
 
-    val latch = new CountDownLatch(1)
     val controller = getController().kafkaController
-
     val leaderIsrAndControllerEpochMap = zkClient.getTopicPartitionStates(Seq(tp))
-    val newLeaderAndIsr = leaderIsrAndControllerEpochMap(tp).leaderAndIsr
-
-    val callback = (result: Either[Map[TopicPartition, Either[Errors, LeaderAndIsr]], Errors]) => {
-      result match {
-        case Left(partitionResults: Map[TopicPartition, Either[Errors, LeaderAndIsr]]) =>
-          partitionResults.get(tp) match {
-            case Some(Left(error: Errors)) => throw new AssertionError(s"Should not have seen error for $tp")
-            case Some(Right(leaderAndIsr: LeaderAndIsr)) => assertEquals(leaderAndIsr, newLeaderAndIsr, "ISR should remain unchanged")
-            case None => throw new AssertionError(s"Should have seen $tp in result")
-          }
-        case Right(_: Errors) => throw new AssertionError("Should not have had top-level error here")
-      }
-      latch.countDown()
+    val oldLeaderAndIsr = leaderIsrAndControllerEpochMap(tp).leaderAndIsr
+    val newIsr = List(oldLeaderAndIsr.leader)
+    val newPartitionEpoch = oldLeaderAndIsr.partitionEpoch + 1
+    val topicId = controller.controllerContext.topicIds(tp.topic)
+    val brokerId = otherBroker.config.brokerId
+    val brokerEpoch = controller.controllerContext.liveBrokerIdAndEpochs(otherBroker.config.brokerId)
+
+    def sendAndVerifyAlterPartitionResponse(requestPartitionEpoch: Int): Unit = {
+      val alterPartitionRequest = new AlterPartitionRequestData()
+        .setBrokerId(brokerId)
+        .setBrokerEpoch(brokerEpoch)
+        .setTopics(Seq(new AlterPartitionRequestData.TopicData()
+          .setTopicId(topicId)
+          .setPartitions(Seq(new AlterPartitionRequestData.PartitionData()
+            .setPartitionIndex(tp.partition)
+            .setLeaderEpoch(oldLeaderAndIsr.leaderEpoch)
+            .setPartitionEpoch(requestPartitionEpoch)
+            .setNewIsr(newIsr.map(Int.box).asJava)
+            .setLeaderRecoveryState(oldLeaderAndIsr.leaderRecoveryState.value)
+          ).asJava)
+        ).asJava)
+
+    val future = alterPartitionFuture(alterPartitionRequest, AlterPartitionRequestData.HIGHEST_SUPPORTED_VERSION)
+
+      // When re-sending an ISR update, we should not get and error or any ISR changes
+      val expectedAlterPartitionResponse = new AlterPartitionResponseData()
+        .setTopics(Seq(new AlterPartitionResponseData.TopicData()
+          .setTopicId(topicId)
+          .setPartitions(Seq(new AlterPartitionResponseData.PartitionData()
+            .setPartitionIndex(tp.partition)
+            .setLeaderId(brokerId)
+            .setLeaderEpoch(oldLeaderAndIsr.leaderEpoch)
+            .setPartitionEpoch(newPartitionEpoch)
+            .setIsr(newIsr.map(Int.box).asJava)
+            .setLeaderRecoveryState(oldLeaderAndIsr.leaderRecoveryState.value)
+          ).asJava)
+        ).asJava)
+      assertEquals(expectedAlterPartitionResponse, future.get(10, TimeUnit.SECONDS))
     }
 
-    val brokerEpoch = controller.controllerContext.liveBrokerIdAndEpochs.get(otherBroker.config.brokerId).get
-    // When re-sending the current ISR, we should not get and error or any ISR changes
-    controller.eventManager.put(AlterIsrReceived(otherBroker.config.brokerId, brokerEpoch, Map(tp -> newLeaderAndIsr), callback))
-    latch.await()
+    // send a request, expect the partition epoch to be incremented
+    sendAndVerifyAlterPartitionResponse(oldLeaderAndIsr.partitionEpoch)
+
+    // re-send the same request with various partition epochs (less/equal/greater than the current
+    // epoch), expect it to succeed while the partition epoch remains the same
+    sendAndVerifyAlterPartitionResponse(oldLeaderAndIsr.partitionEpoch)
+    sendAndVerifyAlterPartitionResponse(newPartitionEpoch)
   }
 
-  @Test
-  def testAlterIsrErrors(): Unit = {
-    servers = makeServers(1)
+  @ParameterizedTest
+  @ApiKeyVersionsSource(apiKey = ApiKeys.ALTER_PARTITION)
+  def testShutdownBrokerNotAddedToIsr(alterPartitionVersion: Short): Unit = {
+    servers = makeServers(2)
     val controllerId = TestUtils.waitUntilControllerElected(zkClient)
+    val otherBroker = servers.find(_.config.brokerId != controllerId).get
+    val brokerId = otherBroker.config.brokerId
     val tp = new TopicPartition("t", 0)
-    val assignment = Map(tp.partition -> Seq(controllerId))
+    val assignment = Map(tp.partition -> Seq(controllerId, brokerId))
+    val fullIsr = List(controllerId, brokerId)
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
+
+    // Shut down follower.
+    servers(brokerId).shutdown()
+    servers(brokerId).awaitShutdown()
+
     val controller = getController().kafkaController
-    var future = captureAlterIsrError(controllerId, controller.brokerEpoch - 1,
-      Map(tp -> LeaderAndIsr(controllerId, List(controllerId))))
-    var capturedError = future.get(5, TimeUnit.SECONDS)
-    assertEquals(Errors.STALE_BROKER_EPOCH, capturedError)
-
-    future = captureAlterIsrError(99, controller.brokerEpoch,
-      Map(tp -> LeaderAndIsr(controllerId, List(controllerId))))
-    capturedError = future.get(5, TimeUnit.SECONDS)
-    assertEquals(Errors.STALE_BROKER_EPOCH, capturedError)
-
-    val unknownTopicPartition = new TopicPartition("unknown", 99)
-    future = captureAlterIsrPartitionError(controllerId, controller.brokerEpoch,
-      Map(unknownTopicPartition -> LeaderAndIsr(controllerId, List(controllerId))), unknownTopicPartition)
-    capturedError = future.get(5, TimeUnit.SECONDS)
-    assertEquals(Errors.UNKNOWN_TOPIC_OR_PARTITION, capturedError)
-
-    future = captureAlterIsrPartitionError(controllerId, controller.brokerEpoch,
-      Map(tp -> LeaderAndIsr(controllerId, 1, List(controllerId), 99)), tp)
-    capturedError = future.get(5, TimeUnit.SECONDS)
-    assertEquals(Errors.INVALID_UPDATE_VERSION, capturedError)
-  }
-
-  def captureAlterIsrError(brokerId: Int, brokerEpoch: Long, isrsToAlter: Map[TopicPartition, LeaderAndIsr]): CompletableFuture[Errors] = {
-    val future = new CompletableFuture[Errors]()
+    val leaderIsrAndControllerEpochMap = controller.controllerContext.partitionsLeadershipInfo
+    val leaderAndIsr = leaderIsrAndControllerEpochMap(tp).leaderAndIsr
+    val topicId = controller.controllerContext.topicIds(tp.topic)
+    val controllerEpoch = controller.controllerContext.liveBrokerIdAndEpochs(controllerId)
+
+    // We expect only the controller (online broker) to be in ISR
+    assertEquals(List(controllerId), leaderAndIsr.isr)
+
+    val requestTopic = new AlterPartitionRequestData.TopicData()
+      .setPartitions(Seq(new AlterPartitionRequestData.PartitionData()
+        .setPartitionIndex(tp.partition)
+        .setLeaderEpoch(leaderAndIsr.leaderEpoch)
+        .setPartitionEpoch(leaderAndIsr.partitionEpoch)
+        .setNewIsr(fullIsr.map(Int.box).asJava)
+        .setLeaderRecoveryState(leaderAndIsr.leaderRecoveryState.value)).asJava)
+    if (alterPartitionVersion > 1) requestTopic.setTopicId(topicId) else requestTopic.setTopicName(tp.topic)
+
+    // Try to update ISR to contain the offline broker.
+    val alterPartitionRequest = new AlterPartitionRequestData()
+      .setBrokerId(controllerId)
+      .setBrokerEpoch(controllerEpoch)
+      .setTopics(Seq(requestTopic).asJava)
+
+    val future = alterPartitionFuture(alterPartitionRequest, alterPartitionVersion)
+
+    val expectedError = if (alterPartitionVersion > 1) Errors.INELIGIBLE_REPLICA else Errors.OPERATION_NOT_ATTEMPTED
+    val expectedResponseTopic = new AlterPartitionResponseData.TopicData()
+      .setPartitions(Seq(new AlterPartitionResponseData.PartitionData()
+        .setPartitionIndex(tp.partition)
+        .setErrorCode(expectedError.code())
+        .setLeaderRecoveryState(leaderAndIsr.leaderRecoveryState.value)
+      ).asJava)
+    if (alterPartitionVersion > 1) expectedResponseTopic.setTopicId(topicId) else expectedResponseTopic.setTopicName(tp.topic)
+
+    // We expect an ineligble replica error response for the partition.
+    val expectedAlterPartitionResponse = new AlterPartitionResponseData()
+      .setTopics(Seq(expectedResponseTopic).asJava)
+
+    val newLeaderIsrAndControllerEpochMap = controller.controllerContext.partitionsLeadershipInfo
+    val newLeaderAndIsr = newLeaderIsrAndControllerEpochMap(tp).leaderAndIsr
+    assertEquals(expectedAlterPartitionResponse, future.get(10, TimeUnit.SECONDS))
+    assertEquals(List(controllerId), newLeaderAndIsr.isr)
+
+    // Bring replica back online.
+    servers(brokerId).startup()
+
+    // Wait for broker to rejoin ISR.
+    TestUtils.waitUntilTrue(() => fullIsr == zkClient.getTopicPartitionState(tp).get.leaderAndIsr.isr, "Replica did not rejoin ISR.")
+  }
+
+  @Test
+  def testAlterPartitionErrors(): Unit = {
+    servers = makeServers(2)
+    val controllerId = TestUtils.waitUntilControllerElected(zkClient)
+    val tp = new TopicPartition("t", 0)
+    val replicas = controllerId :: servers.map(_.config.nodeId).filter(_ != controllerId).take(1).toList
+    val assignment = Map(tp.partition -> replicas)
+
+    TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
     val controller = getController().kafkaController
-    val callback: AlterIsrCallback = {
-      case Left(_: Map[TopicPartition, Either[Errors, LeaderAndIsr]]) =>
-        future.completeExceptionally(new AssertionError(s"Should have seen top-level error"))
-      case Right(error: Errors) =>
-        future.complete(error)
+    val partitionState = controller.controllerContext.partitionLeadershipInfo(tp).get
+    val leaderId = partitionState.leaderAndIsr.leader
+    val leaderBrokerEpoch = servers(leaderId).kafkaController.brokerEpoch
+    val leaderEpoch = partitionState.leaderAndIsr.leaderEpoch
+    val partitionEpoch = partitionState.leaderAndIsr.partitionEpoch
+    val topicId = controller.controllerContext.topicIds.get(tp.topic)
+
+    def assertAlterPartition(
+      topLevelError: Errors = Errors.NONE,
+      partitionError: Errors = Errors.NONE,
+      topicPartition: TopicPartition = tp,
+      topicIdOpt: Option[Uuid] = topicId,
+      leaderId: Int = leaderId,
+      brokerEpoch: Long = leaderBrokerEpoch,
+      leaderEpoch: Int = leaderEpoch,
+      partitionEpoch: Int = partitionEpoch,
+      isr: Set[Int] = replicas.toSet,
+      leaderRecoveryState: Byte = LeaderRecoveryState.RECOVERED.value
+    ): Unit = {
+      assertAlterPartitionError(
+        topicPartition = topicPartition,
+        topicIdOpt = topicIdOpt,
+        leaderId = leaderId,
+        brokerEpoch = brokerEpoch,
+        leaderEpoch = leaderEpoch,
+        partitionEpoch = partitionEpoch,
+        isr = isr,
+        leaderRecoveryState = leaderRecoveryState,
+        topLevelError = topLevelError,
+        partitionError = partitionError
+      )
     }
-    controller.eventManager.put(AlterIsrReceived(brokerId, brokerEpoch, isrsToAlter, callback))
-    future
+
+    assertAlterPartition(
+      topLevelError = Errors.STALE_BROKER_EPOCH,
+      brokerEpoch = leaderBrokerEpoch - 1
+    )
+
+    assertAlterPartition(
+      topLevelError = Errors.STALE_BROKER_EPOCH,
+      leaderId = 99,
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.UNKNOWN_TOPIC_ID,
+      topicIdOpt = Some(Uuid.randomUuid())
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.UNKNOWN_TOPIC_OR_PARTITION,
+      topicPartition = new TopicPartition("unknown", 0),
+      topicIdOpt = None
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.UNKNOWN_TOPIC_OR_PARTITION,
+      topicPartition = new TopicPartition(tp.topic, 1),
+      topicIdOpt = None
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_UPDATE_VERSION,
+      isr = Set(leaderId),
+      partitionEpoch = partitionEpoch - 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      partitionEpoch = partitionEpoch + 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.FENCED_LEADER_EPOCH,
+      leaderEpoch = leaderEpoch - 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      leaderEpoch = leaderEpoch + 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_REQUEST,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_REQUEST,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value,
+      isr = Set(controllerId)
+    )
+
+    // Version/epoch errors take precedence over other validations since
+    // the leader may be working with outdated state.
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_UPDATE_VERSION,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value,
+      partitionEpoch = partitionEpoch - 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value,
+      partitionEpoch = partitionEpoch + 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.FENCED_LEADER_EPOCH,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value,
+      leaderEpoch = leaderEpoch - 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value,
+      leaderEpoch = leaderEpoch + 1
+    )
+
+    // Validate that unexpected exceptions are handled correctly.
+    assertAlterPartition(
+      topLevelError = Errors.UNKNOWN_SERVER_ERROR,
+      leaderRecoveryState = 25, // Invalid recovery state.
+    )
   }
 
-  def captureAlterIsrPartitionError(brokerId: Int, brokerEpoch: Long, isrsToAlter: Map[TopicPartition, LeaderAndIsr], tp: TopicPartition): CompletableFuture[Errors] = {
-    val future = new CompletableFuture[Errors]()
+  @Test
+  def testAlterPartitionErrorsAfterUncleanElection(): Unit = {
+    // - Start 3 brokers with unclean election enabled
+    // - Create a topic with two non-controller replicas: A and B
+    // - Shutdown A to bring ISR to [B]
+    // - Shutdown B to make partition offline
+    // - Restart A to force unclean election with ISR [A]
+    // - Verify AlterPartition handling in this state
+
+    servers = makeServers(numConfigs = 3, uncleanLeaderElectionEnable = true)
+    val controllerId = TestUtils.waitUntilControllerElected(zkClient)
     val controller = getController().kafkaController
-    val callback: AlterIsrCallback = {
-      case Left(partitionResults: Map[TopicPartition, Either[Errors, LeaderAndIsr]]) =>
-        partitionResults.get(tp) match {
-          case Some(Left(error: Errors)) => future.complete(error)
-          case Some(Right(_: LeaderAndIsr)) => future.completeExceptionally(new AssertionError(s"Should have seen an error for $tp in result"))
-          case None => future.completeExceptionally(new AssertionError(s"Should have seen $tp in result"))
-        }
-      case Right(_: Errors) =>
-        future.completeExceptionally(new AssertionError(s"Should not seen top-level error"))
+
+    val tp = new TopicPartition("t", 0)
+    val replicas = servers.map(_.config.nodeId).filter(_ != controllerId).take(2).toList
+    val assignment = Map(tp.partition -> replicas)
+
+    val replica1 :: replica2 :: Nil = replicas
+
+    TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
+    val topicIdOpt = controller.controllerContext.topicIds.get(tp.topic)
+
+    servers(replica1).shutdown()
+    servers(replica1).awaitShutdown()
+
+    val partitionStateAfterFirstShutdown = controller.controllerContext.partitionLeadershipInfo(tp).get
+    assertEquals(replica2, partitionStateAfterFirstShutdown.leaderAndIsr.leader)
+    assertEquals(Set(replica2), partitionStateAfterFirstShutdown.leaderAndIsr.isr.toSet)
+
+    servers(replica2).shutdown()
+    servers(replica2).awaitShutdown()
+
+    val partitionStateAfterSecondShutdown = controller.controllerContext.partitionLeadershipInfo(tp).get
+    assertEquals(-1, partitionStateAfterSecondShutdown.leaderAndIsr.leader)
+    assertEquals(Set(replica2), partitionStateAfterSecondShutdown.leaderAndIsr.isr.toSet)
+
+    servers(replica1).startup()
+    TestUtils.waitUntilLeaderIsKnown(servers, tp)
+
+    val partitionStateAfterRestart = controller.controllerContext.partitionLeadershipInfo(tp).get
+    assertEquals(replica1, partitionStateAfterRestart.leaderAndIsr.leader)
+    assertEquals(Set(replica1), partitionStateAfterRestart.leaderAndIsr.isr.toSet)
+    assertEquals(LeaderRecoveryState.RECOVERING, partitionStateAfterRestart.leaderAndIsr.leaderRecoveryState)
+
+    val leaderId = replica1
+    val leaderBrokerEpoch = servers(replica1).kafkaController.brokerEpoch
+    val leaderEpoch = partitionStateAfterRestart.leaderAndIsr.leaderEpoch
+    val partitionEpoch = partitionStateAfterRestart.leaderAndIsr.partitionEpoch
+
+    def assertAlterPartition(
+      topLevelError: Errors = Errors.NONE,
+      partitionError: Errors = Errors.NONE,
+      leaderId: Int = leaderId,
+      brokerEpoch: Long = leaderBrokerEpoch,
+      leaderEpoch: Int = leaderEpoch,
+      partitionEpoch: Int = partitionEpoch,
+      leaderRecoveryState: Byte = LeaderRecoveryState.RECOVERED.value
+    ): Unit = {
+      assertAlterPartitionError(
+        topicPartition = tp,
+        topicIdOpt = topicIdOpt,
+        leaderId = leaderId,
+        brokerEpoch = brokerEpoch,
+        leaderEpoch = leaderEpoch,
+        partitionEpoch = partitionEpoch,
+        isr = replicas.toSet,
+        leaderRecoveryState = leaderRecoveryState,
+        topLevelError = topLevelError,
+        partitionError = partitionError
+      )
     }
-    controller.eventManager.put(AlterIsrReceived(brokerId, brokerEpoch, isrsToAlter, callback))
-    future
+
+    assertAlterPartition(
+      topLevelError = Errors.STALE_BROKER_EPOCH,
+      brokerEpoch = leaderBrokerEpoch - 1
+    )
+
+    assertAlterPartition(
+      topLevelError = Errors.STALE_BROKER_EPOCH,
+      leaderId = 99
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_UPDATE_VERSION,
+      partitionEpoch = partitionEpoch - 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      partitionEpoch = partitionEpoch + 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.FENCED_LEADER_EPOCH,
+      leaderEpoch = leaderEpoch - 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      leaderEpoch = leaderEpoch + 1
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_REQUEST,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value
+    )
+
+    // Version/epoch errors take precedence over other validations since
+    // the leader may be working with outdated state.
+
+    assertAlterPartition(
+      partitionError = Errors.INVALID_UPDATE_VERSION,
+      partitionEpoch = partitionEpoch - 1,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      partitionEpoch = partitionEpoch + 1,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.FENCED_LEADER_EPOCH,
+      leaderEpoch = leaderEpoch - 1,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value
+    )
+
+    assertAlterPartition(
+      partitionError = Errors.NOT_CONTROLLER,
+      leaderEpoch = leaderEpoch + 1,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERING.value
+    )
+  }
+
+  def assertAlterPartitionError(
+    topicPartition: TopicPartition,
+    topicIdOpt: Option[Uuid],
+    leaderId: Int,
+    brokerEpoch: Long,
+    leaderEpoch: Int,
+    partitionEpoch: Int,
+    isr: Set[Int],
+    leaderRecoveryState: Byte,
+    topLevelError: Errors,
+    partitionError: Errors,
+  ): Unit = {
+    val topicName = if (topicIdOpt.isEmpty) topicPartition.topic else ""
+    val topicId = topicIdOpt.getOrElse(Uuid.ZERO_UUID)
+
+    val alterPartitionRequest = new AlterPartitionRequestData()
+      .setBrokerId(leaderId)
+      .setBrokerEpoch(brokerEpoch)
+      .setTopics(Seq(new AlterPartitionRequestData.TopicData()
+        .setTopicId(topicId)
+        .setTopicName(topicName)
+        .setPartitions(Seq(new AlterPartitionRequestData.PartitionData()
+          .setPartitionIndex(topicPartition.partition)
+          .setLeaderEpoch(leaderEpoch)
+          .setPartitionEpoch(partitionEpoch)
+          .setNewIsr(isr.toList.map(Int.box).asJava)
+          .setLeaderRecoveryState(leaderRecoveryState)).asJava)).asJava)
+
+    val future = alterPartitionFuture(alterPartitionRequest, if (topicIdOpt.isDefined) AlterPartitionRequestData.HIGHEST_SUPPORTED_VERSION else 1)
+
+    val expectedAlterPartitionResponse = if (topLevelError != Errors.NONE) {
+      new AlterPartitionResponseData().setErrorCode(topLevelError.code)
+    } else {
+      new AlterPartitionResponseData()
+        .setTopics(Seq(new AlterPartitionResponseData.TopicData()
+          .setTopicId(topicId)
+          .setTopicName(topicName)
+          .setPartitions(Seq(new AlterPartitionResponseData.PartitionData()
+            .setPartitionIndex(topicPartition.partition)
+            .setErrorCode(partitionError.code)).asJava)).asJava)
+    }
+
+    assertEquals(expectedAlterPartitionResponse, future.get(10, TimeUnit.SECONDS))
   }
 
   @Test
@@ -952,7 +1437,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     TestUtils.createTopic(zkClient, tp1.topic(), assignment1, servers)
 
     // Test that the first topic has its ID added correctly
-    waitForPartitionState(tp1, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp1, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     assertNotEquals(None, controller.controllerContext.topicIds.get("t1"))
     val topicId1 = controller.controllerContext.topicIds("t1")
@@ -963,7 +1448,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     TestUtils.createTopic(zkClient, tp2.topic(), assignment2, servers)
 
     // Test that the second topic has its ID added correctly
-    waitForPartitionState(tp2, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp2, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     assertNotEquals(None, controller.controllerContext.topicIds.get("t2"))
     val topicId2 = controller.controllerContext.topicIds("t2")
@@ -976,7 +1461,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
 
   @Test
   def testTopicIdsAreNotAdded(): Unit = {
-    servers = makeServers(1, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
+    servers = makeServers(1, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
     TestUtils.waitUntilControllerElected(zkClient)
     val controller = getController().kafkaController
     val tp1 = new TopicPartition("t1", 0)
@@ -988,7 +1473,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     TestUtils.createTopic(zkClient, tp1.topic(), assignment1, servers)
 
     // Test that the first topic has no topic ID added.
-    waitForPartitionState(tp1, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp1, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     assertEquals(None, controller.controllerContext.topicIds.get("t1"))
 
@@ -997,7 +1482,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     TestUtils.createTopic(zkClient, tp2.topic(), assignment2, servers)
 
     // Test that the second topic has no topic ID added.
-    waitForPartitionState(tp2, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp2, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     assertEquals(None, controller.controllerContext.topicIds.get("t2"))
 
@@ -1014,15 +1499,15 @@ class ControllerIntegrationTest extends QuorumTestHarness {
 
     servers = makeServers(1)
     adminZkClient.createTopic(tp.topic, 1, 1)
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
-    val topicIdAfterCreate = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
+    val (topicIdAfterCreate, _) = TestUtils.computeUntilTrue(zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))(_.nonEmpty)
     assertTrue(topicIdAfterCreate.isDefined)
     assertEquals(topicIdAfterCreate, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
       "correct topic ID cannot be found in the controller context")
 
     adminZkClient.addPartitions(tp.topic, assignment, adminZkClient.getBrokerMetadatas(), 2)
-    val topicIdAfterAddition = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
+    val (topicIdAfterAddition, _) = TestUtils.computeUntilTrue(zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))(_.nonEmpty)
     assertEquals(topicIdAfterCreate, topicIdAfterAddition)
     assertEquals(topicIdAfterCreate, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
       "topic ID changed after partition additions")
@@ -1038,17 +1523,17 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val assignment = Map(tp.partition -> ReplicaAssignment(Seq(0), List(), List()))
     val adminZkClient = new AdminZkClient(zkClient)
 
-    servers = makeServers(1, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
+    servers = makeServers(1, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
     adminZkClient.createTopic(tp.topic, 1, 1)
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
-    val topicIdAfterCreate = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
+    val (topicIdAfterCreate, _) = TestUtils.computeUntilTrue(zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))(_.nonEmpty)
     assertEquals(None, topicIdAfterCreate)
     assertEquals(topicIdAfterCreate, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
       "incorrect topic ID can be found in the controller context")
 
     adminZkClient.addPartitions(tp.topic, assignment, adminZkClient.getBrokerMetadatas(), 2)
-    val topicIdAfterAddition = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
+    val (topicIdAfterAddition, _) = TestUtils.computeUntilTrue(zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))(_.nonEmpty)
     assertEquals(topicIdAfterCreate, topicIdAfterAddition)
     assertEquals(topicIdAfterCreate, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
       "topic ID changed after partition additions")
@@ -1066,7 +1551,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     val topicId = controller.controllerContext.topicIds.get("t").get
 
@@ -1079,13 +1564,13 @@ class ControllerIntegrationTest extends QuorumTestHarness {
 
   @Test
   def testNoTopicIdPersistsThroughControllerReelection(): Unit = {
-    servers = makeServers(2, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
+    servers = makeServers(2, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
     val controllerId = TestUtils.waitUntilControllerElected(zkClient)
     val controller = getController().kafkaController
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     val emptyTopicId = controller.controllerContext.topicIds.get("t")
     assertEquals(None, emptyTopicId)
@@ -1105,7 +1590,7 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     val topicId = controller.controllerContext.topicIds.get("t").get
 
@@ -1119,13 +1604,13 @@ class ControllerIntegrationTest extends QuorumTestHarness {
 
   @Test
   def testTopicIdCreatedOnUpgrade(): Unit = {
-    servers = makeServers(1, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
+    servers = makeServers(1, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
     val controllerId = TestUtils.waitUntilControllerElected(zkClient)
     val controller = getController().kafkaController
     val tp = new TopicPartition("t", 0)
     val assignment = Map(tp.partition -> Seq(controllerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, controllerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     assertEquals(None, zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))
     assertEquals(None, controller.controllerContext.topicIds.get(tp.topic))
@@ -1156,20 +1641,20 @@ class ControllerIntegrationTest extends QuorumTestHarness {
   @Test
   def testTopicIdCreatedOnUpgradeMultiBrokerScenario(): Unit = {
     // Simulate an upgrade scenario where the controller is still on a pre-topic ID IBP, but the other two brokers are upgraded.
-    servers = makeServers(1, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
+    servers = makeServers(1, interBrokerProtocolVersion = Some(MetadataVersion.IBP_2_7_IV0))
     servers = servers ++ makeServers(3, startingIdNumber = 1)
     val originalControllerId = TestUtils.waitUntilControllerElected(zkClient)
     assertEquals(0, originalControllerId)
     val controller = getController().kafkaController
-    assertEquals(KAFKA_2_7_IV0, servers(originalControllerId).config.interBrokerProtocolVersion)
+    assertEquals(IBP_2_7_IV0, servers(originalControllerId).config.interBrokerProtocolVersion)
     val remainingBrokers = servers.filter(_.config.brokerId != originalControllerId)
     val tp = new TopicPartition("t", 0)
     // Only the remaining brokers will have the replicas for the partition
     val assignment = Map(tp.partition -> remainingBrokers.map(_.config.brokerId))
     TestUtils.createTopic(zkClient, tp.topic, partitionReplicaAssignment = assignment, servers = servers)
-    waitForPartitionState(tp, firstControllerEpoch, remainingBrokers(0).config.brokerId, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, remainingBrokers(0).config.brokerId, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
-    val topicIdAfterCreate = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
+    val (topicIdAfterCreate, _) = TestUtils.computeUntilTrue(zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))(_.nonEmpty)
     assertEquals(None, topicIdAfterCreate)
     val emptyTopicId = controller.controllerContext.topicIds.get("t")
     assertEquals(None, emptyTopicId)
@@ -1215,10 +1700,10 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     val adminZkClient = new AdminZkClient(zkClient)
 
     // start server with old IBP
-    servers = makeServers(1, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
+    servers = makeServers(1, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
     // use create topic with ZK client directly, without topic ID
     adminZkClient.createTopic(tp.topic, 1, 1)
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
     val topicIdAfterCreate = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
     val id = servers.head.kafkaController.controllerContext.topicIds.get(tp.topic)
@@ -1230,41 +1715,41 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     servers(0).shutdown()
     servers(0).awaitShutdown()
     servers = makeServers(1)
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
-      "failed to get expected partition state upon controller restart")
-    val topicIdAfterUpgrade = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
-    assertEquals(topicIdAfterUpgrade, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
-      "expected same topic ID but it can not be found")
-    assertEquals(tp.topic(), servers.head.kafkaController.controllerContext.topicNames(topicIdAfterUpgrade.get),
-      "correct topic name expected but cannot be found in the controller context")
+
+    def awaitTopicId(): Uuid = {
+      // Wait for consistent controller context (Note that `topicIds` is updated before `topicNames`)
+      val (topicIdOpt, isDefined) = TestUtils.computeUntilTrue {
+        val topicIdOpt = servers.head.kafkaController.controllerContext.topicIds.get(tp.topic)
+        topicIdOpt.flatMap { topicId =>
+          val topicNameOpt = servers.head.kafkaController.controllerContext.topicNames.get(topicId)
+          if (topicNameOpt.contains(tp.topic)) {
+            Some(topicId)
+          } else {
+            None
+          }
+        }
+      }(_.isDefined)
+
+      assertTrue(isDefined, "Timed out waiting for a consistent topicId in controller context")
+      assertEquals(topicIdOpt, zkClient.getTopicIdsForTopics(Set(tp.topic)).get(tp.topic))
+      topicIdOpt.get
+    }
+
+    val topicId = awaitTopicId()
 
     // Downgrade back to 2.7
     servers(0).shutdown()
     servers(0).awaitShutdown()
-    servers = makeServers(1, interBrokerProtocolVersion = Some(KAFKA_2_7_IV0))
-    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.initialLeaderEpoch,
+    servers = makeServers(1, interBrokerProtocolVersion = Some(IBP_2_7_IV0))
+    waitForPartitionState(tp, firstControllerEpoch, 0, LeaderAndIsr.InitialLeaderEpoch,
       "failed to get expected partition state upon topic creation")
-    val topicIdAfterDowngrade = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
-    assertTrue(topicIdAfterDowngrade.isDefined)
-    assertEquals(topicIdAfterUpgrade, topicIdAfterDowngrade,
-      "expected same topic ID but it can not be found after downgrade")
-    assertEquals(topicIdAfterDowngrade, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
-      "expected same topic ID in controller context but it is no longer found after downgrade")
-    assertEquals(tp.topic(), servers.head.kafkaController.controllerContext.topicNames(topicIdAfterUpgrade.get),
-      "correct topic name expected but cannot be found in the controller context")
+    assertEquals(topicId, awaitTopicId())
 
     // Reassign partitions
     servers(0).kafkaController.eventManager.put(ApiPartitionReassignment(reassignment, _ => ()))
     waitForPartitionState(tp, 3, 0, 1,
       "failed to get expected partition state upon controller restart")
-    val topicIdAfterReassignment = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
-    assertTrue(topicIdAfterReassignment.isDefined)
-    assertEquals(topicIdAfterUpgrade, topicIdAfterReassignment,
-      "expected same topic ID but it can not be found after reassignment")
-    assertEquals(topicIdAfterUpgrade, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
-      "expected same topic ID in controller context but is no longer found after reassignment")
-    assertEquals(tp.topic(), servers.head.kafkaController.controllerContext.topicNames(topicIdAfterUpgrade.get),
-      "correct topic name expected but cannot be found in the controller context")
+    assertEquals(topicId, awaitTopicId())
 
     // Upgrade back to 2.8
     servers(0).shutdown()
@@ -1272,18 +1757,13 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     servers = makeServers(1)
     waitForPartitionState(tp, 3, 0, 1,
       "failed to get expected partition state upon controller restart")
-    val topicIdAfterReUpgrade = zkClient.getTopicIdsForTopics(Set(tp.topic())).get(tp.topic())
-    assertEquals(topicIdAfterUpgrade, topicIdAfterReUpgrade,
-      "expected same topic ID but it can not be found after re-upgrade")
-    assertEquals(topicIdAfterReUpgrade, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic),
-      "topic ID can not be found in controller context after re-upgrading IBP")
-    assertEquals(tp.topic(), servers.head.kafkaController.controllerContext.topicNames(topicIdAfterReUpgrade.get),
-      "correct topic name expected but cannot be found in the controller context")
+    assertEquals(topicId, awaitTopicId())
 
     adminZkClient.deleteTopic(tp.topic)
-    TestUtils.waitUntilTrue(() => servers.head.kafkaController.controllerContext.topicIds.get(tp.topic).isEmpty,
-      "topic ID for topic should have been removed from controller context after deletion")
-    assertTrue(servers.head.kafkaController.controllerContext.topicNames.get(topicIdAfterUpgrade.get).isEmpty)
+    // Verify removal from controller context (Note that `topicIds` is updated before `topicNames`)
+    TestUtils.waitUntilTrue(() => !servers.head.kafkaController.controllerContext.topicNames.contains(topicId),
+      "Timed out waiting for removal of topicId from controller context")
+    assertEquals(None, servers.head.kafkaController.controllerContext.topicIds.get(tp.topic))
   }
 
   private def testControllerMove(fun: () => Unit): Unit = {
@@ -1374,9 +1854,9 @@ class ControllerIntegrationTest extends QuorumTestHarness {
                           listeners : Option[String] = None,
                           listenerSecurityProtocolMap : Option[String] = None,
                           controlPlaneListenerName : Option[String] = None,
-                          interBrokerProtocolVersion: Option[ApiVersion] = None,
+                          interBrokerProtocolVersion: Option[MetadataVersion] = None,
                           logDirCount: Int = 1,
-                          startingIdNumber: Int = 0) = {
+                          startingIdNumber: Int = 0): Seq[KafkaServer] = {
     val configs = TestUtils.createBrokerConfigs(numConfigs, zkConnect, enableControlledShutdown = enableControlledShutdown, logDirCount = logDirCount, startingIdNumber = startingIdNumber)
     configs.foreach { config =>
       config.setProperty(KafkaConfig.AutoLeaderRebalanceEnableProp, autoLeaderRebalanceEnable.toString)
@@ -1401,4 +1881,15 @@ class ControllerIntegrationTest extends QuorumTestHarness {
     servers.filter(s => s.config.brokerId == controllerId).head
   }
 
+  private def alterPartitionFuture(alterPartitionRequest: AlterPartitionRequestData,
+                                   alterPartitionVersion: Short): CompletableFuture[AlterPartitionResponseData] = {
+    val future = new CompletableFuture[AlterPartitionResponseData]()
+    getController().kafkaController.eventManager.put(AlterPartitionReceived(
+      alterPartitionRequest,
+      alterPartitionVersion,
+      future.complete
+    ))
+    future
+  }
+
 }
diff --git a/core/src/test/scala/unit/kafka/controller/MockPartitionStateMachine.scala b/core/src/test/scala/unit/kafka/controller/MockPartitionStateMachine.scala
index b9a4d04198da0..9bc6e3cd6344d 100644
--- a/core/src/test/scala/unit/kafka/controller/MockPartitionStateMachine.scala
+++ b/core/src/test/scala/unit/kafka/controller/MockPartitionStateMachine.scala
@@ -23,9 +23,11 @@ import org.apache.kafka.common.TopicPartition
 
 import scala.collection.{Seq, mutable}
 
-class MockPartitionStateMachine(controllerContext: ControllerContext,
-                                uncleanLeaderElectionEnabled: Boolean)
-  extends PartitionStateMachine(controllerContext) {
+class MockPartitionStateMachine(
+  controllerContext: ControllerContext,
+  uncleanLeaderElectionEnabled: Boolean,
+  isLeaderRecoverySupported: Boolean
+) extends PartitionStateMachine(controllerContext) {
 
   var stateChangesByTargetState = mutable.Map.empty[PartitionState, Int].withDefaultValue(0)
 
@@ -101,7 +103,11 @@ class MockPartitionStateMachine(controllerContext: ControllerContext,
         val partitionsWithUncleanLeaderElectionState = validLeaderAndIsrs.map { case (partition, leaderAndIsr) =>
           (partition, Some(leaderAndIsr), isUnclean || uncleanLeaderElectionEnabled)
         }
-        leaderForOffline(controllerContext, partitionsWithUncleanLeaderElectionState)
+        leaderForOffline(
+          controllerContext,
+          isLeaderRecoverySupported,
+          partitionsWithUncleanLeaderElectionState
+        )
       case ReassignPartitionLeaderElectionStrategy =>
         leaderForReassign(controllerContext, validLeaderAndIsrs)
       case PreferredReplicaPartitionLeaderElectionStrategy =>
diff --git a/core/src/test/scala/unit/kafka/controller/PartitionStateMachineTest.scala b/core/src/test/scala/unit/kafka/controller/PartitionStateMachineTest.scala
index 174b9f190e8d9..9f11d42e697c6 100644
--- a/core/src/test/scala/unit/kafka/controller/PartitionStateMachineTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/PartitionStateMachineTest.scala
@@ -24,10 +24,13 @@ import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
 import kafka.zk.{KafkaZkClient, TopicPartitionStateZNode}
 import kafka.zookeeper._
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.server.common.MetadataVersion.{IBP_3_1_IV0, IBP_3_2_IV0}
 import org.apache.zookeeper.KeeperException.Code
 import org.apache.zookeeper.data.Stat
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{BeforeEach, Test}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 import org.mockito.ArgumentMatchers.{any, anyInt}
 import org.mockito.Mockito.{mock, verify, when}
 
@@ -167,7 +170,7 @@ class PartitionStateMachineTest {
         TopicPartitionStateZNode.encode(leaderIsrAndControllerEpoch), stat, ResponseMetadata(0, 0))))
 
     val leaderAndIsrAfterElection = leaderAndIsr.newLeader(brokerId)
-    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
+    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withPartitionEpoch(2)
     when(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
       .thenReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
 
@@ -202,7 +205,7 @@ class PartitionStateMachineTest {
       .thenReturn(Seq(GetDataResponse(Code.OK, null, Some(partition),
         TopicPartitionStateZNode.encode(leaderIsrAndControllerEpoch), stat, ResponseMetadata(0, 0))))
     val leaderAndIsrAfterElection = leaderAndIsr.newLeaderAndIsr(otherBrokerId, List(otherBrokerId))
-    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
+    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withPartitionEpoch(2)
     when(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
       .thenReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
 
@@ -256,7 +259,7 @@ class PartitionStateMachineTest {
     when(mockZkClient.getLogConfigs(Set.empty, config.originals()))
       .thenReturn((Map(partition.topic -> LogConfig()), Map.empty[String, Exception]))
     val leaderAndIsrAfterElection = leaderAndIsr.newLeader(brokerId)
-    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
+    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withPartitionEpoch(2)
     when(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
       .thenReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
 
@@ -275,8 +278,11 @@ class PartitionStateMachineTest {
     assertEquals(OnlinePartition, partitionState(partition))
   }
 
-  @Test
-  def testOfflinePartitionToUncleanOnlinePartitionTransition(): Unit = {
+  @ParameterizedTest
+  @ValueSource(booleans = Array(true, false))
+  def testOfflinePartitionToUncleanOnlinePartitionTransition(
+    isLeaderRecoverySupported: Boolean
+  ): Unit = {
     /* Starting scenario: Leader: X, Isr: [X], Replicas: [X, Y], LiveBrokers: [Y]
      * Ending scenario: Leader: Y, Isr: [Y], Replicas: [X, Y], LiverBrokers: [Y]
      *
@@ -284,6 +290,22 @@ class PartitionStateMachineTest {
      * election on the offline partition results on the first live broker getting
      * elected.
      */
+
+
+    val partitionStateMachine = {
+      val apiVersion = if (isLeaderRecoverySupported) IBP_3_2_IV0 else IBP_3_1_IV0
+      val properties = TestUtils.createBrokerConfig(brokerId, "zkConnect")
+
+      properties.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, apiVersion.toString)
+
+      new ZkPartitionStateMachine(
+        KafkaConfig.fromProps(properties),
+        new StateChangeLogger(brokerId, true, None),
+        controllerContext,
+        mockZkClient,
+        mockControllerBrokerRequestBatch
+      )
+    }
     val leaderBrokerId = brokerId + 1
     controllerContext.setLiveBrokers(Map(TestUtils.createBrokerAndEpoch(brokerId, "host", 0)))
     controllerContext.updatePartitionFullReplicaAssignment(
@@ -309,8 +331,12 @@ class PartitionStateMachineTest {
       )
     )
 
-    val leaderAndIsrAfterElection = leaderAndIsr.newLeaderAndIsr(brokerId, List(brokerId))
-    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withZkVersion(2)
+    val leaderAndIsrAfterElection = if (isLeaderRecoverySupported) {
+      leaderAndIsr.newRecoveringLeaderAndIsr(brokerId, List(brokerId))
+    } else {
+      leaderAndIsr.newLeaderAndIsr(brokerId, List(brokerId))
+    }
+    val updatedLeaderAndIsr = leaderAndIsrAfterElection.withPartitionEpoch(2)
     when(mockZkClient.updateLeaderAndIsr(Map(partition -> leaderAndIsrAfterElection), controllerEpoch, controllerContext.epochZkVersion))
       .thenReturn(UpdateLeaderAndIsrResult(Map(partition -> Right(updatedLeaderAndIsr)), Seq.empty))
 
@@ -484,7 +510,11 @@ class PartitionStateMachineTest {
       controllerContext.updatePartitionFullReplicaAssignment(partition, ReplicaAssignment(Seq(brokerId)))
     }
 
-    val partitionStateMachine = new MockPartitionStateMachine(controllerContext, uncleanLeaderElectionEnabled = false)
+    val partitionStateMachine = new MockPartitionStateMachine(
+      controllerContext,
+      uncleanLeaderElectionEnabled = false,
+      isLeaderRecoverySupported = true
+    )
     val replicaStateMachine = new MockReplicaStateMachine(controllerContext)
     val deletionClient = mock(classOf[DeletionClient])
     val topicDeletionManager = new TopicDeletionManager(config, controllerContext,
diff --git a/core/src/test/scala/unit/kafka/controller/ReplicaStateMachineTest.scala b/core/src/test/scala/unit/kafka/controller/ReplicaStateMachineTest.scala
index ecb25ea898ea2..34187b138427f 100644
--- a/core/src/test/scala/unit/kafka/controller/ReplicaStateMachineTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/ReplicaStateMachineTest.scala
@@ -213,7 +213,7 @@ class ReplicaStateMachineTest {
 
     val stat = new Stat(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
     val adjustedLeaderAndIsr = leaderAndIsr.newLeaderAndIsr(LeaderAndIsr.NoLeader, List(otherBrokerId))
-    val updatedLeaderAndIsr = adjustedLeaderAndIsr.withZkVersion(adjustedLeaderAndIsr .zkVersion + 1)
+    val updatedLeaderAndIsr = adjustedLeaderAndIsr.withPartitionEpoch(adjustedLeaderAndIsr.partitionEpoch + 1)
     val updatedLeaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(updatedLeaderAndIsr, controllerEpoch)
     when(mockZkClient.getTopicPartitionStatesRaw(partitions)).thenReturn(
       Seq(GetDataResponse(Code.OK, null, Some(partition),
diff --git a/core/src/test/scala/unit/kafka/controller/TopicDeletionManagerTest.scala b/core/src/test/scala/unit/kafka/controller/TopicDeletionManagerTest.scala
index ec2339dea1e40..a2ee8bfbd8006 100644
--- a/core/src/test/scala/unit/kafka/controller/TopicDeletionManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/controller/TopicDeletionManagerTest.scala
@@ -43,7 +43,11 @@ class TopicDeletionManagerTest {
     val replicaStateMachine = new MockReplicaStateMachine(controllerContext)
     replicaStateMachine.startup()
 
-    val partitionStateMachine = new MockPartitionStateMachine(controllerContext, uncleanLeaderElectionEnabled = false)
+    val partitionStateMachine = new MockPartitionStateMachine(
+      controllerContext,
+      uncleanLeaderElectionEnabled = false,
+      isLeaderRecoverySupported = true
+    )
     partitionStateMachine.startup()
 
     val deletionManager = new TopicDeletionManager(config, controllerContext, replicaStateMachine,
@@ -66,7 +70,11 @@ class TopicDeletionManagerTest {
     val replicaStateMachine = new MockReplicaStateMachine(controllerContext)
     replicaStateMachine.startup()
 
-    val partitionStateMachine = new MockPartitionStateMachine(controllerContext, uncleanLeaderElectionEnabled = false)
+    val partitionStateMachine = new MockPartitionStateMachine(
+      controllerContext,
+      uncleanLeaderElectionEnabled = false,
+      isLeaderRecoverySupported = true
+    )
     partitionStateMachine.startup()
 
     val deletionManager = new TopicDeletionManager(config, controllerContext, replicaStateMachine,
@@ -126,7 +134,11 @@ class TopicDeletionManagerTest {
     val replicaStateMachine = new MockReplicaStateMachine(controllerContext)
     replicaStateMachine.startup()
 
-    val partitionStateMachine = new MockPartitionStateMachine(controllerContext, uncleanLeaderElectionEnabled = false)
+    val partitionStateMachine = new MockPartitionStateMachine(
+      controllerContext,
+      uncleanLeaderElectionEnabled = false,
+      isLeaderRecoverySupported = true
+    )
     partitionStateMachine.startup()
 
     val deletionManager = new TopicDeletionManager(config, controllerContext, replicaStateMachine,
@@ -194,7 +206,11 @@ class TopicDeletionManagerTest {
     val replicaStateMachine = new MockReplicaStateMachine(controllerContext)
     replicaStateMachine.startup()
 
-    val partitionStateMachine = new MockPartitionStateMachine(controllerContext, uncleanLeaderElectionEnabled = false)
+    val partitionStateMachine = new MockPartitionStateMachine(
+      controllerContext,
+      uncleanLeaderElectionEnabled = false,
+      isLeaderRecoverySupported = true
+    )
     partitionStateMachine.startup()
 
     val deletionManager = new TopicDeletionManager(config, controllerContext, replicaStateMachine,
diff --git a/core/src/test/scala/unit/kafka/coordinator/group/GroupMetadataManagerTest.scala b/core/src/test/scala/unit/kafka/coordinator/group/GroupMetadataManagerTest.scala
index 7132ca79d372a..688d6e83b0dc9 100644
--- a/core/src/test/scala/unit/kafka/coordinator/group/GroupMetadataManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/coordinator/group/GroupMetadataManagerTest.scala
@@ -21,14 +21,12 @@ import java.lang.management.ManagementFactory
 import java.nio.ByteBuffer
 import java.util.concurrent.locks.ReentrantLock
 import java.util.{Collections, Optional}
-import com.yammer.metrics.core.Gauge
 
+import com.yammer.metrics.core.Gauge
 import javax.management.ObjectName
-import kafka.api._
 import kafka.cluster.Partition
 import kafka.common.OffsetAndMetadata
 import kafka.log.{AppendOrigin, LogAppendInfo, UnifiedLog}
-import kafka.metrics.KafkaYammerMetrics
 import kafka.server.{FetchDataInfo, FetchLogEnd, HostedPartition, KafkaConfig, LogOffsetMetadata, ReplicaManager, RequestLocal}
 import kafka.utils.{KafkaScheduler, MockTime, TestUtils}
 import org.apache.kafka.clients.consumer.ConsumerPartitionAssignor
@@ -42,6 +40,9 @@ import org.apache.kafka.common.record._
 import org.apache.kafka.common.requests.OffsetFetchResponse
 import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
 import org.apache.kafka.common.utils.Utils
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion._
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.mockito.{ArgumentCaptor, ArgumentMatchers}
@@ -91,7 +92,7 @@ class GroupMetadataManagerTest {
     metrics = new kMetrics()
     time = new MockTime
     replicaManager = mock(classOf[ReplicaManager])
-    groupMetadataManager = new GroupMetadataManager(0, ApiVersion.latestVersion, offsetConfig, replicaManager,
+    groupMetadataManager = new GroupMetadataManager(0, MetadataVersion.latest, offsetConfig, replicaManager,
       time, metrics)
     groupMetadataManager.startup(() => numOffsetsPartitions, false)
     partition = mock(classOf[Partition])
@@ -106,7 +107,7 @@ class GroupMetadataManagerTest {
   def testLogInfoFromCleanupGroupMetadata(): Unit = {
     var expiredOffsets: Int = 0
     var infoCount = 0
-    val gmm = new GroupMetadataManager(0, ApiVersion.latestVersion, offsetConfig, replicaManager, time, metrics) {
+    val gmm = new GroupMetadataManager(0, MetadataVersion.latest, offsetConfig, replicaManager, time, metrics) {
       override def cleanupGroupMetadata(groups: Iterable[GroupMetadata], requestLocal: RequestLocal,
                                         selector: GroupMetadata => Map[TopicPartition, OffsetAndMetadata]): Int = expiredOffsets
 
@@ -1054,17 +1055,17 @@ class GroupMetadataManagerTest {
     val protocol = "range"
     val memberId = "memberId"
 
-    for (apiVersion <- ApiVersion.allVersions) {
-      val groupMetadataRecord = buildStableGroupRecordWithMember(generation, protocolType, protocol, memberId, apiVersion = apiVersion)
+    for (metadataVersion <- MetadataVersion.VERSIONS) {
+      val groupMetadataRecord = buildStableGroupRecordWithMember(generation, protocolType, protocol, memberId, metadataVersion = metadataVersion)
 
       val deserializedGroupMetadata = GroupMetadataManager.readGroupMessageValue(groupId, groupMetadataRecord.value(), time)
       // GROUP_METADATA_VALUE_SCHEMA_V2 or higher should correctly set the currentStateTimestamp
-      if (apiVersion >= KAFKA_2_1_IV0)
+      if (metadataVersion.isAtLeast(IBP_2_1_IV0))
         assertEquals(Some(time.milliseconds()), deserializedGroupMetadata.currentStateTimestamp,
-          s"the apiVersion $apiVersion doesn't set the currentStateTimestamp correctly.")
+          s"the metadataVersion $metadataVersion doesn't set the currentStateTimestamp correctly.")
       else
         assertTrue(deserializedGroupMetadata.currentStateTimestamp.isEmpty,
-          s"the apiVersion $apiVersion should not set the currentStateTimestamp.")
+          s"the metadataVersion $metadataVersion should not set the currentStateTimestamp.")
     }
   }
 
@@ -1073,10 +1074,10 @@ class GroupMetadataManagerTest {
     val generation = 1
     val protocol = "range"
     val memberId = "memberId"
-    val oldApiVersions = Array(KAFKA_0_9_0, KAFKA_0_10_1_IV0, KAFKA_2_1_IV0)
+    val oldMetadataVersions = Array(IBP_0_9_0, IBP_0_10_1_IV0, IBP_2_1_IV0)
 
-    for (apiVersion <- oldApiVersions) {
-      val groupMetadataRecord = buildStableGroupRecordWithMember(generation, protocolType, protocol, memberId, apiVersion = apiVersion)
+    for (metadataVersion <- oldMetadataVersions) {
+      val groupMetadataRecord = buildStableGroupRecordWithMember(generation, protocolType, protocol, memberId, metadataVersion = metadataVersion)
 
       val deserializedGroupMetadata = GroupMetadataManager.readGroupMessageValue(groupId, groupMetadataRecord.value(), time)
       assertEquals(groupId, deserializedGroupMetadata.groupId)
@@ -2181,10 +2182,10 @@ class GroupMetadataManagerTest {
       new TopicPartition("bar", 0) -> 8992L
     )
 
-    val apiVersion = KAFKA_1_1_IV0
-    val offsetCommitRecords = createCommittedOffsetRecords(committedOffsets, apiVersion = apiVersion, retentionTimeOpt = Some(100))
+    val metadataVersion = IBP_1_1_IV0
+    val offsetCommitRecords = createCommittedOffsetRecords(committedOffsets, metadataVersion = metadataVersion, retentionTimeOpt = Some(100))
     val memberId = "98098230493"
-    val groupMetadataRecord = buildStableGroupRecordWithMember(generation, protocolType, protocol, memberId, apiVersion = apiVersion)
+    val groupMetadataRecord = buildStableGroupRecordWithMember(generation, protocolType, protocol, memberId, metadataVersion = metadataVersion)
     val records = MemoryRecords.withRecords(startOffset, CompressionType.NONE,
       (offsetCommitRecords ++ Seq(groupMetadataRecord)).toArray: _*)
 
@@ -2255,8 +2256,8 @@ class GroupMetadataManagerTest {
       commitTimestamp = time.milliseconds(),
       expireTimestamp = None)
 
-    def verifySerde(apiVersion: ApiVersion, expectedOffsetCommitValueVersion: Int): Unit = {
-      val bytes = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, apiVersion)
+    def verifySerde(metadataVersion: MetadataVersion, expectedOffsetCommitValueVersion: Int): Unit = {
+      val bytes = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, metadataVersion)
       val buffer = ByteBuffer.wrap(bytes)
 
       assertEquals(expectedOffsetCommitValueVersion, buffer.getShort(0).toInt)
@@ -2275,10 +2276,10 @@ class GroupMetadataManagerTest {
       assertEquals(expectedLeaderEpoch, deserializedOffsetAndMetadata.leaderEpoch)
     }
 
-    for (version <- ApiVersion.allVersions) {
+    for (version <- MetadataVersion.VERSIONS) {
       val expectedSchemaVersion = version match {
-        case v if v < KAFKA_2_1_IV0 => 1
-        case v if v < KAFKA_2_1_IV1 => 2
+        case v if v.isLessThan(IBP_2_1_IV0) => 1
+        case v if v.isLessThan(IBP_2_1_IV1) => 2
         case _ => 3
       }
       verifySerde(version, expectedSchemaVersion)
@@ -2297,8 +2298,8 @@ class GroupMetadataManagerTest {
       commitTimestamp = time.milliseconds(),
       expireTimestamp = Some(time.milliseconds() + 1000))
 
-    def verifySerde(apiVersion: ApiVersion): Unit = {
-      val bytes = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, apiVersion)
+    def verifySerde(metadataVersion: MetadataVersion): Unit = {
+      val bytes = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, metadataVersion)
       val buffer = ByteBuffer.wrap(bytes)
       assertEquals(1, buffer.getShort(0).toInt)
 
@@ -2306,7 +2307,7 @@ class GroupMetadataManagerTest {
       assertEquals(offsetAndMetadata, deserializedOffsetAndMetadata)
     }
 
-    for (version <- ApiVersion.allVersions)
+    for (version <- MetadataVersion.VERSIONS)
       verifySerde(version)
   }
 
@@ -2319,13 +2320,13 @@ class GroupMetadataManagerTest {
       commitTimestamp = time.milliseconds(),
       expireTimestamp = None)
 
-    def verifySerde(apiVersion: ApiVersion): Unit = {
-      val bytes = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, apiVersion)
+    def verifySerde(metadataVersion: MetadataVersion): Unit = {
+      val bytes = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, metadataVersion)
       val buffer = ByteBuffer.wrap(bytes)
       val version = buffer.getShort(0).toInt
-      if (apiVersion < KAFKA_2_1_IV0)
+      if (metadataVersion.isLessThan(IBP_2_1_IV0))
         assertEquals(1, version)
-      else if (apiVersion < KAFKA_2_1_IV1)
+      else if (metadataVersion.isLessThan(IBP_2_1_IV1))
         assertEquals(2, version)
       else
         assertEquals(3, version)
@@ -2334,7 +2335,7 @@ class GroupMetadataManagerTest {
       assertEquals(offsetAndMetadata, deserializedOffsetAndMetadata)
     }
 
-    for (version <- ApiVersion.allVersions)
+    for (version <- MetadataVersion.VERSIONS)
       verifySerde(version)
   }
 
@@ -2397,7 +2398,7 @@ class GroupMetadataManagerTest {
     val offsetCommitRecord = TestUtils.records(Seq(
       new SimpleRecord(
         GroupMetadataManager.offsetCommitKey(groupId, topicPartition),
-        GroupMetadataManager.offsetCommitValue(OffsetAndMetadata(35L, "", time.milliseconds()), ApiVersion.latestVersion)
+        GroupMetadataManager.offsetCommitValue(OffsetAndMetadata(35L, "", time.milliseconds()), MetadataVersion.latest)
       )
     )).records.asScala.head
     val (keyStringOpt, valueStringOpt) = GroupMetadataManager.formatRecordKeyAndValue(offsetCommitRecord)
@@ -2487,20 +2488,20 @@ class GroupMetadataManagerTest {
                                                protocol: String,
                                                memberId: String,
                                                assignmentBytes: Array[Byte] = Array.emptyByteArray,
-                                               apiVersion: ApiVersion = ApiVersion.latestVersion): SimpleRecord = {
+                                               metadataVersion: MetadataVersion = MetadataVersion.latest): SimpleRecord = {
     val memberProtocols = List((protocol, Array.emptyByteArray))
     val member = new MemberMetadata(memberId, Some(groupInstanceId), "clientId", "clientHost", 30000, 10000, protocolType, memberProtocols)
     val group = GroupMetadata.loadGroup(groupId, Stable, generation, protocolType, protocol, memberId,
-      if (apiVersion >= KAFKA_2_1_IV0) Some(time.milliseconds()) else None, Seq(member), time)
+      if (metadataVersion.isAtLeast(IBP_2_1_IV0)) Some(time.milliseconds()) else None, Seq(member), time)
     val groupMetadataKey = GroupMetadataManager.groupMetadataKey(groupId)
-    val groupMetadataValue = GroupMetadataManager.groupMetadataValue(group, Map(memberId -> assignmentBytes), apiVersion)
+    val groupMetadataValue = GroupMetadataManager.groupMetadataValue(group, Map(memberId -> assignmentBytes), metadataVersion)
     new SimpleRecord(groupMetadataKey, groupMetadataValue)
   }
 
   private def buildEmptyGroupRecord(generation: Int, protocolType: String): SimpleRecord = {
     val group = GroupMetadata.loadGroup(groupId, Empty, generation, protocolType, null, null, None, Seq.empty, time)
     val groupMetadataKey = GroupMetadataManager.groupMetadataKey(groupId)
-    val groupMetadataValue = GroupMetadataManager.groupMetadataValue(group, Map.empty, ApiVersion.latestVersion)
+    val groupMetadataValue = GroupMetadataManager.groupMetadataValue(group, Map.empty, MetadataVersion.latest)
     new SimpleRecord(groupMetadataKey, groupMetadataValue)
   }
 
@@ -2544,7 +2545,7 @@ class GroupMetadataManagerTest {
 
   private def createCommittedOffsetRecords(committedOffsets: Map[TopicPartition, Long],
                                            groupId: String = groupId,
-                                           apiVersion: ApiVersion = ApiVersion.latestVersion,
+                                           metadataVersion: MetadataVersion = MetadataVersion.latest,
                                            retentionTimeOpt: Option[Long] = None): Seq[SimpleRecord] = {
     committedOffsets.map { case (topicPartition, offset) =>
       val commitTimestamp = time.milliseconds()
@@ -2556,7 +2557,7 @@ class GroupMetadataManagerTest {
           OffsetAndMetadata(offset, "", commitTimestamp)
       }
       val offsetCommitKey = GroupMetadataManager.offsetCommitKey(groupId, topicPartition)
-      val offsetCommitValue = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, apiVersion)
+      val offsetCommitValue = GroupMetadataManager.offsetCommitValue(offsetAndMetadata, metadataVersion)
       new SimpleRecord(offsetCommitKey, offsetCommitValue)
     }.toSeq
   }
diff --git a/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionMarkerChannelManagerTest.scala b/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionMarkerChannelManagerTest.scala
index 20dbddc9546eb..a159809905550 100644
--- a/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionMarkerChannelManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionMarkerChannelManagerTest.scala
@@ -22,7 +22,6 @@ import java.util.Collections
 import java.util.concurrent.{Callable, Executors, Future}
 
 import kafka.common.RequestAndCompletionHandler
-import kafka.metrics.KafkaYammerMetrics
 import kafka.server.{KafkaConfig, MetadataCache}
 import kafka.utils.TestUtils
 import org.apache.kafka.clients.{ClientResponse, NetworkClient}
@@ -31,6 +30,7 @@ import org.apache.kafka.common.record.RecordBatch
 import org.apache.kafka.common.requests.{RequestHeader, TransactionResult, WriteTxnMarkersRequest, WriteTxnMarkersResponse}
 import org.apache.kafka.common.utils.MockTime
 import org.apache.kafka.common.{Node, TopicPartition}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 import org.mockito.ArgumentMatchers.any
diff --git a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala
index b502d4863f3a1..43226507808cd 100755
--- a/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala
+++ b/core/src/test/scala/unit/kafka/integration/KafkaServerTestHarness.scala
@@ -20,7 +20,6 @@ package kafka.integration
 import java.io.File
 import java.util
 import java.util.Arrays
-
 import kafka.server.QuorumTestHarness
 import kafka.server._
 import kafka.utils.TestUtils
@@ -30,12 +29,14 @@ import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
 import scala.collection.{Seq, mutable}
 import scala.jdk.CollectionConverters._
 import java.util.Properties
-
 import kafka.utils.TestUtils.{createAdminClient, resource}
+import org.apache.kafka.common.acl.AccessControlEntry
 import org.apache.kafka.common.{KafkaException, Uuid}
 import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.resource.ResourcePattern
 import org.apache.kafka.common.security.scram.ScramCredential
 import org.apache.kafka.common.utils.Time
+import org.apache.kafka.controller.ControllerRequestContextUtil.ANONYMOUS_CONTEXT
 
 /**
  * A test harness that brings up some number of broker nodes
@@ -168,10 +169,11 @@ abstract class KafkaServerTestHarness extends QuorumTestHarness {
     numPartitions: Int = 1,
     replicationFactor: Int = 1,
     topicConfig: Properties = new Properties,
-    listenerName: ListenerName = listenerName
+    listenerName: ListenerName = listenerName,
+    adminClientConfig: Properties = new Properties
   ): scala.collection.immutable.Map[Int, Int] = {
     if (isKRaftTest()) {
-      resource(createAdminClient(brokers, listenerName)) { admin =>
+      resource(createAdminClient(brokers, listenerName, adminClientConfig)) { admin =>
         TestUtils.createTopicWithAdmin(
           admin = admin,
           topic = topic,
@@ -237,6 +239,14 @@ abstract class KafkaServerTestHarness extends QuorumTestHarness {
     }
   }
 
+  def addAndVerifyAcls(acls: Set[AccessControlEntry], resource: ResourcePattern): Unit = {
+    TestUtils.addAndVerifyAcls(brokers, acls, resource, controllerServers)
+  }
+
+  def removeAndVerifyAcls(acls: Set[AccessControlEntry], resource: ResourcePattern): Unit = {
+    TestUtils.removeAndVerifyAcls(brokers, acls, resource, controllerServers)
+  }
+
   /**
    * Pick a broker at random and kill it if it isn't already dead
    * Return the id of the broker killed
@@ -289,7 +299,7 @@ abstract class KafkaServerTestHarness extends QuorumTestHarness {
   def getTopicIds(names: Seq[String]): Map[String, Uuid] = {
     val result = new util.HashMap[String, Uuid]()
     if (isKRaftTest()) {
-      val topicIdsMap = controllerServer.controller.findTopicIds(Long.MaxValue, names.asJava).get()
+      val topicIdsMap = controllerServer.controller.findTopicIds(ANONYMOUS_CONTEXT, names.asJava).get()
       names.foreach { name =>
         val response = topicIdsMap.get(name)
         result.put(name, response.result())
@@ -305,7 +315,7 @@ abstract class KafkaServerTestHarness extends QuorumTestHarness {
 
   def getTopicIds(): Map[String, Uuid] = {
     if (isKRaftTest()) {
-      controllerServer.controller.findAllTopicIds(Long.MaxValue).get().asScala.toMap
+      controllerServer.controller.findAllTopicIds(ANONYMOUS_CONTEXT).get().asScala.toMap
     } else {
       getController().kafkaController.controllerContext.topicIds.toMap
     }
@@ -314,7 +324,7 @@ abstract class KafkaServerTestHarness extends QuorumTestHarness {
   def getTopicNames(): Map[Uuid, String] = {
     if (isKRaftTest()) {
       val result = new util.HashMap[Uuid, String]()
-      controllerServer.controller.findAllTopicIds(Long.MaxValue).get().entrySet().forEach {
+      controllerServer.controller.findAllTopicIds(ANONYMOUS_CONTEXT).get().entrySet().forEach {
         e => result.put(e.getValue(), e.getKey())
       }
       result.asScala.toMap
@@ -351,4 +361,17 @@ abstract class KafkaServerTestHarness extends QuorumTestHarness {
       )
     }
   }
+
+  def aliveBrokers: Seq[KafkaBroker] = {
+    _brokers.filter(broker => alive(broker.config.brokerId)).toSeq
+  }
+
+  def ensureConsistentKRaftMetadata(): Unit = {
+    if (isKRaftTest()) {
+      TestUtils.ensureConsistentKRaftMetadata(
+        aliveBrokers,
+        controllerServer
+      )
+    }
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/integration/MetricsDuringTopicCreationDeletionTest.scala b/core/src/test/scala/unit/kafka/integration/MetricsDuringTopicCreationDeletionTest.scala
index e045ea9b77920..7d363d13b3cc2 100644
--- a/core/src/test/scala/unit/kafka/integration/MetricsDuringTopicCreationDeletionTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/MetricsDuringTopicCreationDeletionTest.scala
@@ -25,7 +25,7 @@ import kafka.utils.{Logging, TestUtils}
 import scala.jdk.CollectionConverters._
 import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
 import com.yammer.metrics.core.Gauge
-import kafka.metrics.KafkaYammerMetrics
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 
 class MetricsDuringTopicCreationDeletionTest extends KafkaServerTestHarness with Logging {
 
diff --git a/core/src/test/scala/unit/kafka/integration/MinIsrConfigTest.scala b/core/src/test/scala/unit/kafka/integration/MinIsrConfigTest.scala
index 35f7cc4a6bbf2..3ed7e8dc57cb2 100644
--- a/core/src/test/scala/unit/kafka/integration/MinIsrConfigTest.scala
+++ b/core/src/test/scala/unit/kafka/integration/MinIsrConfigTest.scala
@@ -21,7 +21,7 @@ import java.util.Properties
 import scala.collection.Seq
 
 import kafka.server.KafkaConfig
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.ValueSource
 
@@ -30,7 +30,7 @@ class MinIsrConfigTest extends KafkaServerTestHarness {
   overridingProps.put(KafkaConfig.MinInSyncReplicasProp, "5")
   def generateConfigs: Seq[KafkaConfig] = TestUtils.createBrokerConfigs(1, zkConnectOrNull).map(KafkaConfig.fromProps(_, overridingProps))
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDeaultKafkaConfig(quorum: String): Unit = {
     assert(brokers.head.logManager.initialDefaultConfig.minInSyncReplicas == 5)
diff --git a/core/src/test/scala/unit/kafka/log/LocalLogTest.scala b/core/src/test/scala/unit/kafka/log/LocalLogTest.scala
index 67e3a79b8557c..d92f1576cd3dc 100644
--- a/core/src/test/scala/unit/kafka/log/LocalLogTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LocalLogTest.scala
@@ -128,6 +128,16 @@ class LocalLogTest {
     assertTrue(logDir.exists)
   }
 
+  @Test
+  def testRollEmptyActiveSegment(): Unit = {
+    val oldActiveSegment = log.segments.activeSegment
+    log.roll()
+    assertEquals(1, log.segments.numberOfSegments)
+    assertNotEquals(oldActiveSegment, log.segments.activeSegment)
+    assertFalse(logDir.listFiles.isEmpty)
+    assertTrue(oldActiveSegment.hasSuffix(LocalLog.DeletedFileSuffix))
+  }
+
   @Test
   def testLogDeleteDirSuccessWhenEmptyAndFailureWhenNonEmpty(): Unit ={
     val record = new SimpleRecord(mockTime.milliseconds, "a".getBytes)
@@ -384,6 +394,24 @@ class LocalLogTest {
     assertEquals(log.segments.nonActiveLogSegmentsFrom(0L).toSeq, deletableSegments.toSeq)
   }
 
+  @Test
+  def testCreateAndDeleteSegment(): Unit = {
+    val record = new SimpleRecord(mockTime.milliseconds, "a".getBytes)
+    appendRecords(List(record))
+    val newOffset = log.segments.activeSegment.baseOffset + 1
+    val oldActiveSegment = log.segments.activeSegment
+    val newActiveSegment = log.createAndDeleteSegment(newOffset, log.segments.activeSegment, asyncDelete = true, LogTruncation(log))
+    assertEquals(1, log.segments.numberOfSegments)
+    assertEquals(newActiveSegment, log.segments.activeSegment)
+    assertNotEquals(oldActiveSegment, log.segments.activeSegment)
+    assertTrue(oldActiveSegment.hasSuffix(LocalLog.DeletedFileSuffix))
+    assertEquals(newOffset, log.segments.activeSegment.baseOffset)
+    assertEquals(0L, log.recoveryPoint)
+    assertEquals(newOffset, log.logEndOffset)
+    val fetchDataInfo = readRecords(startOffset = newOffset)
+    assertTrue(fetchDataInfo.records.records.asScala.isEmpty)
+  }
+
   @Test
   def testTruncateFullyAndStartAt(): Unit = {
     val record = new SimpleRecord(mockTime.milliseconds, "a".getBytes)
@@ -397,6 +425,7 @@ class LocalLogTest {
       appendRecords(List(record), initialOffset = offset)
     }
     assertEquals(5, log.segments.numberOfSegments)
+    assertNotEquals(10L, log.segments.activeSegment.baseOffset)
     val expected = List[LogSegment]() ++ log.segments.values
     val deleted = log.truncateFullyAndStartAt(10L)
     assertEquals(expected, deleted)
diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
index c9797439a4b12..7cfa764d379ff 100644
--- a/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogCleanerIntegrationTest.scala
@@ -20,10 +20,11 @@ package kafka.log
 import java.io.PrintWriter
 
 import com.yammer.metrics.core.{Gauge, MetricName}
-import kafka.metrics.{KafkaMetricsGroup, KafkaYammerMetrics}
+import kafka.metrics.KafkaMetricsGroup
 import kafka.utils.{MockTime, TestUtils}
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.record.{CompressionType, RecordBatch}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, Test}
 
diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerManagerTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerManagerTest.scala
index 0cdafed127f6f..fdc05c74f8613 100644
--- a/core/src/test/scala/unit/kafka/log/LogCleanerManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogCleanerManagerTest.scala
@@ -36,17 +36,17 @@ import scala.collection.mutable
   */
 class LogCleanerManagerTest extends Logging {
 
-  val tmpDir = TestUtils.tempDir()
-  val tmpDir2 = TestUtils.tempDir()
-  val logDir = TestUtils.randomPartitionLogDir(tmpDir)
-  val logDir2 = TestUtils.randomPartitionLogDir(tmpDir)
+  val tmpDir: File = TestUtils.tempDir()
+  val tmpDir2: File = TestUtils.tempDir()
+  val logDir: File = TestUtils.randomPartitionLogDir(tmpDir)
+  val logDir2: File = TestUtils.randomPartitionLogDir(tmpDir)
   val topicPartition = new TopicPartition("log", 0)
   val topicPartition2 = new TopicPartition("log2", 0)
   val logProps = new Properties()
   logProps.put(LogConfig.SegmentBytesProp, 1024: java.lang.Integer)
   logProps.put(LogConfig.SegmentIndexBytesProp, 1024: java.lang.Integer)
   logProps.put(LogConfig.CleanupPolicyProp, LogConfig.Compact)
-  val logConfig = LogConfig(logProps)
+  val logConfig: LogConfig = LogConfig(logProps)
   val time = new MockTime(1400000000000L, 1000L)  // Tue May 13 16:53:20 UTC 2014 for `currentTimeMs`
   val offset = 999
 
@@ -394,7 +394,7 @@ class LogCleanerManagerTest extends Logging {
     val cleanerManager: LogCleanerManager = createCleanerManager(log)
 
     // expect the checkpoint offset is not the expectedOffset before doing updateCheckpoints
-    assertNotEquals(offset, cleanerManager.allCleanerCheckpoints.get(topicPartition).getOrElse(0))
+    assertNotEquals(offset, cleanerManager.allCleanerCheckpoints.getOrElse(topicPartition, 0))
 
     cleanerManager.updateCheckpoints(logDir, partitionToUpdateOrAdd = Option(topicPartition, offset))
     // expect the checkpoint offset is now updated to the expected offset after doing updateCheckpoints
@@ -413,7 +413,7 @@ class LogCleanerManagerTest extends Logging {
 
     // updateCheckpoints should remove the topicPartition data in the logDir
     cleanerManager.updateCheckpoints(logDir, partitionToRemove = Option(topicPartition))
-    assertTrue(cleanerManager.allCleanerCheckpoints.get(topicPartition).isEmpty)
+    assertFalse(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
   }
 
   @Test
@@ -431,7 +431,7 @@ class LogCleanerManagerTest extends Logging {
     cleanerManager.handleLogDirFailure(logDir.getAbsolutePath)
     // verify the partition data in logDir is gone, and data in logDir2 is still there
     assertEquals(offset, cleanerManager.allCleanerCheckpoints(topicPartition2))
-    assertTrue(cleanerManager.allCleanerCheckpoints.get(topicPartition).isEmpty)
+    assertFalse(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
   }
 
   @Test
@@ -471,7 +471,7 @@ class LogCleanerManagerTest extends Logging {
 
     // force delete the logDir2 from checkpoints, so that the partition data should also be deleted
     cleanerManager.handleLogDirFailure(logDir2.getAbsolutePath)
-    assertTrue(cleanerManager.allCleanerCheckpoints.get(topicPartition).isEmpty)
+    assertFalse(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
   }
 
   /**
@@ -710,14 +710,16 @@ class LogCleanerManagerTest extends Logging {
     assertThrows(classOf[IllegalStateException], () => cleanerManager.doneCleaning(topicPartition, log.dir, 1))
 
     cleanerManager.setCleaningState(topicPartition, LogCleaningInProgress)
-    cleanerManager.doneCleaning(topicPartition, log.dir, 1)
+    val endOffset = 1L
+    cleanerManager.doneCleaning(topicPartition, log.dir, endOffset)
     assertTrue(cleanerManager.cleaningState(topicPartition).isEmpty)
-    assertTrue(cleanerManager.allCleanerCheckpoints.get(topicPartition).nonEmpty)
+    assertTrue(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
+    assertEquals(Some(endOffset), cleanerManager.allCleanerCheckpoints.get(topicPartition))
 
     cleanerManager.setCleaningState(topicPartition, LogCleaningAborted)
-    cleanerManager.doneCleaning(topicPartition, log.dir, 1)
+    cleanerManager.doneCleaning(topicPartition, log.dir, endOffset)
     assertEquals(LogCleaningPaused(1), cleanerManager.cleaningState(topicPartition).get)
-    assertTrue(cleanerManager.allCleanerCheckpoints.get(topicPartition).nonEmpty)
+    assertTrue(cleanerManager.allCleanerCheckpoints.contains(topicPartition))
   }
 
   @Test
@@ -755,7 +757,7 @@ class LogCleanerManagerTest extends Logging {
 
     val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time)
     assertEquals(None, filthiestLog, "Log should not be selected for cleaning")
-    assertEquals(20L, cleanerCheckpoints.get(tp).get, "Unselected log should have checkpoint offset updated")
+    assertEquals(20L, cleanerCheckpoints(tp), "Unselected log should have checkpoint offset updated")
   }
 
   /**
@@ -777,7 +779,7 @@ class LogCleanerManagerTest extends Logging {
 
     val filthiestLog = cleanerManager.grabFilthiestCompactedLog(time).get
     assertEquals(tp1, filthiestLog.topicPartition, "Dirtier log should be selected")
-    assertEquals(15L, cleanerCheckpoints.get(tp0).get, "Unselected log should have checkpoint offset updated")
+    assertEquals(15L, cleanerCheckpoints(tp0), "Unselected log should have checkpoint offset updated")
   }
 
   private def createCleanerManager(log: UnifiedLog): LogCleanerManager = {
diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerParameterizedIntegrationTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerParameterizedIntegrationTest.scala
index 9377307058d1c..4229962b5c4bd 100755
--- a/core/src/test/scala/unit/kafka/log/LogCleanerParameterizedIntegrationTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogCleanerParameterizedIntegrationTest.scala
@@ -19,13 +19,13 @@ package kafka.log
 
 import java.io.File
 import java.util.Properties
-import kafka.api.KAFKA_0_11_0_IV0
-import kafka.api.{KAFKA_0_10_0_IV1, KAFKA_0_9_0}
+
 import kafka.server.KafkaConfig
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.utils._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.record._
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_9_0, IBP_0_10_0_IV1, IBP_0_11_0_IV0}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.extension.ExtensionContext
 import org.junit.jupiter.params.ParameterizedTest
@@ -151,7 +151,7 @@ class LogCleanerParameterizedIntegrationTest extends AbstractLogCleanerIntegrati
 
     val log = cleaner.logs.get(topicPartitions(0))
     val props = logConfigProperties(maxMessageSize = maxMessageSize)
-    props.put(LogConfig.MessageFormatVersionProp, KAFKA_0_9_0.version)
+    props.put(LogConfig.MessageFormatVersionProp, IBP_0_9_0.version)
     log.updateConfig(new LogConfig(props))
 
     val appends = writeDups(numKeys = 100, numDups = 3, log = log, codec = codec, magicValue = RecordBatch.MAGIC_VALUE_V0)
@@ -173,7 +173,7 @@ class LogCleanerParameterizedIntegrationTest extends AbstractLogCleanerIntegrati
       val largeMessageOffset = appendInfo.firstOffset.map(_.messageOffset).get
 
       // also add some messages with version 1 and version 2 to check that we handle mixed format versions correctly
-      props.put(LogConfig.MessageFormatVersionProp, KAFKA_0_11_0_IV0.version)
+      props.put(LogConfig.MessageFormatVersionProp, IBP_0_11_0_IV0.version)
       log.updateConfig(new LogConfig(props))
       val dupsV1 = writeDups(startKey = 30, numKeys = 40, numDups = 3, log = log, codec = codec, magicValue = RecordBatch.MAGIC_VALUE_V1)
       val dupsV2 = writeDups(startKey = 15, numKeys = 5, numDups = 3, log = log, codec = codec, magicValue = RecordBatch.MAGIC_VALUE_V2)
@@ -194,7 +194,7 @@ class LogCleanerParameterizedIntegrationTest extends AbstractLogCleanerIntegrati
 
     val log = cleaner.logs.get(topicPartitions(0))
     val props = logConfigProperties(maxMessageSize = maxMessageSize, segmentSize = 256)
-    props.put(LogConfig.MessageFormatVersionProp, KAFKA_0_9_0.version)
+    props.put(LogConfig.MessageFormatVersionProp, IBP_0_9_0.version)
     log.updateConfig(new LogConfig(props))
 
     // with compression enabled, these messages will be written as a single message containing
@@ -202,7 +202,7 @@ class LogCleanerParameterizedIntegrationTest extends AbstractLogCleanerIntegrati
     var appendsV0 = writeDupsSingleMessageSet(numKeys = 2, numDups = 3, log = log, codec = codec, magicValue = RecordBatch.MAGIC_VALUE_V0)
     appendsV0 ++= writeDupsSingleMessageSet(numKeys = 2, startKey = 3, numDups = 2, log = log, codec = codec, magicValue = RecordBatch.MAGIC_VALUE_V0)
 
-    props.put(LogConfig.MessageFormatVersionProp, KAFKA_0_10_0_IV1.version)
+    props.put(LogConfig.MessageFormatVersionProp, IBP_0_10_0_IV1.version)
     log.updateConfig(new LogConfig(props))
 
     var appendsV1 = writeDupsSingleMessageSet(startKey = 4, numKeys = 2, numDups = 2, log = log, codec = codec, magicValue = RecordBatch.MAGIC_VALUE_V1)
diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala
index 30f3bd4ba6d17..949e0c59df53d 100755
--- a/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala
@@ -23,9 +23,8 @@ import java.nio.charset.StandardCharsets
 import java.nio.file.Paths
 import java.util.Properties
 import java.util.concurrent.{CountDownLatch, TimeUnit}
-
 import kafka.common._
-import kafka.server.{BrokerTopicStats, LogDirFailureChannel}
+import kafka.server.{BrokerTopicStats, KafkaConfig, LogDirFailureChannel}
 import kafka.utils._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.errors.CorruptRecordException
@@ -270,6 +269,111 @@ class LogCleanerTest {
     assertEquals(2L, logAppendInfo.lastOffset)
   }
 
+  private def assertAllAbortedTxns(
+    expectedAbortedTxns: List[AbortedTxn],
+    log: UnifiedLog
+  ): Unit= {
+    val abortedTxns = log.collectAbortedTransactions(startOffset = 0L, upperBoundOffset = log.logEndOffset)
+    assertEquals(expectedAbortedTxns, abortedTxns)
+  }
+
+  private def assertAllTransactionsComplete(log: UnifiedLog): Unit = {
+    assertTrue(log.activeProducers.forall(_.currentTxnStartOffset() == -1))
+  }
+
+  @Test
+  def testMultiPassSegmentCleaningWithAbortedTransactions(): Unit = {
+    // Verify that the log cleaner preserves aborted transaction state (including the index)
+    // even if the cleaner cannot clean the whole segment in one pass.
+
+    val deleteRetentionMs = 50000
+    val offsetMapSlots = 4
+    val cleaner = makeCleaner(Int.MaxValue)
+    val logProps = new Properties()
+    logProps.put(LogConfig.DeleteRetentionMsProp, deleteRetentionMs.toString)
+    val log = makeLog(config = LogConfig.fromProps(logConfig.originals, logProps))
+
+    val producerEpoch = 0.toShort
+    val producerId1 = 1
+    val producerId2 = 2
+
+    val appendProducer1 = appendTransactionalAsLeader(log, producerId1, producerEpoch)
+    val appendProducer2 = appendTransactionalAsLeader(log, producerId2, producerEpoch)
+
+    def abort(producerId: Long): Unit = {
+      log.appendAsLeader(abortMarker(producerId, producerEpoch), leaderEpoch = 0, origin = AppendOrigin.Replication)
+    }
+
+    def commit(producerId: Long): Unit = {
+      log.appendAsLeader(commitMarker(producerId, producerEpoch), leaderEpoch = 0, origin = AppendOrigin.Replication)
+    }
+
+    // Append some transaction data (offset range in parenthesis)
+    appendProducer1(Seq(1, 2))  // [0, 1]
+    appendProducer2(Seq(2, 3))  // [2, 3]
+    appendProducer1(Seq(3, 4))  // [4, 5]
+    commit(producerId1)         // [6, 6]
+    commit(producerId2)         // [7, 7]
+    appendProducer1(Seq(2, 3))  // [8, 9]
+    abort(producerId1)          // [10, 10]
+    appendProducer2(Seq(4, 5))  // [11, 12]
+    appendProducer1(Seq(5, 6))  // [13, 14]
+    commit(producerId1)         // [15, 15]
+    abort(producerId2)          // [16, 16]
+    appendProducer2(Seq(6, 7))  // [17, 18]
+    commit(producerId2)         // [19, 19]
+
+    log.roll()
+    assertEquals(20L, log.logEndOffset)
+
+    val expectedAbortedTxns = List(
+      new AbortedTxn(producerId=producerId1, firstOffset=8, lastOffset=10, lastStableOffset=11),
+      new AbortedTxn(producerId=producerId2, firstOffset=11, lastOffset=16, lastStableOffset=17)
+    )
+
+    assertAllTransactionsComplete(log)
+    assertAllAbortedTxns(expectedAbortedTxns, log)
+
+    var dirtyOffset = 0L
+    def cleanSegments(): Unit = {
+      val offsetMap = new FakeOffsetMap(slots = offsetMapSlots)
+      val segments = log.logSegments(0, log.activeSegment.baseOffset).toSeq
+      val stats = new CleanerStats(time)
+      cleaner.buildOffsetMap(log, dirtyOffset, log.activeSegment.baseOffset, offsetMap, stats)
+      cleaner.cleanSegments(log, segments, offsetMap, time.milliseconds(), stats, new CleanedTransactionMetadata, Long.MaxValue)
+      dirtyOffset = offsetMap.latestOffset + 1
+    }
+
+    // On the first pass, we should see the data from the aborted transactions deleted,
+    // but the markers should remain until the deletion retention time has passed.
+    cleanSegments()
+    assertEquals(4L, dirtyOffset)
+    assertEquals(List(0, 2, 4, 6, 7, 10, 13, 15, 16, 17, 19), batchBaseOffsetsInLog(log))
+    assertEquals(List(0, 2, 3, 4, 5, 6, 7, 10, 13, 14, 15, 16, 17, 18, 19), offsetsInLog(log))
+    assertAllTransactionsComplete(log)
+    assertAllAbortedTxns(expectedAbortedTxns, log)
+
+    // On the second pass, no data from the aborted transactions remains. The markers
+    // still cannot be removed from the log due to the retention time, but we do not
+    // need to record them in the transaction index since they are empty.
+    cleanSegments()
+    assertEquals(14, dirtyOffset)
+    assertEquals(List(0, 2, 4, 6, 7, 10, 13, 15, 16, 17, 19), batchBaseOffsetsInLog(log))
+    assertEquals(List(0, 2, 4, 5, 6, 7, 10, 13, 14, 15, 16, 17, 18, 19), offsetsInLog(log))
+    assertAllTransactionsComplete(log)
+    assertAllAbortedTxns(List(), log)
+
+    // On the last pass, wait for the retention time to expire. The abort markers
+    // (offsets 10 and 16) should be deleted.
+    time.sleep(deleteRetentionMs)
+    cleanSegments()
+    assertEquals(20L, dirtyOffset)
+    assertEquals(List(0, 2, 4, 6, 7, 13, 15, 17, 19), batchBaseOffsetsInLog(log))
+    assertEquals(List(0, 2, 4, 5, 6, 7, 13, 15, 17, 18, 19), offsetsInLog(log))
+    assertAllTransactionsComplete(log)
+    assertAllAbortedTxns(List(), log)
+  }
+
   @Test
   def testBasicTransactionAwareCleaning(): Unit = {
     val cleaner = makeCleaner(Int.MaxValue)
@@ -926,6 +1030,50 @@ class LogCleanerTest {
     assertEquals(List(3, 4, 5), offsetsInLog(log))
   }
 
+
+  @Test
+  def testCleaningWithKeysConflictingWithTxnMarkerKeys(): Unit = {
+    val cleaner = makeCleaner(10)
+    val logProps = new Properties()
+    logProps.put(LogConfig.SegmentBytesProp, 1024: java.lang.Integer)
+    val log = makeLog(config = LogConfig.fromProps(logConfig.originals, logProps))
+    val leaderEpoch = 5
+    val producerEpoch = 0.toShort
+
+    // First we append one committed transaction
+    val producerId1 = 1L
+    val appendProducer = appendTransactionalAsLeader(log, producerId1, producerEpoch, leaderEpoch)
+    appendProducer(Seq(1))
+    log.appendAsLeader(commitMarker(producerId1, producerEpoch), leaderEpoch, origin = AppendOrigin.Coordinator)
+
+    // Now we append one transaction with a key which conflicts with the COMMIT marker appended above
+    def commitRecordKey(): ByteBuffer = {
+      val keySize = ControlRecordType.COMMIT.recordKey().sizeOf()
+      val key = ByteBuffer.allocate(keySize)
+      ControlRecordType.COMMIT.recordKey().writeTo(key)
+      key.flip()
+      key
+    }
+
+    val producerId2 = 2L
+    val records = MemoryRecords.withTransactionalRecords(
+      CompressionType.NONE,
+      producerId2,
+      producerEpoch,
+      0,
+      new SimpleRecord(time.milliseconds(), commitRecordKey(), ByteBuffer.wrap("foo".getBytes))
+    )
+    log.appendAsLeader(records, leaderEpoch, origin = AppendOrigin.Client)
+    log.appendAsLeader(commitMarker(producerId2, producerEpoch), leaderEpoch, origin = AppendOrigin.Coordinator)
+    log.roll()
+    assertEquals(List(0, 1, 2, 3), offsetsInLog(log))
+
+    // After cleaning, the marker should not be removed
+    cleaner.clean(LogToClean(new TopicPartition("test", 0), log, 0L, log.activeSegment.baseOffset))
+    assertEquals(List(0, 1, 2, 3), lastOffsetsPerBatchInLog(log))
+    assertEquals(List(0, 1, 2, 3), offsetsInLog(log))
+  }
+
   @Test
   def testPartialSegmentClean(): Unit = {
     // because loadFactor is 0.75, this means we can fit 1 message in the map
@@ -1080,6 +1228,11 @@ class LogCleanerTest {
     assertEquals(numInvalidMessages, stats.invalidMessagesRead, "Cleaner should have seen %d invalid messages.")
   }
 
+  private def batchBaseOffsetsInLog(log: UnifiedLog): Iterable[Long] = {
+    for (segment <- log.logSegments; batch <- segment.log.batches.asScala)
+      yield batch.baseOffset
+  }
+
   def lastOffsetsPerBatchInLog(log: UnifiedLog): Iterable[Long] = {
     for (segment <- log.logSegments; batch <- segment.log.batches.asScala)
       yield batch.lastOffset
@@ -1744,6 +1897,35 @@ class LogCleanerTest {
     } finally logCleaner.shutdown()
   }
 
+  @Test
+  def testReconfigureLogCleanerIoMaxBytesPerSecond(): Unit = {
+    val oldKafkaProps = TestUtils.createBrokerConfig(1, "localhost:2181")
+    oldKafkaProps.setProperty(KafkaConfig.LogCleanerIoMaxBytesPerSecondProp, "10000000")
+
+    val logCleaner = new LogCleaner(LogCleaner.cleanerConfig(new KafkaConfig(oldKafkaProps)),
+      logDirs = Array(TestUtils.tempDir()),
+      logs = new Pool[TopicPartition, UnifiedLog](),
+      logDirFailureChannel = new LogDirFailureChannel(1),
+      time = time) {
+      // shutdown() and startup() are called in LogCleaner.reconfigure().
+      // Empty startup() and shutdown() to ensure that no unnecessary log cleaner threads remain after this test.
+      override def startup(): Unit = {}
+      override def shutdown(): Unit = {}
+    }
+
+    try {
+      assertEquals(10000000, logCleaner.throttler.desiredRatePerSec, s"Throttler.desiredRatePerSec should be initialized from initial `${KafkaConfig.LogCleanerIoMaxBytesPerSecondProp}` config.")
+
+      val newKafkaProps = TestUtils.createBrokerConfig(1, "localhost:2181")
+      newKafkaProps.setProperty(KafkaConfig.LogCleanerIoMaxBytesPerSecondProp, "20000000")
+
+      logCleaner.reconfigure(new KafkaConfig(oldKafkaProps), new KafkaConfig(newKafkaProps))
+
+      assertEquals(20000000, logCleaner.throttler.desiredRatePerSec, s"Throttler.desiredRatePerSec should be updated with new `${KafkaConfig.LogCleanerIoMaxBytesPerSecondProp}` config.")
+    } finally {
+      logCleaner.shutdown()
+    }
+  }
 
   private def writeToLog(log: UnifiedLog, keysAndValues: Iterable[(Int, Int)], offsetSeq: Iterable[Long]): Iterable[Long] = {
     for(((key, value), offset) <- keysAndValues.zip(offsetSeq))
@@ -1824,19 +2006,31 @@ class LogCleanerTest {
       partitionLeaderEpoch, new SimpleRecord(key.toString.getBytes, value.toString.getBytes))
   }
 
-  private def appendTransactionalAsLeader(log: UnifiedLog,
-                                          producerId: Long,
-                                          producerEpoch: Short,
-                                          origin: AppendOrigin = AppendOrigin.Client): Seq[Int] => LogAppendInfo = {
-    appendIdempotentAsLeader(log, producerId, producerEpoch, isTransactional = true, origin = origin)
+  private def appendTransactionalAsLeader(
+    log: UnifiedLog,
+    producerId: Long,
+    producerEpoch: Short,
+    leaderEpoch: Int = 0,
+    origin: AppendOrigin = AppendOrigin.Client
+  ): Seq[Int] => LogAppendInfo = {
+    appendIdempotentAsLeader(
+      log,
+      producerId,
+      producerEpoch,
+      isTransactional = true,
+      leaderEpoch = leaderEpoch,
+      origin = origin
+    )
   }
 
-  private def appendIdempotentAsLeader(log: UnifiedLog,
-                                       producerId: Long,
-                                       producerEpoch: Short,
-                                       isTransactional: Boolean = false,
-                                       leaderEpoch: Int = 0,
-                                       origin: AppendOrigin = AppendOrigin.Client): Seq[Int] => LogAppendInfo = {
+  private def appendIdempotentAsLeader(
+    log: UnifiedLog,
+    producerId: Long,
+    producerEpoch: Short,
+    isTransactional: Boolean = false,
+    leaderEpoch: Int = 0,
+    origin: AppendOrigin = AppendOrigin.Client
+  ): Seq[Int] => LogAppendInfo = {
     var sequence = 0
     keys: Seq[Int] => {
       val simpleRecords = keys.map { key =>
diff --git a/core/src/test/scala/unit/kafka/log/LogConfigTest.scala b/core/src/test/scala/unit/kafka/log/LogConfigTest.scala
index f72bb9282709f..838c043ff8426 100644
--- a/core/src/test/scala/unit/kafka/log/LogConfigTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogConfigTest.scala
@@ -17,7 +17,6 @@
 
 package kafka.log
 
-import kafka.api.KAFKA_3_0_IV1
 import kafka.server.{KafkaConfig, ThrottledReplicaListValidator}
 import kafka.utils.TestUtils
 import org.apache.kafka.common.config.ConfigDef.Importance.MEDIUM
@@ -25,8 +24,10 @@ import org.apache.kafka.common.config.ConfigDef.Type.INT
 import org.apache.kafka.common.config.{ConfigException, TopicConfig}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
-
 import java.util.{Collections, Properties}
+
+import org.apache.kafka.server.common.MetadataVersion.IBP_3_0_IV1
+
 import scala.annotation.nowarn
 
 class LogConfigTest {
@@ -65,7 +66,7 @@ class LogConfigTest {
     assertEquals(2 * millisInHour, logProps.get(LogConfig.SegmentJitterMsProp))
     assertEquals(2 * millisInHour, logProps.get(LogConfig.RetentionMsProp))
     // The message format version should always be 3.0 if the inter-broker protocol version is 3.0 or higher
-    assertEquals(KAFKA_3_0_IV1.version, logProps.get(LogConfig.MessageFormatVersionProp))
+    assertEquals(IBP_3_0_IV1.version, logProps.get(LogConfig.MessageFormatVersionProp))
   }
 
   @Test
diff --git a/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala b/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala
index a6b114320ad7c..c6379ff3f3341 100644
--- a/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala
@@ -17,24 +17,28 @@
 
 package kafka.log
 
-import java.io.{BufferedWriter, File, FileWriter}
+import java.io.{BufferedWriter, File, FileWriter, IOException}
 import java.nio.ByteBuffer
 import java.nio.file.{Files, NoSuchFileException, Paths}
 import java.util.Properties
-import kafka.api.{ApiVersion, KAFKA_0_11_0_IV0}
 import kafka.server.epoch.{EpochEntry, LeaderEpochFileCache}
 import kafka.server.{BrokerTopicStats, FetchDataInfo, KafkaConfig, LogDirFailureChannel}
 import kafka.server.metadata.MockConfigRepository
 import kafka.utils.{CoreUtils, MockTime, Scheduler, TestUtils}
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.errors.KafkaStorageException
 import org.apache.kafka.common.record.{CompressionType, ControlRecordType, DefaultRecordBatch, MemoryRecords, RecordBatch, RecordVersion, SimpleRecord, TimestampType}
 import org.apache.kafka.common.utils.{Time, Utils}
-import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotEquals, assertThrows, assertTrue}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_0_11_0_IV0
+import org.junit.jupiter.api.Assertions.{assertDoesNotThrow, assertEquals, assertFalse, assertNotEquals, assertThrows, assertTrue}
+import org.junit.jupiter.api.function.Executable
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.mockito.ArgumentMatchers
 import org.mockito.ArgumentMatchers.{any, anyLong}
 import org.mockito.Mockito.{mock, reset, times, verify, when}
 
+import java.util.concurrent.ConcurrentMap
 import scala.annotation.nowarn
 import scala.collection.mutable.ListBuffer
 import scala.collection.{Iterable, Map, mutable}
@@ -61,6 +65,12 @@ class LogLoaderTest {
     Utils.delete(tmpDir)
   }
 
+  object ErrorTypes extends Enumeration {
+    type Errors = Value
+    val IOException, RuntimeException, KafkaStorageExceptionWithIOExceptionCause,
+    KafkaStorageExceptionWithoutIOExceptionCause = Value
+  }
+
   @Test
   def testLogRecoveryIsCalledUponBrokerCrash(): Unit = {
     // LogManager must realize correctly if the last shutdown was not clean and the logs need
@@ -73,15 +83,19 @@ class LogLoaderTest {
     var log: UnifiedLog = null
     val time = new MockTime()
     var cleanShutdownInterceptedValue = false
-    case class SimulateError(var hasError: Boolean = false)
+    case class SimulateError(var hasError: Boolean = false, var errorType: ErrorTypes.Errors = ErrorTypes.RuntimeException)
     val simulateError = SimulateError()
+    val logDirFailureChannel = new LogDirFailureChannel(logDirs.size)
 
     val maxTransactionTimeoutMs = 5 * 60 * 1000
     val maxProducerIdExpirationMs = 60 * 60 * 1000
 
     // Create a LogManager with some overridden methods to facilitate interception of clean shutdown
-    // flag and to inject a runtime error
-    def interceptedLogManager(logConfig: LogConfig, logDirs: Seq[File], simulateError: SimulateError): LogManager = {
+    // flag and to inject an error
+    def interceptedLogManager(logConfig: LogConfig,
+                              logDirs: Seq[File],
+                              logDirFailureChannel: LogDirFailureChannel
+                             ): LogManager = {
       new LogManager(
         logDirs = logDirs.map(_.getAbsoluteFile),
         initialOfflineDirs = Array.empty[File],
@@ -98,15 +112,24 @@ class LogLoaderTest {
         interBrokerProtocolVersion = config.interBrokerProtocolVersion,
         scheduler = time.scheduler,
         brokerTopicStats = new BrokerTopicStats(),
-        logDirFailureChannel = new LogDirFailureChannel(logDirs.size),
+        logDirFailureChannel = logDirFailureChannel,
         time = time,
         keepPartitionMetadataFile = config.usesTopicId) {
 
         override def loadLog(logDir: File, hadCleanShutdown: Boolean, recoveryPoints: Map[TopicPartition, Long],
                              logStartOffsets: Map[TopicPartition, Long], defaultConfig: LogConfig,
-                             topicConfigs: Map[String, LogConfig]): UnifiedLog = {
+                             topicConfigs: Map[String, LogConfig], numRemainingSegments: ConcurrentMap[String, Int]): UnifiedLog = {
           if (simulateError.hasError) {
-            throw new RuntimeException("Simulated error")
+            simulateError.errorType match {
+              case ErrorTypes.KafkaStorageExceptionWithIOExceptionCause =>
+                throw new KafkaStorageException(new IOException("Simulated Kafka storage error with IOException cause"))
+              case ErrorTypes.KafkaStorageExceptionWithoutIOExceptionCause =>
+                throw new KafkaStorageException("Simulated Kafka storage error without IOException cause")
+              case ErrorTypes.IOException =>
+                throw new IOException("Simulated IO error")
+              case _ =>
+                throw new RuntimeException("Simulated Runtime error")
+            }
           }
           cleanShutdownInterceptedValue = hadCleanShutdown
           val topicPartition = UnifiedLog.parseTopicPartitionName(logDir)
@@ -132,10 +155,24 @@ class LogLoaderTest {
       }
     }
 
+    def initializeLogManagerForSimulatingErrorTest(logDirFailureChannel: LogDirFailureChannel = new LogDirFailureChannel(logDirs.size)
+                                                  ): (LogManager, Executable) = {
+      val logManager: LogManager = interceptedLogManager(logConfig, logDirs, logDirFailureChannel)
+      log = logManager.getOrCreateLog(topicPartition, isNew = true, topicId = None)
+
+      assertFalse(logDirFailureChannel.hasOfflineLogDir(logDir.getAbsolutePath), "log dir should not be offline before load logs")
+
+      val runLoadLogs: Executable = () => {
+        val defaultConfig = logManager.currentDefaultConfig
+        logManager.loadLogs(defaultConfig, logManager.fetchTopicConfigOverrides(defaultConfig, Set.empty))
+      }
+
+      (logManager, runLoadLogs)
+    }
+
     val cleanShutdownFile = new File(logDir, LogLoader.CleanShutdownFile)
     locally {
-      val logManager: LogManager = interceptedLogManager(logConfig, logDirs, simulateError)
-      log = logManager.getOrCreateLog(topicPartition, isNew = true, topicId = None)
+      val (logManager, _) = initializeLogManagerForSimulatingErrorTest()
 
       // Load logs after a clean shutdown
       Files.createFile(cleanShutdownFile.toPath)
@@ -156,33 +193,48 @@ class LogLoaderTest {
     }
 
     locally {
-      simulateError.hasError = true
-      val logManager: LogManager = interceptedLogManager(logConfig, logDirs, simulateError)
-      log = logManager.getOrCreateLog(topicPartition, isNew = true, topicId = None)
+      val (logManager, runLoadLogs) = initializeLogManagerForSimulatingErrorTest(logDirFailureChannel)
 
-      // Simulate error
-      assertThrows(classOf[RuntimeException], () => {
-        val defaultConfig = logManager.currentDefaultConfig
-        logManager.loadLogs(defaultConfig, logManager.fetchTopicConfigOverrides(defaultConfig, Set.empty))
-      })
+      // Simulate Runtime error
+      simulateError.hasError = true
+      simulateError.errorType = ErrorTypes.RuntimeException
+      assertThrows(classOf[RuntimeException], runLoadLogs)
       assertFalse(cleanShutdownFile.exists(), "Clean shutdown file must not have existed")
+      assertFalse(logDirFailureChannel.hasOfflineLogDir(logDir.getAbsolutePath), "log dir should not turn offline when Runtime Exception thrown")
+
+      // Simulate Kafka storage error with IOException cause
+      // in this case, the logDir will be added into offline list before KafkaStorageThrown. So we don't verify it here
+      simulateError.errorType = ErrorTypes.KafkaStorageExceptionWithIOExceptionCause
+      assertDoesNotThrow(runLoadLogs, "KafkaStorageException with IOException cause should be caught and handled")
+
+      // Simulate Kafka storage error without IOException cause
+      simulateError.errorType = ErrorTypes.KafkaStorageExceptionWithoutIOExceptionCause
+      assertThrows(classOf[KafkaStorageException], runLoadLogs, "should throw exception when KafkaStorageException without IOException cause")
+      assertFalse(logDirFailureChannel.hasOfflineLogDir(logDir.getAbsolutePath), "log dir should not turn offline when KafkaStorageException without IOException cause thrown")
+
+      // Simulate IO error
+      simulateError.errorType = ErrorTypes.IOException
+      assertDoesNotThrow(runLoadLogs, "IOException should be caught and handled")
+      assertTrue(logDirFailureChannel.hasOfflineLogDir(logDir.getAbsolutePath), "the log dir should turn offline after IOException thrown")
+
       // Do not simulate error on next call to LogManager#loadLogs. LogManager must understand that log had unclean shutdown the last time.
       simulateError.hasError = false
       cleanShutdownInterceptedValue = true
       val defaultConfig = logManager.currentDefaultConfig
       logManager.loadLogs(defaultConfig, logManager.fetchTopicConfigOverrides(defaultConfig, Set.empty))
       assertFalse(cleanShutdownInterceptedValue, "Unexpected value for clean shutdown flag")
+      logManager.shutdown()
     }
   }
 
   @Test
   def testProducerSnapshotsRecoveryAfterUncleanShutdownV1(): Unit = {
-    testProducerSnapshotsRecoveryAfterUncleanShutdown(ApiVersion.minSupportedFor(RecordVersion.V1).version)
+    testProducerSnapshotsRecoveryAfterUncleanShutdown(MetadataVersion.minSupportedFor(RecordVersion.V1).version)
   }
 
   @Test
   def testProducerSnapshotsRecoveryAfterUncleanShutdownCurrentMessageFormat(): Unit = {
-    testProducerSnapshotsRecoveryAfterUncleanShutdown(ApiVersion.latestVersion.version)
+    testProducerSnapshotsRecoveryAfterUncleanShutdown(MetadataVersion.latest.version)
   }
 
   private def createLog(dir: File,
@@ -267,7 +319,7 @@ class LogLoaderTest {
     val expectedSegmentsWithReads = mutable.Set[Long]()
     val expectedSnapshotOffsets = mutable.Set[Long]()
 
-    if (logConfig.messageFormatVersion < KAFKA_0_11_0_IV0) {
+    if (logConfig.messageFormatVersion.isLessThan(IBP_0_11_0_IV0)) {
       expectedSegmentsWithReads += activeSegmentOffset
       expectedSnapshotOffsets ++= log.logSegments.map(_.baseOffset).toVector.takeRight(2) :+ log.logEndOffset
     } else {
@@ -885,8 +937,8 @@ class LogLoaderTest {
 
     // The files remain absent until we first access it because we are doing lazy loading for time index and offset index
     // files but in this test case we need to create these files in order to test we will remove them.
-    bogusIndex2.createNewFile()
-    bogusTimeIndex2.createNewFile()
+    Files.createFile(bogusIndex2.toPath)
+    Files.createFile(bogusTimeIndex2.toPath)
 
     def createRecords = TestUtils.singletonRecords(value = "test".getBytes, timestamp = mockTime.milliseconds)
     val logConfig = LogTestUtils.createLogConfig(segmentBytes = createRecords.sizeInBytes * 5, segmentIndexBytes = 1000, indexIntervalBytes = 1)
diff --git a/core/src/test/scala/unit/kafka/log/LogManagerTest.scala b/core/src/test/scala/unit/kafka/log/LogManagerTest.scala
index 11b511e3da6ef..1b2dd7809f3fb 100755
--- a/core/src/test/scala/unit/kafka/log/LogManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogManagerTest.scala
@@ -17,11 +17,10 @@
 
 package kafka.log
 
-import com.yammer.metrics.core.MetricName
-import kafka.metrics.KafkaYammerMetrics
+import com.yammer.metrics.core.{Gauge, MetricName}
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.server.metadata.{ConfigRepository, MockConfigRepository}
-import kafka.server.{FetchDataInfo, FetchLogEnd}
+import kafka.server.{BrokerTopicStats, FetchDataInfo, FetchLogEnd, LogDirFailureChannel}
 import kafka.utils._
 import org.apache.directory.api.util.FileUtils
 import org.apache.kafka.common.errors.OffsetOutOfRangeException
@@ -30,14 +29,17 @@ import org.apache.kafka.common.{KafkaException, TopicPartition}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.mockito.ArgumentMatchers.any
-import org.mockito.{ArgumentMatchers, Mockito}
-import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify}
+import org.mockito.{ArgumentCaptor, ArgumentMatchers, Mockito}
+import org.mockito.Mockito.{doAnswer, doNothing, mock, never, spy, times, verify}
 
 import java.io._
 import java.nio.file.Files
-import java.util.concurrent.Future
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, Future}
 import java.util.{Collections, Properties}
-import scala.collection.mutable
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
+
+import scala.collection.{Map, mutable}
+import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Try}
 
@@ -420,12 +422,14 @@ class LogManagerTest {
   }
 
   private def createLogManager(logDirs: Seq[File] = Seq(this.logDir),
-                               configRepository: ConfigRepository = new MockConfigRepository): LogManager = {
+                               configRepository: ConfigRepository = new MockConfigRepository,
+                               recoveryThreadsPerDataDir: Int = 1): LogManager = {
     TestUtils.createLogManager(
       defaultConfig = logConfig,
       configRepository = configRepository,
       logDirs = logDirs,
-      time = this.time)
+      time = this.time,
+      recoveryThreadsPerDataDir = recoveryThreadsPerDataDir)
   }
 
   @Test
@@ -637,6 +641,205 @@ class LogManagerTest {
     assertTrue(logManager.partitionsInitializing.isEmpty)
   }
 
+  private def appendRecordsToLog(time: MockTime, parentLogDir: File, partitionId: Int, brokerTopicStats: BrokerTopicStats, expectedSegmentsPerLog: Int): Unit = {
+    def createRecord = TestUtils.singletonRecords(value = "test".getBytes, timestamp = time.milliseconds)
+    val tpFile = new File(parentLogDir, s"$name-$partitionId")
+    val segmentBytes = 1024
+
+    val log = LogTestUtils.createLog(tpFile, logConfig, brokerTopicStats, time.scheduler, time, 0, 0,
+      5 * 60 * 1000, 60 * 60 * 1000, LogManager.ProducerIdExpirationCheckIntervalMs)
+
+    assertTrue(expectedSegmentsPerLog > 0)
+    // calculate numMessages to append to logs. It'll create "expectedSegmentsPerLog" log segments with segment.bytes=1024
+    val numMessages = Math.floor(segmentBytes * expectedSegmentsPerLog / createRecord.sizeInBytes).asInstanceOf[Int]
+    try {
+      for (_ <- 0 until numMessages) {
+        log.appendAsLeader(createRecord, leaderEpoch = 0)
+      }
+
+      assertEquals(expectedSegmentsPerLog, log.numberOfSegments)
+    } finally {
+      log.close()
+    }
+  }
+
+  private def verifyRemainingLogsToRecoverMetric(spyLogManager: LogManager, expectedParams: Map[String, Int]): Unit = {
+    val spyLogManagerClassName = spyLogManager.getClass().getSimpleName
+    // get all `remainingLogsToRecover` metrics
+    val logMetrics: ArrayBuffer[Gauge[Int]] = KafkaYammerMetrics.defaultRegistry.allMetrics.asScala
+      .filter { case (metric, _) => metric.getType == s"$spyLogManagerClassName" && metric.getName == "remainingLogsToRecover" }
+      .map { case (_, gauge) => gauge }
+      .asInstanceOf[ArrayBuffer[Gauge[Int]]]
+
+    assertEquals(expectedParams.size, logMetrics.size)
+
+    val capturedPath: ArgumentCaptor[String] = ArgumentCaptor.forClass(classOf[String])
+
+    val expectedCallTimes = expectedParams.values.sum
+    verify(spyLogManager, times(expectedCallTimes)).decNumRemainingLogs(any[ConcurrentMap[String, Int]], capturedPath.capture());
+
+    val paths = capturedPath.getAllValues
+    expectedParams.foreach {
+      case (path, totalLogs) =>
+        // make sure each path is called "totalLogs" times, which means it is decremented to 0 in the end
+        assertEquals(totalLogs, Collections.frequency(paths, path))
+    }
+
+    // expected the end value is 0
+    logMetrics.foreach { gauge => assertEquals(0, gauge.value()) }
+  }
+
+  private def verifyRemainingSegmentsToRecoverMetric(spyLogManager: LogManager,
+                                                     logDirs: Seq[File],
+                                                     recoveryThreadsPerDataDir: Int,
+                                                     mockMap: ConcurrentHashMap[String, Int],
+                                                     expectedParams: Map[String, Int]): Unit = {
+    val spyLogManagerClassName = spyLogManager.getClass().getSimpleName
+    // get all `remainingSegmentsToRecover` metrics
+    val logSegmentMetrics: ArrayBuffer[Gauge[Int]] = KafkaYammerMetrics.defaultRegistry.allMetrics.asScala
+          .filter { case (metric, _) => metric.getType == s"$spyLogManagerClassName" && metric.getName == "remainingSegmentsToRecover" }
+          .map { case (_, gauge) => gauge }
+          .asInstanceOf[ArrayBuffer[Gauge[Int]]]
+
+    // expected each log dir has 1 metrics for each thread
+    assertEquals(recoveryThreadsPerDataDir * logDirs.size, logSegmentMetrics.size)
+
+    val capturedThreadName: ArgumentCaptor[String] = ArgumentCaptor.forClass(classOf[String])
+    val capturedNumRemainingSegments: ArgumentCaptor[Int] = ArgumentCaptor.forClass(classOf[Int])
+
+    // Since we'll update numRemainingSegments from totalSegments to 0 for each thread, so we need to add 1 here
+    val expectedCallTimes = expectedParams.values.map( num => num + 1 ).sum
+    verify(mockMap, times(expectedCallTimes)).put(capturedThreadName.capture(), capturedNumRemainingSegments.capture());
+
+    // expected the end value is 0
+    logSegmentMetrics.foreach { gauge => assertEquals(0, gauge.value()) }
+
+    val threadNames = capturedThreadName.getAllValues
+    val numRemainingSegments = capturedNumRemainingSegments.getAllValues
+
+    expectedParams.foreach {
+      case (threadName, totalSegments) =>
+        // make sure we update the numRemainingSegments from totalSegments to 0 in order for each thread
+        var expectedCurRemainingSegments = totalSegments + 1
+        for (i <- 0 until threadNames.size) {
+          if (threadNames.get(i).contains(threadName)) {
+            expectedCurRemainingSegments -= 1
+            assertEquals(expectedCurRemainingSegments, numRemainingSegments.get(i))
+          }
+        }
+        assertEquals(0, expectedCurRemainingSegments)
+    }
+  }
+
+  private def verifyLogRecoverMetricsRemoved(spyLogManager: LogManager): Unit = {
+    val spyLogManagerClassName = spyLogManager.getClass().getSimpleName
+    // get all `remainingLogsToRecover` metrics
+    def logMetrics: mutable.Set[MetricName] = KafkaYammerMetrics.defaultRegistry.allMetrics.keySet.asScala
+      .filter { metric => metric.getType == s"$spyLogManagerClassName" && metric.getName == "remainingLogsToRecover" }
+
+    assertTrue(logMetrics.isEmpty)
+
+    // get all `remainingSegmentsToRecover` metrics
+    val logSegmentMetrics: mutable.Set[MetricName] = KafkaYammerMetrics.defaultRegistry.allMetrics.keySet.asScala
+      .filter { metric => metric.getType == s"$spyLogManagerClassName" && metric.getName == "remainingSegmentsToRecover" }
+
+    assertTrue(logSegmentMetrics.isEmpty)
+  }
+
+  @Test
+  def testLogRecoveryMetrics(): Unit = {
+    logManager.shutdown()
+    val logDir1 = TestUtils.tempDir()
+    val logDir2 = TestUtils.tempDir()
+    val logDirs = Seq(logDir1, logDir2)
+    val recoveryThreadsPerDataDir = 2
+    // create logManager with expected recovery thread number
+    logManager = createLogManager(logDirs, recoveryThreadsPerDataDir = recoveryThreadsPerDataDir)
+    val spyLogManager = spy(logManager)
+
+    assertEquals(2, spyLogManager.liveLogDirs.size)
+
+    val mockTime = new MockTime()
+    val mockMap = mock(classOf[ConcurrentHashMap[String, Int]])
+    val mockBrokerTopicStats = mock(classOf[BrokerTopicStats])
+    val expectedSegmentsPerLog = 2
+
+    // create log segments for log recovery in each log dir
+    appendRecordsToLog(mockTime, logDir1, 0, mockBrokerTopicStats, expectedSegmentsPerLog)
+    appendRecordsToLog(mockTime, logDir2, 1, mockBrokerTopicStats, expectedSegmentsPerLog)
+
+    // intercept loadLog method to pass expected parameter to do log recovery
+    doAnswer { invocation =>
+      val dir: File = invocation.getArgument(0)
+      val topicConfigOverrides: mutable.Map[String, LogConfig] = invocation.getArgument(5)
+
+      val topicPartition = UnifiedLog.parseTopicPartitionName(dir)
+      val config = topicConfigOverrides.getOrElse(topicPartition.topic, logConfig)
+
+      UnifiedLog(
+        dir = dir,
+        config = config,
+        logStartOffset = 0,
+        recoveryPoint = 0,
+        maxTransactionTimeoutMs = 5 * 60 * 1000,
+        maxProducerIdExpirationMs = 5 * 60 * 1000,
+        producerIdExpirationCheckIntervalMs = LogManager.ProducerIdExpirationCheckIntervalMs,
+        scheduler = mockTime.scheduler,
+        time = mockTime,
+        brokerTopicStats = mockBrokerTopicStats,
+        logDirFailureChannel = mock(classOf[LogDirFailureChannel]),
+        // not clean shutdown
+        lastShutdownClean = false,
+        topicId = None,
+        keepPartitionMetadataFile = false,
+        // pass mock map for verification later
+        numRemainingSegments = mockMap)
+
+    }.when(spyLogManager).loadLog(any[File], any[Boolean], any[Map[TopicPartition, Long]], any[Map[TopicPartition, Long]],
+      any[LogConfig], any[Map[String, LogConfig]], any[ConcurrentMap[String, Int]])
+
+    // do nothing for removeLogRecoveryMetrics for metrics verification
+    doNothing().when(spyLogManager).removeLogRecoveryMetrics()
+
+    // start the logManager to do log recovery
+    spyLogManager.startup(Set.empty)
+
+    // make sure log recovery metrics are added and removed
+    verify(spyLogManager, times(1)).addLogRecoveryMetrics(any[ConcurrentMap[String, Int]], any[ConcurrentMap[String, Int]])
+    verify(spyLogManager, times(1)).removeLogRecoveryMetrics()
+
+    // expected 1 log in each log dir since we created 2 partitions with 2 log dirs
+    val expectedRemainingLogsParams = Map[String, Int](logDir1.getAbsolutePath -> 1, logDir2.getAbsolutePath -> 1)
+    verifyRemainingLogsToRecoverMetric(spyLogManager, expectedRemainingLogsParams)
+
+    val expectedRemainingSegmentsParams = Map[String, Int](
+      logDir1.getAbsolutePath -> expectedSegmentsPerLog, logDir2.getAbsolutePath -> expectedSegmentsPerLog)
+    verifyRemainingSegmentsToRecoverMetric(spyLogManager, logDirs, recoveryThreadsPerDataDir, mockMap, expectedRemainingSegmentsParams)
+  }
+
+  @Test
+  def testLogRecoveryMetricsShouldBeRemovedAfterLogRecovered(): Unit = {
+    logManager.shutdown()
+    val logDir1 = TestUtils.tempDir()
+    val logDir2 = TestUtils.tempDir()
+    val logDirs = Seq(logDir1, logDir2)
+    val recoveryThreadsPerDataDir = 2
+    // create logManager with expected recovery thread number
+    logManager = createLogManager(logDirs, recoveryThreadsPerDataDir = recoveryThreadsPerDataDir)
+    val spyLogManager = spy(logManager)
+
+    assertEquals(2, spyLogManager.liveLogDirs.size)
+
+    // start the logManager to do log recovery
+    spyLogManager.startup(Set.empty)
+
+    // make sure log recovery metrics are added and removed once
+    verify(spyLogManager, times(1)).addLogRecoveryMetrics(any[ConcurrentMap[String, Int]], any[ConcurrentMap[String, Int]])
+    verify(spyLogManager, times(1)).removeLogRecoveryMetrics()
+
+    verifyLogRecoverMetricsRemoved(spyLogManager)
+  }
+
   @Test
   def testMetricsExistWhenLogIsRecreatedBeforeDeletion(): Unit = {
     val topicName = "metric-test"
diff --git a/core/src/test/scala/unit/kafka/log/LogTestUtils.scala b/core/src/test/scala/unit/kafka/log/LogTestUtils.scala
index e524bcbd7d2f2..50af76f556ca2 100644
--- a/core/src/test/scala/unit/kafka/log/LogTestUtils.scala
+++ b/core/src/test/scala/unit/kafka/log/LogTestUtils.scala
@@ -27,6 +27,8 @@ import org.apache.kafka.common.record.{CompressionType, ControlRecordType, EndTr
 import org.apache.kafka.common.utils.{Time, Utils}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse}
 
+import java.nio.file.Files
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
 import scala.collection.Iterable
 import scala.jdk.CollectionConverters._
 
@@ -82,7 +84,8 @@ object LogTestUtils {
                 producerIdExpirationCheckIntervalMs: Int = LogManager.ProducerIdExpirationCheckIntervalMs,
                 lastShutdownClean: Boolean = true,
                 topicId: Option[Uuid] = None,
-                keepPartitionMetadataFile: Boolean = true): UnifiedLog = {
+                keepPartitionMetadataFile: Boolean = true,
+                numRemainingSegments: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]): UnifiedLog = {
     UnifiedLog(
       dir = dir,
       config = config,
@@ -97,7 +100,8 @@ object LogTestUtils {
       logDirFailureChannel = new LogDirFailureChannel(10),
       lastShutdownClean = lastShutdownClean,
       topicId = topicId,
-      keepPartitionMetadataFile = keepPartitionMetadataFile
+      keepPartitionMetadataFile = keepPartitionMetadataFile,
+      numRemainingSegments = numRemainingSegments
     )
   }
 
@@ -142,8 +146,8 @@ object LogTestUtils {
       segment.append(MemoryRecords.withRecords(baseOffset + Int.MaxValue - 1, CompressionType.NONE, 0,
         record(baseOffset + Int.MaxValue - 1)))
       // Need to create the offset files explicitly to avoid triggering segment recovery to truncate segment.
-      UnifiedLog.offsetIndexFile(logDir, baseOffset).createNewFile()
-      UnifiedLog.timeIndexFile(logDir, baseOffset).createNewFile()
+      Files.createFile(UnifiedLog.offsetIndexFile(logDir, baseOffset).toPath)
+      Files.createFile(UnifiedLog.timeIndexFile(logDir, baseOffset).toPath)
       baseOffset + Int.MaxValue
     }
 
diff --git a/core/src/test/scala/unit/kafka/log/LogValidatorTest.scala b/core/src/test/scala/unit/kafka/log/LogValidatorTest.scala
index 4275684230736..0ffa1d484e6ee 100644
--- a/core/src/test/scala/unit/kafka/log/LogValidatorTest.scala
+++ b/core/src/test/scala/unit/kafka/log/LogValidatorTest.scala
@@ -18,17 +18,18 @@ package kafka.log
 
 import java.nio.ByteBuffer
 import java.util.concurrent.TimeUnit
-import kafka.api.{ApiVersion, KAFKA_2_0_IV1, KAFKA_2_3_IV1}
+
 import kafka.common.{LongRef, RecordValidationException}
 import kafka.log.LogValidator.ValidationAndOffsetAssignResult
 import kafka.message._
-import kafka.metrics.KafkaYammerMetrics
 import kafka.server.{BrokerTopicStats, RequestLocal}
 import kafka.utils.TestUtils.meterCount
 import org.apache.kafka.common.errors.{InvalidTimestampException, UnsupportedCompressionTypeException, UnsupportedForMessageFormatException}
 import org.apache.kafka.common.record._
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.common.{InvalidRecordException, TopicPartition}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.kafka.test.TestUtils
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@@ -126,7 +127,7 @@ class LogValidatorTest {
       1000L,
       RecordBatch.NO_PRODUCER_EPOCH,
       origin = AppendOrigin.Client,
-      KAFKA_2_3_IV1,
+      MetadataVersion.IBP_2_3_IV1,
       brokerTopicStats,
       RequestLocal.withThreadConfinedCaching)
   }
@@ -158,7 +159,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatedResults.validatedRecords
@@ -198,7 +199,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatedResults.validatedRecords
@@ -247,7 +248,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatedResults.validatedRecords
@@ -310,7 +311,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
   }
@@ -355,7 +356,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = partitionLeaderEpoch,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatingResults.validatedRecords
@@ -428,7 +429,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = partitionLeaderEpoch,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatingResults.validatedRecords
@@ -485,7 +486,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatedResults.validatedRecords
@@ -531,7 +532,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatedResults.validatedRecords
@@ -589,7 +590,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = partitionLeaderEpoch,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val validatedRecords = validatedResults.validatedRecords
@@ -643,7 +644,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -667,7 +668,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -691,7 +692,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -715,7 +716,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -738,7 +739,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -761,7 +762,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -785,7 +786,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords
     checkOffsets(messageWithOffset, offset)
@@ -810,7 +811,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords
     checkOffsets(messageWithOffset, offset)
@@ -836,7 +837,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords
     checkOffsets(compressedMessagesWithOffset, offset)
@@ -862,7 +863,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords
     checkOffsets(compressedMessagesWithOffset, offset)
@@ -886,7 +887,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     checkOffsets(validatedResults.validatedRecords, offset)
@@ -912,7 +913,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     checkOffsets(validatedResults.validatedRecords, offset)
@@ -938,7 +939,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     checkOffsets(validatedResults.validatedRecords, offset)
@@ -964,7 +965,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     checkOffsets(validatedResults.validatedRecords, offset)
@@ -990,7 +991,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -1013,7 +1014,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Coordinator,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching)
     val batches = TestUtils.toList(result.validatedRecords.batches)
@@ -1041,7 +1042,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1065,7 +1066,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1088,7 +1089,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1111,7 +1112,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1135,7 +1136,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1159,7 +1160,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1185,7 +1186,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -1211,7 +1212,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -1235,7 +1236,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1259,7 +1260,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching).validatedRecords, offset)
   }
@@ -1281,7 +1282,7 @@ class LogValidatorTest {
         timestampDiffMaxMs = 5000L,
         partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
         origin = AppendOrigin.Client,
-        interBrokerProtocolVersion = ApiVersion.latestVersion,
+        interBrokerProtocolVersion = MetadataVersion.latest,
         brokerTopicStats = brokerTopicStats,
         requestLocal = RequestLocal.withThreadConfinedCaching)
     )
@@ -1312,7 +1313,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 1000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = KAFKA_2_0_IV1,
+      interBrokerProtocolVersion = MetadataVersion.IBP_2_0_IV1,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
@@ -1347,7 +1348,7 @@ class LogValidatorTest {
         timestampDiffMaxMs = 1000L,
         partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
         origin = AppendOrigin.Client,
-        interBrokerProtocolVersion = ApiVersion.latestVersion,
+        interBrokerProtocolVersion = MetadataVersion.latest,
         brokerTopicStats = brokerTopicStats,
         requestLocal = RequestLocal.withThreadConfinedCaching)
     )
@@ -1426,7 +1427,7 @@ class LogValidatorTest {
       timestampDiffMaxMs = 5000L,
       partitionLeaderEpoch = RecordBatch.NO_PARTITION_LEADER_EPOCH,
       origin = AppendOrigin.Client,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
+      interBrokerProtocolVersion = MetadataVersion.latest,
       brokerTopicStats = brokerTopicStats,
       requestLocal = RequestLocal.withThreadConfinedCaching))
   }
diff --git a/core/src/test/scala/unit/kafka/log/ProducerStateManagerTest.scala b/core/src/test/scala/unit/kafka/log/ProducerStateManagerTest.scala
index 93c1724841ec2..60f0c1ce2c1b4 100644
--- a/core/src/test/scala/unit/kafka/log/ProducerStateManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/log/ProducerStateManagerTest.scala
@@ -975,9 +975,9 @@ class ProducerStateManagerTest {
     // the broker shutdown cleanly and emitted a snapshot file larger than the base offset of the active segment.
 
     // Create 3 snapshot files at different offsets.
-    UnifiedLog.producerSnapshotFile(logDir, 5).createNewFile() // not stray
-    UnifiedLog.producerSnapshotFile(logDir, 2).createNewFile() // stray
-    UnifiedLog.producerSnapshotFile(logDir, 42).createNewFile() // not stray
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 5).toPath) // not stray
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 2).toPath) // stray
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 42).toPath) // not stray
 
     // claim that we only have one segment with a base offset of 5
     stateManager.removeStraySnapshots(Seq(5))
@@ -995,9 +995,9 @@ class ProducerStateManagerTest {
     // Snapshots associated with an offset in the list of segment base offsets should remain.
 
     // Create 3 snapshot files at different offsets.
-    UnifiedLog.producerSnapshotFile(logDir, 5).createNewFile() // stray
-    UnifiedLog.producerSnapshotFile(logDir, 2).createNewFile() // stray
-    UnifiedLog.producerSnapshotFile(logDir, 42).createNewFile() // not stray
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 5).toPath) // stray
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 2).toPath) // stray
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 42).toPath) // not stray
 
     stateManager.removeStraySnapshots(Seq(42))
     assertEquals(Seq(42), ProducerStateManager.listSnapshotFiles(logDir).map(_.offset).sorted)
@@ -1009,7 +1009,7 @@ class ProducerStateManagerTest {
    */
   @Test
   def testRemoveAndMarkSnapshotForDeletion(): Unit = {
-    UnifiedLog.producerSnapshotFile(logDir, 5).createNewFile()
+    Files.createFile(UnifiedLog.producerSnapshotFile(logDir, 5).toPath)
     val manager = new ProducerStateManager(partition, logDir, maxTransactionTimeoutMs, maxProducerIdExpirationMs, time)
     assertTrue(manager.latestSnapshotOffset.isDefined)
     val snapshot = manager.removeAndMarkSnapshotForDeletion(5).get
@@ -1027,7 +1027,7 @@ class ProducerStateManagerTest {
   @Test
   def testRemoveAndMarkSnapshotForDeletionAlreadyDeleted(): Unit = {
     val file = UnifiedLog.producerSnapshotFile(logDir, 5)
-    file.createNewFile()
+    Files.createFile(file.toPath)
     val manager = new ProducerStateManager(partition, logDir, maxTransactionTimeoutMs, maxProducerIdExpirationMs, time)
     assertTrue(manager.latestSnapshotOffset.isDefined)
     Files.delete(file.toPath)
diff --git a/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala b/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala
index 79aa743eb6e32..57409a1f0384e 100755
--- a/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala
+++ b/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala
@@ -22,8 +22,8 @@ import java.nio.ByteBuffer
 import java.nio.file.Files
 import java.util.concurrent.{Callable, Executors}
 import java.util.{Optional, Properties}
+
 import kafka.common.{OffsetsOutOfOrderException, RecordValidationException, UnexpectedAppendOffsetException}
-import kafka.metrics.KafkaYammerMetrics
 import kafka.server.checkpoints.LeaderEpochCheckpointFile
 import kafka.server.epoch.{EpochEntry, LeaderEpochFileCache}
 import kafka.server.{BrokerTopicStats, FetchHighWatermark, FetchIsolation, FetchLogEnd, FetchTxnCommitted, KafkaConfig, LogOffsetMetadata, PartitionMetadataFile}
@@ -36,6 +36,7 @@ import org.apache.kafka.common.record.MemoryRecords.RecordFilter
 import org.apache.kafka.common.record._
 import org.apache.kafka.common.requests.{ListOffsetsRequest, ListOffsetsResponse}
 import org.apache.kafka.common.utils.{BufferSupplier, Time, Utils}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 
@@ -66,8 +67,8 @@ class UnifiedLogTest {
 
   def createEmptyLogs(dir: File, offsets: Int*): Unit = {
     for(offset <- offsets) {
-      UnifiedLog.logFile(dir, offset).createNewFile()
-      UnifiedLog.offsetIndexFile(dir, offset).createNewFile()
+      Files.createFile(UnifiedLog.logFile(dir, offset).toPath)
+      Files.createFile(UnifiedLog.offsetIndexFile(dir, offset).toPath)
     }
   }
 
@@ -1852,12 +1853,12 @@ class UnifiedLogTest {
     val record = MemoryRecords.withRecords(CompressionType.NONE, new SimpleRecord("simpleValue".getBytes))
 
     val topicId = Uuid.randomUuid()
-    log.partitionMetadataFile.record(topicId)
+    log.partitionMetadataFile.get.record(topicId)
 
     // Should trigger a synchronous flush
     log.appendAsLeader(record, leaderEpoch = 0)
-    assertTrue(log.partitionMetadataFile.exists())
-    assertEquals(topicId, log.partitionMetadataFile.read().topicId)
+    assertTrue(log.partitionMetadataFile.get.exists())
+    assertEquals(topicId, log.partitionMetadataFile.get.read().topicId)
   }
 
   @Test
@@ -1866,15 +1867,15 @@ class UnifiedLogTest {
     var log = createLog(logDir, logConfig)
 
     val topicId = Uuid.randomUuid()
-    log.partitionMetadataFile.record(topicId)
+    log.partitionMetadataFile.get.record(topicId)
 
     // Should trigger a synchronous flush
     log.close()
 
     // We open the log again, and the partition metadata file should exist with the same ID.
     log = createLog(logDir, logConfig)
-    assertTrue(log.partitionMetadataFile.exists())
-    assertEquals(topicId, log.partitionMetadataFile.read().topicId)
+    assertTrue(log.partitionMetadataFile.get.exists())
+    assertEquals(topicId, log.partitionMetadataFile.get.read().topicId)
   }
 
   @Test
@@ -1901,14 +1902,14 @@ class UnifiedLogTest {
     val topicId = Uuid.randomUuid()
     log.assignTopicId(topicId)
     // We should not write to this file or set the topic ID
-    assertFalse(log.partitionMetadataFile.exists())
+    assertFalse(log.partitionMetadataFile.get.exists())
     assertEquals(None, log.topicId)
     log.close()
 
     val log2 = createLog(logDir, logConfig, topicId = Some(Uuid.randomUuid()),  keepPartitionMetadataFile = false)
 
     // We should not write to this file or set the topic ID
-    assertFalse(log2.partitionMetadataFile.exists())
+    assertFalse(log2.partitionMetadataFile.get.exists())
     assertEquals(None, log2.topicId)
     log2.close()
   }
@@ -2252,7 +2253,7 @@ class UnifiedLogTest {
 
     // Ensure that after a directory rename, the epoch cache is written to the right location
     val tp = UnifiedLog.parseTopicPartitionName(log.dir)
-    log.renameDir(UnifiedLog.logDeleteDirName(tp))
+    log.renameDir(UnifiedLog.logDeleteDirName(tp), true)
     log.appendAsLeader(TestUtils.records(List(new SimpleRecord("foo".getBytes()))), leaderEpoch = 10)
     assertEquals(Some(10), log.latestEpoch)
     assertTrue(LeaderEpochCheckpointFile.newFile(log.dir).exists())
@@ -2273,7 +2274,7 @@ class UnifiedLogTest {
 
     // Ensure that after a directory rename, the partition metadata file is written to the right location.
     val tp = UnifiedLog.parseTopicPartitionName(log.dir)
-    log.renameDir(UnifiedLog.logDeleteDirName(tp))
+    log.renameDir(UnifiedLog.logDeleteDirName(tp), true)
     log.appendAsLeader(TestUtils.records(List(new SimpleRecord("foo".getBytes()))), leaderEpoch = 10)
     assertEquals(Some(10), log.latestEpoch)
     assertTrue(PartitionMetadataFile.newFile(log.dir).exists())
@@ -2282,7 +2283,7 @@ class UnifiedLogTest {
     // Check the topic ID remains in memory and was copied correctly.
     assertTrue(log.topicId.isDefined)
     assertEquals(topicId, log.topicId.get)
-    assertEquals(topicId, log.partitionMetadataFile.read().topicId)
+    assertEquals(topicId, log.partitionMetadataFile.get.read().topicId)
   }
 
   @Test
@@ -2292,17 +2293,17 @@ class UnifiedLogTest {
 
     // Write a topic ID to the partition metadata file to ensure it is transferred correctly.
     val topicId = Uuid.randomUuid()
-    log.partitionMetadataFile.record(topicId)
+    log.partitionMetadataFile.get.record(topicId)
 
     // Ensure that after a directory rename, the partition metadata file is written to the right location.
     val tp = UnifiedLog.parseTopicPartitionName(log.dir)
-    log.renameDir(UnifiedLog.logDeleteDirName(tp))
+    log.renameDir(UnifiedLog.logDeleteDirName(tp), true)
     assertTrue(PartitionMetadataFile.newFile(log.dir).exists())
     assertFalse(PartitionMetadataFile.newFile(this.logDir).exists())
 
     // Check the file holds the correct contents.
-    assertTrue(log.partitionMetadataFile.exists())
-    assertEquals(topicId, log.partitionMetadataFile.read().topicId)
+    assertTrue(log.partitionMetadataFile.get.exists())
+    assertEquals(topicId, log.partitionMetadataFile.get.read().topicId)
   }
 
   @Test
@@ -2412,8 +2413,8 @@ class UnifiedLogTest {
   private def testDegenerateSplitSegmentWithOverflow(segmentBaseOffset: Long, records: List[MemoryRecords]): Unit = {
     val segment = LogTestUtils.rawSegment(logDir, segmentBaseOffset)
     // Need to create the offset files explicitly to avoid triggering segment recovery to truncate segment.
-    UnifiedLog.offsetIndexFile(logDir, segmentBaseOffset).createNewFile()
-    UnifiedLog.timeIndexFile(logDir, segmentBaseOffset).createNewFile()
+    Files.createFile(UnifiedLog.offsetIndexFile(logDir, segmentBaseOffset).toPath)
+    Files.createFile(UnifiedLog.timeIndexFile(logDir, segmentBaseOffset).toPath)
     records.foreach(segment.append _)
     segment.close()
 
@@ -3324,6 +3325,135 @@ class UnifiedLogTest {
     assertEquals(1, log.numberOfSegments)
   }
 
+  @Test
+  def testSegmentDeletionWithHighWatermarkInitialization(): Unit = {
+    val logConfig = LogTestUtils.createLogConfig(
+      segmentBytes = 512,
+      segmentIndexBytes = 1000,
+      retentionMs = 999
+    )
+    val log = createLog(logDir, logConfig)
+
+    val expiredTimestamp = mockTime.milliseconds() - 1000
+    for (i <- 0 until 100) {
+      val records = TestUtils.singletonRecords(value = s"test$i".getBytes, timestamp = expiredTimestamp)
+      log.appendAsLeader(records, leaderEpoch = 0)
+    }
+
+    val initialHighWatermark = log.updateHighWatermark(25L)
+    assertEquals(25L, initialHighWatermark)
+
+    val initialNumSegments = log.numberOfSegments
+    log.deleteOldSegments()
+    assertTrue(log.numberOfSegments < initialNumSegments)
+    assertTrue(log.logStartOffset <= initialHighWatermark)
+  }
+
+  @Test
+  def testCannotDeleteSegmentsAtOrAboveHighWatermark(): Unit = {
+    val logConfig = LogTestUtils.createLogConfig(
+      segmentBytes = 512,
+      segmentIndexBytes = 1000,
+      retentionMs = 999
+    )
+    val log = createLog(logDir, logConfig)
+
+    val expiredTimestamp = mockTime.milliseconds() - 1000
+    for (i <- 0 until 100) {
+      val records = TestUtils.singletonRecords(value = s"test$i".getBytes, timestamp = expiredTimestamp)
+      log.appendAsLeader(records, leaderEpoch = 0)
+    }
+
+    // ensure we have at least a few segments so the test case is not trivial
+    assertTrue(log.numberOfSegments > 5)
+    assertEquals(0L, log.highWatermark)
+    assertEquals(0L, log.logStartOffset)
+    assertEquals(100L, log.logEndOffset)
+
+    for (hw <- 0 to 100) {
+      log.updateHighWatermark(hw)
+      assertEquals(hw, log.highWatermark)
+      log.deleteOldSegments()
+      assertTrue(log.logStartOffset <= hw)
+
+      // verify that all segments up to the high watermark have been deleted
+      log.logSegments.headOption.foreach { segment =>
+        assertTrue(segment.baseOffset <= hw)
+        assertTrue(segment.baseOffset >= log.logStartOffset)
+      }
+      log.logSegments.tail.foreach { segment =>
+        assertTrue(segment.baseOffset > hw)
+        assertTrue(segment.baseOffset >= log.logStartOffset)
+      }
+    }
+
+    assertEquals(100L, log.logStartOffset)
+    assertEquals(1, log.numberOfSegments)
+    assertEquals(0, log.activeSegment.size)
+  }
+
+  @Test
+  def testCannotIncrementLogStartOffsetPastHighWatermark(): Unit = {
+    val logConfig = LogTestUtils.createLogConfig(
+      segmentBytes = 512,
+      segmentIndexBytes = 1000,
+      retentionMs = 999
+    )
+    val log = createLog(logDir, logConfig)
+
+    for (i <- 0 until 100) {
+      val records = TestUtils.singletonRecords(value = s"test$i".getBytes)
+      log.appendAsLeader(records, leaderEpoch = 0)
+    }
+
+    log.updateHighWatermark(25L)
+    assertThrows(classOf[OffsetOutOfRangeException], () => log.maybeIncrementLogStartOffset(26L, ClientRecordDeletion))
+  }
+
+  def testBackgroundDeletionWithIOException(): Unit = {
+    val logConfig = LogTestUtils.createLogConfig(segmentBytes = 1024 * 1024)
+    val log = createLog(logDir, logConfig)
+    assertEquals(1, log.numberOfSegments, "The number of segments should be 1")
+
+    // Delete the underlying directory to trigger a KafkaStorageException
+    val dir = log.dir
+    Utils.delete(dir)
+    Files.createFile(dir.toPath)
+
+    assertThrows(classOf[KafkaStorageException], () => {
+      log.delete()
+    })
+    assertTrue(log.logDirFailureChannel.hasOfflineLogDir(tmpDir.toString))
+  }
+
+  /**
+   * test renaming a log's dir without reinitialization, which is the case during topic deletion
+   */
+  @Test
+  def testRenamingDirWithoutReinitialization(): Unit = {
+    val logConfig = LogTestUtils.createLogConfig(segmentBytes = 1024 * 1024)
+    val log = createLog(logDir, logConfig)
+    assertEquals(1, log.numberOfSegments, "The number of segments should be 1")
+
+    val newDir = TestUtils.randomPartitionLogDir(tmpDir)
+    assertTrue(newDir.exists())
+
+    log.renameDir(newDir.getName, false)
+    assertTrue(log.leaderEpochCache.isEmpty)
+    assertTrue(log.partitionMetadataFile.isEmpty)
+    assertEquals(0, log.logEndOffset)
+    // verify that records appending can still succeed
+    // even with the uninitialized leaderEpochCache and partitionMetadataFile
+    val records = TestUtils.records(List(new SimpleRecord(mockTime.milliseconds, "key".getBytes, "value".getBytes)))
+    log.appendAsLeader(records, leaderEpoch = 0)
+    assertEquals(1, log.logEndOffset)
+
+    // verify that the background deletion can succeed
+    log.delete()
+    assertEquals(0, log.numberOfSegments, "The number of segments should be 0")
+    assertFalse(newDir.exists())
+  }
+
   private def appendTransactionalToBuffer(buffer: ByteBuffer,
                                           producerId: Long,
                                           producerEpoch: Short,
diff --git a/core/src/test/scala/unit/kafka/metrics/KafkaMetricsGroupTest.scala b/core/src/test/scala/unit/kafka/metrics/KafkaMetricsGroupTest.scala
new file mode 100644
index 0000000000000..918553589d6c2
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/metrics/KafkaMetricsGroupTest.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.metrics
+
+import org.junit.jupiter.api.Assertions.{assertEquals, assertNull}
+import org.junit.jupiter.api.Test
+
+class KafkaMetricsGroupTest {
+
+  @Test
+  def testUntaggedMetricName(): Unit = {
+    val metricName = KafkaMetricsGroup.explicitMetricName(
+      group = "kafka.metrics",
+      typeName = "TestMetrics",
+      name = "TaggedMetric",
+      Map.empty
+    )
+
+    assertEquals("kafka.metrics", metricName.getGroup)
+    assertEquals("TestMetrics", metricName.getType)
+    assertEquals("TaggedMetric", metricName.getName)
+    assertEquals("kafka.metrics:type=TestMetrics,name=TaggedMetric",
+      metricName.getMBeanName)
+    assertNull(metricName.getScope)
+  }
+
+  @Test
+  def testTaggedMetricName(): Unit = {
+    val tags = Map("foo" -> "bar", "bar" -> "baz", "baz" -> "raz.taz")
+    val metricName = KafkaMetricsGroup.explicitMetricName(
+      group = "kafka.metrics",
+      typeName = "TestMetrics",
+      name = "TaggedMetric",
+      tags
+    )
+
+    assertEquals("kafka.metrics", metricName.getGroup)
+    assertEquals("TestMetrics", metricName.getType)
+    assertEquals("TaggedMetric", metricName.getName)
+    assertEquals("kafka.metrics:type=TestMetrics,name=TaggedMetric,foo=bar,bar=baz,baz=raz.taz",
+      metricName.getMBeanName)
+    assertEquals("bar.baz.baz.raz_taz.foo.bar", metricName.getScope)
+  }
+
+  @Test
+  def testTaggedMetricNameWithEmptyValue(): Unit = {
+    val tags = Map("foo" -> "bar", "bar" -> "", "baz" -> "raz.taz")
+    val metricName = KafkaMetricsGroup.explicitMetricName(
+      group = "kafka.metrics",
+      typeName = "TestMetrics",
+      name = "TaggedMetric",
+      tags
+    )
+
+    assertEquals("kafka.metrics", metricName.getGroup)
+    assertEquals("TestMetrics", metricName.getType)
+    assertEquals("TaggedMetric", metricName.getName)
+    assertEquals("kafka.metrics:type=TestMetrics,name=TaggedMetric,foo=bar,baz=raz.taz",
+      metricName.getMBeanName)
+    assertEquals("baz.raz_taz.foo.bar", metricName.getScope)
+  }
+
+
+}
diff --git a/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala b/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala
index f4e69f9f2b4f7..29de3c0f24288 100644
--- a/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala
+++ b/core/src/test/scala/unit/kafka/metrics/MetricsTest.scala
@@ -33,7 +33,8 @@ import kafka.log.LogConfig
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.metrics.JmxReporter
 import org.apache.kafka.common.utils.Time
-import org.junit.jupiter.api.Timeout;
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
+import org.junit.jupiter.api.Timeout
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.ValueSource
 
@@ -53,7 +54,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
 
   val nMessages = 2
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testMetricsReporterAfterDeletingTopic(quorum: String): Unit = {
     val topic = "test-topic-metric"
@@ -63,7 +64,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertEquals(Set.empty, topicMetricGroups(topic), "Topic metrics exists after deleteTopic")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testBrokerTopicMetricsUnregisteredAfterDeletingTopic(quorum: String): Unit = {
     val topic = "test-broker-topic-metric"
@@ -78,7 +79,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertEquals(Set.empty, topicMetricGroups(topic), "Topic metrics exists after deleteTopic")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testClusterIdMetric(quorum: String): Unit = {
     // Check if clusterId metric exists.
@@ -86,7 +87,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertEquals(metrics.keySet.asScala.count(_.getMBeanName == s"$requiredKafkaServerPrefix=ClusterId"), 1)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testBrokerStateMetric(quorum: String): Unit = {
     // Check if BrokerState metric exists.
@@ -94,7 +95,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertEquals(metrics.keySet.asScala.count(_.getMBeanName == s"$requiredKafkaServerPrefix=BrokerState"), 1)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testYammerMetricsCountMetric(quorum: String): Unit = {
     // Check if yammer-metrics-count metric exists.
@@ -102,7 +103,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertEquals(metrics.keySet.asScala.count(_.getMBeanName == s"$requiredKafkaServerPrefix=yammer-metrics-count"), 1)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testLinuxIoMetrics(quorum: String): Unit = {
     // Check if linux-disk-{read,write}-bytes metrics either do or do not exist depending on whether we are or are not
@@ -114,7 +115,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
       assertEquals(metrics.keySet.asScala.count(_.getMBeanName == s"$requiredKafkaServerPrefix=$name"), expectedCount))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testJMXFilter(quorum: String): Unit = {
     // Check if cluster id metrics is not exposed in JMX
@@ -124,7 +125,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
                   .isRegistered(new ObjectName(s"$requiredKafkaServerPrefix=ClusterId")))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUpdateJMXFilter(quorum: String): Unit = {
     // verify previously exposed metrics are removed and existing matching metrics are added
@@ -137,7 +138,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
                   .isRegistered(new ObjectName(s"$requiredKafkaServerPrefix=ClusterId")))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testGeneralBrokerTopicMetricsAreGreedilyRegistered(quorum: String): Unit = {
     val topic = "test-broker-topic-metric"
@@ -152,7 +153,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertTrue(topicMetricGroups(topic).nonEmpty, "Topic metrics aren't registered")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testWindowsStyleTagNames(quorum: String): Unit = {
     val path = "C:\\windows-path\\kafka-logs"
@@ -162,7 +163,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assert(metric.getMBeanName.endsWith(expectedMBeanName))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testBrokerTopicMetricsBytesInOut(quorum: String): Unit = {
     val topic = "test-bytes-in-out"
@@ -210,7 +211,7 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertTrue(TestUtils.meterCount(bytesOut) > initialBytesOut)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testZkControllerMetrics(quorum: String): Unit = {
     val metrics = KafkaYammerMetrics.defaultRegistry.allMetrics
@@ -228,11 +229,32 @@ class MetricsTest extends KafkaServerTestHarness with Logging {
     assertEquals(metrics.keySet.asScala.count(_.getMBeanName == "kafka.controller:type=KafkaController,name=FencedBrokerCount"), 1)
   }
 
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("kraft"))
+  def testKRaftControllerMetrics(quorum: String): Unit = {
+    val metrics = KafkaYammerMetrics.defaultRegistry.allMetrics
+    Set(
+      "kafka.controller:type=KafkaController,name=ActiveControllerCount",
+      "kafka.controller:type=KafkaController,name=GlobalPartitionCount",
+      "kafka.controller:type=KafkaController,name=GlobalTopicCount",
+      "kafka.controller:type=KafkaController,name=LastAppliedRecordLagMs",
+      "kafka.controller:type=KafkaController,name=LastAppliedRecordOffset",
+      "kafka.controller:type=KafkaController,name=LastAppliedRecordTimestamp",
+      "kafka.controller:type=KafkaController,name=LastCommittedRecordOffset",
+      "kafka.controller:type=KafkaController,name=MetadataErrorCount",
+      "kafka.controller:type=KafkaController,name=OfflinePartitionsCount",
+      "kafka.controller:type=KafkaController,name=PreferredReplicaImbalanceCount",
+    ).foreach(expected => {
+      assertEquals(1, metrics.keySet.asScala.count(_.getMBeanName.equals(expected)),
+        s"Unable to find ${expected}")
+    })
+  }
+
   /**
    * Test that the metrics are created with the right name, testZooKeeperStateChangeRateMetrics
    * and testZooKeeperSessionStateMetric in ZooKeeperClientTest test the metrics behaviour.
    */
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testSessionExpireListenerMetrics(quorum: String): Unit = {
     val metrics = KafkaYammerMetrics.defaultRegistry.allMetrics
diff --git a/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala b/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala
index f3f8ca884c5c0..bddf03a136c98 100644
--- a/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala
+++ b/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala
@@ -18,42 +18,44 @@
 package kafka.network
 
 
-import java.io.IOException
-import java.net.InetAddress
-import java.nio.ByteBuffer
-import java.util.Collections
 import com.fasterxml.jackson.databind.ObjectMapper
 import kafka.network
+import kafka.server.EnvelopeUtils
 import kafka.utils.TestUtils
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType
 import org.apache.kafka.common.config.types.Password
 import org.apache.kafka.common.config.{ConfigResource, SaslConfigs, SslConfigs, TopicConfig}
 import org.apache.kafka.common.memory.MemoryPool
-import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData
+import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableTopic
 import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData._
-import org.apache.kafka.common.network.{ByteBufferSend, ClientInformation, ListenerName}
-import org.apache.kafka.common.protocol.{ApiKeys, Errors}
-import org.apache.kafka.common.requests.{AbstractRequest, MetadataRequest, RequestTestUtils}
+import org.apache.kafka.common.message.{CreateTopicsRequestData, CreateTopicsResponseData, IncrementalAlterConfigsRequestData}
+import org.apache.kafka.common.network.{ClientInformation, ListenerName}
+import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.AlterConfigsRequest._
 import org.apache.kafka.common.requests._
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, KafkaPrincipalSerde, SecurityProtocol}
 import org.apache.kafka.common.utils.{SecurityUtils, Utils}
+import org.apache.kafka.test
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api._
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.EnumSource
 import org.mockito.Mockito.mock
-import org.mockito.{ArgumentCaptor, Mockito}
 
+import java.io.IOException
+import java.net.InetAddress
+import java.nio.ByteBuffer
+import java.util.Collections
+import java.util.concurrent.atomic.AtomicReference
 import scala.collection.{Map, Seq}
 import scala.jdk.CollectionConverters._
 
 class RequestChannelTest {
   private val requestChannelMetrics: RequestChannel.Metrics = mock(classOf[RequestChannel.Metrics])
-  private val clientId = "id"
   private val principalSerde = new KafkaPrincipalSerde() {
     override def serialize(principal: KafkaPrincipal): Array[Byte] = Utils.utf8(principal.toString)
     override def deserialize(bytes: Array[Byte]): KafkaPrincipal = SecurityUtils.parseKafkaPrincipal(Utils.utf8(bytes))
   }
-  private val mockSend: ByteBufferSend = Mockito.mock(classOf[ByteBufferSend])
 
   @Test
   def testAlterRequests(): Unit = {
@@ -191,84 +193,66 @@ class RequestChannelTest {
     assertTrue(isValidJson(RequestConvertToJson.request(alterConfigs.loggableRequest).toString))
   }
 
-  @Test
-  def testEnvelopeBuildResponseSendShouldReturnNoErrorIfInnerResponseHasNoError(): Unit = {
-    val channelRequest = buildForwardRequestWithEnvelopeRequestAttached(buildMetadataRequest())
-
-    val envelopeResponseArgumentCaptor = ArgumentCaptor.forClass(classOf[EnvelopeResponse])
-
-    Mockito.doAnswer(_ => mockSend)
-      .when(channelRequest.envelope.get.context).buildResponseSend(envelopeResponseArgumentCaptor.capture())
-
-    // create an inner response without error
-    val responseWithoutError = RequestTestUtils.metadataUpdateWith(2, Collections.singletonMap("a", 2))
-
-    // build an envelope response
-    channelRequest.buildResponseSend(responseWithoutError)
-
-    // expect the envelopeResponse result without error
-    val capturedValue: EnvelopeResponse = envelopeResponseArgumentCaptor.getValue
-    assertTrue(capturedValue.error().equals(Errors.NONE))
+  @ParameterizedTest
+  @EnumSource(value=classOf[Errors], names=Array("NONE", "CLUSTER_AUTHORIZATION_FAILED", "NOT_CONTROLLER"))
+  def testBuildEnvelopeResponse(error: Errors): Unit = {
+    val topic = "foo"
+    val createTopicRequest = buildCreateTopicRequest(topic)
+    val unwrapped = buildUnwrappedEnvelopeRequest(createTopicRequest)
+
+    val createTopicResponse = buildCreateTopicResponse(topic, error)
+    val envelopeResponse = buildEnvelopeResponse(unwrapped, createTopicResponse)
+
+    error match {
+      case Errors.NOT_CONTROLLER =>
+        assertEquals(Errors.NOT_CONTROLLER, envelopeResponse.error)
+        assertNull(envelopeResponse.responseData)
+      case _ =>
+        assertEquals(Errors.NONE, envelopeResponse.error)
+        val unwrappedResponse = AbstractResponse.parseResponse(envelopeResponse.responseData, unwrapped.header)
+        assertEquals(createTopicResponse.data, unwrappedResponse.data)
+    }
   }
 
-  @Test
-  def testEnvelopeBuildResponseSendShouldReturnNoErrorIfInnerResponseHasNoNotControllerError(): Unit = {
-    val channelRequest = buildForwardRequestWithEnvelopeRequestAttached(buildMetadataRequest())
-
-    val envelopeResponseArgumentCaptor = ArgumentCaptor.forClass(classOf[EnvelopeResponse])
-
-    Mockito.doAnswer(_ => mockSend)
-      .when(channelRequest.envelope.get.context).buildResponseSend(envelopeResponseArgumentCaptor.capture())
-
-    // create an inner response with REQUEST_TIMED_OUT error
-    val responseWithTimeoutError = RequestTestUtils.metadataUpdateWith("cluster1", 2,
-      Collections.singletonMap("a", Errors.REQUEST_TIMED_OUT),
-      Collections.singletonMap("a", 2))
-
-    // build an envelope response
-    channelRequest.buildResponseSend(responseWithTimeoutError)
-
-    // expect the envelopeResponse result without error
-    val capturedValue: EnvelopeResponse = envelopeResponseArgumentCaptor.getValue
-    assertTrue(capturedValue.error().equals(Errors.NONE))
+  private def buildCreateTopicRequest(topic: String): CreateTopicsRequest = {
+    val requestData = new CreateTopicsRequestData()
+    requestData.topics.add(new CreatableTopic()
+      .setName(topic)
+      .setReplicationFactor(-1)
+      .setNumPartitions(-1)
+    )
+    new CreateTopicsRequest.Builder(requestData).build()
   }
 
-  @Test
-  def testEnvelopeBuildResponseSendShouldReturnNotControllerErrorIfInnerResponseHasOne(): Unit = {
-    val channelRequest = buildForwardRequestWithEnvelopeRequestAttached(buildMetadataRequest())
-
-    val envelopeResponseArgumentCaptor = ArgumentCaptor.forClass(classOf[EnvelopeResponse])
-
-    Mockito.doAnswer(_ => mockSend)
-      .when(channelRequest.envelope.get.context).buildResponseSend(envelopeResponseArgumentCaptor.capture())
-
-    // create an inner response with NOT_CONTROLLER error
-    val responseWithNotControllerError = RequestTestUtils.metadataUpdateWith("cluster1", 2,
-      Collections.singletonMap("a", Errors.NOT_CONTROLLER),
-      Collections.singletonMap("a", 2))
-
-    // build an envelope response
-    channelRequest.buildResponseSend(responseWithNotControllerError)
-
-    // expect the envelopeResponse result has NOT_CONTROLLER error
-    val capturedValue: EnvelopeResponse = envelopeResponseArgumentCaptor.getValue
-    assertTrue(capturedValue.error().equals(Errors.NOT_CONTROLLER))
+  private def buildCreateTopicResponse(
+    topic: String,
+    error: Errors,
+  ): CreateTopicsResponse = {
+    val responseData = new CreateTopicsResponseData()
+    responseData.topics.add(new CreateTopicsResponseData.CreatableTopicResult()
+      .setName(topic)
+      .setErrorCode(error.code)
+    )
+    new CreateTopicsResponse(responseData)
   }
 
-  private def buildMetadataRequest(): AbstractRequest = {
-    val resourceName = "topic-1"
-    val header = new RequestHeader(ApiKeys.METADATA, ApiKeys.METADATA.latestVersion,
-      clientId, 0)
+  private def buildUnwrappedEnvelopeRequest(request: AbstractRequest): RequestChannel.Request = {
+    val wrappedRequest = TestUtils.buildEnvelopeRequest(
+      request,
+      principalSerde,
+      requestChannelMetrics,
+      System.nanoTime()
+    )
 
-    new MetadataRequest.Builder(Collections.singletonList(resourceName), true).build(header.apiVersion)
-  }
+    val unwrappedRequest = new AtomicReference[RequestChannel.Request]()
 
-  private def buildForwardRequestWithEnvelopeRequestAttached(request: AbstractRequest): RequestChannel.Request = {
-    val envelopeRequest = TestUtils.buildRequestWithEnvelope(
-      request, principalSerde, requestChannelMetrics, System.nanoTime(), shouldSpyRequestContext = true)
+    EnvelopeUtils.handleEnvelopeRequest(
+      wrappedRequest,
+      requestChannelMetrics,
+      request => unwrappedRequest.set(request)
+    )
 
-    TestUtils.buildRequestWithEnvelope(
-      request, principalSerde, requestChannelMetrics, System.nanoTime(), envelope = Option(envelopeRequest))
+    unwrappedRequest.get()
   }
 
   private def isValidJson(str: String): Boolean = {
@@ -312,4 +296,23 @@ class RequestChannelTest {
   private def toMap(config: IncrementalAlterConfigsRequestData.AlterableConfigCollection): Map[String, String] = {
     config.asScala.map(e => e.name -> e.value).toMap
   }
+
+  private def buildEnvelopeResponse(
+    unwrapped: RequestChannel.Request,
+    response: AbstractResponse
+  ): EnvelopeResponse = {
+    assertTrue(unwrapped.envelope.isDefined)
+    val envelope = unwrapped.envelope.get
+
+    val send = unwrapped.buildResponseSend(response)
+    val sendBytes = test.TestUtils.toBuffer(send)
+
+    // We need to read the size field before `parseResponse` below
+    val size = sendBytes.getInt
+    assertEquals(size, sendBytes.remaining())
+    val envelopeResponse = AbstractResponse.parseResponse(sendBytes, envelope.header)
+
+    assertTrue(envelopeResponse.isInstanceOf[EnvelopeResponse])
+    envelopeResponse.asInstanceOf[EnvelopeResponse]
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/network/RequestConvertToJsonTest.scala b/core/src/test/scala/unit/kafka/network/RequestConvertToJsonTest.scala
index 56e14d114efab..0ce8448a4f267 100644
--- a/core/src/test/scala/unit/kafka/network/RequestConvertToJsonTest.scala
+++ b/core/src/test/scala/unit/kafka/network/RequestConvertToJsonTest.scala
@@ -19,14 +19,13 @@ package kafka.network
 
 import java.net.InetAddress
 import java.nio.ByteBuffer
-
 import com.fasterxml.jackson.databind.node.{BooleanNode, DoubleNode, JsonNodeFactory, LongNode, ObjectNode, TextNode}
 import kafka.network
 import kafka.network.RequestConvertToJson.requestHeaderNode
 import org.apache.kafka.common.memory.MemoryPool
 import org.apache.kafka.common.message._
 import org.apache.kafka.common.network.{ClientInformation, ListenerName, NetworkSend}
-import org.apache.kafka.common.protocol.{ApiKeys, MessageUtil}
+import org.apache.kafka.common.protocol.{ApiKeys, Errors, MessageUtil}
 import org.apache.kafka.common.requests._
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
 import org.junit.jupiter.api.Assertions.assertEquals
@@ -61,6 +60,33 @@ class RequestConvertToJsonTest {
     assertEquals(ArrayBuffer.empty, unhandledKeys, "Unhandled request keys")
   }
 
+  @Test
+  def testAllApiVersionsResponseHandled(): Unit = {
+
+    ApiKeys.values().foreach { key => {
+      val unhandledVersions = ArrayBuffer[java.lang.Short]()
+      key.allVersions().forEach { version => {
+        val message = key match {
+          // Specify top-level error handling for verifying compatibility across versions
+          case ApiKeys.DESCRIBE_LOG_DIRS =>
+            ApiMessageType.fromApiKey(key.id).newResponse().asInstanceOf[DescribeLogDirsResponseData]
+              .setErrorCode(Errors.CLUSTER_AUTHORIZATION_FAILED.code())
+          case _ =>
+            ApiMessageType.fromApiKey(key.id).newResponse()
+        }
+
+        val bytes = MessageUtil.toByteBuffer(message, version)
+        val response = AbstractResponse.parseResponse(key, bytes, version)
+        try {
+          RequestConvertToJson.response(response, version)
+        } catch {
+          case _ : IllegalStateException => unhandledVersions += version
+        }}
+      }
+      assertEquals(ArrayBuffer.empty, unhandledVersions, s"API: ${key.toString} - Unhandled request versions")
+    }}
+  }
+
   @Test
   def testAllResponseTypesHandled(): Unit = {
     val unhandledKeys = ArrayBuffer[String]()
@@ -80,7 +106,7 @@ class RequestConvertToJsonTest {
 
   @Test
   def testRequestHeaderNode(): Unit = {
-    val alterIsrRequest = new AlterIsrRequest(new AlterIsrRequestData(), 0)
+    val alterIsrRequest = new AlterPartitionRequest(new AlterPartitionRequestData(), 0)
     val req = request(alterIsrRequest)
     val header = req.header
 
@@ -107,7 +133,7 @@ class RequestConvertToJsonTest {
 
   @Test
   def testRequestDesc(): Unit = {
-    val alterIsrRequest = new AlterIsrRequest(new AlterIsrRequestData(), 0)
+    val alterIsrRequest = new AlterPartitionRequest(new AlterPartitionRequestData(), 0)
     val req = request(alterIsrRequest)
 
     val expectedNode = new ObjectNode(JsonNodeFactory.instance)
@@ -122,7 +148,7 @@ class RequestConvertToJsonTest {
 
   @Test
   def testRequestDescMetrics(): Unit = {
-    val alterIsrRequest = new AlterIsrRequest(new AlterIsrRequestData(), 0)
+    val alterIsrRequest = new AlterPartitionRequest(new AlterPartitionRequestData(), 0)
     val req = request(alterIsrRequest)
     val send = new NetworkSend(req.context.connectionId, alterIsrRequest.toSend(req.header))
     val headerLog = RequestConvertToJson.requestHeaderNode(req.header)
diff --git a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala
index 849646cb644c3..98f92d61ff277 100644
--- a/core/src/test/scala/unit/kafka/network/SocketServerTest.scala
+++ b/core/src/test/scala/unit/kafka/network/SocketServerTest.scala
@@ -25,12 +25,11 @@ import java.nio.charset.StandardCharsets
 import java.util
 import java.util.concurrent.{CompletableFuture, ConcurrentLinkedQueue, Executors, TimeUnit}
 import java.util.{Properties, Random}
+
 import com.fasterxml.jackson.databind.node.{JsonNodeFactory, ObjectNode, TextNode}
 import com.yammer.metrics.core.{Gauge, Meter}
-
 import javax.net.ssl._
 import kafka.cluster.EndPoint
-import kafka.metrics.KafkaYammerMetrics
 import kafka.security.CredentialProvider
 import kafka.server.{ApiVersionManager, KafkaConfig, SimpleApiVersionManager, ThrottleCallback, ThrottledChannel}
 import kafka.utils.Implicits._
@@ -51,8 +50,10 @@ import org.apache.kafka.test.{TestSslUtils, TestUtils => JTestUtils}
 import org.apache.log4j.Level
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api._
-
 import java.util.concurrent.atomic.AtomicInteger
+
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
+
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
@@ -78,7 +79,7 @@ class SocketServerTest {
 
   private val apiVersionManager = new SimpleApiVersionManager(ListenerType.ZK_BROKER)
   val server = new SocketServer(config, metrics, Time.SYSTEM, credentialProvider, apiVersionManager)
-  server.startup()
+  server.enableRequestProcessing(Map.empty)
   val sockets = new ArrayBuffer[Socket]
 
   private val kafkaLogger = org.apache.log4j.LogManager.getLogger("kafka")
@@ -295,20 +296,18 @@ class SocketServerTest {
     shutdownServerAndMetrics(server)
     val testProps = new Properties
     testProps ++= props
-    testProps.put("listeners", "EXTERNAL://localhost:0,INTERNAL://localhost:0,CONTROLLER://localhost:0")
-    testProps.put("listener.security.protocol.map", "EXTERNAL:PLAINTEXT,INTERNAL:PLAINTEXT,CONTROLLER:PLAINTEXT")
-    testProps.put("control.plane.listener.name", "CONTROLLER")
+    testProps.put("listeners", "EXTERNAL://localhost:0,INTERNAL://localhost:0,CONTROL_PLANE://localhost:0")
+    testProps.put("listener.security.protocol.map", "EXTERNAL:PLAINTEXT,INTERNAL:PLAINTEXT,CONTROL_PLANE:PLAINTEXT")
+    testProps.put("control.plane.listener.name", "CONTROL_PLANE")
     testProps.put("inter.broker.listener.name", "INTERNAL")
     val config = KafkaConfig.fromProps(testProps)
     val testableServer = new TestableSocketServer(config)
-    testableServer.startup(startProcessingRequests = false)
 
     val updatedEndPoints = config.effectiveAdvertisedListeners.map { endpoint =>
       endpoint.copy(port = testableServer.boundPort(endpoint.listenerName))
     }.map(_.toJava)
 
     val externalReadyFuture = new CompletableFuture[Void]()
-    val executor = Executors.newSingleThreadExecutor()
 
     def controlPlaneListenerStarted() = {
       try {
@@ -333,18 +332,19 @@ class SocketServerTest {
     try {
       val externalListener = new ListenerName("EXTERNAL")
       val externalEndpoint = updatedEndPoints.find(e => e.listenerName.get == externalListener.value).get
-      val futures = Map(externalEndpoint -> externalReadyFuture)
-      val startFuture = executor.submit((() => testableServer.startProcessingRequests(futures)): Runnable)
+      val controlPlaneListener = new ListenerName("CONTROL_PLANE")
+      val controlPlaneEndpoint = updatedEndPoints.find(e => e.listenerName.get == controlPlaneListener.value).get
+      val futures = Map(
+        externalEndpoint -> externalReadyFuture,
+        controlPlaneEndpoint -> CompletableFuture.completedFuture[Void](null))
+      testableServer.enableRequestProcessing(futures)
       TestUtils.waitUntilTrue(() => controlPlaneListenerStarted(), "Control plane listener not started")
-      TestUtils.waitUntilTrue(() => listenerStarted(config.interBrokerListenerName), "Inter-broker listener not started")
-      assertFalse(startFuture.isDone, "Socket server startup did not wait for future to complete")
-
+      assertFalse(listenerStarted(config.interBrokerListenerName))
       assertFalse(listenerStarted(externalListener))
-
       externalReadyFuture.complete(null)
+      TestUtils.waitUntilTrue(() => listenerStarted(config.interBrokerListenerName), "Inter-broker listener not started")
       TestUtils.waitUntilTrue(() => listenerStarted(externalListener), "External listener not started")
     } finally {
-      executor.shutdownNow()
       shutdownServerAndMetrics(testableServer)
     }
   }
@@ -361,7 +361,6 @@ class SocketServerTest {
     val config = KafkaConfig.fromProps(testProps)
     val connectionQueueSize = 1
     val testableServer = new TestableSocketServer(config, connectionQueueSize)
-    testableServer.startup(startProcessingRequests = false)
 
     val socket1 = connect(testableServer, new ListenerName("EXTERNAL"), localAddr = InetAddress.getLocalHost)
     sendRequest(socket1, producerRequestBytes())
@@ -467,7 +466,7 @@ class SocketServerTest {
       time, credentialProvider, apiVersionManager)
 
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       val serializedBytes = producerRequestBytes()
 
       // Connection with no outstanding requests
@@ -477,6 +476,10 @@ class SocketServerTest {
       processRequest(overrideServer.dataPlaneRequestChannel, request0)
       assertTrue(openChannel(request0, overrideServer).nonEmpty, "Channel not open")
       assertEquals(openChannel(request0, overrideServer), openOrClosingChannel(request0, overrideServer))
+      // Receive response to make sure activity on socket server processor thread quiesces, otherwise
+      // it may continue after the mock time sleep, so there would be events that would mark the
+      // connection as "up-to-date" after the sleep and prevent connection from being idle.
+      receiveResponse(socket0)
       TestUtils.waitUntilTrue(() => !openChannel(request0, overrideServer).get.isMuted, "Failed to unmute channel")
       time.sleep(idleTimeMs + 1)
       TestUtils.waitUntilTrue(() => openOrClosingChannel(request0, overrideServer).isEmpty, "Failed to close idle channel")
@@ -531,7 +534,7 @@ class SocketServerTest {
     }
 
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       overrideServer.testableProcessor.setConnectionId(overrideConnectionId)
       val socket1 = connectAndWaitForConnectionRegister()
       TestUtils.waitUntilTrue(() => connectionCount == 1 && openChannel.isDefined, "Failed to create channel")
@@ -800,7 +803,7 @@ class SocketServerTest {
     val server = new SocketServer(KafkaConfig.fromProps(newProps), new Metrics(),
       Time.SYSTEM, credentialProvider, apiVersionManager)
     try {
-      server.startup()
+      server.enableRequestProcessing(Map.empty)
       // make the maximum allowable number of connections
       val conns = (0 until 5).map(_ => connect(server))
       // now try one more (should fail)
@@ -839,7 +842,7 @@ class SocketServerTest {
     val overrideServer = new SocketServer(KafkaConfig.fromProps(overrideProps), serverMetrics,
       Time.SYSTEM, credentialProvider, apiVersionManager)
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       // make the maximum allowable number of connections
       val conns = (0 until overrideNum).map(_ => connect(overrideServer))
 
@@ -879,7 +882,7 @@ class SocketServerTest {
     }
 
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       val conn = connect(overrideServer)
       conn.setSoTimeout(3000)
       assertEquals(-1, conn.getInputStream.read())
@@ -902,7 +905,7 @@ class SocketServerTest {
     // update the connection rate to 5
     overrideServer.connectionQuotas.updateIpConnectionRateQuota(None, Some(connectionRate))
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       // make the (maximum allowable number + 1) of connections
       (0 to connectionRate).map(_ => connect(overrideServer))
 
@@ -951,7 +954,7 @@ class SocketServerTest {
     val overrideServer = new SocketServer(KafkaConfig.fromProps(overrideProps), new Metrics(),
       time, credentialProvider, apiVersionManager)
     overrideServer.connectionQuotas.updateIpConnectionRateQuota(None, Some(connectionRate))
-    overrideServer.startup()
+    overrideServer.enableRequestProcessing(Map.empty)
     // make the maximum allowable number of connections
     (0 until connectionRate).map(_ => connect(overrideServer))
     // now try one more (should get throttled)
@@ -974,7 +977,7 @@ class SocketServerTest {
     val overrideServer = new SocketServer(KafkaConfig.fromProps(sslServerProps), serverMetrics,
       Time.SYSTEM, credentialProvider, apiVersionManager)
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       val sslContext = SSLContext.getInstance(TestSslUtils.DEFAULT_TLS_PROTOCOL_FOR_TESTS)
       sslContext.init(null, Array(TestUtils.trustAllCerts), new java.security.SecureRandom())
       val socketFactory = sslContext.getSocketFactory
@@ -1033,7 +1036,7 @@ class SocketServerTest {
     val time = new MockTime()
     val overrideServer = new TestableSocketServer(KafkaConfig.fromProps(overrideProps), time = time)
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       val socket = connect(overrideServer, ListenerName.forSecurityProtocol(SecurityProtocol.SASL_PLAINTEXT))
 
       val correlationId = -1
@@ -1113,7 +1116,7 @@ class SocketServerTest {
     val overrideServer = new TestableSocketServer(KafkaConfig.fromProps(props))
 
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       val conn: Socket = connect(overrideServer)
       overrideServer.testableProcessor.closeSocketOnSendResponse(conn)
       val serializedBytes = producerRequestBytes()
@@ -1124,12 +1127,12 @@ class SocketServerTest {
 
       val requestMetrics = channel.metrics(request.header.apiKey.name)
       def totalTimeHistCount(): Long = requestMetrics.totalTimeHist.count
+      val expectedTotalTimeCount = totalTimeHistCount() + 1
       val send = new NetworkSend(request.context.connectionId, ByteBufferSend.sizePrefixed(ByteBuffer.allocate(responseBufferSize)))
       val headerLog = new ObjectNode(JsonNodeFactory.instance)
       headerLog.set("response", new TextNode("someResponse"))
       channel.sendResponse(new RequestChannel.SendResponse(request, send, Some(headerLog), None))
 
-      val expectedTotalTimeCount = totalTimeHistCount() + 1
       TestUtils.waitUntilTrue(() => totalTimeHistCount() == expectedTotalTimeCount,
         s"request metrics not updated, expected: $expectedTotalTimeCount, actual: ${totalTimeHistCount()}")
 
@@ -1145,7 +1148,7 @@ class SocketServerTest {
     val overrideServer = new TestableSocketServer(KafkaConfig.fromProps(props))
 
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       val selector = overrideServer.testableSelector
 
       // Create a channel, send some requests and close socket. Receive one pending request after socket was closed.
@@ -1173,7 +1176,7 @@ class SocketServerTest {
     val overrideServer = new SocketServer(KafkaConfig.fromProps(props), serverMetrics,
       Time.SYSTEM, credentialProvider, apiVersionManager)
     try {
-      overrideServer.startup()
+      overrideServer.enableRequestProcessing(Map.empty)
       conn = connect(overrideServer)
       val serializedBytes = producerRequestBytes()
       sendRequest(conn, serializedBytes)
@@ -1389,6 +1392,7 @@ class SocketServerTest {
    * buffered receive.
    */
   @Test
+  @Disabled // TODO: re-enabled until KAFKA-13735 is fixed
   def remoteCloseWithoutBufferedReceives(): Unit = {
     verifyRemoteCloseWithBufferedReceives(numComplete = 0, hasIncomplete = false)
   }
@@ -1426,6 +1430,7 @@ class SocketServerTest {
    * The channel must be closed after pending receives are processed.
    */
   @Test
+  @Disabled // TODO: re-enable after KAFKA-13736 is fixed
   def closingChannelWithBufferedReceives(): Unit = {
     verifyRemoteCloseWithBufferedReceives(numComplete = 3, hasIncomplete = false, makeClosing = true)
   }
@@ -1552,7 +1557,7 @@ class SocketServerTest {
     props.put(KafkaConfig.ConnectionsMaxIdleMsProp, idleTimeMs.toString)
     props ++= sslServerProps
     val testableServer = new TestableSocketServer(time = time)
-    testableServer.startup()
+    testableServer.enableRequestProcessing(Map.empty)
 
     assertTrue(testableServer.controlPlaneRequestChannelOpt.isEmpty)
 
@@ -1588,7 +1593,7 @@ class SocketServerTest {
     val time = new MockTime()
     props ++= sslServerProps
     val testableServer = new TestableSocketServer(time = time)
-    testableServer.startup()
+    testableServer.enableRequestProcessing(Map.empty)
     val proxyServer = new ProxyServer(testableServer)
     try {
       val testableSelector = testableServer.testableSelector
@@ -1734,7 +1739,7 @@ class SocketServerTest {
     val numConnections = 5
     props.put("max.connections.per.ip", numConnections.toString)
     val testableServer = new TestableSocketServer(KafkaConfig.fromProps(props), connectionQueueSize = 1)
-    testableServer.startup()
+    testableServer.enableRequestProcessing(Map.empty)
     val testableSelector = testableServer.testableSelector
     val errors = new mutable.HashSet[String]
 
@@ -1886,7 +1891,9 @@ class SocketServerTest {
                                  startProcessingRequests: Boolean = true): Unit = {
     shutdownServerAndMetrics(server)
     val testableServer = new TestableSocketServer(config)
-    testableServer.startup(startProcessingRequests = startProcessingRequests)
+    if (startProcessingRequests) {
+      testableServer.enableRequestProcessing(Map.empty)
+    }
     try {
       testWithServer(testableServer)
     } finally {
@@ -1991,7 +1998,8 @@ class SocketServerTest {
                     new LogContext(),
                     connectionQueueSize,
                     isPrivilegedListener,
-                    apiVersionManager) {
+                    apiVersionManager,
+                    s"TestableProcessor${id}") {
     private var connectionId: Option[String] = None
     private var conn: Option[Socket] = None
 
diff --git a/core/src/test/scala/unit/kafka/raft/RaftManagerTest.scala b/core/src/test/scala/unit/kafka/raft/RaftManagerTest.scala
index a7a9519455aa7..9d7a93db94c74 100644
--- a/core/src/test/scala/unit/kafka/raft/RaftManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/raft/RaftManagerTest.scala
@@ -18,7 +18,6 @@ package kafka.raft
 
 import java.util.concurrent.CompletableFuture
 import java.util.Properties
-
 import kafka.raft.KafkaRaftManager.RaftIoThread
 import kafka.server.{KafkaConfig, MetaProperties}
 import kafka.tools.TestRaftServer.ByteArraySerde
@@ -83,23 +82,23 @@ class RaftManagerTest {
   }
 
   @Test
-  def testSentinelNodeIdIfBrokerRoleOnly(): Unit = {
+  def testNodeIdPresentIfBrokerRoleOnly(): Unit = {
     val raftManager = instantiateRaftManagerWithConfigs(new TopicPartition("__raft_id_test", 0), "broker", "1")
-    assertFalse(raftManager.client.nodeId.isPresent)
+    assertEquals(1, raftManager.client.nodeId.getAsInt)
     raftManager.shutdown()
   }
 
   @Test
   def testNodeIdPresentIfControllerRoleOnly(): Unit = {
     val raftManager = instantiateRaftManagerWithConfigs(new TopicPartition("__raft_id_test", 0), "controller", "1")
-    assertTrue(raftManager.client.nodeId.getAsInt == 1)
+    assertEquals(1, raftManager.client.nodeId.getAsInt)
     raftManager.shutdown()
   }
 
   @Test
   def testNodeIdPresentIfColocated(): Unit = {
     val raftManager = instantiateRaftManagerWithConfigs(new TopicPartition("__raft_id_test", 0), "controller,broker", "1")
-    assertTrue(raftManager.client.nodeId.getAsInt == 1)
+    assertEquals(1, raftManager.client.nodeId.getAsInt)
     raftManager.shutdown()
   }
 
diff --git a/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala b/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala
index 3bbce4d568934..3c35fd7d429de 100644
--- a/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala
+++ b/core/src/test/scala/unit/kafka/security/auth/ZkAuthorizationTest.scala
@@ -18,6 +18,7 @@
 package kafka.security.auth
 
 import java.nio.charset.StandardCharsets
+
 import kafka.admin.ZkSecurityMigrator
 import kafka.server.QuorumTestHarness
 import kafka.utils.{Logging, TestUtils}
@@ -30,12 +31,12 @@ import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
 
 import scala.util.{Failure, Success, Try}
 import javax.security.auth.login.Configuration
-import kafka.api.ApiVersion
 import kafka.cluster.{Broker, EndPoint}
 import kafka.controller.ReplicaAssignment
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.utils.Time
+import org.apache.kafka.server.common.MetadataVersion
 import org.apache.zookeeper.client.ZKClientConfig
 
 import scala.jdk.CollectionConverters._
@@ -136,7 +137,7 @@ class ZkAuthorizationTest extends QuorumTestHarness with Logging {
   private def createBrokerInfo(id: Int, host: String, port: Int, securityProtocol: SecurityProtocol,
                                rack: Option[String] = None): BrokerInfo =
     BrokerInfo(Broker(id, Seq(new EndPoint(host, port, ListenerName.forSecurityProtocol
-    (securityProtocol), securityProtocol)), rack = rack), ApiVersion.latestVersion, jmxPort = port + 10)
+    (securityProtocol), securityProtocol)), rack = rack), MetadataVersion.latest, jmxPort = port + 10)
 
   private def newKafkaZkClient(connectionString: String, isSecure: Boolean) =
     KafkaZkClient(connectionString, isSecure, 6000, 6000, Int.MaxValue, Time.SYSTEM, "ZkAuthorizationTest",
diff --git a/core/src/test/scala/unit/kafka/security/authorizer/AclAuthorizerTest.scala b/core/src/test/scala/unit/kafka/security/authorizer/AclAuthorizerTest.scala
index 9011eb616f18a..3be34921423dc 100644
--- a/core/src/test/scala/unit/kafka/security/authorizer/AclAuthorizerTest.scala
+++ b/core/src/test/scala/unit/kafka/security/authorizer/AclAuthorizerTest.scala
@@ -16,38 +16,39 @@
  */
 package kafka.security.authorizer
 
-import java.io.File
-import java.net.InetAddress
-import java.nio.charset.StandardCharsets.UTF_8
-import java.nio.file.Files
-import java.util.{Collections, UUID}
-import java.util.concurrent.{Executors, Semaphore, TimeUnit}
 import kafka.Kafka
-import kafka.api.{ApiVersion, KAFKA_2_0_IV0, KAFKA_2_0_IV1}
 import kafka.security.authorizer.AclEntry.{WildcardHost, WildcardPrincipalString}
 import kafka.server.{KafkaConfig, QuorumTestHarness}
 import kafka.utils.TestUtils
 import kafka.zk.ZkAclStore
 import kafka.zookeeper.{GetChildrenRequest, GetDataRequest, ZooKeeperClient}
-import org.apache.kafka.common.acl._
 import org.apache.kafka.common.acl.AclOperation._
 import org.apache.kafka.common.acl.AclPermissionType.{ALLOW, DENY}
+import org.apache.kafka.common.acl._
 import org.apache.kafka.common.errors.{ApiException, UnsupportedVersionException}
 import org.apache.kafka.common.requests.RequestContext
-import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourcePatternFilter, ResourceType}
+import org.apache.kafka.common.resource.PatternType.{LITERAL, MATCH, PREFIXED}
 import org.apache.kafka.common.resource.Resource.CLUSTER_NAME
 import org.apache.kafka.common.resource.ResourcePattern.WILDCARD_RESOURCE
 import org.apache.kafka.common.resource.ResourceType._
-import org.apache.kafka.common.resource.PatternType.{LITERAL, MATCH, PREFIXED}
+import org.apache.kafka.common.resource.{PatternType, ResourcePattern, ResourcePatternFilter, ResourceType}
 import org.apache.kafka.common.security.auth.KafkaPrincipal
-import org.apache.kafka.server.authorizer._
 import org.apache.kafka.common.utils.{Time, SecurityUtils => JSecurityUtils}
+import org.apache.kafka.server.authorizer._
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_0_IV0, IBP_2_0_IV1}
 import org.apache.zookeeper.client.ZKClientConfig
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
 
-import scala.jdk.CollectionConverters._
+import java.io.File
+import java.net.InetAddress
+import java.nio.charset.StandardCharsets.UTF_8
+import java.nio.file.Files
+import java.util.concurrent.{Executors, Semaphore, TimeUnit}
+import java.util.{Collections, UUID}
 import scala.collection.mutable
+import scala.jdk.CollectionConverters._
 
 class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
 
@@ -484,9 +485,9 @@ class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
   @Test
   def testAclInheritance(): Unit = {
     testImplicationsOfAllow(AclOperation.ALL, Set(READ, WRITE, CREATE, DELETE, ALTER, DESCRIBE,
-      CLUSTER_ACTION, DESCRIBE_CONFIGS, ALTER_CONFIGS, IDEMPOTENT_WRITE))
+      CLUSTER_ACTION, DESCRIBE_CONFIGS, ALTER_CONFIGS, IDEMPOTENT_WRITE, CREATE_TOKENS, DESCRIBE_TOKENS))
     testImplicationsOfDeny(AclOperation.ALL, Set(READ, WRITE, CREATE, DELETE, ALTER, DESCRIBE,
-      CLUSTER_ACTION, DESCRIBE_CONFIGS, ALTER_CONFIGS, IDEMPOTENT_WRITE))
+      CLUSTER_ACTION, DESCRIBE_CONFIGS, ALTER_CONFIGS, IDEMPOTENT_WRITE, CREATE_TOKENS, DESCRIBE_TOKENS))
     testImplicationsOfAllow(READ, Set(DESCRIBE))
     testImplicationsOfAllow(WRITE, Set(DESCRIBE))
     testImplicationsOfAllow(DELETE, Set(DESCRIBE))
@@ -714,12 +715,18 @@ class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
 
   @Test
   def testThrowsOnAddPrefixedAclIfInterBrokerProtocolVersionTooLow(): Unit = {
-    givenAuthorizerWithProtocolVersion(Option(KAFKA_2_0_IV0))
+    givenAuthorizerWithProtocolVersion(Option(IBP_2_0_IV0))
     val e = assertThrows(classOf[ApiException],
       () => addAcls(aclAuthorizer, Set(denyReadAcl), new ResourcePattern(TOPIC, "z_other", PREFIXED)))
     assertTrue(e.getCause.isInstanceOf[UnsupportedVersionException], s"Unexpected exception $e")
   }
 
+  @Test
+  def testCreateAclWithInvalidResourceName(): Unit = {
+    assertThrows(classOf[ApiException],
+      () => addAcls(aclAuthorizer, Set(allowReadAcl), new ResourcePattern(TOPIC, "test/1", LITERAL)))
+  }
+
   @Test
   def testWritesExtendedAclChangeEventIfInterBrokerProtocolNotSet(): Unit = {
     givenAuthorizerWithProtocolVersion(Option.empty)
@@ -736,7 +743,7 @@ class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
 
   @Test
   def testWritesExtendedAclChangeEventWhenInterBrokerProtocolAtLeastKafkaV2(): Unit = {
-    givenAuthorizerWithProtocolVersion(Option(KAFKA_2_0_IV1))
+    givenAuthorizerWithProtocolVersion(Option(IBP_2_0_IV1))
     val resource = new ResourcePattern(TOPIC, "z_other", PREFIXED)
     val expected = new String(ZkAclStore(PREFIXED).changeStore
       .createChangeNode(resource).bytes, UTF_8)
@@ -750,7 +757,7 @@ class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
 
   @Test
   def testWritesLiteralWritesLiteralAclChangeEventWhenInterBrokerProtocolLessThanKafkaV2eralAclChangesForOlderProtocolVersions(): Unit = {
-    givenAuthorizerWithProtocolVersion(Option(KAFKA_2_0_IV0))
+    givenAuthorizerWithProtocolVersion(Option(IBP_2_0_IV0))
     val resource = new ResourcePattern(TOPIC, "z_other", LITERAL)
     val expected = new String(ZkAclStore(LITERAL).changeStore
       .createChangeNode(resource).bytes, UTF_8)
@@ -764,7 +771,7 @@ class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
 
   @Test
   def testWritesLiteralAclChangeEventWhenInterBrokerProtocolIsKafkaV2(): Unit = {
-    givenAuthorizerWithProtocolVersion(Option(KAFKA_2_0_IV1))
+    givenAuthorizerWithProtocolVersion(Option(IBP_2_0_IV1))
     val resource = new ResourcePattern(TOPIC, "z_other", LITERAL)
     val expected = new String(ZkAclStore(LITERAL).changeStore
       .createChangeNode(resource).bytes, UTF_8)
@@ -994,7 +1001,7 @@ class AclAuthorizerTest extends QuorumTestHarness with BaseAuthorizerTest {
     }
   }
 
-  private def givenAuthorizerWithProtocolVersion(protocolVersion: Option[ApiVersion]): Unit = {
+  private def givenAuthorizerWithProtocolVersion(protocolVersion: Option[MetadataVersion]): Unit = {
     aclAuthorizer.close()
 
     val props = TestUtils.createBrokerConfig(0, zkConnect)
diff --git a/core/src/test/scala/unit/kafka/security/token/delegation/DelegationTokenManagerTest.scala b/core/src/test/scala/unit/kafka/security/token/delegation/DelegationTokenManagerTest.scala
index 523b6a74b00f2..0cd67edfd0c94 100644
--- a/core/src/test/scala/unit/kafka/security/token/delegation/DelegationTokenManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/security/token/delegation/DelegationTokenManagerTest.scala
@@ -20,7 +20,6 @@ package kafka.security.token.delegation
 import java.net.InetAddress
 import java.nio.ByteBuffer
 import java.util.{Base64, Properties}
-
 import kafka.network.RequestChannel.Session
 import kafka.security.authorizer.{AclAuthorizer, AuthorizerUtils}
 import kafka.security.authorizer.AclEntry.WildcardHost
@@ -33,7 +32,7 @@ import org.apache.kafka.common.acl.AclPermissionType.ALLOW
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.resource.PatternType.LITERAL
 import org.apache.kafka.common.resource.ResourcePattern
-import org.apache.kafka.common.resource.ResourceType.DELEGATION_TOKEN
+import org.apache.kafka.common.resource.ResourceType.{DELEGATION_TOKEN, USER}
 import org.apache.kafka.common.security.auth.KafkaPrincipal
 import org.apache.kafka.common.security.scram.internals.ScramMechanism
 import org.apache.kafka.common.security.token.delegation.internals.DelegationTokenCache
@@ -84,7 +83,7 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     val config = KafkaConfig.fromProps(props)
     val tokenManager = createDelegationTokenManager(config, tokenCache, time, zkClient)
 
-    tokenManager.createToken(owner, renewer, -1, createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, -1, createTokenResultCallBack)
     assertEquals(Errors.DELEGATION_TOKEN_AUTH_DISABLED, createTokenResult.error)
     assert(Array[Byte]() sameElements createTokenResult.hmac)
 
@@ -101,11 +100,11 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     val tokenManager = createDelegationTokenManager(config, tokenCache, time, zkClient)
     tokenManager.startup()
 
-    tokenManager.createToken(owner, renewer, -1 , createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, -1 , createTokenResultCallBack)
     val issueTime = time.milliseconds
     val tokenId = createTokenResult.tokenId
     val password = DelegationTokenManager.createHmac(tokenId, secretKey)
-    assertEquals(CreateTokenResult(issueTime, issueTime + renewTimeMsDefault,  issueTime + maxLifeTimeMsDefault, tokenId, password, Errors.NONE), createTokenResult)
+    assertEquals(CreateTokenResult(owner, owner, issueTime, issueTime + renewTimeMsDefault,  issueTime + maxLifeTimeMsDefault, tokenId, password, Errors.NONE), createTokenResult)
 
     val token = tokenManager.getToken(tokenId)
     assertFalse(token.isEmpty )
@@ -118,12 +117,12 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     val tokenManager = createDelegationTokenManager(config, tokenCache, time, zkClient)
     tokenManager.startup()
 
-    tokenManager.createToken(owner, renewer, -1 , createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, -1 , createTokenResultCallBack)
     val issueTime = time.milliseconds
     val maxLifeTime = issueTime + maxLifeTimeMsDefault
     val tokenId = createTokenResult.tokenId
     val password = DelegationTokenManager.createHmac(tokenId, secretKey)
-    assertEquals(CreateTokenResult(issueTime, issueTime + renewTimeMsDefault,  maxLifeTime, tokenId, password, Errors.NONE), createTokenResult)
+    assertEquals(CreateTokenResult(owner, owner, issueTime, issueTime + renewTimeMsDefault,  maxLifeTime, tokenId, password, Errors.NONE), createTokenResult)
 
     //try renewing non-existing token
     tokenManager.renewToken(owner, ByteBuffer.wrap("test".getBytes), -1 , renewResponseCallback)
@@ -166,11 +165,11 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     val tokenManager = createDelegationTokenManager(config, tokenCache, time, zkClient)
     tokenManager.startup()
 
-    tokenManager.createToken(owner, renewer, -1 , createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, -1 , createTokenResultCallBack)
     val issueTime = time.milliseconds
     val tokenId = createTokenResult.tokenId
     val password = DelegationTokenManager.createHmac(tokenId, secretKey)
-    assertEquals(CreateTokenResult(issueTime, issueTime + renewTimeMsDefault,  issueTime + maxLifeTimeMsDefault, tokenId, password, Errors.NONE), createTokenResult)
+    assertEquals(CreateTokenResult(owner, owner, issueTime, issueTime + renewTimeMsDefault,  issueTime + maxLifeTimeMsDefault, tokenId, password, Errors.NONE), createTokenResult)
 
     //try expire non-existing token
     tokenManager.expireToken(owner, ByteBuffer.wrap("test".getBytes), -1 , renewResponseCallback)
@@ -201,11 +200,11 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     val tokenManager = createDelegationTokenManager(config, tokenCache, time, zkClient)
     tokenManager.startup()
 
-    tokenManager.createToken(owner, renewer, -1 , createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, -1 , createTokenResultCallBack)
     val issueTime = time.milliseconds
     val tokenId = createTokenResult.tokenId
     val password = DelegationTokenManager.createHmac(tokenId, secretKey)
-    assertEquals(CreateTokenResult(issueTime, issueTime + renewTimeMsDefault,  issueTime + maxLifeTimeMsDefault, tokenId, password, Errors.NONE), createTokenResult)
+    assertEquals(CreateTokenResult(owner, owner, issueTime, issueTime + renewTimeMsDefault,  issueTime + maxLifeTimeMsDefault, tokenId, password, Errors.NONE), createTokenResult)
 
     // expire the token immediately
     tokenManager.expireToken(owner, ByteBuffer.wrap(password), -1, renewResponseCallback)
@@ -224,10 +223,13 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
 
     val config = KafkaConfig.fromProps(props)
 
+    val requester1 = SecurityUtils.parseKafkaPrincipal("User:requester1")
+
     val owner1 = SecurityUtils.parseKafkaPrincipal("User:owner1")
     val owner2 = SecurityUtils.parseKafkaPrincipal("User:owner2")
     val owner3 = SecurityUtils.parseKafkaPrincipal("User:owner3")
     val owner4 = SecurityUtils.parseKafkaPrincipal("User:owner4")
+    val owner5 = SecurityUtils.parseKafkaPrincipal("User:owner5")
 
     val renewer1 = SecurityUtils.parseKafkaPrincipal("User:renewer1")
     val renewer2 = SecurityUtils.parseKafkaPrincipal("User:renewer2")
@@ -243,41 +245,47 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     tokenManager.startup()
 
     //create tokens
-    tokenManager.createToken(owner1, List(renewer1, renewer2), 1 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner1, owner1, List(renewer1, renewer2), 1 * 60 * 60 * 1000L, createTokenResultCallBack)
 
-    tokenManager.createToken(owner2, List(renewer3), 1 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner2, owner2, List(renewer3), 1 * 60 * 60 * 1000L, createTokenResultCallBack)
     val tokenId2 = createTokenResult.tokenId
 
-    tokenManager.createToken(owner3, List(renewer4), 2 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner3, owner3, List(renewer4), 2 * 60 * 60 * 1000L, createTokenResultCallBack)
     val tokenId3 = createTokenResult.tokenId
 
-    tokenManager.createToken(owner4, List(owner1, renewer4), 2 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner4, owner4, List(owner1, renewer4), 2 * 60 * 60 * 1000L, createTokenResultCallBack)
 
-    assert(tokenManager.getAllTokenInformation.size == 4 )
+    tokenManager.createToken(requester1, owner5, List(renewer1), 1 * 60 * 60 * 1000L, createTokenResultCallBack)
+
+    assertEquals(5, tokenManager.getAllTokenInformation.size)
 
     //get tokens non-exiting owner
     var  tokens = getTokens(tokenManager, aclAuthorizer, hostSession, owner1, List(SecurityUtils.parseKafkaPrincipal("User:unknown")))
-    assert(tokens.size == 0)
+    assertEquals(0, tokens.size)
 
     //get all tokens for  empty owner list
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession, owner1, List())
-    assert(tokens.size == 0)
+    assertEquals(0, tokens.size)
 
     //get all tokens for owner1
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession, owner1, List(owner1))
-    assert(tokens.size == 2)
+    assertEquals(2, tokens.size)
 
     //get all tokens for owner1
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession, owner1, null)
-    assert(tokens.size == 2)
+    assertEquals(2, tokens.size)
 
     //get all tokens for unknown owner
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession, SecurityUtils.parseKafkaPrincipal("User:unknown"), null)
-    assert(tokens.size == 0)
+    assertEquals(0, tokens.size)
 
     //get all tokens for multiple owners (owner1, renewer4) and without permission for renewer4
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession, owner1, List(owner1, renewer4))
-    assert(tokens.size == 2)
+    assertEquals(2, tokens.size)
+
+    // get tokens for owner5 with requester1
+    tokens = getTokens(tokenManager, aclAuthorizer, hostSession, requester1, List(owner5))
+    assertEquals(1, tokens.size)
 
     def createAcl(aclBinding: AclBinding): Unit = {
       val result = aclAuthorizer.createAcls(null, List(aclBinding).asJava).get(0).toCompletableFuture.get
@@ -288,22 +296,22 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     createAcl(new AclBinding(new ResourcePattern(DELEGATION_TOKEN, tokenId3, LITERAL),
       new AccessControlEntry(owner1.toString, WildcardHost, DESCRIBE, ALLOW)))
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession, owner1, List(owner1, renewer4))
-    assert(tokens.size == 3)
+    assertEquals(3, tokens.size)
 
     //get all tokens for renewer4 which is a renewer principal for some tokens
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession,  renewer4, List(renewer4))
-    assert(tokens.size == 2)
+    assertEquals(2, tokens.size)
 
     //get all tokens for multiple owners (renewer2, renewer3) which are token renewers principals and without permissions for renewer3
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession,  renewer2, List(renewer2, renewer3))
-    assert(tokens.size == 1)
+    assertEquals(1, tokens.size)
 
     //get all tokens for multiple owners (renewer2, renewer3) which are token renewers principals and with permissions
     hostSession = Session(renewer2, InetAddress.getByName("192.168.1.1"))
     createAcl(new AclBinding(new ResourcePattern(DELEGATION_TOKEN, tokenId2, LITERAL),
       new AccessControlEntry(renewer2.toString, WildcardHost, DESCRIBE, ALLOW)))
     tokens = getTokens(tokenManager, aclAuthorizer, hostSession,  renewer2, List(renewer2, renewer3))
-    assert(tokens.size == 2)
+    assertEquals(2, tokens.size)
 
     aclAuthorizer.close()
   }
@@ -315,13 +323,20 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
       List()
     }
     else {
-      def authorizeToken(tokenId: String) = {
+      def authorizeToken(tokenId: String): Boolean = {
         val requestContext = AuthorizerUtils.sessionToRequestContext(hostSession)
         val action = new Action(AclOperation.DESCRIBE,
           new ResourcePattern(DELEGATION_TOKEN, tokenId, LITERAL), 1, true, true)
         aclAuthorizer.authorize(requestContext, List(action).asJava).asScala.head == AuthorizationResult.ALLOWED
       }
-      def eligible(token: TokenInformation) = DelegationTokenManager.filterToken(requestPrincipal, Option(requestedOwners), token, authorizeToken)
+      def authorizeRequester(owner: KafkaPrincipal): Boolean = {
+        val requestContext = AuthorizerUtils.sessionToRequestContext(hostSession)
+        val action = new Action(AclOperation.DESCRIBE_TOKENS,
+          new ResourcePattern(USER, owner.toString, LITERAL), 1, true, true)
+        aclAuthorizer.authorize(requestContext, List(action).asJava).asScala.head == AuthorizationResult.ALLOWED
+      }
+      def eligible(token: TokenInformation) = DelegationTokenManager
+        .filterToken(requestPrincipal, Option(requestedOwners), token, authorizeToken, authorizeRequester)
       tokenManager.getTokens(eligible)
     }
   }
@@ -333,10 +348,10 @@ class DelegationTokenManagerTest extends QuorumTestHarness  {
     tokenManager.startup()
 
     //create tokens
-    tokenManager.createToken(owner, renewer, 1 * 60 * 60 * 1000L, createTokenResultCallBack)
-    tokenManager.createToken(owner, renewer, 1 * 60 * 60 * 1000L, createTokenResultCallBack)
-    tokenManager.createToken(owner, renewer, 2 * 60 * 60 * 1000L, createTokenResultCallBack)
-    tokenManager.createToken(owner, renewer, 2 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, 1 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, 1 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, 2 * 60 * 60 * 1000L, createTokenResultCallBack)
+    tokenManager.createToken(owner, owner, renewer, 2 * 60 * 60 * 1000L, createTokenResultCallBack)
     assert(tokenManager.getAllTokenInformation.size == 4 )
 
     time.sleep(2 * 60 * 60 * 1000L)
diff --git a/core/src/test/scala/unit/kafka/server/AbstractApiVersionsRequestTest.scala b/core/src/test/scala/unit/kafka/server/AbstractApiVersionsRequestTest.scala
index 530bc235b380c..99d593ede65dc 100644
--- a/core/src/test/scala/unit/kafka/server/AbstractApiVersionsRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/AbstractApiVersionsRequestTest.scala
@@ -77,7 +77,7 @@ abstract class AbstractApiVersionsRequestTest(cluster: ClusterInstance) {
       ApiVersionsResponse.intersectForwardableApis(
         ApiMessageType.ListenerType.BROKER,
         RecordVersion.current,
-        new NodeApiVersions(ApiKeys.controllerApis().asScala.map(ApiVersionsResponse.toApiVersion).asJava).allSupportedApiVersions()
+        NodeApiVersions.create(ApiKeys.controllerApis().asScala.map(ApiVersionsResponse.toApiVersion).asJava).allSupportedApiVersions()
       )
     }
 
diff --git a/core/src/test/scala/unit/kafka/server/AbstractCreateTopicsRequestTest.scala b/core/src/test/scala/unit/kafka/server/AbstractCreateTopicsRequestTest.scala
index 91ff1d577da06..ecee2cd19c4e5 100644
--- a/core/src/test/scala/unit/kafka/server/AbstractCreateTopicsRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/AbstractCreateTopicsRequestTest.scala
@@ -91,16 +91,8 @@ abstract class AbstractCreateTopicsRequestTest extends BaseRequestTest {
     topic
   }
 
-  def createTopicsSocketServer: SocketServer = {
-    if (isKRaftTest()) {
-      anySocketServer
-    } else {
-      controllerSocketServer
-    }
-  }
-
   protected def validateValidCreateTopicsRequests(request: CreateTopicsRequest): Unit = {
-    val response = sendCreateTopicRequest(request, createTopicsSocketServer)
+    val response = sendCreateTopicRequest(request, adminSocketServer)
 
     assertFalse(response.errorCounts().keySet().asScala.exists(_.code() > 0),
       s"There should be no errors, found ${response.errorCounts().keySet().asScala.mkString(", ")},")
@@ -162,7 +154,7 @@ abstract class AbstractCreateTopicsRequestTest extends BaseRequestTest {
   protected def validateErrorCreateTopicsRequests(request: CreateTopicsRequest,
                                                   expectedResponse: Map[String, ApiError],
                                                   checkErrorMessage: Boolean = true): Unit = {
-    val response = sendCreateTopicRequest(request, createTopicsSocketServer)
+    val response = sendCreateTopicRequest(request, adminSocketServer)
     assertEquals(expectedResponse.size, response.data().topics().size, "The response size should match")
 
     expectedResponse.foreach { case (topicName, expectedError) =>
diff --git a/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala b/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala
index 647f8ae61a6eb..cb60384a6b0e4 100644
--- a/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala
@@ -18,13 +18,20 @@ package kafka.server
 
 import com.yammer.metrics.core.Gauge
 import kafka.cluster.BrokerEndPoint
-import kafka.metrics.KafkaYammerMetrics
+import kafka.log.LogAppendInfo
+import kafka.server.AbstractFetcherThread.{ReplicaFetch, ResultWithPartitions}
+import kafka.utils.Implicits.MapExtensionMethods
 import kafka.utils.TestUtils
+import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
+import org.apache.kafka.common.requests.FetchRequest
+import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.common.{TopicPartition, Uuid}
-import org.junit.jupiter.api.{BeforeEach, Test}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.api.{BeforeEach, Test}
 import org.mockito.Mockito.{mock, verify, when}
 
+import scala.collection.{Map, Set, mutable}
 import scala.jdk.CollectionConverters._
 
 class AbstractFetcherManagerTest {
@@ -58,8 +65,8 @@ class AbstractFetcherManagerTest {
       currentLeaderEpoch = leaderEpoch,
       initOffset = fetchOffset)
 
-    when(fetcher.sourceBroker)
-      .thenReturn(new BrokerEndPoint(0, "localhost", 9092))
+    when(fetcher.leader)
+      .thenReturn(new MockLeaderEndPoint(new BrokerEndPoint(0, "localhost", 9092)))
     when(fetcher.addPartitions(Map(tp -> initialFetchState)))
       .thenReturn(Set(tp))
     when(fetcher.fetchState(tp))
@@ -100,6 +107,7 @@ class AbstractFetcherManagerTest {
     fetcherManager.removeFetcherForPartitions(Set(tp))
     assertEquals(0, getMetricValue(metricName))
   }
+
   @Test
   def testDeadThreadCountMetric(): Unit = {
     val fetcher: AbstractFetcherThread = mock(classOf[AbstractFetcherThread])
@@ -119,8 +127,8 @@ class AbstractFetcherManagerTest {
       currentLeaderEpoch = leaderEpoch,
       initOffset = fetchOffset)
 
-    when(fetcher.sourceBroker)
-      .thenReturn(new BrokerEndPoint(0, "localhost", 9092))
+    when(fetcher.leader)
+      .thenReturn(new MockLeaderEndPoint(new BrokerEndPoint(0, "localhost", 9092)))
     when(fetcher.addPartitions(Map(tp -> initialFetchState)))
       .thenReturn(Set(tp))
     when(fetcher.isThreadFailed).thenReturn(true)
@@ -166,8 +174,8 @@ class AbstractFetcherManagerTest {
       initOffset = fetchOffset)
 
     // Simulate calls to different fetchers due to different leaders
-    when(fetcher.sourceBroker)
-      .thenReturn(new BrokerEndPoint(0, "localhost", 9092))
+    when(fetcher.leader)
+      .thenReturn(new MockLeaderEndPoint(new BrokerEndPoint(0, "localhost", 9092)))
     when(fetcher.addPartitions(Map(tp1 -> initialFetchState1)))
       .thenReturn(Set(tp1))
     when(fetcher.addPartitions(Map(tp2 -> initialFetchState2)))
@@ -210,4 +218,122 @@ class AbstractFetcherManagerTest {
     verify(fetcher).maybeUpdateTopicIds(Set(tp1), topicIds)
     verify(fetcher).maybeUpdateTopicIds(Set(tp2), topicIds)
   }
+
+  @Test
+  def testExpandThreadPool(): Unit = {
+    testResizeThreadPool(10, 50)
+  }
+
+  @Test
+  def testShrinkThreadPool(): Unit = {
+    testResizeThreadPool(50, 10)
+  }
+
+  private def testResizeThreadPool(currentFetcherSize: Int, newFetcherSize: Int, brokerNum: Int = 6): Unit = {
+    val fetchingTopicPartitions = makeTopicPartition(10, 100)
+    val failedTopicPartitions = makeTopicPartition(2, 5, "topic_failed")
+    val fetcherManager = new AbstractFetcherManager[AbstractFetcherThread]("fetcher-manager", "fetcher-manager", currentFetcherSize) {
+      override def createFetcherThread(fetcherId: Int, sourceBroker: BrokerEndPoint): AbstractFetcherThread = {
+        new TestResizeFetcherThread(sourceBroker, failedPartitions)
+      }
+    }
+    try {
+      fetcherManager.addFetcherForPartitions(fetchingTopicPartitions.map { tp =>
+        val brokerId = getBrokerId(tp, brokerNum)
+        val brokerEndPoint = new BrokerEndPoint(brokerId, s"kafka-host-$brokerId", 9092)
+        tp -> InitialFetchState(None, brokerEndPoint, 0, 0)
+      }.toMap)
+
+      // Mark some of these partitions failed within resizing scope
+      fetchingTopicPartitions.take(20).foreach(fetcherManager.addFailedPartition)
+      // Mark failed partitions out of resizing scope
+      failedTopicPartitions.foreach(fetcherManager.addFailedPartition)
+
+      fetcherManager.resizeThreadPool(newFetcherSize)
+
+      val ownedPartitions = mutable.Set.empty[TopicPartition]
+      fetcherManager.fetcherThreadMap.forKeyValue { (brokerIdAndFetcherId, fetcherThread) =>
+        val fetcherId = brokerIdAndFetcherId.fetcherId
+        val brokerId = brokerIdAndFetcherId.brokerId
+
+        fetcherThread.partitions.foreach { tp =>
+          ownedPartitions += tp
+          assertEquals(fetcherManager.getFetcherId(tp), fetcherId)
+          assertEquals(getBrokerId(tp, brokerNum), brokerId)
+        }
+      }
+      // Verify that all partitions are owned by the fetcher threads.
+      assertEquals(fetchingTopicPartitions, ownedPartitions)
+
+      // Only failed partitions should still be kept after resizing
+      assertEquals(failedTopicPartitions, fetcherManager.failedPartitions.partitions())
+    } finally {
+      fetcherManager.closeAllFetchers()
+    }
+  }
+
+
+  private def makeTopicPartition(topicNum: Int, partitionNum: Int, topicPrefix: String = "topic_"): Set[TopicPartition] = {
+    val res = mutable.Set[TopicPartition]()
+    for (i <- 0 to topicNum - 1) {
+      val topic = topicPrefix + i
+      for (j <- 0 to partitionNum - 1) {
+        res += new TopicPartition(topic, j)
+      }
+    }
+    res.toSet
+  }
+
+  private def getBrokerId(tp: TopicPartition, brokerNum: Int): Int = {
+    Utils.abs(tp.hashCode) % brokerNum
+  }
+
+  private class MockLeaderEndPoint(sourceBroker: BrokerEndPoint) extends LeaderEndPoint {
+    override def initiateClose(): Unit = {}
+
+    override def close(): Unit = {}
+
+    override def brokerEndPoint(): BrokerEndPoint = sourceBroker
+
+    override def fetch(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = Map.empty
+
+    override def fetchEarliestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = 1
+
+    override def fetchLatestOffset(topicPartition: TopicPartition, currentLeaderEpoch: Int): Long = 1
+
+    override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = Map.empty
+
+    override def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = ResultWithPartitions(None, Set.empty)
+
+    override val isTruncationOnFetchSupported: Boolean = false
+  }
+
+  private class TestResizeFetcherThread(sourceBroker: BrokerEndPoint, failedPartitions: FailedPartitions)
+    extends AbstractFetcherThread(
+      name = "test-resize-fetcher",
+      clientId = "mock-fetcher",
+      leader = new MockLeaderEndPoint(sourceBroker),
+      failedPartitions,
+      fetchBackOffMs = 0,
+      brokerTopicStats = new BrokerTopicStats) {
+
+    override protected def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = {
+      None
+    }
+
+    override protected def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {}
+
+    override protected def truncateFullyAndStartAt(topicPartition: TopicPartition, offset: Long): Unit = {}
+
+    override protected def latestEpoch(topicPartition: TopicPartition): Option[Int] = Some(0)
+
+    override protected def logStartOffset(topicPartition: TopicPartition): Long = 1
+
+    override protected def logEndOffset(topicPartition: TopicPartition): Long = 1
+
+    override protected def endOffsetForEpoch(topicPartition: TopicPartition, epoch: Int): Option[OffsetAndEpoch] = Some(OffsetAndEpoch(1, 0))
+
+    override protected val isOffsetForLeaderEpochSupported: Boolean = false
+  }
+
 }
diff --git a/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala b/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala
index 148a903187b1d..cdd17b1af2ca4 100644
--- a/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala
+++ b/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala
@@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicInteger
 import kafka.cluster.BrokerEndPoint
 import kafka.log.LogAppendInfo
 import kafka.message.NoCompressionCodec
-import kafka.metrics.KafkaYammerMetrics
 import kafka.server.AbstractFetcherThread.ReplicaFetch
 import kafka.server.AbstractFetcherThread.ResultWithPartitions
 import kafka.utils.Implicits.MapExtensionMethods
@@ -38,6 +37,7 @@ import org.apache.kafka.common.record._
 import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.{UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET}
 import org.apache.kafka.common.requests.{FetchRequest, FetchResponse}
 import org.apache.kafka.common.utils.Time
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Assumptions.assumeTrue
 import org.junit.jupiter.api.{BeforeEach, Test}
@@ -77,12 +77,13 @@ class AbstractFetcherThreadTest {
   @Test
   def testMetricsRemovedOnShutdown(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     // add one partition to create the consumer lag metric
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)))
-    fetcher.setLeaderState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setLeaderState(partition, PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.start()
 
@@ -104,12 +105,13 @@ class AbstractFetcherThreadTest {
   @Test
   def testConsumerLagRemovedWithPartition(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     // add one partition to create the consumer lag metric
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)))
-    fetcher.setLeaderState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setLeaderState(partition, PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -126,15 +128,16 @@ class AbstractFetcherThreadTest {
   @Test
   def testSimpleFetch(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)))
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 0,
       new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -148,19 +151,20 @@ class AbstractFetcherThreadTest {
     val partition = new TopicPartition("topic", 0)
     val fetchBackOffMs = 250
 
-    val fetcher = new MockFetcherThread(fetchBackOffMs = fetchBackOffMs) {
-      override def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
+      override def fetch(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
         throw new UnknownTopicIdException("Topic ID was unknown as expected for this test")
       }
-    }
+    }, fetchBackOffMs = fetchBackOffMs)
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(Some(Uuid.randomUuid()), 0L, leaderEpoch = 0)))
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 0,
       new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // Do work for the first time. This should result in all partitions in error.
     val timeBeforeFirst = System.currentTimeMillis()
@@ -187,27 +191,28 @@ class AbstractFetcherThreadTest {
     val partition3 = new TopicPartition("topic3", 0)
     val fetchBackOffMs = 250
 
-    val fetcher = new MockFetcherThread(fetchBackOffMs = fetchBackOffMs) {
-      override def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
+      override def fetch(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
         Map(partition1 -> new FetchData().setErrorCode(Errors.UNKNOWN_TOPIC_ID.code),
           partition2 -> new FetchData().setErrorCode(Errors.INCONSISTENT_TOPIC_ID.code),
           partition3 -> new FetchData().setErrorCode(Errors.NONE.code))
       }
-    }
+    }, fetchBackOffMs = fetchBackOffMs)
 
-    fetcher.setReplicaState(partition1, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition1, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition1 -> initialFetchState(Some(Uuid.randomUuid()), 0L, leaderEpoch = 0)))
-    fetcher.setReplicaState(partition2, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition2, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition2 -> initialFetchState(Some(Uuid.randomUuid()), 0L, leaderEpoch = 0)))
-    fetcher.setReplicaState(partition3, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition3, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition3 -> initialFetchState(Some(Uuid.randomUuid()), 0L, leaderEpoch = 0)))
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 0,
       new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition1, leaderState)
-    fetcher.setLeaderState(partition2, leaderState)
-    fetcher.setLeaderState(partition3, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition1, leaderState)
+    fetcher.mockLeader.setLeaderState(partition2, leaderState)
+    fetcher.mockLeader.setLeaderState(partition3, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -227,15 +232,16 @@ class AbstractFetcherThreadTest {
   @Test
   def testFencedTruncation(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)))
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 1,
       new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 1, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 1, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -252,17 +258,18 @@ class AbstractFetcherThreadTest {
   @Test
   def testFencedFetch(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
-    val replicaState = MockFetcherThread.PartitionState(leaderEpoch = 0)
+    val replicaState = PartitionState(leaderEpoch = 0)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)))
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 0,
       new SimpleRecord("a".getBytes),
       new SimpleRecord("b".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -270,7 +277,7 @@ class AbstractFetcherThreadTest {
     assertEquals(2, replicaState.logEndOffset)
 
     // Bump the epoch on the leader
-    fetcher.leaderPartitionState(partition).leaderEpoch += 1
+    fetcher.mockLeader.leaderPartitionState(partition).leaderEpoch += 1
 
     fetcher.doWork()
 
@@ -282,16 +289,17 @@ class AbstractFetcherThreadTest {
   @Test
   def testUnknownLeaderEpochInTruncation(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     // The replica's leader epoch is ahead of the leader
-    val replicaState = MockFetcherThread.PartitionState(leaderEpoch = 1)
+    val replicaState = PartitionState(leaderEpoch = 1)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 1)), forceTruncation = true)
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 0, new SimpleRecord("a".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -300,7 +308,7 @@ class AbstractFetcherThreadTest {
     assertEquals(Some(Truncating), fetcher.fetchState(partition).map(_.state))
 
     // Bump the epoch on the leader
-    fetcher.leaderPartitionState(partition).leaderEpoch += 1
+    fetcher.mockLeader.leaderPartitionState(partition).leaderEpoch += 1
 
     // Now we can make progress
     fetcher.doWork()
@@ -312,21 +320,22 @@ class AbstractFetcherThreadTest {
   @Test
   def testUnknownLeaderEpochWhileFetching(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     // This test is contrived because it shouldn't be possible to to see unknown leader epoch
     // in the Fetching state as the leader must validate the follower's epoch when it checks
     // the truncation offset.
 
-    val replicaState = MockFetcherThread.PartitionState(leaderEpoch = 1)
+    val replicaState = PartitionState(leaderEpoch = 1)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 1)))
 
-    val leaderState = MockFetcherThread.PartitionState(Seq(
+    val leaderState = PartitionState(Seq(
       mkBatch(baseOffset = 0L, leaderEpoch = 0, new SimpleRecord("a".getBytes)),
       mkBatch(baseOffset = 1L, leaderEpoch = 0, new SimpleRecord("b".getBytes))
     ), leaderEpoch = 1, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -335,7 +344,7 @@ class AbstractFetcherThreadTest {
     assertEquals(Some(Fetching), fetcher.fetchState(partition).map(_.state))
 
     // Somehow the leader epoch rewinds
-    fetcher.leaderPartitionState(partition).leaderEpoch = 0
+    fetcher.mockLeader.leaderPartitionState(partition).leaderEpoch = 0
 
     // We are stuck at the current offset
     fetcher.doWork()
@@ -343,7 +352,7 @@ class AbstractFetcherThreadTest {
     assertEquals(Some(Fetching), fetcher.fetchState(partition).map(_.state))
 
     // After returning to the right epoch, we can continue fetching
-    fetcher.leaderPartitionState(partition).leaderEpoch = 1
+    fetcher.mockLeader.leaderPartitionState(partition).leaderEpoch = 1
     fetcher.doWork()
     assertEquals(2, replicaState.logEndOffset)
     assertEquals(Some(Fetching), fetcher.fetchState(partition).map(_.state))
@@ -352,14 +361,14 @@ class AbstractFetcherThreadTest {
   @Test
   def testTruncation(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)),
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 5, highWatermark = 0L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 5, highWatermark = 0L)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 5)))
 
@@ -368,12 +377,13 @@ class AbstractFetcherThreadTest {
       mkBatch(baseOffset = 1, leaderEpoch = 3, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 5, new SimpleRecord("c".getBytes)))
 
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 5, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 5, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     TestUtils.waitUntilTrue(() => {
       fetcher.doWork()
-      fetcher.replicaPartitionState(partition).log == fetcher.leaderPartitionState(partition).log
+      fetcher.replicaPartitionState(partition).log == fetcher.mockLeader.leaderPartitionState(partition).log
     }, "Failed to reconcile leader and follower logs")
 
     assertEquals(leaderState.logStartOffset, replicaState.logStartOffset)
@@ -385,29 +395,28 @@ class AbstractFetcherThreadTest {
   def testTruncateToHighWatermarkIfLeaderEpochRequestNotSupported(): Unit = {
     val highWatermark = 2L
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread {
-      override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
-        assertEquals(highWatermark, truncationState.offset)
-        assertTrue(truncationState.truncationCompleted)
-        super.truncate(topicPartition, truncationState)
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
+        override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] =
+          throw new UnsupportedOperationException
+        override val isTruncationOnFetchSupported: Boolean = false
+    }) {
+        override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
+          assertEquals(highWatermark, truncationState.offset)
+          assertTrue(truncationState.truncationCompleted)
+          super.truncate(topicPartition, truncationState)
+        }
+        override protected val isOffsetForLeaderEpochSupported: Boolean = false
       }
 
-      override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] =
-        throw new UnsupportedOperationException
-
-      override protected val isOffsetForLeaderEpochSupported: Boolean = false
-
-      override protected val isTruncationOnFetchSupported: Boolean = false
-    }
-
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)),
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 5, highWatermark)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 5, highWatermark)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), highWatermark, leaderEpoch = 5)))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -420,27 +429,28 @@ class AbstractFetcherThreadTest {
   def testTruncateToHighWatermarkIfLeaderEpochInfoNotAvailable(): Unit = {
     val highWatermark = 2L
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread {
-      override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
-        assertEquals(highWatermark, truncationState.offset)
-        assertTrue(truncationState.truncationCompleted)
-        super.truncate(topicPartition, truncationState)
-      }
-
-      override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] =
-        throw new UnsupportedOperationException
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
+        override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] =
+          throw new UnsupportedOperationException
+      }) {
+        override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
+          assertEquals(highWatermark, truncationState.offset)
+          assertTrue(truncationState.truncationCompleted)
+          super.truncate(topicPartition, truncationState)
+        }
 
-      override def latestEpoch(topicPartition: TopicPartition): Option[Int] = None
-    }
+        override def latestEpoch(topicPartition: TopicPartition): Option[Int] = None
+      }
 
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)),
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 5, highWatermark)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 5, highWatermark)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), highWatermark, leaderEpoch = 5)))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -453,7 +463,7 @@ class AbstractFetcherThreadTest {
   def testTruncateToHighWatermarkDuringRemovePartitions(): Unit = {
     val highWatermark = 2L
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint) {
       override def truncateToHighWatermark(partitions: Set[TopicPartition]): Unit = {
         removePartitions(Set(partition))
         super.truncateToHighWatermark(partitions)
@@ -467,9 +477,10 @@ class AbstractFetcherThreadTest {
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 5, highWatermark)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 5, highWatermark)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), highWatermark, leaderEpoch = 5)))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
 
@@ -482,14 +493,14 @@ class AbstractFetcherThreadTest {
     val partition = new TopicPartition("topic", 0)
 
     var truncations = 0
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint) {
       override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
         truncations += 1
         super.truncate(topicPartition, truncationState)
       }
     }
 
-    val replicaState = MockFetcherThread.PartitionState(leaderEpoch = 5)
+    val replicaState = PartitionState(leaderEpoch = 5)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 5)), forceTruncation = true)
 
@@ -498,8 +509,9 @@ class AbstractFetcherThreadTest {
       mkBatch(baseOffset = 1, leaderEpoch = 3, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 5, new SimpleRecord("c".getBytes)))
 
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 5, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 5, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // Do one round of truncation
     fetcher.doWork()
@@ -524,7 +536,7 @@ class AbstractFetcherThreadTest {
     assumeTrue(truncateOnFetch)
     val partition = new TopicPartition("topic", 0)
     var truncations = 0
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint) {
       override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
         truncations += 1
         super.truncate(topicPartition, truncationState)
@@ -535,7 +547,7 @@ class AbstractFetcherThreadTest {
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 5, highWatermark = 2L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 5, highWatermark = 2L)
     fetcher.setReplicaState(partition, replicaState)
 
     // Verify that truncation based on fetch response is performed if partition is owned by fetcher thread
@@ -564,14 +576,14 @@ class AbstractFetcherThreadTest {
   @Test
   def testFollowerFetchOutOfRangeHigh(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread()
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)),
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 4, highWatermark = 0L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 4, highWatermark = 0L)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 4)))
 
@@ -580,8 +592,9 @@ class AbstractFetcherThreadTest {
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 4, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 4, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // initial truncation and verify that the log end offset is updated
     fetcher.doWork()
@@ -605,23 +618,24 @@ class AbstractFetcherThreadTest {
   def testFencedOffsetResetAfterOutOfRange(): Unit = {
     val partition = new TopicPartition("topic", 0)
     var fetchedEarliestOffset = false
-    val fetcher = new MockFetcherThread() {
-      override protected def fetchEarliestOffsetFromLeader(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
+      override def fetchEarliestOffset(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
         fetchedEarliestOffset = true
         throw new FencedLeaderEpochException(s"Epoch $leaderEpoch is fenced")
       }
-    }
+    })
 
     val replicaLog = Seq()
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 4, highWatermark = 0L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 4, highWatermark = 0L)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 4)))
 
     val leaderLog = Seq(
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 4, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 4, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // After the out of range error, we get a fenced error and remove the partition and mark as failed
     fetcher.doWork()
@@ -634,21 +648,22 @@ class AbstractFetcherThreadTest {
   @Test
   def testFollowerFetchOutOfRangeLow(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     // The follower begins from an offset which is behind the leader's log start offset
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 0, highWatermark = 0L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 0, highWatermark = 0L)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 0)))
 
     val leaderLog = Seq(
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // initial truncation and verify that the log start offset is updated
     fetcher.doWork()
@@ -663,7 +678,7 @@ class AbstractFetcherThreadTest {
 
     TestUtils.waitUntilTrue(() => {
       fetcher.doWork()
-      fetcher.replicaPartitionState(partition).log == fetcher.leaderPartitionState(partition).log
+      fetcher.replicaPartitionState(partition).log == fetcher.mockLeader.leaderPartitionState(partition).log
     }, "Failed to reconcile leader and follower logs")
 
     assertEquals(leaderState.logStartOffset, replicaState.logStartOffset)
@@ -674,28 +689,29 @@ class AbstractFetcherThreadTest {
   @Test
   def testRetryAfterUnknownLeaderEpochInLatestOffsetFetch(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher: MockFetcherThread = new MockFetcherThread {
+    val fetcher: MockFetcherThread = new MockFetcherThread(new MockLeaderEndPoint {
       val tries = new AtomicInteger(0)
-      override protected def fetchLatestOffsetFromLeader(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
+      override def fetchLatestOffset(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
         if (tries.getAndIncrement() == 0)
           throw new UnknownLeaderEpochException("Unexpected leader epoch")
-        super.fetchLatestOffsetFromLeader(topicPartition, leaderEpoch)
+        super.fetchLatestOffset(topicPartition, leaderEpoch)
       }
-    }
+    })
 
     // The follower begins from an offset which is behind the leader's log start offset
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 0, highWatermark = 0L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 0, highWatermark = 0L)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 0)))
 
     val leaderLog = Seq(
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // initial truncation and initial error response handling
     fetcher.doWork()
@@ -703,7 +719,7 @@ class AbstractFetcherThreadTest {
 
     TestUtils.waitUntilTrue(() => {
       fetcher.doWork()
-      fetcher.replicaPartitionState(partition).log == fetcher.leaderPartitionState(partition).log
+      fetcher.replicaPartitionState(partition).log == fetcher.mockLeader.leaderPartitionState(partition).log
     }, "Failed to reconcile leader and follower logs")
 
     assertEquals(leaderState.logStartOffset, replicaState.logStartOffset)
@@ -715,10 +731,10 @@ class AbstractFetcherThreadTest {
   def testCorruptMessage(): Unit = {
     val partition = new TopicPartition("topic", 0)
 
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
       var fetchedOnce = false
-      override def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
-        val fetchedData = super.fetchFromLeader(fetchRequest)
+      override def fetch(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
+        val fetchedData = super.fetch(fetchRequest)
         if (!fetchedOnce) {
           val records = fetchedData.head._2.records.asInstanceOf[MemoryRecords]
           val buffer = records.buffer()
@@ -728,15 +744,16 @@ class AbstractFetcherThreadTest {
         }
         fetchedData
       }
-    }
+    })
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     val batch = mkBatch(baseOffset = 0L, leaderEpoch = 0,
       new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))
-    val leaderState = MockFetcherThread.PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(Seq(batch), leaderEpoch = 0, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
 
     fetcher.doWork() // fails with corrupt record
     fetcher.doWork() // should succeed
@@ -768,28 +785,33 @@ class AbstractFetcherThreadTest {
     val initialLeaderEpochOnFollower = 0
     val nextLeaderEpochOnFollower = initialLeaderEpochOnFollower + 1
 
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
       var fetchEpochsFromLeaderOnce = false
       override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
         val fetchedEpochs = super.fetchEpochEndOffsets(partitions)
         if (!fetchEpochsFromLeaderOnce) {
-          // leader epoch changes while fetching epochs from leader
-          removePartitions(Set(partition))
-          setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = nextLeaderEpochOnFollower))
-          addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = nextLeaderEpochOnFollower)), forceTruncation = true)
+          responseCallback.apply()
           fetchEpochsFromLeaderOnce = true
         }
         fetchedEpochs
       }
+    })
+
+    def changeLeaderEpochWhileFetchEpoch(): Unit = {
+      fetcher.removePartitions(Set(partition))
+      fetcher.setReplicaState(partition, PartitionState(leaderEpoch = nextLeaderEpochOnFollower))
+      fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = nextLeaderEpochOnFollower)), forceTruncation = true)
     }
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = initialLeaderEpochOnFollower))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = initialLeaderEpochOnFollower))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = initialLeaderEpochOnFollower)), forceTruncation = true)
 
     val leaderLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = initialLeaderEpochOnFollower, new SimpleRecord("c".getBytes)))
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpochOnLeader, highWatermark = 0L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpochOnLeader, highWatermark = 0L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setResponseCallback(changeLeaderEpochWhileFetchEpoch)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // first round of truncation
     fetcher.doWork()
@@ -800,13 +822,13 @@ class AbstractFetcherThreadTest {
     assertEquals(Option(nextLeaderEpochOnFollower), fetcher.fetchState(partition).map(_.currentLeaderEpoch))
 
     if (leaderEpochOnLeader < nextLeaderEpochOnFollower) {
-      fetcher.setLeaderState(
-        partition, MockFetcherThread.PartitionState(leaderLog, nextLeaderEpochOnFollower, highWatermark = 0L))
+      fetcher.mockLeader.setLeaderState(
+        partition, PartitionState(leaderLog, nextLeaderEpochOnFollower, highWatermark = 0L))
     }
 
     // make sure the fetcher is now able to truncate and fetch
     fetcher.doWork()
-    assertEquals(fetcher.leaderPartitionState(partition).log, fetcher.replicaPartitionState(partition).log)
+    assertEquals(fetcher.mockLeader.leaderPartitionState(partition).log, fetcher.replicaPartitionState(partition).log)
   }
 
   @Test
@@ -816,24 +838,30 @@ class AbstractFetcherThreadTest {
     val initialLeaderEpochOnFollower = 0
     val nextLeaderEpochOnFollower = initialLeaderEpochOnFollower + 1
 
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
       override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
         val fetchedEpochs = super.fetchEpochEndOffsets(partitions)
-        // leader epoch changes while fetching epochs from leader
-        // at the same time, the replica fetcher manager removes the partition
-        removePartitions(Set(partition))
-        setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = nextLeaderEpochOnFollower))
+        responseCallback.apply()
         fetchedEpochs
       }
+    })
+
+    def changeLeaderEpochDuringFetchEpoch(): Unit = {
+      // leader epoch changes while fetching epochs from leader
+      // at the same time, the replica fetcher manager removes the partition
+      fetcher.removePartitions(Set(partition))
+      fetcher.setReplicaState(partition, PartitionState(leaderEpoch = nextLeaderEpochOnFollower))
     }
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = initialLeaderEpochOnFollower))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = initialLeaderEpochOnFollower))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = initialLeaderEpochOnFollower)))
 
     val leaderLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = initialLeaderEpochOnFollower, new SimpleRecord("c".getBytes)))
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpochOnLeader, highWatermark = 0L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpochOnLeader, highWatermark = 0L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setResponseCallback(changeLeaderEpochDuringFetchEpoch)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // first round of work
     fetcher.doWork()
@@ -843,8 +871,8 @@ class AbstractFetcherThreadTest {
     assertEquals(None, fetcher.fetchState(partition).map(_.state))
     assertEquals(None, fetcher.fetchState(partition).map(_.currentLeaderEpoch))
 
-    fetcher.setLeaderState(
-      partition, MockFetcherThread.PartitionState(leaderLog, nextLeaderEpochOnFollower, highWatermark = 0L))
+    fetcher.mockLeader.setLeaderState(
+      partition, PartitionState(leaderLog, nextLeaderEpochOnFollower, highWatermark = 0L))
 
     // make sure the fetcher is able to continue work
     fetcher.doWork()
@@ -854,7 +882,7 @@ class AbstractFetcherThreadTest {
   @Test
   def testTruncationThrowsExceptionIfLeaderReturnsPartitionsNotRequestedInFetchEpochs(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread {
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint {
       override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
         val unrequestedTp = new TopicPartition("topic2", 0)
         super.fetchEpochEndOffsets(partitions).toMap + (unrequestedTp -> new EpochEndOffset()
@@ -863,11 +891,12 @@ class AbstractFetcherThreadTest {
           .setLeaderEpoch(0)
           .setEndOffset(0))
       }
-    }
+    })
 
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = 0)), forceTruncation = true)
-    fetcher.setLeaderState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setLeaderState(partition, PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // first round of truncation should throw an exception
     assertThrows(classOf[IllegalStateException], () => fetcher.doWork())
@@ -875,7 +904,7 @@ class AbstractFetcherThreadTest {
 
   @Test
   def testFetcherThreadHandlingPartitionFailureDuringAppending(): Unit = {
-    val fetcherForAppend = new MockFetcherThread {
+    val fetcherForAppend = new MockFetcherThread(new MockLeaderEndPoint) {
       override def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = {
         if (topicPartition == partition1) {
           throw new KafkaException()
@@ -889,7 +918,7 @@ class AbstractFetcherThreadTest {
 
   @Test
   def testFetcherThreadHandlingPartitionFailureDuringTruncation(): Unit = {
-    val fetcherForTruncation = new MockFetcherThread {
+    val fetcherForTruncation = new MockFetcherThread(new MockLeaderEndPoint) {
       override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
         if(topicPartition == partition1)
           throw new Exception()
@@ -903,13 +932,14 @@ class AbstractFetcherThreadTest {
 
   private def verifyFetcherThreadHandlingPartitionFailure(fetcher: MockFetcherThread): Unit = {
 
-    fetcher.setReplicaState(partition1, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition1, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition1 -> initialFetchState(topicIds.get(partition1.topic), 0L, leaderEpoch = 0)), forceTruncation = true)
-    fetcher.setLeaderState(partition1, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setLeaderState(partition1, PartitionState(leaderEpoch = 0))
 
-    fetcher.setReplicaState(partition2, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition2, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition2 -> initialFetchState(topicIds.get(partition2.topic), 0L, leaderEpoch = 0)), forceTruncation = true)
-    fetcher.setLeaderState(partition2, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setLeaderState(partition2, PartitionState(leaderEpoch = 0))
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     // processing data fails for partition1
     fetcher.doWork()
@@ -937,14 +967,14 @@ class AbstractFetcherThreadTest {
   @Test
   def testDivergingEpochs(): Unit = {
     val partition = new TopicPartition("topic", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     val replicaLog = Seq(
       mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)),
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)))
 
-    val replicaState = MockFetcherThread.PartitionState(replicaLog, leaderEpoch = 5, highWatermark = 0L)
+    val replicaState = PartitionState(replicaLog, leaderEpoch = 5, highWatermark = 0L)
     fetcher.setReplicaState(partition, replicaState)
     fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 5)))
     assertEquals(3L, replicaState.logEndOffset)
@@ -955,15 +985,16 @@ class AbstractFetcherThreadTest {
       mkBatch(baseOffset = 1, leaderEpoch = 2, new SimpleRecord("b".getBytes)),
       mkBatch(baseOffset = 2, leaderEpoch = 5, new SimpleRecord("d".getBytes)))
 
-    val leaderState = MockFetcherThread.PartitionState(leaderLog, leaderEpoch = 5, highWatermark = 2L)
-    fetcher.setLeaderState(partition, leaderState)
+    val leaderState = PartitionState(leaderLog, leaderEpoch = 5, highWatermark = 2L)
+    fetcher.mockLeader.setLeaderState(partition, leaderState)
+    fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState)
 
     fetcher.doWork()
     fetcher.verifyLastFetchedEpoch(partition, Some(2))
 
     TestUtils.waitUntilTrue(() => {
       fetcher.doWork()
-      fetcher.replicaPartitionState(partition).log == fetcher.leaderPartitionState(partition).log
+      fetcher.replicaPartitionState(partition).log == fetcher.mockLeader.leaderPartitionState(partition).log
     }, "Failed to reconcile leader and follower logs")
     fetcher.verifyLastFetchedEpoch(partition, Some(5))
   }
@@ -971,10 +1002,10 @@ class AbstractFetcherThreadTest {
   @Test
   def testMaybeUpdateTopicIds(): Unit = {
     val partition = new TopicPartition("topic1", 0)
-    val fetcher = new MockFetcherThread
+    val fetcher = new MockFetcherThread(new MockLeaderEndPoint)
 
     // Start with no topic IDs
-    fetcher.setReplicaState(partition, MockFetcherThread.PartitionState(leaderEpoch = 0))
+    fetcher.setReplicaState(partition, PartitionState(leaderEpoch = 0))
     fetcher.addPartitions(Map(partition -> initialFetchState(None, 0L, leaderEpoch = 0)))
 
     def verifyFetchState(fetchState: Option[PartitionFetchState], expectedTopicId: Option[Uuid]): Unit = {
@@ -994,152 +1025,120 @@ class AbstractFetcherThreadTest {
     assertTrue(fetcher.fetchState(unknownPartition).isEmpty)
   }
 
-  object MockFetcherThread {
-    class PartitionState(var log: mutable.Buffer[RecordBatch],
-                         var leaderEpoch: Int,
-                         var logStartOffset: Long,
-                         var logEndOffset: Long,
-                         var highWatermark: Long)
-
-    object PartitionState {
-      def apply(log: Seq[RecordBatch], leaderEpoch: Int, highWatermark: Long): PartitionState = {
-        val logStartOffset = log.headOption.map(_.baseOffset).getOrElse(0L)
-        val logEndOffset = log.lastOption.map(_.nextOffset).getOrElse(0L)
-        new PartitionState(log.toBuffer, leaderEpoch, logStartOffset, logEndOffset, highWatermark)
-      }
+  class MockLeaderEndPoint(sourceBroker: BrokerEndPoint = new BrokerEndPoint(1, host = "localhost", port = Random.nextInt()))
+    extends LeaderEndPoint {
 
-      def apply(leaderEpoch: Int): PartitionState = {
-        apply(Seq(), leaderEpoch = leaderEpoch, highWatermark = 0L)
-      }
-    }
-  }
+    private val leaderPartitionStates = mutable.Map[TopicPartition, PartitionState]()
+    var responseCallback: () => Unit = () => {}
 
-  class MockFetcherThread(val replicaId: Int = 0, val leaderId: Int = 1, fetchBackOffMs: Int = 0)
-    extends AbstractFetcherThread("mock-fetcher",
-      clientId = "mock-fetcher",
-      sourceBroker = new BrokerEndPoint(leaderId, host = "localhost", port = Random.nextInt()),
-      failedPartitions,
-      fetchBackOffMs = fetchBackOffMs,
-      brokerTopicStats = new BrokerTopicStats) {
+    var replicaPartitionStateCallback: TopicPartition => Option[PartitionState] = { _ => Option.empty }
+    var replicaId: Int = 0
 
-    import MockFetcherThread.PartitionState
+    override val isTruncationOnFetchSupported: Boolean = truncateOnFetch
 
-    private val replicaPartitionStates = mutable.Map[TopicPartition, PartitionState]()
-    private val leaderPartitionStates = mutable.Map[TopicPartition, PartitionState]()
-    private var latestEpochDefault: Option[Int] = Some(0)
+    def leaderPartitionState(topicPartition: TopicPartition): PartitionState = {
+      leaderPartitionStates.getOrElse(topicPartition,
+        throw new IllegalArgumentException(s"Unknown partition $topicPartition"))
+    }
 
     def setLeaderState(topicPartition: TopicPartition, state: PartitionState): Unit = {
       leaderPartitionStates.put(topicPartition, state)
     }
 
-    def setReplicaState(topicPartition: TopicPartition, state: PartitionState): Unit = {
-      replicaPartitionStates.put(topicPartition, state)
+    def setResponseCallback(callback: () => Unit): Unit = {
+      responseCallback = callback
     }
 
-    def replicaPartitionState(topicPartition: TopicPartition): PartitionState = {
-      replicaPartitionStates.getOrElse(topicPartition,
-        throw new IllegalArgumentException(s"Unknown partition $topicPartition"))
+    def setReplicaPartitionStateCallback(callback: TopicPartition => PartitionState): Unit = {
+      replicaPartitionStateCallback = topicPartition => Some(callback(topicPartition))
     }
 
-    def leaderPartitionState(topicPartition: TopicPartition): PartitionState = {
-      leaderPartitionStates.getOrElse(topicPartition,
-        throw new IllegalArgumentException(s"Unknown partition $topicPartition"))
+    def setReplicaId(replicaId: Int): Unit = {
+      this.replicaId = replicaId
     }
 
-    def addPartitions(initialFetchStates: Map[TopicPartition, InitialFetchState], forceTruncation: Boolean): Set[TopicPartition] = {
-      latestEpochDefault = if (forceTruncation) None else Some(0)
-      val partitions = super.addPartitions(initialFetchStates)
-      latestEpochDefault = Some(0)
-      partitions
-    }
+    override def initiateClose(): Unit = {}
 
-    override def processPartitionData(topicPartition: TopicPartition,
-                                      fetchOffset: Long,
-                                      partitionData: FetchData): Option[LogAppendInfo] = {
-      val state = replicaPartitionState(topicPartition)
+    override def close(): Unit = {}
 
-      if (isTruncationOnFetchSupported && FetchResponse.isDivergingEpoch(partitionData)) {
-        val divergingEpoch = partitionData.divergingEpoch
-        truncateOnFetchResponse(Map(topicPartition -> new EpochEndOffset()
-          .setPartition(topicPartition.partition)
-          .setErrorCode(Errors.NONE.code)
-          .setLeaderEpoch(divergingEpoch.epoch)
-          .setEndOffset(divergingEpoch.endOffset)))
-        return None
-      }
+    override def brokerEndPoint(): BrokerEndPoint = sourceBroker
 
-      // Throw exception if the fetchOffset does not match the fetcherThread partition state
-      if (fetchOffset != state.logEndOffset)
-        throw new RuntimeException(s"Offset mismatch for partition $topicPartition: " +
-          s"fetched offset = $fetchOffset, log end offset = ${state.logEndOffset}.")
+    override def fetch(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
+      fetchRequest.fetchData.asScala.map { case (partition, fetchData) =>
+        val leaderState = leaderPartitionState(partition)
+        val epochCheckError = checkExpectedLeaderEpoch(fetchData.currentLeaderEpoch, leaderState)
+        val divergingEpoch = divergingEpochAndOffset(partition, fetchData.lastFetchedEpoch, fetchData.fetchOffset, leaderState)
 
-      // Now check message's crc
-      val batches = FetchResponse.recordsOrFail(partitionData).batches.asScala
-      var maxTimestamp = RecordBatch.NO_TIMESTAMP
-      var offsetOfMaxTimestamp = -1L
-      var lastOffset = state.logEndOffset
-      var lastEpoch: Option[Int] = None
+        val (error, records) = if (epochCheckError.isDefined) {
+          (epochCheckError.get, MemoryRecords.EMPTY)
+        } else if (fetchData.fetchOffset > leaderState.logEndOffset || fetchData.fetchOffset < leaderState.logStartOffset) {
+          (Errors.OFFSET_OUT_OF_RANGE, MemoryRecords.EMPTY)
+        } else if (divergingEpoch.nonEmpty) {
+          (Errors.NONE, MemoryRecords.EMPTY)
+        } else {
+          // for simplicity, we fetch only one batch at a time
+          val records = leaderState.log.find(_.baseOffset >= fetchData.fetchOffset) match {
+            case Some(batch) =>
+              val buffer = ByteBuffer.allocate(batch.sizeInBytes)
+              batch.writeTo(buffer)
+              buffer.flip()
+              MemoryRecords.readableRecords(buffer)
 
-      for (batch <- batches) {
-        batch.ensureValid()
-        if (batch.maxTimestamp > maxTimestamp) {
-          maxTimestamp = batch.maxTimestamp
-          offsetOfMaxTimestamp = batch.baseOffset
+            case None =>
+              MemoryRecords.EMPTY
+          }
+
+          (Errors.NONE, records)
         }
-        state.log.append(batch)
-        state.logEndOffset = batch.nextOffset
-        lastOffset = batch.lastOffset
-        lastEpoch = Some(batch.partitionLeaderEpoch)
-      }
+        val partitionData = new FetchData()
+          .setPartitionIndex(partition.partition)
+          .setErrorCode(error.code)
+          .setHighWatermark(leaderState.highWatermark)
+          .setLastStableOffset(leaderState.highWatermark)
+          .setLogStartOffset(leaderState.logStartOffset)
+          .setRecords(records)
+        divergingEpoch.foreach(partitionData.setDivergingEpoch)
 
-      state.logStartOffset = partitionData.logStartOffset
-      state.highWatermark = partitionData.highWatermark
+        (partition, partitionData)
+      }.toMap
+    }
 
-      Some(LogAppendInfo(firstOffset = Some(LogOffsetMetadata(fetchOffset)),
-        lastOffset = lastOffset,
-        lastLeaderEpoch = lastEpoch,
-        maxTimestamp = maxTimestamp,
-        offsetOfMaxTimestamp = offsetOfMaxTimestamp,
-        logAppendTime = Time.SYSTEM.milliseconds(),
-        logStartOffset = state.logStartOffset,
-        recordConversionStats = RecordConversionStats.EMPTY,
-        sourceCodec = NoCompressionCodec,
-        targetCodec = NoCompressionCodec,
-        shallowCount = batches.size,
-        validBytes = FetchResponse.recordsSize(partitionData),
-        offsetsMonotonic = true,
-        lastOffsetOfFirstBatch = batches.headOption.map(_.lastOffset).getOrElse(-1)))
+    override def fetchEarliestOffset(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
+      val leaderState = leaderPartitionState(topicPartition)
+      checkLeaderEpochAndThrow(leaderEpoch, leaderState)
+      leaderState.logStartOffset
     }
 
-    override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
-      val state = replicaPartitionState(topicPartition)
-      state.log = state.log.takeWhile { batch =>
-        batch.lastOffset < truncationState.offset
-      }
-      state.logEndOffset = state.log.lastOption.map(_.lastOffset + 1).getOrElse(state.logStartOffset)
-      state.highWatermark = math.min(state.highWatermark, state.logEndOffset)
+    override def fetchLatestOffset(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
+      val leaderState = leaderPartitionState(topicPartition)
+      checkLeaderEpochAndThrow(leaderEpoch, leaderState)
+      leaderState.logEndOffset
     }
 
-    override def truncateFullyAndStartAt(topicPartition: TopicPartition, offset: Long): Unit = {
-      val state = replicaPartitionState(topicPartition)
-      state.log.clear()
-      state.logStartOffset = offset
-      state.logEndOffset = offset
-      state.highWatermark = offset
+    override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
+      val endOffsets = mutable.Map[TopicPartition, EpochEndOffset]()
+      partitions.forKeyValue { (partition, epochData) =>
+        assert(partition.partition == epochData.partition,
+          "Partition must be consistent between TopicPartition and EpochData")
+        val leaderState = leaderPartitionState(partition)
+        val epochEndOffset = lookupEndOffsetForEpoch(partition, epochData, leaderState)
+        endOffsets.put(partition, epochEndOffset)
+      }
+      endOffsets
     }
 
     override def buildFetch(partitionMap: Map[TopicPartition, PartitionFetchState]): ResultWithPartitions[Option[ReplicaFetch]] = {
       val fetchData = mutable.Map.empty[TopicPartition, FetchRequest.PartitionData]
       partitionMap.foreach { case (partition, state) =>
         if (state.isReadyForFetch) {
-          val replicaState = replicaPartitionState(partition)
+          val replicaState = replicaPartitionStateCallback(partition).getOrElse(throw new IllegalArgumentException(s"Unknown partition $partition"))
           val lastFetchedEpoch = if (isTruncationOnFetchSupported)
             state.lastFetchedEpoch.map(_.asInstanceOf[Integer]).asJava
           else
             Optional.empty[Integer]
           fetchData.put(partition,
             new FetchRequest.PartitionData(state.topicId.getOrElse(Uuid.ZERO_UUID), state.fetchOffset, replicaState.logStartOffset,
-            1024 * 1024, Optional.of[Integer](state.currentLeaderEpoch), lastFetchedEpoch))
+              1024 * 1024, Optional.of[Integer](state.currentLeaderEpoch), lastFetchedEpoch))
         }
       }
       val fetchRequest = FetchRequest.Builder.forReplica(version, replicaId, 0, 1, fetchData.asJava)
@@ -1151,24 +1150,10 @@ class AbstractFetcherThreadTest {
       ResultWithPartitions(fetchRequestOpt, Set.empty)
     }
 
-    override def latestEpoch(topicPartition: TopicPartition): Option[Int] = {
-      val state = replicaPartitionState(topicPartition)
-      state.log.lastOption.map(_.partitionLeaderEpoch).orElse(latestEpochDefault)
-    }
-
-    override def logStartOffset(topicPartition: TopicPartition): Long = replicaPartitionState(topicPartition).logStartOffset
-
-    override def logEndOffset(topicPartition: TopicPartition): Long = replicaPartitionState(topicPartition).logEndOffset
-
-    override def endOffsetForEpoch(topicPartition: TopicPartition, epoch: Int): Option[OffsetAndEpoch] = {
-      val epochData = new EpochData()
-        .setPartition(topicPartition.partition)
-        .setLeaderEpoch(epoch)
-      val result = lookupEndOffsetForEpoch(topicPartition, epochData, replicaPartitionState(topicPartition))
-      if (result.endOffset == UNDEFINED_EPOCH_OFFSET)
-        None
-      else
-        Some(OffsetAndEpoch(result.endOffset, result.leaderEpoch))
+    private def checkLeaderEpochAndThrow(expectedEpoch: Int, partitionState: PartitionState): Unit = {
+      checkExpectedLeaderEpoch(expectedEpoch, partitionState).foreach { error =>
+        throw error.exception()
+      }
     }
 
     private def checkExpectedLeaderEpoch(expectedEpochOpt: Optional[Integer],
@@ -1194,13 +1179,6 @@ class AbstractFetcherThreadTest {
       }
     }
 
-    def verifyLastFetchedEpoch(partition: TopicPartition, expectedEpoch: Option[Int]): Unit = {
-      if (isTruncationOnFetchSupported) {
-        assertEquals(Some(Fetching), fetchState(partition).map(_.state))
-        assertEquals(expectedEpoch, fetchState(partition).flatMap(_.lastFetchedEpoch))
-      }
-    }
-
     private def divergingEpochAndOffset(topicPartition: TopicPartition,
                                         lastFetchedEpoch: Optional[Integer],
                                         fetchOffset: Long,
@@ -1212,8 +1190,8 @@ class AbstractFetcherThreadTest {
             .setLeaderEpoch(fetchEpoch)))(topicPartition)
 
         if (partitionState.log.isEmpty
-            || epochEndOffset.endOffset == UNDEFINED_EPOCH_OFFSET
-            || epochEndOffset.leaderEpoch == UNDEFINED_EPOCH)
+          || epochEndOffset.endOffset == UNDEFINED_EPOCH_OFFSET
+          || epochEndOffset.leaderEpoch == UNDEFINED_EPOCH)
           None
         else if (epochEndOffset.leaderEpoch < fetchEpoch || epochEndOffset.endOffset < fetchOffset) {
           Some(new FetchResponseData.EpochEndOffset()
@@ -1224,7 +1202,7 @@ class AbstractFetcherThreadTest {
       }
     }
 
-    private def lookupEndOffsetForEpoch(topicPartition: TopicPartition,
+    def lookupEndOffsetForEpoch(topicPartition: TopicPartition,
                                         epochData: EpochData,
                                         partitionState: PartitionState): EpochEndOffset = {
       checkExpectedLeaderEpoch(epochData.currentLeaderEpoch, partitionState).foreach { error =>
@@ -1256,81 +1234,156 @@ class AbstractFetcherThreadTest {
         .setPartition(topicPartition.partition)
         .setErrorCode(Errors.NONE.code)
     }
+  }
 
-    override def fetchEpochEndOffsets(partitions: Map[TopicPartition, EpochData]): Map[TopicPartition, EpochEndOffset] = {
-      val endOffsets = mutable.Map[TopicPartition, EpochEndOffset]()
-      partitions.forKeyValue { (partition, epochData) =>
-        assert(partition.partition == epochData.partition,
-          "Partition must be consistent between TopicPartition and EpochData")
-        val leaderState = leaderPartitionState(partition)
-        val epochEndOffset = lookupEndOffsetForEpoch(partition, epochData, leaderState)
-        endOffsets.put(partition, epochEndOffset)
-      }
-      endOffsets
+  class PartitionState(var log: mutable.Buffer[RecordBatch],
+                       var leaderEpoch: Int,
+                       var logStartOffset: Long,
+                       var logEndOffset: Long,
+                       var highWatermark: Long)
+
+  object PartitionState {
+    def apply(log: Seq[RecordBatch], leaderEpoch: Int, highWatermark: Long): PartitionState = {
+      val logStartOffset = log.headOption.map(_.baseOffset).getOrElse(0L)
+      val logEndOffset = log.lastOption.map(_.nextOffset).getOrElse(0L)
+      new PartitionState(log.toBuffer, leaderEpoch, logStartOffset, logEndOffset, highWatermark)
     }
 
-    override protected val isOffsetForLeaderEpochSupported: Boolean = true
+    def apply(leaderEpoch: Int): PartitionState = {
+      apply(Seq(), leaderEpoch = leaderEpoch, highWatermark = 0L)
+    }
+  }
 
-    override protected val isTruncationOnFetchSupported: Boolean = truncateOnFetch
+  class MockFetcherThread(val mockLeader : MockLeaderEndPoint, val replicaId: Int = 0, val leaderId: Int = 1, fetchBackOffMs: Int = 0)
+    extends AbstractFetcherThread("mock-fetcher",
+      clientId = "mock-fetcher",
+      leader = mockLeader,
+      failedPartitions,
+      fetchBackOffMs = fetchBackOffMs,
+      brokerTopicStats = new BrokerTopicStats) {
 
-    override def fetchFromLeader(fetchRequest: FetchRequest.Builder): Map[TopicPartition, FetchData] = {
-      fetchRequest.fetchData.asScala.map { case (partition, fetchData) =>
-        val leaderState = leaderPartitionState(partition)
-        val epochCheckError = checkExpectedLeaderEpoch(fetchData.currentLeaderEpoch, leaderState)
-        val divergingEpoch = divergingEpochAndOffset(partition, fetchData.lastFetchedEpoch, fetchData.fetchOffset, leaderState)
+    private val replicaPartitionStates = mutable.Map[TopicPartition, PartitionState]()
+    private var latestEpochDefault: Option[Int] = Some(0)
 
-        val (error, records) = if (epochCheckError.isDefined) {
-          (epochCheckError.get, MemoryRecords.EMPTY)
-        } else if (fetchData.fetchOffset > leaderState.logEndOffset || fetchData.fetchOffset < leaderState.logStartOffset) {
-          (Errors.OFFSET_OUT_OF_RANGE, MemoryRecords.EMPTY)
-        } else if (divergingEpoch.nonEmpty) {
-          (Errors.NONE, MemoryRecords.EMPTY)
-        } else {
-          // for simplicity, we fetch only one batch at a time
-          val records = leaderState.log.find(_.baseOffset >= fetchData.fetchOffset) match {
-            case Some(batch) =>
-              val buffer = ByteBuffer.allocate(batch.sizeInBytes)
-              batch.writeTo(buffer)
-              buffer.flip()
-              MemoryRecords.readableRecords(buffer)
+    def setReplicaState(topicPartition: TopicPartition, state: PartitionState): Unit = {
+      replicaPartitionStates.put(topicPartition, state)
+    }
 
-            case None =>
-              MemoryRecords.EMPTY
-          }
+    def replicaPartitionState(topicPartition: TopicPartition): PartitionState = {
+      replicaPartitionStates.getOrElse(topicPartition,
+        throw new IllegalArgumentException(s"Unknown partition $topicPartition"))
+    }
 
-          (Errors.NONE, records)
+    def addPartitions(initialFetchStates: Map[TopicPartition, InitialFetchState], forceTruncation: Boolean): Set[TopicPartition] = {
+      latestEpochDefault = if (forceTruncation) None else Some(0)
+      val partitions = super.addPartitions(initialFetchStates)
+      latestEpochDefault = Some(0)
+      partitions
+    }
+
+    override def processPartitionData(topicPartition: TopicPartition,
+                                      fetchOffset: Long,
+                                      partitionData: FetchData): Option[LogAppendInfo] = {
+      val state = replicaPartitionState(topicPartition)
+
+      if (leader.isTruncationOnFetchSupported && FetchResponse.isDivergingEpoch(partitionData)) {
+        val divergingEpoch = partitionData.divergingEpoch
+        truncateOnFetchResponse(Map(topicPartition -> new EpochEndOffset()
+          .setPartition(topicPartition.partition)
+          .setErrorCode(Errors.NONE.code)
+          .setLeaderEpoch(divergingEpoch.epoch)
+          .setEndOffset(divergingEpoch.endOffset)))
+        return None
+      }
+
+      // Throw exception if the fetchOffset does not match the fetcherThread partition state
+      if (fetchOffset != state.logEndOffset)
+        throw new RuntimeException(s"Offset mismatch for partition $topicPartition: " +
+          s"fetched offset = $fetchOffset, log end offset = ${state.logEndOffset}.")
+
+      // Now check message's crc
+      val batches = FetchResponse.recordsOrFail(partitionData).batches.asScala
+      var maxTimestamp = RecordBatch.NO_TIMESTAMP
+      var offsetOfMaxTimestamp = -1L
+      var lastOffset = state.logEndOffset
+      var lastEpoch: Option[Int] = None
+
+      for (batch <- batches) {
+        batch.ensureValid()
+        if (batch.maxTimestamp > maxTimestamp) {
+          maxTimestamp = batch.maxTimestamp
+          offsetOfMaxTimestamp = batch.baseOffset
         }
-        val partitionData = new FetchData()
-          .setPartitionIndex(partition.partition)
-          .setErrorCode(error.code)
-          .setHighWatermark(leaderState.highWatermark)
-          .setLastStableOffset(leaderState.highWatermark)
-          .setLogStartOffset(leaderState.logStartOffset)
-          .setRecords(records)
-        divergingEpoch.foreach(partitionData.setDivergingEpoch)
+        state.log.append(batch)
+        state.logEndOffset = batch.nextOffset
+        lastOffset = batch.lastOffset
+        lastEpoch = Some(batch.partitionLeaderEpoch)
+      }
 
-        (partition, partitionData)
-      }.toMap
+      state.logStartOffset = partitionData.logStartOffset
+      state.highWatermark = partitionData.highWatermark
+
+      Some(LogAppendInfo(firstOffset = Some(LogOffsetMetadata(fetchOffset)),
+        lastOffset = lastOffset,
+        lastLeaderEpoch = lastEpoch,
+        maxTimestamp = maxTimestamp,
+        offsetOfMaxTimestamp = offsetOfMaxTimestamp,
+        logAppendTime = Time.SYSTEM.milliseconds(),
+        logStartOffset = state.logStartOffset,
+        recordConversionStats = RecordConversionStats.EMPTY,
+        sourceCodec = NoCompressionCodec,
+        targetCodec = NoCompressionCodec,
+        shallowCount = batches.size,
+        validBytes = FetchResponse.recordsSize(partitionData),
+        offsetsMonotonic = true,
+        lastOffsetOfFirstBatch = batches.headOption.map(_.lastOffset).getOrElse(-1)))
     }
 
-    private def checkLeaderEpochAndThrow(expectedEpoch: Int, partitionState: PartitionState): Unit = {
-      checkExpectedLeaderEpoch(expectedEpoch, partitionState).foreach { error =>
-        throw error.exception()
+    override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {
+      val state = replicaPartitionState(topicPartition)
+      state.log = state.log.takeWhile { batch =>
+        batch.lastOffset < truncationState.offset
       }
+      state.logEndOffset = state.log.lastOption.map(_.lastOffset + 1).getOrElse(state.logStartOffset)
+      state.highWatermark = math.min(state.highWatermark, state.logEndOffset)
     }
 
-    override protected def fetchEarliestOffsetFromLeader(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
-      val leaderState = leaderPartitionState(topicPartition)
-      checkLeaderEpochAndThrow(leaderEpoch, leaderState)
-      leaderState.logStartOffset
+    override def truncateFullyAndStartAt(topicPartition: TopicPartition, offset: Long): Unit = {
+      val state = replicaPartitionState(topicPartition)
+      state.log.clear()
+      state.logStartOffset = offset
+      state.logEndOffset = offset
+      state.highWatermark = offset
     }
 
-    override protected def fetchLatestOffsetFromLeader(topicPartition: TopicPartition, leaderEpoch: Int): Long = {
-      val leaderState = leaderPartitionState(topicPartition)
-      checkLeaderEpochAndThrow(leaderEpoch, leaderState)
-      leaderState.logEndOffset
+    override def latestEpoch(topicPartition: TopicPartition): Option[Int] = {
+      val state = replicaPartitionState(topicPartition)
+      state.log.lastOption.map(_.partitionLeaderEpoch).orElse(latestEpochDefault)
+    }
+
+    override def logStartOffset(topicPartition: TopicPartition): Long = replicaPartitionState(topicPartition).logStartOffset
+
+    override def logEndOffset(topicPartition: TopicPartition): Long = replicaPartitionState(topicPartition).logEndOffset
+
+    override def endOffsetForEpoch(topicPartition: TopicPartition, epoch: Int): Option[OffsetAndEpoch] = {
+      val epochData = new EpochData()
+        .setPartition(topicPartition.partition)
+        .setLeaderEpoch(epoch)
+      val result = mockLeader.lookupEndOffsetForEpoch(topicPartition, epochData, replicaPartitionState(topicPartition))
+      if (result.endOffset == UNDEFINED_EPOCH_OFFSET)
+        None
+      else
+        Some(OffsetAndEpoch(result.endOffset, result.leaderEpoch))
     }
 
+    def verifyLastFetchedEpoch(partition: TopicPartition, expectedEpoch: Option[Int]): Unit = {
+      if (leader.isTruncationOnFetchSupported) {
+        assertEquals(Some(Fetching), fetchState(partition).map(_.state))
+        assertEquals(expectedEpoch, fetchState(partition).flatMap(_.lastFetchedEpoch))
+      }
+    }
+
+    override protected val isOffsetForLeaderEpochSupported: Boolean = true
   }
 
 }
diff --git a/core/src/test/scala/unit/kafka/server/AddPartitionsToTxnRequestServerTest.scala b/core/src/test/scala/unit/kafka/server/AddPartitionsToTxnRequestServerTest.scala
index 0a98d2626cd23..74320e62b49a1 100644
--- a/core/src/test/scala/unit/kafka/server/AddPartitionsToTxnRequestServerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/AddPartitionsToTxnRequestServerTest.scala
@@ -17,13 +17,16 @@
 
 package kafka.server
 
-import java.util.Properties
+import kafka.utils.TestInfoUtils
 
+import java.util.Properties
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.{AddPartitionsToTxnRequest, AddPartitionsToTxnResponse}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{BeforeEach, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.jdk.CollectionConverters._
 
@@ -37,11 +40,12 @@ class AddPartitionsToTxnRequestServerTest extends BaseRequestTest {
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
-    createTopic(topic1, numPartitions, servers.size, new Properties())
+    createTopic(topic1, numPartitions, brokers.size, new Properties())
   }
 
-  @Test
-  def shouldReceiveOperationNotAttemptedWhenOtherPartitionHasError(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def shouldReceiveOperationNotAttemptedWhenOtherPartitionHasError(quorum: String): Unit = {
     // The basic idea is that we have one unknown topic and one created topic. We should get the 'UNKNOWN_TOPIC_OR_PARTITION'
     // error for the unknown topic and the 'OPERATION_NOT_ATTEMPTED' error for the known and authorized topic.
     val nonExistentTopic = new TopicPartition("unknownTopic", 0)
@@ -58,7 +62,7 @@ class AddPartitionsToTxnRequestServerTest extends BaseRequestTest {
       List(createdTopicPartition, nonExistentTopic).asJava)
       .build()
 
-    val leaderId = servers.head.config.brokerId
+    val leaderId = brokers.head.config.brokerId
     val response = connectAndReceive[AddPartitionsToTxnResponse](request, brokerSocketServer(leaderId))
 
     assertEquals(2, response.errors.size)
diff --git a/core/src/test/scala/unit/kafka/server/AlterIsrManagerTest.scala b/core/src/test/scala/unit/kafka/server/AlterIsrManagerTest.scala
deleted file mode 100644
index 40b1b5933a20e..0000000000000
--- a/core/src/test/scala/unit/kafka/server/AlterIsrManagerTest.scala
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package kafka.server
-
-import java.util.Collections
-
-import kafka.api.LeaderAndIsr
-import kafka.utils.{MockScheduler, MockTime}
-import kafka.zk.KafkaZkClient
-import org.apache.kafka.clients.ClientResponse
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.errors.{AuthenticationException, InvalidUpdateVersionException, OperationNotAttemptedException, UnknownServerException, UnsupportedVersionException}
-import org.apache.kafka.common.message.AlterIsrResponseData
-import org.apache.kafka.common.metrics.Metrics
-import org.apache.kafka.common.protocol.{ApiKeys, Errors}
-import org.apache.kafka.common.requests.{AbstractRequest, AlterIsrRequest, AlterIsrResponse}
-import org.apache.kafka.test.TestUtils.assertFutureThrows
-import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{BeforeEach, Test}
-import org.mockito.ArgumentMatchers.{any, anyString}
-import org.mockito.Mockito.{mock, reset, times, verify}
-import org.mockito.{ArgumentCaptor, ArgumentMatchers, Mockito}
-
-import scala.jdk.CollectionConverters._
-
-class AlterIsrManagerTest {
-
-  val topic = "test-topic"
-  val time = new MockTime
-  val metrics = new Metrics
-  val brokerId = 1
-
-  var brokerToController: BrokerToControllerChannelManager = _
-
-  val tp0 = new TopicPartition(topic, 0)
-  val tp1 = new TopicPartition(topic, 1)
-  val tp2 = new TopicPartition(topic, 2)
-
-  @BeforeEach
-  def setup(): Unit = {
-    brokerToController = mock(classOf[BrokerToControllerChannelManager])
-  }
-
-  @Test
-  def testBasic(): Unit = {
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => 2)
-    alterIsrManager.start()
-    alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    verify(brokerToController).start()
-    verify(brokerToController).sendRequest(any(), any())
-
-  }
-
-  @Test
-  def testOverwriteWithinBatch(): Unit = {
-    val capture: ArgumentCaptor[AbstractRequest.Builder[AlterIsrRequest]] = ArgumentCaptor.forClass(classOf[AbstractRequest.Builder[AlterIsrRequest]])
-    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
-
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => 2)
-    alterIsrManager.start()
-
-    // Only send one ISR update for a given topic+partition
-    val firstSubmitFuture = alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    assertFalse(firstSubmitFuture.isDone)
-
-    val failedSubmitFuture = alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2), 10), 0)
-    assertTrue(failedSubmitFuture.isCompletedExceptionally)
-    assertFutureThrows(failedSubmitFuture, classOf[OperationNotAttemptedException])
-
-    // Simulate response
-    val alterIsrResp = partitionResponse(tp0, Errors.NONE)
-    val resp = new ClientResponse(null, null, "", 0L, 0L,
-      false, null, null, alterIsrResp)
-    verify(brokerToController).sendRequest(capture.capture(), callbackCapture.capture())
-    callbackCapture.getValue.onComplete(resp)
-
-    // Now we can submit this partition again
-    val newSubmitFuture = alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1), 10), 0)
-    assertFalse(newSubmitFuture.isDone)
-
-    verify(brokerToController).start()
-    verify(brokerToController, times(2)).sendRequest(capture.capture(), callbackCapture.capture())
-
-    // Make sure we sent the right request ISR={1}
-    val request = capture.getValue.build()
-    assertEquals(request.data().topics().size(), 1)
-    assertEquals(request.data().topics().get(0).partitions().get(0).newIsr().size(), 1)
-  }
-
-  @Test
-  def testSingleBatch(): Unit = {
-    val capture: ArgumentCaptor[AbstractRequest.Builder[AlterIsrRequest]] = ArgumentCaptor.forClass(classOf[AbstractRequest.Builder[AlterIsrRequest]])
-    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
-
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => 2)
-    alterIsrManager.start()
-
-    // First request will send batch of one
-    alterIsrManager.submit(new TopicPartition(topic, 0),
-      new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-
-    // Other submissions will queue up until a response
-    for (i <- 1 to 9) {
-      alterIsrManager.submit(new TopicPartition(topic, i),
-        new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    }
-
-    // Simulate response, omitting partition 0 will allow it to stay in unsent queue
-    val alterIsrResp = new AlterIsrResponse(new AlterIsrResponseData())
-    val resp = new ClientResponse(null, null, "", 0L, 0L,
-      false, null, null, alterIsrResp)
-
-    // On the callback, we check for unsent items and send another request
-    verify(brokerToController).sendRequest(capture.capture(), callbackCapture.capture())
-    callbackCapture.getValue.onComplete(resp)
-
-    verify(brokerToController).start()
-    verify(brokerToController, times(2)).sendRequest(capture.capture(), callbackCapture.capture())
-
-    // Verify the last request sent had all 10 items
-    val request = capture.getValue.build()
-    assertEquals(request.data().topics().size(), 1)
-    assertEquals(request.data().topics().get(0).partitions().size(), 10)
-  }
-
-  @Test
-  def testAuthorizationFailed(): Unit = {
-    testRetryOnTopLevelError(Errors.CLUSTER_AUTHORIZATION_FAILED)
-  }
-
-  @Test
-  def testStaleBrokerEpoch(): Unit = {
-    testRetryOnTopLevelError(Errors.STALE_BROKER_EPOCH)
-  }
-
-  @Test
-  def testUnknownServer(): Unit = {
-    testRetryOnTopLevelError(Errors.UNKNOWN_SERVER_ERROR)
-  }
-
-  @Test
-  def testRetryOnAuthenticationFailure(): Unit = {
-    testRetryOnErrorResponse(new ClientResponse(null, null, "", 0L, 0L,
-      false, null, new AuthenticationException("authentication failed"), null))
-  }
-
-  @Test
-  def testRetryOnUnsupportedVersionError(): Unit = {
-    testRetryOnErrorResponse(new ClientResponse(null, null, "", 0L, 0L,
-      false, new UnsupportedVersionException("unsupported version"), null, null))
-  }
-
-  private def testRetryOnTopLevelError(error: Errors): Unit = {
-    val alterIsrResp = new AlterIsrResponse(new AlterIsrResponseData().setErrorCode(error.code))
-    val response = new ClientResponse(null, null, "", 0L, 0L,
-      false, null, null, alterIsrResp)
-    testRetryOnErrorResponse(response)
-  }
-
-  private def testRetryOnErrorResponse(response: ClientResponse): Unit = {
-    val leaderAndIsr = new LeaderAndIsr(1, 1, List(1,2,3), 10)
-    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
-
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => 2)
-    alterIsrManager.start()
-    alterIsrManager.submit(tp0, leaderAndIsr, 0)
-
-    verify(brokerToController).start()
-    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
-    callbackCapture.getValue.onComplete(response)
-
-    // Any top-level error, we want to retry, so we don't clear items from the pending map
-    assertTrue(alterIsrManager.unsentIsrUpdates.containsKey(tp0))
-
-    reset(brokerToController)
-
-    // After some time, we will retry failed requests
-    time.sleep(100)
-    scheduler.tick()
-
-    // After a successful response, we can submit another AlterIsrItem
-    val retryAlterIsrResponse = partitionResponse(tp0, Errors.NONE)
-    val retryResponse = new ClientResponse(null, null, "", 0L, 0L,
-      false, null, null, retryAlterIsrResponse)
-
-    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
-    callbackCapture.getValue.onComplete(retryResponse)
-
-    assertFalse(alterIsrManager.unsentIsrUpdates.containsKey(tp0))
-  }
-
-  @Test
-  def testInvalidUpdateVersion(): Unit = {
-    checkPartitionError(Errors.INVALID_UPDATE_VERSION)
-  }
-
-  @Test
-  def testUnknownTopicPartition(): Unit = {
-    checkPartitionError(Errors.UNKNOWN_TOPIC_OR_PARTITION)
-  }
-
-  @Test
-  def testNotLeaderOrFollower(): Unit = {
-    checkPartitionError(Errors.NOT_LEADER_OR_FOLLOWER)
-  }
-
-  private def checkPartitionError(error: Errors): Unit = {
-    val alterIsrManager = testPartitionError(tp0, error)
-    // Any partition-level error should clear the item from the pending queue allowing for future updates
-    val future = alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    assertFalse(future.isDone)
-  }
-
-  private def testPartitionError(tp: TopicPartition, error: Errors): AlterIsrManager = {
-    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
-    reset(brokerToController)
-
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => 2)
-    alterIsrManager.start()
-
-    val future = alterIsrManager.submit(tp, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-
-    verify(brokerToController).start()
-    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
-    reset(brokerToController)
-
-    val alterIsrResp = partitionResponse(tp, error)
-    val resp = new ClientResponse(null, null, "", 0L, 0L,
-      false, null, null, alterIsrResp)
-    callbackCapture.getValue.onComplete(resp)
-    assertTrue(future.isCompletedExceptionally)
-    assertFutureThrows(future, error.exception.getClass)
-    alterIsrManager
-  }
-
-  @Test
-  def testOneInFlight(): Unit = {
-    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
-
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => 2)
-    alterIsrManager.start()
-
-    // First submit will send the request
-    alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-
-    // These will become pending unsent items
-    alterIsrManager.submit(tp1, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    alterIsrManager.submit(tp2, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-
-    verify(brokerToController).start()
-    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
-
-    // Once the callback runs, another request will be sent
-    reset(brokerToController)
-
-    val alterIsrResp = new AlterIsrResponse(new AlterIsrResponseData())
-    val resp = new ClientResponse(null, null, "", 0L, 0L,
-      false, null, null, alterIsrResp)
-    callbackCapture.getValue.onComplete(resp)
-  }
-
-  @Test
-  def testPartitionMissingInResponse(): Unit = {
-    brokerToController = Mockito.mock(classOf[BrokerToControllerChannelManager])
-
-    val brokerEpoch = 2
-    val scheduler = new MockScheduler(time)
-    val alterIsrManager = new DefaultAlterIsrManager(brokerToController, scheduler, time, brokerId, () => brokerEpoch)
-    alterIsrManager.start()
-
-    def matchesAlterIsr(topicPartitions: Set[TopicPartition]): AbstractRequest.Builder[_ <: AbstractRequest] = {
-      ArgumentMatchers.argThat[AbstractRequest.Builder[_ <: AbstractRequest]] { request =>
-        assertEquals(ApiKeys.ALTER_ISR, request.apiKey())
-        val alterIsrRequest = request.asInstanceOf[AlterIsrRequest.Builder].build()
-
-        val requestTopicPartitions = alterIsrRequest.data.topics.asScala.flatMap { topicData =>
-          val topic = topicData.name
-          topicData.partitions.asScala.map(partitionData => new TopicPartition(topic, partitionData.partitionIndex))
-        }.toSet
-
-        topicPartitions == requestTopicPartitions
-      }
-    }
-
-    def verifySendAlterIsr(topicPartitions: Set[TopicPartition]): ControllerRequestCompletionHandler = {
-      val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] =
-        ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
-      Mockito.verify(brokerToController).sendRequest(
-        matchesAlterIsr(topicPartitions),
-        callbackCapture.capture()
-      )
-      Mockito.reset(brokerToController)
-      callbackCapture.getValue
-    }
-
-    def clientResponse(topicPartition: TopicPartition, error: Errors): ClientResponse = {
-      val alterIsrResponse = partitionResponse(topicPartition, error)
-      new ClientResponse(null, null, "", 0L, 0L,
-        false, null, null, alterIsrResponse)
-    }
-
-    // The first `submit` will send the `AlterIsr` request
-    val future1 = alterIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    val callback1 = verifySendAlterIsr(Set(tp0))
-
-    // Additional calls while the `AlterIsr` request is inflight will be queued
-    val future2 = alterIsrManager.submit(tp1, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-    val future3 = alterIsrManager.submit(tp2, new LeaderAndIsr(1, 1, List(1,2,3), 10), 0)
-
-    // Respond to the first request, which will also allow the next request to get sent
-    callback1.onComplete(clientResponse(tp0, Errors.UNKNOWN_SERVER_ERROR))
-    assertFutureThrows(future1, classOf[UnknownServerException])
-    assertFalse(future2.isDone)
-    assertFalse(future3.isDone)
-
-    // Verify the second request includes both expected partitions, but only respond with one of them
-    val callback2 = verifySendAlterIsr(Set(tp1, tp2))
-    callback2.onComplete(clientResponse(tp2, Errors.UNKNOWN_SERVER_ERROR))
-    assertFutureThrows(future3, classOf[UnknownServerException])
-    assertFalse(future2.isDone)
-
-    // The missing partition should be retried
-    val callback3 = verifySendAlterIsr(Set(tp1))
-    callback3.onComplete(clientResponse(tp1, Errors.UNKNOWN_SERVER_ERROR))
-    assertFutureThrows(future2, classOf[UnknownServerException])
-  }
-
-  @Test
-  def testZkBasic(): Unit = {
-    val scheduler = new MockScheduler(time)
-    scheduler.startup()
-
-    val kafkaZkClient = Mockito.mock(classOf[KafkaZkClient])
-    Mockito.doAnswer(_ => (true, 2))
-      .when(kafkaZkClient)
-      .conditionalUpdatePath(anyString(), any(), ArgumentMatchers.eq(1), any())
-    Mockito.doAnswer(_ => (false, 2))
-      .when(kafkaZkClient)
-      .conditionalUpdatePath(anyString(), any(), ArgumentMatchers.eq(3), any())
-
-    val zkIsrManager = new ZkIsrManager(scheduler, time, kafkaZkClient)
-    zkIsrManager.start()
-
-    // Correct ZK version
-    val future1 = zkIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 1), 0)
-    assertTrue(future1.isDone)
-    assertEquals(new LeaderAndIsr(1, 1, List(1,2,3), 2), future1.get)
-
-    // Wrong ZK version
-    val future2 = zkIsrManager.submit(tp0, new LeaderAndIsr(1, 1, List(1,2,3), 3), 0)
-    assertTrue(future2.isCompletedExceptionally)
-    assertFutureThrows(future2, classOf[InvalidUpdateVersionException])
-  }
-
-  private def partitionResponse(tp: TopicPartition, error: Errors): AlterIsrResponse = {
-    new AlterIsrResponse(new AlterIsrResponseData()
-      .setTopics(Collections.singletonList(
-        new AlterIsrResponseData.TopicData()
-          .setName(tp.topic())
-          .setPartitions(Collections.singletonList(
-            new AlterIsrResponseData.PartitionData()
-              .setPartitionIndex(tp.partition())
-              .setErrorCode(error.code))))))
-  }
-}
diff --git a/core/src/test/scala/unit/kafka/server/AlterPartitionManagerTest.scala b/core/src/test/scala/unit/kafka/server/AlterPartitionManagerTest.scala
new file mode 100644
index 0000000000000..29deb9bb2b6f5
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/server/AlterPartitionManagerTest.scala
@@ -0,0 +1,652 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import java.util.Collections
+import java.util.stream.{Stream => JStream}
+import kafka.api.LeaderAndIsr
+import kafka.utils.{MockScheduler, MockTime}
+import kafka.zk.KafkaZkClient
+import org.apache.kafka.clients.ClientResponse
+import org.apache.kafka.common.TopicIdPartition
+import org.apache.kafka.common.Uuid
+import org.apache.kafka.common.errors.{AuthenticationException, InvalidUpdateVersionException, OperationNotAttemptedException, UnknownServerException, UnsupportedVersionException}
+import org.apache.kafka.common.message.AlterPartitionResponseData
+import org.apache.kafka.common.metrics.Metrics
+import org.apache.kafka.common.protocol.MessageUtil
+import org.apache.kafka.common.protocol.{ApiKeys, Errors}
+import org.apache.kafka.common.requests.RequestHeader
+import org.apache.kafka.common.requests.{AbstractRequest, AlterPartitionRequest, AlterPartitionResponse}
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_7_IV2, IBP_3_2_IV0}
+import org.apache.kafka.test.TestUtils.assertFutureThrows
+import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.api.BeforeEach
+import org.junit.jupiter.api.Test
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.Arguments
+import org.junit.jupiter.params.provider.MethodSource
+import org.mockito.ArgumentMatcher
+import org.mockito.ArgumentMatchers.{any, anyString}
+import org.mockito.Mockito.{mock, reset, times, verify}
+import org.mockito.{ArgumentCaptor, ArgumentMatchers, Mockito}
+
+import java.util.concurrent.{CompletableFuture, TimeUnit}
+import scala.jdk.CollectionConverters._
+
+class AlterPartitionManagerTest {
+
+  val topic = "test-topic"
+  val topicId = Uuid.randomUuid()
+  val time = new MockTime
+  val metrics = new Metrics
+  val brokerId = 1
+
+  var brokerToController: BrokerToControllerChannelManager = _
+
+  val tp0 = new TopicIdPartition(topicId, 0, topic)
+  val tp1 = new TopicIdPartition(topicId, 1, topic)
+  val tp2 = new TopicIdPartition(topicId, 2, topic)
+
+  @BeforeEach
+  def setup(): Unit = {
+    brokerToController = mock(classOf[BrokerToControllerChannelManager])
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideMetadataVersions"))
+  def testBasic(metadataVersion: MetadataVersion): Unit = {
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => metadataVersion)
+    alterPartitionManager.start()
+    alterPartitionManager.submit(tp0, new LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+    verify(brokerToController).start()
+    verify(brokerToController).sendRequest(any(), any())
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideLeaderRecoveryState"))
+  def testBasicSentLeaderRecoveryState(
+    metadataVersion: MetadataVersion,
+    leaderRecoveryState: LeaderRecoveryState
+  ): Unit = {
+    val requestCapture = ArgumentCaptor.forClass(classOf[AbstractRequest.Builder[AlterPartitionRequest]])
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => metadataVersion)
+    alterPartitionManager.start()
+    alterPartitionManager.submit(tp0, new LeaderAndIsr(1, 1, List(1), leaderRecoveryState, 10), 0)
+    verify(brokerToController).start()
+    verify(brokerToController).sendRequest(requestCapture.capture(), any())
+
+    val request = requestCapture.getValue.build()
+    val expectedLeaderRecoveryState = if (metadataVersion.isAtLeast(IBP_3_2_IV0)) leaderRecoveryState else LeaderRecoveryState.RECOVERED
+    assertEquals(expectedLeaderRecoveryState.value, request.data.topics.get(0).partitions.get(0).leaderRecoveryState())
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideMetadataVersions"))
+  def testOverwriteWithinBatch(metadataVersion: MetadataVersion): Unit = {
+    val canUseTopicIds = metadataVersion.isAtLeast(MetadataVersion.IBP_2_8_IV0)
+    val capture: ArgumentCaptor[AbstractRequest.Builder[AlterPartitionRequest]] = ArgumentCaptor.forClass(classOf[AbstractRequest.Builder[AlterPartitionRequest]])
+    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => metadataVersion)
+    alterPartitionManager.start()
+
+    // Only send one ISR update for a given topic+partition
+    val firstSubmitFuture = alterPartitionManager.submit(tp0, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+    assertFalse(firstSubmitFuture.isDone)
+
+    val failedSubmitFuture = alterPartitionManager.submit(tp0, LeaderAndIsr(1, 1, List(1, 2), LeaderRecoveryState.RECOVERED, 10), 0)
+    assertTrue(failedSubmitFuture.isCompletedExceptionally)
+    assertFutureThrows(failedSubmitFuture, classOf[OperationNotAttemptedException])
+
+    // Simulate response
+    val alterPartitionResp = partitionResponse()
+    val resp = makeClientResponse(
+      response = alterPartitionResp,
+      version = if (canUseTopicIds) ApiKeys.ALTER_PARTITION.latestVersion else 1
+    )
+    verify(brokerToController).sendRequest(capture.capture(), callbackCapture.capture())
+    callbackCapture.getValue.onComplete(resp)
+
+    // Now we can submit this partition again
+    val newSubmitFuture = alterPartitionManager.submit(tp0, LeaderAndIsr(1, 1, List(1), LeaderRecoveryState.RECOVERED, 10), 0)
+    assertFalse(newSubmitFuture.isDone)
+
+    verify(brokerToController).start()
+    verify(brokerToController, times(2)).sendRequest(capture.capture(), callbackCapture.capture())
+
+    // Make sure we sent the right request ISR={1}
+    val request = capture.getValue.build()
+    assertEquals(request.data().topics().size(), 1)
+    assertEquals(request.data().topics().get(0).partitions().get(0).newIsr().size(), 1)
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideMetadataVersions"))
+  def testSingleBatch(metadataVersion: MetadataVersion): Unit = {
+    val capture: ArgumentCaptor[AbstractRequest.Builder[AlterPartitionRequest]] = ArgumentCaptor.forClass(classOf[AbstractRequest.Builder[AlterPartitionRequest]])
+    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => metadataVersion)
+    alterPartitionManager.start()
+
+    // First request will send batch of one
+    alterPartitionManager.submit(new TopicIdPartition(topicId, 0, topic),
+      LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+
+    // Other submissions will queue up until a response
+    for (i <- 1 to 9) {
+      alterPartitionManager.submit(new TopicIdPartition(topicId, i, topic),
+        LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+    }
+
+    // Simulate response, omitting partition 0 will allow it to stay in unsent queue
+    val alterPartitionResp = new AlterPartitionResponse(new AlterPartitionResponseData())
+    val resp = new ClientResponse(null, null, "", 0L, 0L,
+      false, null, null, alterPartitionResp)
+
+    // On the callback, we check for unsent items and send another request
+    verify(brokerToController).sendRequest(capture.capture(), callbackCapture.capture())
+    callbackCapture.getValue.onComplete(resp)
+
+    verify(brokerToController).start()
+    verify(brokerToController, times(2)).sendRequest(capture.capture(), callbackCapture.capture())
+
+    // Verify the last request sent had all 10 items
+    val request = capture.getValue.build()
+    assertEquals(request.data().topics().size(), 1)
+    assertEquals(request.data().topics().get(0).partitions().size(), 10)
+  }
+
+  @Test
+  def testSubmitFromCallback(): Unit = {
+    // prepare a partition level retriable error response
+    val alterPartitionRespWithPartitionError = partitionResponse(tp0, Errors.UNKNOWN_SERVER_ERROR)
+    val errorResponse = makeClientResponse(alterPartitionRespWithPartitionError, ApiKeys.ALTER_PARTITION.latestVersion)
+
+    val leaderId = 1
+    val leaderEpoch = 1
+    val partitionEpoch = 10
+    val isr = List(1, 2, 3)
+    val leaderAndIsr = new LeaderAndIsr(leaderId, leaderEpoch, isr, LeaderRecoveryState.RECOVERED, partitionEpoch)
+    val callbackCapture = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => IBP_3_2_IV0)
+    alterPartitionManager.start()
+    val future = alterPartitionManager.submit(tp0, leaderAndIsr, 0)
+    val finalFuture = new CompletableFuture[LeaderAndIsr]()
+    future.whenComplete { (_, e) =>
+      if (e != null) {
+        // Retry when error.
+        alterPartitionManager.submit(tp0, leaderAndIsr, 0).whenComplete { (result, e) =>
+          if (e != null) {
+            finalFuture.completeExceptionally(e)
+          } else {
+            finalFuture.complete(result)
+          }
+        }
+      } else {
+        finalFuture.completeExceptionally(new AssertionError("Expected the future to be failed"))
+      }
+    }
+
+    verify(brokerToController).start()
+    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
+    reset(brokerToController)
+    callbackCapture.getValue.onComplete(errorResponse)
+
+    // Complete the retry request
+    val retryAlterPartitionResponse = partitionResponse(tp0, Errors.NONE, partitionEpoch, leaderId, leaderEpoch, isr)
+    val retryResponse = makeClientResponse(retryAlterPartitionResponse, ApiKeys.ALTER_PARTITION.latestVersion)
+
+    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
+    callbackCapture.getValue.onComplete(retryResponse)
+
+    assertEquals(leaderAndIsr, finalFuture.get(200, TimeUnit.MILLISECONDS))
+    // No more items in unsentIsrUpdates
+    assertFalse(alterPartitionManager.unsentIsrUpdates.containsKey(tp0.topicPartition))
+  }
+
+  @Test
+  def testAuthorizationFailed(): Unit = {
+    testRetryOnTopLevelError(Errors.CLUSTER_AUTHORIZATION_FAILED)
+  }
+
+  @Test
+  def testStaleBrokerEpoch(): Unit = {
+    testRetryOnTopLevelError(Errors.STALE_BROKER_EPOCH)
+  }
+
+  @Test
+  def testUnknownServer(): Unit = {
+    testRetryOnTopLevelError(Errors.UNKNOWN_SERVER_ERROR)
+  }
+
+  @Test
+  def testRetryOnAuthenticationFailure(): Unit = {
+    testRetryOnErrorResponse(new ClientResponse(null, null, "", 0L, 0L,
+      false, null, new AuthenticationException("authentication failed"), null))
+  }
+
+  @Test
+  def testRetryOnUnsupportedVersionError(): Unit = {
+    testRetryOnErrorResponse(new ClientResponse(null, null, "", 0L, 0L,
+      false, new UnsupportedVersionException("unsupported version"), null, null))
+  }
+
+  private def testRetryOnTopLevelError(error: Errors): Unit = {
+    val alterPartitionResp = new AlterPartitionResponse(new AlterPartitionResponseData().setErrorCode(error.code))
+    val response = makeClientResponse(alterPartitionResp, ApiKeys.ALTER_PARTITION.latestVersion)
+    testRetryOnErrorResponse(response)
+  }
+
+  private def testRetryOnErrorResponse(response: ClientResponse): Unit = {
+    val leaderAndIsr = new LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10)
+    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => IBP_3_2_IV0)
+    alterPartitionManager.start()
+    alterPartitionManager.submit(tp0, leaderAndIsr, 0)
+
+    verify(brokerToController).start()
+    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
+    callbackCapture.getValue.onComplete(response)
+
+    // Any top-level error, we want to retry, so we don't clear items from the pending map
+    assertTrue(alterPartitionManager.unsentIsrUpdates.containsKey(tp0.topicPartition))
+
+    reset(brokerToController)
+
+    // After some time, we will retry failed requests
+    time.sleep(100)
+    scheduler.tick()
+
+    // After a successful response, we can submit another AlterIsrItem
+    val retryAlterPartitionResponse = partitionResponse()
+    val retryResponse = makeClientResponse(retryAlterPartitionResponse, ApiKeys.ALTER_PARTITION.latestVersion)
+
+    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
+    callbackCapture.getValue.onComplete(retryResponse)
+
+    assertFalse(alterPartitionManager.unsentIsrUpdates.containsKey(tp0.topicPartition))
+  }
+
+  @Test
+  def testInvalidUpdateVersion(): Unit = {
+    checkPartitionError(Errors.INVALID_UPDATE_VERSION)
+  }
+
+  @Test
+  def testUnknownTopicPartition(): Unit = {
+    checkPartitionError(Errors.UNKNOWN_TOPIC_OR_PARTITION)
+  }
+
+  @Test
+  def testNotLeaderOrFollower(): Unit = {
+    checkPartitionError(Errors.NOT_LEADER_OR_FOLLOWER)
+  }
+
+  @Test
+  def testInvalidRequest(): Unit = {
+    checkPartitionError(Errors.INVALID_REQUEST)
+  }
+
+  private def checkPartitionError(error: Errors): Unit = {
+    val alterPartitionManager = testPartitionError(tp0, error)
+    // Any partition-level error should clear the item from the pending queue allowing for future updates
+    val future = alterPartitionManager.submit(tp0, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+    assertFalse(future.isDone)
+  }
+
+  private def testPartitionError(tp: TopicIdPartition, error: Errors): AlterPartitionManager = {
+    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+    reset(brokerToController)
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => IBP_3_2_IV0)
+    alterPartitionManager.start()
+
+    val future = alterPartitionManager.submit(tp, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+
+    verify(brokerToController).start()
+    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
+    reset(brokerToController)
+
+    val alterPartitionResp = partitionResponse(tp, error)
+    val resp = makeClientResponse(alterPartitionResp, ApiKeys.ALTER_PARTITION.latestVersion)
+    callbackCapture.getValue.onComplete(resp)
+    assertTrue(future.isCompletedExceptionally)
+    assertFutureThrows(future, error.exception.getClass)
+    alterPartitionManager
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideMetadataVersions"))
+  def testOneInFlight(metadataVersion: MetadataVersion): Unit = {
+    val callbackCapture: ArgumentCaptor[ControllerRequestCompletionHandler] = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+
+    val scheduler = new MockScheduler(time)
+    val alterPartitionManager = new DefaultAlterPartitionManager(brokerToController, scheduler, time, brokerId, () => 2, () => metadataVersion)
+    alterPartitionManager.start()
+
+    // First submit will send the request
+    alterPartitionManager.submit(tp0, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+
+    // These will become pending unsent items
+    alterPartitionManager.submit(tp1, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+    alterPartitionManager.submit(tp2, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10), 0)
+
+    verify(brokerToController).start()
+    verify(brokerToController).sendRequest(any(), callbackCapture.capture())
+
+    // Once the callback runs, another request will be sent
+    reset(brokerToController)
+
+    val alterPartitionResp = new AlterPartitionResponse(new AlterPartitionResponseData())
+    val resp = makeClientResponse(alterPartitionResp, ApiKeys.ALTER_PARTITION.latestVersion)
+    callbackCapture.getValue.onComplete(resp)
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideMetadataVersions"))
+  def testPartitionMissingInResponse(metadataVersion: MetadataVersion): Unit = {
+    val expectedVersion = if (metadataVersion.isTopicIdsSupported) {
+      ApiKeys.ALTER_PARTITION.latestVersion
+    } else {
+      1.toShort
+    }
+    val leaderAndIsr = LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10)
+    val controlledEpoch = 0
+    val brokerEpoch = 2
+    val scheduler = new MockScheduler(time)
+    val brokerToController = Mockito.mock(classOf[BrokerToControllerChannelManager])
+    val alterPartitionManager = new DefaultAlterPartitionManager(
+      brokerToController,
+      scheduler,
+      time,
+      brokerId,
+      () => brokerEpoch,
+      () => metadataVersion
+    )
+    alterPartitionManager.start()
+
+    // The first `submit` will send the `AlterIsr` request
+    val future1 = alterPartitionManager.submit(tp0, leaderAndIsr, controlledEpoch)
+    val callback1 = verifySendRequest(brokerToController, alterPartitionRequestMatcher(
+      expectedTopicPartitions = Set(tp0),
+      expectedVersion = expectedVersion
+    ))
+
+    // Additional calls while the `AlterIsr` request is inflight will be queued
+    val future2 = alterPartitionManager.submit(tp1, leaderAndIsr, controlledEpoch)
+    val future3 = alterPartitionManager.submit(tp2, leaderAndIsr, controlledEpoch)
+
+    // Respond to the first request, which will also allow the next request to get sent
+    callback1.onComplete(makeClientResponse(
+      response = partitionResponse(tp0, Errors.UNKNOWN_SERVER_ERROR),
+      version = expectedVersion
+    ))
+    assertFutureThrows(future1, classOf[UnknownServerException])
+    assertFalse(future2.isDone)
+    assertFalse(future3.isDone)
+
+    // Verify the second request includes both expected partitions, but only respond with one of them
+    val callback2 = verifySendRequest(brokerToController, alterPartitionRequestMatcher(
+      expectedTopicPartitions = Set(tp1, tp2),
+      expectedVersion = expectedVersion
+    ))
+    callback2.onComplete(makeClientResponse(
+      response = partitionResponse(tp2, Errors.UNKNOWN_SERVER_ERROR),
+      version = expectedVersion
+    ))
+    assertFutureThrows(future3, classOf[UnknownServerException])
+    assertFalse(future2.isDone)
+
+    // The missing partition should be retried
+    val callback3 = verifySendRequest(brokerToController, alterPartitionRequestMatcher(
+      expectedTopicPartitions = Set(tp1),
+      expectedVersion = expectedVersion
+    ))
+    callback3.onComplete(makeClientResponse(
+      response = partitionResponse(tp1, Errors.UNKNOWN_SERVER_ERROR),
+      version = expectedVersion
+    ))
+    assertFutureThrows(future2, classOf[UnknownServerException])
+  }
+
+  @ParameterizedTest
+  @MethodSource(Array("provideMetadataVersions"))
+  def testPartialTopicIds(metadataVersion: MetadataVersion): Unit = {
+    val canUseTopicIds = metadataVersion.isAtLeast(MetadataVersion.IBP_2_8_IV0)
+    val foo = new TopicIdPartition(Uuid.ZERO_UUID, 0, "foo")
+    val bar = new TopicIdPartition(Uuid.randomUuid(), 0, "bar")
+    val zar = new TopicIdPartition(Uuid.randomUuid(), 0, "zar")
+
+    val leaderAndIsr = LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 10)
+    val controlledEpoch = 0
+    val brokerEpoch = 2
+    val scheduler = new MockScheduler(time)
+    val brokerToController = Mockito.mock(classOf[BrokerToControllerChannelManager])
+    val alterPartitionManager = new DefaultAlterPartitionManager(
+      brokerToController,
+      scheduler,
+      time,
+      brokerId,
+      () => brokerEpoch,
+      () => metadataVersion
+    )
+    alterPartitionManager.start()
+
+    // Submits an alter isr update with zar, which has a topic id.
+    val future1 = alterPartitionManager.submit(zar, leaderAndIsr, controlledEpoch)
+
+    // The latest version is expected if all the submitted partitions
+    // have topic ids and IBP >= 2.8; version 1 should be used otherwise.
+    val callback1 = verifySendRequest(brokerToController, alterPartitionRequestMatcher(
+      expectedTopicPartitions = Set(zar),
+      expectedVersion = if (canUseTopicIds) ApiKeys.ALTER_PARTITION.latestVersion else 1
+    ))
+
+    // Submits two additional alter isr changes with foo and bar while the previous one
+    // is still inflight. foo has no topic id, bar has one.
+    val future2 = alterPartitionManager.submit(foo, leaderAndIsr, controlledEpoch)
+    val future3 = alterPartitionManager.submit(bar, leaderAndIsr, controlledEpoch)
+
+    // Completes the first request. That triggers the next one.
+    callback1.onComplete(makeClientResponse(
+      response = makeAlterPartition(Seq(makeAlterPartitionTopicData(zar, Errors.NONE))),
+      version = if (canUseTopicIds) ApiKeys.ALTER_PARTITION.latestVersion else 1
+    ))
+
+    assertTrue(future1.isDone)
+    assertFalse(future2.isDone)
+    assertFalse(future3.isDone)
+
+    // Version 1 is expected because foo does not have a topic id.
+    val callback2 = verifySendRequest(brokerToController, alterPartitionRequestMatcher(
+      expectedTopicPartitions = Set(foo, bar),
+      expectedVersion = 1
+    ))
+
+    // Completes the second request.
+    callback2.onComplete(makeClientResponse(
+      response = makeAlterPartition(Seq(
+        makeAlterPartitionTopicData(foo, Errors.NONE),
+        makeAlterPartitionTopicData(bar, Errors.NONE),
+      )),
+      version = 1
+    ))
+
+    assertTrue(future1.isDone)
+    assertTrue(future2.isDone)
+    assertTrue(future3.isDone)
+  }
+
+  private def verifySendRequest(
+    brokerToController: BrokerToControllerChannelManager,
+    expectedRequest: ArgumentMatcher[AbstractRequest.Builder[_ <: AbstractRequest]]
+  ): ControllerRequestCompletionHandler = {
+    val callbackCapture = ArgumentCaptor.forClass(classOf[ControllerRequestCompletionHandler])
+
+    Mockito.verify(brokerToController).sendRequest(
+      ArgumentMatchers.argThat(expectedRequest),
+      callbackCapture.capture()
+    )
+
+    Mockito.reset(brokerToController)
+
+    callbackCapture.getValue
+  }
+
+  private def alterPartitionRequestMatcher(
+    expectedTopicPartitions: Set[TopicIdPartition],
+    expectedVersion: Short
+  ): ArgumentMatcher[AbstractRequest.Builder[_ <: AbstractRequest]] = {
+    request => {
+      assertEquals(ApiKeys.ALTER_PARTITION, request.apiKey)
+
+      val alterPartitionRequest = request.asInstanceOf[AlterPartitionRequest.Builder].build()
+      assertEquals(expectedVersion, alterPartitionRequest.version)
+
+      val requestTopicPartitions = alterPartitionRequest.data.topics.asScala.flatMap { topicData =>
+        topicData.partitions.asScala.map { partitionData =>
+          new TopicIdPartition(topicData.topicId, partitionData.partitionIndex, topicData.topicName)
+        }
+      }.toSet
+
+      expectedTopicPartitions == requestTopicPartitions
+    }
+  }
+
+  private def makeClientResponse(
+    response: AlterPartitionResponse,
+    version: Short
+  ): ClientResponse = {
+    new ClientResponse(
+      new RequestHeader(response.apiKey, version, "", 0),
+      null,
+      "",
+      0L,
+      0L,
+      false,
+      null,
+      null,
+      // Response is serialized and deserialized to ensure that its does
+      // not contain ignorable fields used by other versions.
+      AlterPartitionResponse.parse(MessageUtil.toByteBuffer(response.data, version), version)
+    )
+  }
+
+  private def makeAlterPartition(
+    topics: Seq[AlterPartitionResponseData.TopicData]
+  ): AlterPartitionResponse = {
+    new AlterPartitionResponse(new AlterPartitionResponseData().setTopics(topics.asJava))
+  }
+
+  private def makeAlterPartitionTopicData(
+    topicIdPartition: TopicIdPartition,
+    error: Errors
+  ): AlterPartitionResponseData.TopicData = {
+    new AlterPartitionResponseData.TopicData()
+      .setTopicName(topicIdPartition.topic)
+      .setTopicId(topicIdPartition.topicId)
+      .setPartitions(Collections.singletonList(
+        new AlterPartitionResponseData.PartitionData()
+          .setPartitionIndex(topicIdPartition.partition)
+          .setErrorCode(error.code)))
+  }
+
+  @Test
+  def testZkBasic(): Unit = {
+    val scheduler = new MockScheduler(time)
+    scheduler.startup()
+
+    val kafkaZkClient = Mockito.mock(classOf[KafkaZkClient])
+    Mockito.doAnswer(_ => (true, 2))
+      .when(kafkaZkClient)
+      .conditionalUpdatePath(anyString(), any(), ArgumentMatchers.eq(1), any())
+    Mockito.doAnswer(_ => (false, 2))
+      .when(kafkaZkClient)
+      .conditionalUpdatePath(anyString(), any(), ArgumentMatchers.eq(3), any())
+
+    val zkIsrManager = new ZkAlterPartitionManager(scheduler, time, kafkaZkClient)
+    zkIsrManager.start()
+
+    // Correct ZK version
+    val future1 = zkIsrManager.submit(tp0, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 1), 0)
+    assertTrue(future1.isDone)
+    assertEquals(LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 2), future1.get)
+
+    // Wrong ZK version
+    val future2 = zkIsrManager.submit(tp0, LeaderAndIsr(1, 1, List(1, 2, 3), LeaderRecoveryState.RECOVERED, 3), 0)
+    assertTrue(future2.isCompletedExceptionally)
+    assertFutureThrows(future2, classOf[InvalidUpdateVersionException])
+  }
+
+  private def partitionResponse(
+    tp: TopicIdPartition = tp0,
+    error: Errors = Errors.NONE,
+    partitionEpoch: Int = 0,
+    leaderId: Int = 0,
+    leaderEpoch: Int = 0,
+    isr: List[Int] = List.empty
+  ): AlterPartitionResponse = {
+    new AlterPartitionResponse(new AlterPartitionResponseData()
+      .setTopics(Collections.singletonList(
+        new AlterPartitionResponseData.TopicData()
+          .setTopicName(tp.topic)
+          .setTopicId(tp.topicId)
+          .setPartitions(Collections.singletonList(
+            new AlterPartitionResponseData.PartitionData()
+              .setPartitionIndex(tp.partition)
+              .setPartitionEpoch(partitionEpoch)
+              .setLeaderEpoch(leaderEpoch)
+              .setLeaderId(leaderId)
+              .setIsr(isr.map(Integer.valueOf).asJava)
+              .setErrorCode(error.code))))))
+  }
+}
+
+object AlterPartitionManagerTest {
+  def provideMetadataVersions(): JStream[MetadataVersion] = {
+    JStream.of(
+      // Supports KIP-704: unclean leader recovery
+      IBP_3_2_IV0,
+      // Supports KIP-497: alter partition
+      IBP_2_7_IV2
+    )
+  }
+
+  def provideLeaderRecoveryState(): JStream[Arguments] = {
+    // Multiply metadataVersions by leaderRecoveryState
+    provideMetadataVersions().flatMap { metadataVersion =>
+      JStream.of(
+        Arguments.of(metadataVersion, LeaderRecoveryState.RECOVERED),
+        Arguments.of(metadataVersion, LeaderRecoveryState.RECOVERING)
+      )
+    }
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/server/ApiVersionManagerTest.scala b/core/src/test/scala/unit/kafka/server/ApiVersionManagerTest.scala
index 8f8a7a0e3ca53..9936f8deaeda8 100644
--- a/core/src/test/scala/unit/kafka/server/ApiVersionManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ApiVersionManagerTest.scala
@@ -16,10 +16,11 @@
  */
 package kafka.server
 
-import kafka.api.ApiVersion
+import kafka.server.metadata.ZkMetadataCache
 import org.apache.kafka.clients.NodeApiVersions
 import org.apache.kafka.common.message.ApiMessageType.ListenerType
 import org.apache.kafka.common.protocol.ApiKeys
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.{Disabled, Test}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.params.ParameterizedTest
@@ -30,17 +31,16 @@ import scala.jdk.CollectionConverters._
 
 class ApiVersionManagerTest {
   private val brokerFeatures = BrokerFeatures.createDefault()
-  private val featureCache = new FinalizedFeatureCache(brokerFeatures)
+  private val metadataCache = new ZkMetadataCache(1, MetadataVersion.latest(), brokerFeatures)
 
   @ParameterizedTest
   @EnumSource(classOf[ListenerType])
   def testApiScope(apiScope: ListenerType): Unit = {
     val versionManager = new DefaultApiVersionManager(
       listenerType = apiScope,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
       forwardingManager = None,
       features = brokerFeatures,
-      featureCache = featureCache
+      metadataCache = metadataCache
     )
     assertEquals(ApiKeys.apisForListener(apiScope).asScala, versionManager.enabledApis)
     assertTrue(ApiKeys.apisForListener(apiScope).asScala.forall(versionManager.isApiEnabled))
@@ -61,10 +61,9 @@ class ApiVersionManagerTest {
 
     val versionManager = new DefaultApiVersionManager(
       listenerType = ListenerType.ZK_BROKER,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
       forwardingManager = Some(forwardingManager),
       features = brokerFeatures,
-      featureCache = featureCache
+      metadataCache = metadataCache
     )
 
     val apiVersionsResponse = versionManager.apiVersionResponse(throttleTimeMs = 0)
@@ -82,10 +81,9 @@ class ApiVersionManagerTest {
     for (forwardingManagerOpt <- Seq(Some(forwardingManager), None)) {
       val versionManager = new DefaultApiVersionManager(
         listenerType = ListenerType.BROKER,
-        interBrokerProtocolVersion = ApiVersion.latestVersion,
         forwardingManager = forwardingManagerOpt,
         features = brokerFeatures,
-        featureCache = featureCache
+        metadataCache = metadataCache
       )
       assertFalse(versionManager.isApiEnabled(ApiKeys.ENVELOPE))
       assertFalse(versionManager.enabledApis.contains(ApiKeys.ENVELOPE))
@@ -104,10 +102,9 @@ class ApiVersionManagerTest {
 
     val versionManager = new DefaultApiVersionManager(
       listenerType = ListenerType.ZK_BROKER,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
       forwardingManager = Some(forwardingManager),
       features = brokerFeatures,
-      featureCache = featureCache
+      metadataCache = metadataCache
     )
     assertTrue(versionManager.isApiEnabled(ApiKeys.ENVELOPE))
     assertTrue(versionManager.enabledApis.contains(ApiKeys.ENVELOPE))
@@ -123,10 +120,9 @@ class ApiVersionManagerTest {
   def testEnvelopeDisabledWhenForwardingManagerEmpty(): Unit = {
     val versionManager = new DefaultApiVersionManager(
       listenerType = ListenerType.ZK_BROKER,
-      interBrokerProtocolVersion = ApiVersion.latestVersion,
       forwardingManager = None,
       features = brokerFeatures,
-      featureCache = featureCache
+      metadataCache = metadataCache
     )
     assertFalse(versionManager.isApiEnabled(ApiKeys.ENVELOPE))
     assertFalse(versionManager.enabledApis.contains(ApiKeys.ENVELOPE))
diff --git a/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala b/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala
index b1778ba7dfb38..bc45b72077da0 100644
--- a/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ApiVersionsRequestTest.scala
@@ -29,7 +29,7 @@ import org.junit.jupiter.api.extension.ExtendWith
 
 
 @ExtendWith(value = Array(classOf[ClusterTestExtensions]))
-@ClusterTestDefaults(clusterType = Type.BOTH, brokers = 1)
+@ClusterTestDefaults(clusterType = Type.ALL, brokers = 1)
 class ApiVersionsRequestTest(cluster: ClusterInstance) extends AbstractApiVersionsRequestTest(cluster) {
 
   @BeforeEach
diff --git a/core/src/test/scala/unit/kafka/server/BaseRequestTest.scala b/core/src/test/scala/unit/kafka/server/BaseRequestTest.scala
index eee4608f74c7f..7d1f3eca185a7 100644
--- a/core/src/test/scala/unit/kafka/server/BaseRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/BaseRequestTest.scala
@@ -83,6 +83,20 @@ abstract class BaseRequestTest extends IntegrationTestHarness {
     }.map(_.socketServer).getOrElse(throw new IllegalStateException(s"Could not find broker with id $brokerId"))
   }
 
+  /**
+   * Return the socket server where admin request to be sent.
+   *
+   * For KRaft clusters that is any broker as the broker will forward the request to the active
+   * controller. For Legacy clusters that is the controller broker.
+   */
+  def adminSocketServer: SocketServer = {
+    if (isKRaftTest()) {
+      anySocketServer
+    } else {
+      controllerSocketServer
+    }
+  }
+
   def connect(socketServer: SocketServer = anySocketServer,
               listenerName: ListenerName = listenerName): Socket = {
     new Socket("localhost", socketServer.boundPort(listenerName))
diff --git a/core/src/test/scala/unit/kafka/server/BrokerEpochIntegrationTest.scala b/core/src/test/scala/unit/kafka/server/BrokerEpochIntegrationTest.scala
index 0e1b148141293..250f87cbd7330 100755
--- a/core/src/test/scala/unit/kafka/server/BrokerEpochIntegrationTest.scala
+++ b/core/src/test/scala/unit/kafka/server/BrokerEpochIntegrationTest.scala
@@ -147,9 +147,9 @@ class BrokerEpochIntegrationTest extends QuorumTestHarness {
             .setPartitionIndex(tp.partition)
             .setControllerEpoch(controllerEpoch)
             .setLeader(brokerId2)
-            .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 1)
+            .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 1)
             .setIsr(Seq(brokerId1, brokerId2).map(Integer.valueOf).asJava)
-            .setZkVersion(LeaderAndIsr.initialZKVersion)
+            .setPartitionEpoch(LeaderAndIsr.InitialPartitionEpoch)
             .setReplicas(Seq(0, 1).map(Integer.valueOf).asJava)
             .setIsNew(false)
         )
@@ -177,9 +177,9 @@ class BrokerEpochIntegrationTest extends QuorumTestHarness {
             .setPartitionIndex(tp.partition)
             .setControllerEpoch(controllerEpoch)
             .setLeader(brokerId2)
-            .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 1)
+            .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 1)
             .setIsr(Seq(brokerId1, brokerId2).map(Integer.valueOf).asJava)
-            .setZkVersion(LeaderAndIsr.initialZKVersion)
+            .setZkVersion(LeaderAndIsr.InitialPartitionEpoch)
             .setReplicas(Seq(0, 1).map(Integer.valueOf).asJava))
         val liveBrokers = brokerAndEpochs.map { case (broker, _) =>
           val securityProtocol = SecurityProtocol.PLAINTEXT
@@ -220,7 +220,7 @@ class BrokerEpochIntegrationTest extends QuorumTestHarness {
             .setTopicName(tp.topic())
             .setPartitionStates(Seq(new StopReplicaPartitionState()
               .setPartitionIndex(tp.partition())
-              .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 2)
+              .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 2)
               .setDeletePartition(true)).asJava)
         ).asJava
         val requestBuilder = new StopReplicaRequest.Builder(
diff --git a/core/src/test/scala/unit/kafka/server/BrokerFeaturesTest.scala b/core/src/test/scala/unit/kafka/server/BrokerFeaturesTest.scala
index c4cc52c27c9c4..ad8786c919d32 100644
--- a/core/src/test/scala/unit/kafka/server/BrokerFeaturesTest.scala
+++ b/core/src/test/scala/unit/kafka/server/BrokerFeaturesTest.scala
@@ -17,7 +17,8 @@
 
 package kafka.server
 
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
+import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
 import org.junit.jupiter.api.Test
 
@@ -27,7 +28,7 @@ class BrokerFeaturesTest {
 
   @Test
   def testEmpty(): Unit = {
-    assertTrue(BrokerFeatures.createDefault().supportedFeatures.empty)
+    assertTrue(BrokerFeatures.createEmpty().supportedFeatures.empty)
   }
 
   @Test
@@ -38,15 +39,12 @@ class BrokerFeaturesTest {
       "test_feature_2" -> new SupportedVersionRange(1, 3)).asJava)
     brokerFeatures.setSupportedFeatures(supportedFeatures)
 
-    val compatibleFeatures = Map[String, FinalizedVersionRange](
-      "test_feature_1" -> new FinalizedVersionRange(2, 3))
-    val inCompatibleFeatures = Map[String, FinalizedVersionRange](
-      "test_feature_3" -> new FinalizedVersionRange(3, 4))
+    val compatibleFeatures = Map[String, Short]("test_feature_1" -> 4)
+    val inCompatibleFeatures = Map[String, Short]("test_feature_3" -> 4)
     val features = compatibleFeatures++inCompatibleFeatures
-    val finalizedFeatures = Features.finalizedFeatures(features.asJava)
+    val finalizedFeatures = features
 
-    assertEquals(
-      Features.finalizedFeatures(inCompatibleFeatures.asJava),
+    assertEquals(inCompatibleFeatures,
       brokerFeatures.incompatibleFeatures(finalizedFeatures))
     assertTrue(BrokerFeatures.hasIncompatibleFeatures(supportedFeatures, finalizedFeatures))
   }
@@ -59,15 +57,13 @@ class BrokerFeaturesTest {
       "test_feature_2" -> new SupportedVersionRange(1, 3)).asJava)
     brokerFeatures.setSupportedFeatures(supportedFeatures)
 
-    val compatibleFeatures = Map[String, FinalizedVersionRange](
-      "test_feature_1" -> new FinalizedVersionRange(2, 3))
-    val inCompatibleFeatures = Map[String, FinalizedVersionRange](
-      "test_feature_2" -> new FinalizedVersionRange(1, 4))
+    val compatibleFeatures = Map[String, Short]("test_feature_1" -> 3)
+    val inCompatibleFeatures = Map[String, Short]("test_feature_2" -> 4)
     val features = compatibleFeatures++inCompatibleFeatures
-    val finalizedFeatures = Features.finalizedFeatures(features.asJava)
+    val finalizedFeatures = features
 
     assertEquals(
-      Features.finalizedFeatures(inCompatibleFeatures.asJava),
+      inCompatibleFeatures,
       brokerFeatures.incompatibleFeatures(finalizedFeatures))
     assertTrue(BrokerFeatures.hasIncompatibleFeatures(supportedFeatures, finalizedFeatures))
   }
@@ -80,11 +76,11 @@ class BrokerFeaturesTest {
       "test_feature_2" -> new SupportedVersionRange(1, 3)).asJava)
     brokerFeatures.setSupportedFeatures(supportedFeatures)
 
-    val compatibleFeatures = Map[String, FinalizedVersionRange](
-      "test_feature_1" -> new FinalizedVersionRange(2, 3),
-      "test_feature_2" -> new FinalizedVersionRange(1, 3))
-    val finalizedFeatures = Features.finalizedFeatures(compatibleFeatures.asJava)
-    assertTrue(brokerFeatures.incompatibleFeatures(finalizedFeatures).empty())
+    val compatibleFeatures = Map[String, Short](
+      "test_feature_1" -> 3,
+      "test_feature_2" -> 3)
+    val finalizedFeatures = compatibleFeatures
+    assertTrue(brokerFeatures.incompatibleFeatures(finalizedFeatures).isEmpty)
     assertFalse(BrokerFeatures.hasIncompatibleFeatures(supportedFeatures, finalizedFeatures))
   }
 
@@ -97,10 +93,11 @@ class BrokerFeaturesTest {
       "test_feature_3" -> new SupportedVersionRange(3, 7)).asJava)
     brokerFeatures.setSupportedFeatures(supportedFeatures)
 
-    val expectedFeatures = Map[String, FinalizedVersionRange](
-      "test_feature_1" -> new FinalizedVersionRange(1, 4),
-      "test_feature_2" -> new FinalizedVersionRange(1, 3),
-      "test_feature_3" -> new FinalizedVersionRange(3, 7))
-    assertEquals(Features.finalizedFeatures(expectedFeatures.asJava), brokerFeatures.defaultFinalizedFeatures)
+    val expectedFeatures = Map[String, Short](
+      MetadataVersion.FEATURE_NAME -> MetadataVersion.latest().featureLevel(),
+      "test_feature_1" -> 4,
+      "test_feature_2" -> 3,
+      "test_feature_3" -> 7)
+    assertEquals(expectedFeatures, brokerFeatures.defaultFinalizedFeatures)
   }
 }
diff --git a/core/src/test/scala/unit/kafka/server/BrokerLifecycleManagerTest.scala b/core/src/test/scala/unit/kafka/server/BrokerLifecycleManagerTest.scala
index dd3e49d4d16f0..1a0fac443c0e1 100644
--- a/core/src/test/scala/unit/kafka/server/BrokerLifecycleManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/BrokerLifecycleManagerTest.scala
@@ -73,7 +73,7 @@ class BrokerLifecycleManagerTest {
     val metadata = new Metadata(1000, 1000, new LogContext(), new ClusterResourceListeners())
     val mockClient = new MockClient(time, metadata)
     val controllerNodeProvider = new SimpleControllerNodeProvider()
-    val nodeApiVersions = new NodeApiVersions(Seq(BROKER_REGISTRATION, BROKER_HEARTBEAT).map {
+    val nodeApiVersions = NodeApiVersions.create(Seq(BROKER_REGISTRATION, BROKER_HEARTBEAT).map {
       apiKey => new ApiVersion().setApiKey(apiKey.id).
         setMinVersion(apiKey.oldestVersion()).setMaxVersion(apiKey.latestVersion())
     }.toList.asJava)
diff --git a/core/src/test/scala/unit/kafka/server/BrokerMetricNamesTest.scala b/core/src/test/scala/unit/kafka/server/BrokerMetricNamesTest.scala
index 3bd9c6da61021..dc69076619d5b 100644
--- a/core/src/test/scala/unit/kafka/server/BrokerMetricNamesTest.scala
+++ b/core/src/test/scala/unit/kafka/server/BrokerMetricNamesTest.scala
@@ -17,18 +17,18 @@
 
 package kafka.server
 
-import kafka.metrics.KafkaYammerMetrics
 import kafka.test.ClusterInstance
 import kafka.test.annotation.{ClusterTest, ClusterTestDefaults, Type}
 import kafka.test.junit.ClusterTestExtensions
 import kafka.utils.TestUtils
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.AfterEach
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.extension.ExtendWith
 
 import scala.jdk.CollectionConverters._
 
-@ClusterTestDefaults(clusterType = Type.BOTH)
+@ClusterTestDefaults(clusterType = Type.ALL)
 @ExtendWith(value = Array(classOf[ClusterTestExtensions]))
 class BrokerMetricNamesTest(cluster: ClusterInstance) {
   @AfterEach
diff --git a/core/src/test/scala/unit/kafka/server/ClientQuotasRequestTest.scala b/core/src/test/scala/unit/kafka/server/ClientQuotasRequestTest.scala
index 573bd95b96b3b..904fbbc21654e 100644
--- a/core/src/test/scala/unit/kafka/server/ClientQuotasRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ClientQuotasRequestTest.scala
@@ -37,7 +37,7 @@ import org.junit.jupiter.api.extension.ExtendWith
 
 import scala.jdk.CollectionConverters._
 
-@ClusterTestDefaults(clusterType = Type.BOTH)
+@ClusterTestDefaults(clusterType = Type.ALL)
 @ExtendWith(value = Array(classOf[ClusterTestExtensions]))
 @Tag("integration")
 class ClientQuotasRequestTest(cluster: ClusterInstance) {
@@ -294,8 +294,8 @@ class ClientQuotasRequestTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testAlterClientQuotasBadIp(): Unit = {
-    val invalidHostPatternEntity = new ClientQuotaEntity(Map(ClientQuotaEntity.IP -> "abc-123").asJava)
-    val unresolvableHostEntity = new ClientQuotaEntity(Map(ClientQuotaEntity.IP -> "ip").asJava)
+    val invalidHostPatternEntity = new ClientQuotaEntity(Map(ClientQuotaEntity.IP -> "not a valid host because it has spaces").asJava)
+    val unresolvableHostEntity = new ClientQuotaEntity(Map(ClientQuotaEntity.IP ->  "RFC2606.invalid").asJava)
     val expectedExceptionMessage = "not a valid IP"
     expectInvalidRequestWithMessage(alterEntityQuotas(invalidHostPatternEntity, Map(IpConnectionRateProp -> Some(50.0)),
       validateOnly = true), expectedExceptionMessage)
diff --git a/core/src/test/scala/unit/kafka/server/ControllerApisTest.scala b/core/src/test/scala/unit/kafka/server/ControllerApisTest.scala
index 479bf8e83155a..0fc961145273b 100644
--- a/core/src/test/scala/unit/kafka/server/ControllerApisTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ControllerApisTest.scala
@@ -17,12 +17,6 @@
 
 package kafka.server
 
-import java.net.InetAddress
-import java.util
-import java.util.Collections.singletonList
-import java.util.Properties
-import java.util.concurrent.{CompletableFuture, ExecutionException}
-
 import kafka.network.RequestChannel
 import kafka.raft.RaftManager
 import kafka.server.QuotaFactory.QuotaManagers
@@ -45,7 +39,7 @@ import org.apache.kafka.common.message.DeleteTopicsRequestData.DeleteTopicState
 import org.apache.kafka.common.message.DeleteTopicsResponseData.DeletableTopicResult
 import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.{AlterConfigsResource, AlterConfigsResourceCollection, AlterableConfig, AlterableConfigCollection}
 import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData.AlterConfigsResourceResponse
-import org.apache.kafka.common.message.{CreateTopicsRequestData, _}
+import org.apache.kafka.common.message._
 import org.apache.kafka.common.network.{ClientInformation, ListenerName}
 import org.apache.kafka.common.protocol.Errors._
 import org.apache.kafka.common.protocol.{ApiKeys, ApiMessage, Errors}
@@ -53,15 +47,24 @@ import org.apache.kafka.common.requests._
 import org.apache.kafka.common.resource.{PatternType, Resource, ResourcePattern, ResourceType}
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
 import org.apache.kafka.common.{ElectionType, Uuid}
-import org.apache.kafka.controller.Controller
+import org.apache.kafka.controller.ControllerRequestContextUtil.ANONYMOUS_CONTEXT
+import org.apache.kafka.controller.{Controller, ControllerRequestContext}
 import org.apache.kafka.server.authorizer.{Action, AuthorizableRequestContext, AuthorizationResult, Authorizer}
 import org.apache.kafka.server.common.ApiMessageAndVersion
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, Test}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
 import org.mockito.{ArgumentCaptor, ArgumentMatchers}
 
+import java.net.InetAddress
+import java.util
+import java.util.Collections.singletonList
+import java.util.concurrent.{CompletableFuture, ExecutionException, TimeUnit}
+import java.util.concurrent.atomic.AtomicReference
+import java.util.{Collections, Properties}
 import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
@@ -101,7 +104,6 @@ class ControllerApisTest {
       authorizer,
       quotas,
       time,
-      Map.empty,
       controller,
       raftManager,
       new KafkaConfig(props),
@@ -173,6 +175,41 @@ class ControllerApisTest {
     )
   }
 
+  @Test
+  def testFetchLocalTimeComputedCorrectly(): Unit = {
+    val localTimeDurationMs = 5
+    val initialTimeNanos = time.nanoseconds()
+    val initialTimeMs = time.milliseconds()
+
+    when(
+      raftManager.handleRequest(
+        any(classOf[RequestHeader]),
+        any(classOf[ApiMessage]),
+        any(classOf[Long])
+      )
+    ).thenAnswer { _ =>
+      time.sleep(localTimeDurationMs)
+      new CompletableFuture[ApiMessage]()
+    }
+
+    // Local time should be updated when `ControllerApis.handle` returns
+    val fetchRequestData = new FetchRequestData()
+    val request = buildRequest(new FetchRequest(fetchRequestData, ApiKeys.FETCH.latestVersion))
+    createControllerApis(None, new MockController.Builder().build())
+      .handle(request, RequestLocal.NoCaching)
+
+    verify(raftManager).handleRequest(
+      ArgumentMatchers.eq(request.header),
+      ArgumentMatchers.eq(fetchRequestData),
+      ArgumentMatchers.eq(initialTimeMs)
+    )
+
+    assertEquals(localTimeDurationMs, TimeUnit.MILLISECONDS.convert(
+      request.apiLocalCompleteTimeNanos - initialTimeNanos,
+      TimeUnit.NANOSECONDS
+    ))
+  }
+
   @Test
   def testUnauthorizedFetchSnapshot(): Unit = {
     assertThrows(classOf[ClusterAuthorizationException], () => createControllerApis(
@@ -293,11 +330,11 @@ class ControllerApisTest {
   }
 
   @Test
-  def testUnauthorizedHandleAlterIsrRequest(): Unit = {
+  def testUnauthorizedHandleAlterPartitionRequest(): Unit = {
     assertThrows(classOf[ClusterAuthorizationException], () => createControllerApis(
       Some(createDenyAllAuthorizer()), new MockController.Builder().build()).
-        handleAlterIsrRequest(buildRequest(new AlterIsrRequest.Builder(
-          new AlterIsrRequestData()).build(0))))
+        handleAlterPartitionRequest(buildRequest(new AlterPartitionRequest.Builder(
+          new AlterPartitionRequestData(), false).build(0))))
   }
 
   @Test
@@ -497,6 +534,7 @@ class ControllerApisTest {
         new CreatableTopic().setName("bar").setNumPartitions(2).setReplicationFactor(3),
         new CreatableTopic().setName("bar").setNumPartitions(2).setReplicationFactor(3),
         new CreatableTopic().setName("baz").setNumPartitions(2).setReplicationFactor(3),
+        new CreatableTopic().setName("indescribable").setNumPartitions(2).setReplicationFactor(3),
         new CreatableTopic().setName("quux").setNumPartitions(2).setReplicationFactor(3),
     ).iterator()))
     val expectedResponse = Set(new CreatableTopicResult().setName("foo").
@@ -507,11 +545,19 @@ class ControllerApisTest {
         setErrorMessage("Duplicate topic name."),
       new CreatableTopicResult().setName("baz").
         setErrorCode(NONE.code()).
-        setTopicId(new Uuid(0L, 1L)),
+        setTopicId(new Uuid(0L, 1L)).
+        setNumPartitions(2).
+        setReplicationFactor(3).
+        setTopicConfigErrorCode(NONE.code()),
+      new CreatableTopicResult().setName("indescribable").
+        setErrorCode(NONE.code()).
+        setTopicId(new Uuid(0L, 2L)).
+        setTopicConfigErrorCode(TOPIC_AUTHORIZATION_FAILED.code()),
       new CreatableTopicResult().setName("quux").
         setErrorCode(TOPIC_AUTHORIZATION_FAILED.code()))
-    assertEquals(expectedResponse, controllerApis.createTopics(request,
+    assertEquals(expectedResponse, controllerApis.createTopics(ANONYMOUS_CONTEXT, request,
       false,
+      _ => Set("baz", "indescribable"),
       _ => Set("baz")).get().topics().asScala.toSet)
   }
 
@@ -529,7 +575,7 @@ class ControllerApisTest {
         setErrorCode(UNKNOWN_TOPIC_OR_PARTITION.code()).
         setErrorMessage("This server does not host this topic-partition."),
       new DeletableTopicResult().setName("foo").setTopicId(fooId))
-    assertEquals(expectedResponse, controllerApis.deleteTopics(request,
+    assertEquals(expectedResponse, controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
       ApiKeys.DELETE_TOPICS.latestVersion().toInt,
       true,
       _ => Set.empty,
@@ -555,7 +601,7 @@ class ControllerApisTest {
         setErrorCode(UNKNOWN_TOPIC_ID.code()).
         setErrorMessage("This server does not host this topic ID."),
       new DeletableTopicResult().setName("foo").setTopicId(fooId))
-    assertEquals(response, controllerApis.deleteTopics(request,
+    assertEquals(response, controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
       ApiKeys.DELETE_TOPICS.latestVersion().toInt,
       true,
       _ => Set.empty,
@@ -597,7 +643,7 @@ class ControllerApisTest {
       new DeletableTopicResult().setName(null).setTopicId(bazId).
         setErrorCode(INVALID_REQUEST.code()).
         setErrorMessage("Duplicate topic id."))
-    assertEquals(response, controllerApis.deleteTopics(request,
+    assertEquals(response, controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
       ApiKeys.DELETE_TOPICS.latestVersion().toInt,
       false,
       names => names.toSet,
@@ -633,7 +679,7 @@ class ControllerApisTest {
       new DeletableTopicResult().setName("foo").setTopicId(fooId).
         setErrorCode(TOPIC_AUTHORIZATION_FAILED.code).
         setErrorMessage(TOPIC_AUTHORIZATION_FAILED.message))
-    assertEquals(response, controllerApis.deleteTopics(request,
+    assertEquals(response, controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
       ApiKeys.DELETE_TOPICS.latestVersion().toInt,
       false,
       _ => Set("foo", "baz"),
@@ -658,7 +704,7 @@ class ControllerApisTest {
       new DeletableTopicResult().setName(null).setTopicId(barId).
         setErrorCode(UNKNOWN_TOPIC_ID.code).
         setErrorMessage(UNKNOWN_TOPIC_ID.message))
-    assertEquals(expectedResponse, controllerApis.deleteTopics(request,
+    assertEquals(expectedResponse, controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
       ApiKeys.DELETE_TOPICS.latestVersion().toInt,
       false,
       _ => Set("foo"),
@@ -677,7 +723,7 @@ class ControllerApisTest {
     request.topics().add(new DeleteTopicState().setName(null).setTopicId(fooId))
     request.topics().add(new DeleteTopicState().setName(null).setTopicId(barId))
     assertEquals(classOf[NotControllerException], assertThrows(
-      classOf[ExecutionException], () => controllerApis.deleteTopics(request,
+      classOf[ExecutionException], () => controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
         ApiKeys.DELETE_TOPICS.latestVersion().toInt,
         false,
         _ => Set("foo", "bar"),
@@ -694,23 +740,24 @@ class ControllerApisTest {
     val controllerApis = createControllerApis(None, controller, props)
     val request = new DeleteTopicsRequestData()
     request.topics().add(new DeleteTopicState().setName("foo").setTopicId(ZERO_UUID))
-    assertThrows(classOf[TopicDeletionDisabledException], () => controllerApis.deleteTopics(request,
+    assertThrows(classOf[TopicDeletionDisabledException],
+      () => controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
         ApiKeys.DELETE_TOPICS.latestVersion().toInt,
         false,
         _ => Set("foo", "bar"),
         _ => Set("foo", "bar")))
-    assertThrows(classOf[InvalidRequestException], () => controllerApis.deleteTopics(request,
+    assertThrows(classOf[InvalidRequestException],
+      () => controllerApis.deleteTopics(ANONYMOUS_CONTEXT, request,
         1,
         false,
         _ => Set("foo", "bar"),
         _ => Set("foo", "bar")))
   }
 
-  @Test
-  def testCreatePartitionsRequest(): Unit = {
-    val controller = new MockController.Builder().
-      newInitialTopic("foo", Uuid.fromString("vZKYST0pSA2HO5x_6hoO2Q")).
-      newInitialTopic("bar", Uuid.fromString("VlFu5c51ToiNx64wtwkhQw")).build()
+  @ParameterizedTest
+  @ValueSource(booleans = Array(true, false))
+  def testCreatePartitionsRequest(validateOnly: Boolean): Unit = {
+    val controller = mock(classOf[Controller])
     val controllerApis = createControllerApis(None, controller)
     val request = new CreatePartitionsRequestData()
     request.topics().add(new CreatePartitionsTopic().setName("foo").setAssignments(null).setCount(5))
@@ -718,16 +765,31 @@ class ControllerApisTest {
     request.topics().add(new CreatePartitionsTopic().setName("bar").setAssignments(null).setCount(5))
     request.topics().add(new CreatePartitionsTopic().setName("bar").setAssignments(null).setCount(5))
     request.topics().add(new CreatePartitionsTopic().setName("baz").setAssignments(null).setCount(5))
+    request.setValidateOnly(validateOnly)
+
+    // Check if the controller is called correctly with the 'validateOnly' field set appropriately.
+    when(controller.createPartitions(
+      any(),
+      ArgumentMatchers.eq(
+        Collections.singletonList(
+          new CreatePartitionsTopic().setName("foo").setAssignments(null).setCount(5))),
+      ArgumentMatchers.eq(validateOnly))).thenReturn(CompletableFuture
+      .completedFuture(Collections.singletonList(
+        new CreatePartitionsTopicResult().setName("foo").
+          setErrorCode(NONE.code()).
+          setErrorMessage(null)
+      )))
     assertEquals(Set(new CreatePartitionsTopicResult().setName("foo").
-        setErrorCode(NONE.code()).
-        setErrorMessage(null),
+      setErrorCode(NONE.code()).
+      setErrorMessage(null),
       new CreatePartitionsTopicResult().setName("bar").
         setErrorCode(INVALID_REQUEST.code()).
         setErrorMessage("Duplicate topic name."),
       new CreatePartitionsTopicResult().setName("baz").
         setErrorCode(TOPIC_AUTHORIZATION_FAILED.code()).
         setErrorMessage(null)),
-      controllerApis.createPartitions(request, _ => Set("foo", "bar")).get().asScala.toSet)
+      controllerApis.createPartitions(ANONYMOUS_CONTEXT, request,
+        _ => Set("foo", "bar")).get().asScala.toSet)
   }
 
   @Test
@@ -806,8 +868,8 @@ class ControllerApisTest {
     val responseData = new ElectLeadersResponseData()
         .setErrorCode(Errors.NOT_CONTROLLER.code)
 
-    when(controller.electLeaders(
-      request.data
+    when(controller.electLeaders(any[ControllerRequestContext],
+      ArgumentMatchers.eq(request.data)
     )).thenReturn(CompletableFuture.completedFuture(responseData))
 
     val response = handleRequest[ElectLeadersResponse](request, controllerApis)
@@ -841,6 +903,35 @@ class ControllerApisTest {
     }
   }
 
+  @Test
+  def testCompletableFutureExceptions(): Unit = {
+    // This test simulates an error in a completable future as we return from the controller. We need to ensure
+    // that any exception throw in the completion phase is properly captured and translated to an error response.
+    val request = buildRequest(new FetchRequest(new FetchRequestData(), 12))
+    val response = new FetchResponseData()
+    val responseFuture = new CompletableFuture[ApiMessage]()
+    val errorResponseFuture = new AtomicReference[AbstractResponse]()
+    when(raftManager.handleRequest(any(), any(), any())).thenReturn(responseFuture)
+    when(requestChannel.sendResponse(any(), any(), any())).thenAnswer { _ =>
+      // Simulate an encoding failure in the initial fetch response
+      throw new UnsupportedVersionException("Something went wrong")
+    }.thenAnswer { invocation =>
+      val resp = invocation.getArgument(1, classOf[AbstractResponse])
+      errorResponseFuture.set(resp)
+    }
+
+    // Calling handle does not block since we do not call get() in ControllerApis
+    createControllerApis(None,
+      new MockController.Builder().build()).handle(request, null)
+
+    // When we complete this future, the completion stages will fire (including the error handler in ControllerApis#request)
+    responseFuture.complete(response)
+
+    // Now we should get an error response with UNSUPPORTED_VERSION
+    val errorResponse = errorResponseFuture.get()
+    assertEquals(1, errorResponse.errorCounts().getOrDefault(Errors.UNSUPPORTED_VERSION, 0))
+  }
+
   @AfterEach
   def tearDown(): Unit = {
     quotas.shutdown()
diff --git a/core/src/test/scala/unit/kafka/server/ControllerConfigurationValidatorTest.scala b/core/src/test/scala/unit/kafka/server/ControllerConfigurationValidatorTest.scala
index bece00354c675..c89910ed23138 100644
--- a/core/src/test/scala/unit/kafka/server/ControllerConfigurationValidatorTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ControllerConfigurationValidatorTest.scala
@@ -28,9 +28,10 @@ import org.apache.kafka.common.errors.{InvalidConfigurationException, InvalidReq
 import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows}
 
 class ControllerConfigurationValidatorTest {
+  val validator = new ControllerConfigurationValidator()
+
   @Test
   def testDefaultTopicResourceIsRejected(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     assertEquals("Default topic resources are not allowed.",
         assertThrows(classOf[InvalidRequestException], () => validator.validate(
         new ConfigResource(TOPIC, ""), emptyMap())). getMessage())
@@ -38,7 +39,6 @@ class ControllerConfigurationValidatorTest {
 
   @Test
   def testInvalidTopicNameRejected(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     assertEquals("Topic name \"(<-invalid->)\" is illegal, it contains a character " +
       "other than ASCII alphanumerics, '.', '_' and '-'",
         assertThrows(classOf[InvalidTopicException], () => validator.validate(
@@ -47,7 +47,6 @@ class ControllerConfigurationValidatorTest {
 
   @Test
   def testUnknownResourceType(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     assertEquals("Unknown resource type BROKER_LOGGER",
       assertThrows(classOf[InvalidRequestException], () => validator.validate(
         new ConfigResource(BROKER_LOGGER, "foo"), emptyMap())). getMessage())
@@ -55,19 +54,17 @@ class ControllerConfigurationValidatorTest {
 
   @Test
   def testNullTopicConfigValue(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     val config = new TreeMap[String, String]()
     config.put(SEGMENT_JITTER_MS_CONFIG, "10")
     config.put(SEGMENT_BYTES_CONFIG, null)
     config.put(SEGMENT_MS_CONFIG, null)
-    assertEquals("Null value not supported for topic configs : segment.bytes,segment.ms",
-      assertThrows(classOf[InvalidRequestException], () => validator.validate(
+    assertEquals("Null value not supported for topic configs: segment.bytes,segment.ms",
+      assertThrows(classOf[InvalidConfigurationException], () => validator.validate(
         new ConfigResource(TOPIC, "foo"), config)). getMessage())
   }
 
   @Test
   def testValidTopicConfig(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     val config = new TreeMap[String, String]()
     config.put(SEGMENT_JITTER_MS_CONFIG, "1000")
     config.put(SEGMENT_BYTES_CONFIG, "67108864")
@@ -76,7 +73,6 @@ class ControllerConfigurationValidatorTest {
 
   @Test
   def testInvalidTopicConfig(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     val config = new TreeMap[String, String]()
     config.put(SEGMENT_JITTER_MS_CONFIG, "1000")
     config.put(SEGMENT_BYTES_CONFIG, "67108864")
@@ -88,7 +84,6 @@ class ControllerConfigurationValidatorTest {
 
   @Test
   def testInvalidBrokerEntity(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     val config = new TreeMap[String, String]()
     config.put(SEGMENT_JITTER_MS_CONFIG, "1000")
     assertEquals("Unable to parse broker name as a base 10 number.",
@@ -98,7 +93,6 @@ class ControllerConfigurationValidatorTest {
 
   @Test
   def testInvalidNegativeBrokerId(): Unit = {
-    val validator = new ControllerConfigurationValidator()
     val config = new TreeMap[String, String]()
     config.put(SEGMENT_JITTER_MS_CONFIG, "1000")
     assertEquals("Invalid negative broker ID.",
diff --git a/core/src/test/scala/unit/kafka/server/CreateTopicsRequestTest.scala b/core/src/test/scala/unit/kafka/server/CreateTopicsRequestTest.scala
index 94eb213446cd4..57834234cc101 100644
--- a/core/src/test/scala/unit/kafka/server/CreateTopicsRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/CreateTopicsRequestTest.scala
@@ -31,7 +31,7 @@ import org.junit.jupiter.params.provider.ValueSource
 import scala.jdk.CollectionConverters._
 
 class CreateTopicsRequestTest extends AbstractCreateTopicsRequestTest {
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testValidCreateTopicsRequests(quorum: String): Unit = {
     // Generated assignments
@@ -61,7 +61,7 @@ class CreateTopicsRequestTest extends AbstractCreateTopicsRequestTest {
       topicReq("topic14", replicationFactor = -1, numPartitions = 2))))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testErrorCreateTopicsRequests(quorum: String): Unit = {
     val existingTopic = "existing-topic"
@@ -102,7 +102,7 @@ class CreateTopicsRequestTest extends AbstractCreateTopicsRequestTest {
     validateTopicExists("partial-none")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testCreateTopicsWithVeryShortTimeouts(quorum: String): Unit = {
     // When using ZooKeeper, we don't expect a request to ever complete within 1ms.
@@ -132,7 +132,7 @@ class CreateTopicsRequestTest extends AbstractCreateTopicsRequestTest {
   }
 
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testInvalidCreateTopicsRequests(quorum: String): Unit = {
     // Partitions/ReplicationFactor and ReplicaAssignment
@@ -147,7 +147,7 @@ class CreateTopicsRequestTest extends AbstractCreateTopicsRequestTest {
       Map("bad-args-topic" -> error(Errors.INVALID_REQUEST)), checkErrorMessage = false)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testNotController(quorum: String): Unit = {
     // Note: we don't run this test when in KRaft mode, because KRaft doesn't have this
@@ -157,7 +157,7 @@ class CreateTopicsRequestTest extends AbstractCreateTopicsRequestTest {
     assertEquals(1, response.errorCounts().get(Errors.NOT_CONTROLLER))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testCreateTopicsRequestVersions(quorum: String): Unit = {
     // Note: we don't run this test when in KRaft mode, because kraft does not yet support returning topic
diff --git a/core/src/test/scala/unit/kafka/server/CreateTopicsRequestWithPolicyTest.scala b/core/src/test/scala/unit/kafka/server/CreateTopicsRequestWithPolicyTest.scala
index d480c7b472f85..fc46640c1b65f 100644
--- a/core/src/test/scala/unit/kafka/server/CreateTopicsRequestWithPolicyTest.scala
+++ b/core/src/test/scala/unit/kafka/server/CreateTopicsRequestWithPolicyTest.scala
@@ -20,6 +20,7 @@ package kafka.server
 import java.util
 import java.util.Properties
 import kafka.log.LogConfig
+import kafka.utils.TestInfoUtils
 import org.apache.kafka.common.errors.PolicyViolationException
 import org.apache.kafka.common.internals.Topic
 import org.apache.kafka.common.protocol.Errors
@@ -44,7 +45,7 @@ class CreateTopicsRequestWithPolicyTest extends AbstractCreateTopicsRequestTest
     Seq(properties)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testValidCreateTopicsRequests(quorum: String): Unit = {
     validateValidCreateTopicsRequests(topicsReq(Seq(topicReq("topic1",
@@ -63,7 +64,7 @@ class CreateTopicsRequestWithPolicyTest extends AbstractCreateTopicsRequestTest
       assignment = Map(0 -> List(1, 0), 1 -> List(0, 1))))))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testErrorCreateTopicsRequests(quorum: String): Unit = {
     val existingTopic = "existing-topic"
diff --git a/core/src/test/scala/unit/kafka/server/DeleteRecordsRequestTest.scala b/core/src/test/scala/unit/kafka/server/DeleteRecordsRequestTest.scala
new file mode 100644
index 0000000000000..d43c5c7dfa4b3
--- /dev/null
+++ b/core/src/test/scala/unit/kafka/server/DeleteRecordsRequestTest.scala
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package kafka.server
+
+import kafka.utils.TestInfoUtils
+import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata}
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.message.DeleteRecordsRequestData
+import org.apache.kafka.common.message.DeleteRecordsRequestData.{DeleteRecordsPartition, DeleteRecordsTopic}
+import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.requests.{DeleteRecordsRequest, DeleteRecordsResponse}
+import org.apache.kafka.common.serialization.StringSerializer
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
+
+import java.util.Collections
+import java.util.concurrent.TimeUnit
+import scala.collection.Seq
+
+class DeleteRecordsRequestTest extends BaseRequestTest {
+  private val TIMEOUT_MS = 1000
+  private val MESSAGES_PRODUCED_PER_PARTITION = 10
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testDeleteRecordsHappyCase(quorum: String): Unit = {
+    val (topicPartition: TopicPartition, leaderId: Int) = createTopicAndSendRecords
+
+    // Create the DeleteRecord request requesting deletion of offset which is not present
+    val offsetToDelete = Math.max(MESSAGES_PRODUCED_PER_PARTITION - 8, 0)
+    val request: DeleteRecordsRequest = createDeleteRecordsRequestForTopicPartition(topicPartition, offsetToDelete)
+
+    // call the API
+    val response = sendDeleteRecordsRequest(request, leaderId)
+    val partitionResult = response.data.topics.find(topicPartition.topic).partitions.find(topicPartition.partition)
+
+    // Validate the expected error code in the response
+    assertEquals(Errors.NONE.code(), partitionResult.errorCode(),
+      s"Unexpected error code received: ${Errors.forCode(partitionResult.errorCode).name()}")
+
+    // Validate the expected lowWaterMark in the response
+    assertEquals(offsetToDelete, partitionResult.lowWatermark(),
+      s"Unexpected lowWatermark received: ${partitionResult.lowWatermark}")
+
+    // Validate that the records have actually deleted
+    validateLogStartOffsetForTopic(topicPartition, offsetToDelete)
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testErrorWhenDeletingRecordsWithInvalidOffset(quorum: String): Unit = {
+    val (topicPartition: TopicPartition, leaderId: Int) = createTopicAndSendRecords
+
+    // Create the DeleteRecord request requesting deletion of offset which is not present
+    val offsetToDelete = MESSAGES_PRODUCED_PER_PARTITION + 5
+    val request: DeleteRecordsRequest = createDeleteRecordsRequestForTopicPartition(topicPartition, offsetToDelete)
+
+    // call the API
+    val response = sendDeleteRecordsRequest(request, leaderId)
+    val partitionResult = response.data.topics.find(topicPartition.topic).partitions.find(topicPartition.partition)
+
+    // Validate the expected error code in the response
+    assertEquals(Errors.OFFSET_OUT_OF_RANGE.code(), partitionResult.errorCode(),
+      s"Unexpected error code received: ${Errors.forCode(partitionResult.errorCode()).name()}")
+
+    // Validate the expected value for low watermark
+    assertEquals(DeleteRecordsResponse.INVALID_LOW_WATERMARK, partitionResult.lowWatermark())
+
+    // After error, the offset of the topic should have been the original i.e. delete record should not have deleted
+    // records.
+    validateLogStartOffsetForTopic(topicPartition, 0)
+  }
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testErrorWhenDeletingRecordsWithInvalidTopic(quorum: String): Unit = {
+    val invalidTopicPartition = new TopicPartition("invalid-topic", 0)
+    // Create the DeleteRecord request requesting deletion of offset which is not present
+    val offsetToDelete = 1
+    val request: DeleteRecordsRequest = createDeleteRecordsRequestForTopicPartition(invalidTopicPartition, offsetToDelete)
+
+    // call the API
+    val response = sendDeleteRecordsRequest(request)
+    val partitionResult = response.data.topics.find(invalidTopicPartition.topic).partitions.find(invalidTopicPartition.partition)
+
+    // Validate the expected error code in the response
+    assertEquals(Errors.UNKNOWN_TOPIC_OR_PARTITION.code(), partitionResult.errorCode(),
+      s"Unexpected error code received: ${Errors.forCode(partitionResult.errorCode()).name()}")
+
+    // Validate the expected value for low watermark
+    assertEquals(DeleteRecordsResponse.INVALID_LOW_WATERMARK, partitionResult.lowWatermark())
+  }
+
+  private def createTopicAndSendRecords = {
+    // Single topic
+    val topic1 = "topic-1"
+    val topicPartition = new TopicPartition(topic1, 0)
+    val partitionToLeader = createTopic(topic1)
+    assertTrue(partitionToLeader.contains(topicPartition.partition), "Topic creation did not succeed.")
+    // Write records
+    produceData(Seq(topicPartition), MESSAGES_PRODUCED_PER_PARTITION)
+    (topicPartition, partitionToLeader(topicPartition.partition))
+  }
+
+  private def createDeleteRecordsRequestForTopicPartition(topicPartition: TopicPartition, offsetToDelete: Int) = {
+    val requestData = new DeleteRecordsRequestData()
+      .setTopics(Collections.singletonList(new DeleteRecordsTopic()
+        .setName(topicPartition.topic())
+        .setPartitions(Collections.singletonList(new DeleteRecordsPartition()
+          .setOffset(offsetToDelete)
+          .setPartitionIndex(topicPartition.partition())))))
+      .setTimeoutMs(TIMEOUT_MS)
+    val request = new DeleteRecordsRequest.Builder(requestData).build()
+    request
+  }
+
+  private def sendDeleteRecordsRequest(request: DeleteRecordsRequest): DeleteRecordsResponse = {
+    connectAndReceive[DeleteRecordsResponse](request, destination = anySocketServer)
+  }
+
+  private def sendDeleteRecordsRequest(request: DeleteRecordsRequest, leaderId: Int): DeleteRecordsResponse = {
+    connectAndReceive[DeleteRecordsResponse](request, destination = brokerSocketServer(leaderId))
+  }
+
+  private def produceData(topicPartitions: Iterable[TopicPartition], numMessagesPerPartition: Int): Seq[RecordMetadata] = {
+    val producer = createProducer(keySerializer = new StringSerializer, valueSerializer = new StringSerializer)
+    val records = for {
+      tp <- topicPartitions.toSeq
+      messageIndex <- 0 until numMessagesPerPartition
+    } yield {
+      val suffix = s"$tp-$messageIndex"
+      new ProducerRecord(tp.topic, tp.partition, s"key $suffix", s"value $suffix")
+    }
+
+    val sendfutureList = records.map(producer.send)
+
+    // ensure that records are flushed to server
+    producer.flush()
+
+    val recordMetadataList = sendfutureList.map(_.get(10, TimeUnit.SECONDS))
+    recordMetadataList
+      .foreach(recordMetadata => assertTrue(recordMetadata.offset >= 0, s"Invalid offset $recordMetadata"))
+
+    recordMetadataList
+  }
+
+  private def validateLogStartOffsetForTopic(topicPartition: TopicPartition, expectedStartOffset: Long): Unit = {
+    val logForTopicPartition = brokers.flatMap(_.replicaManager.logManager.getLog(topicPartition)).headOption
+    // logManager should exist for the provided partition
+    assertTrue(logForTopicPartition.isDefined)
+    // assert that log start offset is equal to the expectedStartOffset after DeleteRecords has been called.
+    assertEquals(expectedStartOffset, logForTopicPartition.get.logStartOffset)
+  }
+}
diff --git a/core/src/test/scala/unit/kafka/server/DeleteTopicsRequestTest.scala b/core/src/test/scala/unit/kafka/server/DeleteTopicsRequestTest.scala
index a17612170d7a0..644f21ff3f648 100644
--- a/core/src/test/scala/unit/kafka/server/DeleteTopicsRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/DeleteTopicsRequestTest.scala
@@ -17,24 +17,28 @@
 
 package kafka.server
 
-import java.util.{Arrays, Collections}
-
+import java.util.Arrays
+import java.util.Collections
 import kafka.network.SocketServer
 import kafka.utils._
 import org.apache.kafka.common.Uuid
 import org.apache.kafka.common.message.DeleteTopicsRequestData
 import org.apache.kafka.common.message.DeleteTopicsRequestData.DeleteTopicState
 import org.apache.kafka.common.protocol.Errors
-import org.apache.kafka.common.requests.{DeleteTopicsRequest, DeleteTopicsResponse, MetadataRequest, MetadataResponse}
+import org.apache.kafka.common.requests.DeleteTopicsRequest
+import org.apache.kafka.common.requests.DeleteTopicsResponse
+import org.apache.kafka.common.requests.MetadataRequest
+import org.apache.kafka.common.requests.MetadataResponse
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Test
-
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 import scala.jdk.CollectionConverters._
 
-class DeleteTopicsRequestTest extends BaseRequestTest {
+class DeleteTopicsRequestTest extends BaseRequestTest with Logging {
 
-  @Test
-  def testValidDeleteTopicRequests(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testValidDeleteTopicRequests(quorum: String): Unit = {
     val timeout = 10000
     // Single topic
     createTopic("topic-1", 1, 1)
@@ -66,6 +70,9 @@ class DeleteTopicsRequestTest extends BaseRequestTest {
     val response = sendDeleteTopicsRequest(request)
     val error = response.errorCounts.asScala.find(_._1 != Errors.NONE)
     assertTrue(error.isEmpty, s"There should be no errors, found ${response.data.responses.asScala}")
+
+    ensureConsistentKRaftMetadata()
+
     request.data.topicNames.forEach { topic =>
       validateTopicIsDeleted(topic)
     }
@@ -75,13 +82,20 @@ class DeleteTopicsRequestTest extends BaseRequestTest {
     val response = sendDeleteTopicsRequest(request)
     val error = response.errorCounts.asScala.find(_._1 != Errors.NONE)
     assertTrue(error.isEmpty, s"There should be no errors, found ${response.data.responses.asScala}")
+
+    ensureConsistentKRaftMetadata()
+
     response.data.responses.forEach { response =>
       validateTopicIsDeleted(response.name())
     }
   }
 
-  @Test
-  def testErrorDeleteTopicRequests(): Unit = {
+  /*
+   * Only run this test against ZK cluster. The KRaft controller doesn't perform operations that have timed out.
+   */
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def testErrorDeleteTopicRequests(quorum: String): Unit = {
     val timeout = 30000
     val timeoutTopic = "invalid-timeout"
 
@@ -103,14 +117,14 @@ class DeleteTopicsRequestTest extends BaseRequestTest {
         "partial-invalid-topic" -> Errors.UNKNOWN_TOPIC_OR_PARTITION
       )
     )
-    
+
     // Topic IDs
     createTopic("topic-id-1", 1, 1)
     val validId = getTopicIds()("topic-id-1")
     val invalidId = Uuid.randomUuid
     validateErrorDeleteTopicRequestsWithIds(new DeleteTopicsRequest.Builder(
       new DeleteTopicsRequestData()
-        .setTopics(Arrays.asList(new DeleteTopicState().setTopicId(invalidId), 
+        .setTopics(Arrays.asList(new DeleteTopicState().setTopicId(invalidId),
             new DeleteTopicState().setTopicId(validId)))
         .setTimeoutMs(timeout)).build(),
       Map(
@@ -128,7 +142,7 @@ class DeleteTopicsRequestTest extends BaseRequestTest {
           .setTimeoutMs(0)).build(),
       Map(timeoutTopic -> Errors.REQUEST_TIMED_OUT))
     // The topic should still get deleted eventually
-    TestUtils.waitUntilTrue(() => !servers.head.metadataCache.contains(timeoutTopic), s"Topic $timeoutTopic is never deleted")
+    TestUtils.waitUntilTrue(() => !brokers.head.metadataCache.contains(timeoutTopic), s"Topic $timeoutTopic is never deleted")
     validateTopicIsDeleted(timeoutTopic)
   }
 
@@ -166,8 +180,13 @@ class DeleteTopicsRequestTest extends BaseRequestTest {
     }
   }
 
-  @Test
-  def testNotController(): Unit = {
+  /*
+   * Only run this test against ZK clusters. KRaft doesn't have this behavior of returning NOT_CONTROLLER.
+   * Instead, the request is forwarded.
+   */
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def testNotController(quorum: String): Unit = {
     val request = new DeleteTopicsRequest.Builder(
         new DeleteTopicsRequestData()
           .setTopicNames(Collections.singletonList("not-controller"))
@@ -185,8 +204,36 @@ class DeleteTopicsRequestTest extends BaseRequestTest {
       s"The topic $topic should not exist")
   }
 
-  private def sendDeleteTopicsRequest(request: DeleteTopicsRequest, socketServer: SocketServer = controllerSocketServer): DeleteTopicsResponse = {
+  private def sendDeleteTopicsRequest(
+    request: DeleteTopicsRequest,
+    socketServer: SocketServer = adminSocketServer
+  ): DeleteTopicsResponse = {
     connectAndReceive[DeleteTopicsResponse](request, destination = socketServer)
   }
 
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def testDeleteTopicsVersions(quorum: String): Unit = {
+    // This test assumes that the current valid versions are 0-6 please adjust the test if there are changes.
+    assertEquals(0, DeleteTopicsRequestData.LOWEST_SUPPORTED_VERSION)
+    assertEquals(6, DeleteTopicsRequestData.HIGHEST_SUPPORTED_VERSION)
+
+    val timeout = 10000
+    (0 until DeleteTopicsRequestData.SCHEMAS.size).foreach { version =>
+      info(s"Creating and deleting tests for version $version")
+
+      val topicName = s"topic-$version"
+
+      createTopic(topicName, 1, 1)
+      val data = new DeleteTopicsRequestData().setTimeoutMs(timeout)
+
+      if (version < 6) {
+        data.setTopicNames(Arrays.asList(topicName))
+      } else {
+        data.setTopics(Arrays.asList(new DeleteTopicState().setName(topicName)))
+      }
+
+      validateValidDeleteTopicRequests(new DeleteTopicsRequest.Builder(data).build(version.toShort))
+    }
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/DescribeLogDirsRequestTest.scala b/core/src/test/scala/unit/kafka/server/DescribeLogDirsRequestTest.scala
index 9ab3f86d77090..0ad110dcdbfa8 100644
--- a/core/src/test/scala/unit/kafka/server/DescribeLogDirsRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/DescribeLogDirsRequestTest.scala
@@ -53,9 +53,13 @@ class DescribeLogDirsRequestTest extends BaseRequestTest {
     val offlineResult = response.data.results.asScala.find(logDirResult => logDirResult.logDir == offlineDir).get
     assertEquals(Errors.KAFKA_STORAGE_ERROR.code, offlineResult.errorCode)
     assertEquals(0, offlineResult.topics.asScala.map(t => t.partitions().size()).sum)
+    assertEquals(DescribeLogDirsResponse.UNKNOWN_VOLUME_BYTES, offlineResult.totalBytes)
+    assertEquals(DescribeLogDirsResponse.UNKNOWN_VOLUME_BYTES, offlineResult.usableBytes)
 
     val onlineResult = response.data.results.asScala.find(logDirResult => logDirResult.logDir == onlineDir).get
     assertEquals(Errors.NONE.code, onlineResult.errorCode)
+    assertTrue(onlineResult.totalBytes > 0)
+    assertTrue(onlineResult.usableBytes > 0)
     val onlinePartitionsMap = onlineResult.topics.asScala.flatMap { topic =>
       topic.partitions().asScala.map { partitionResult =>
         new TopicPartition(topic.name, partitionResult.partitionIndex) -> partitionResult
diff --git a/core/src/test/scala/unit/kafka/server/DescribeQuorumRequestTest.scala b/core/src/test/scala/unit/kafka/server/DescribeQuorumRequestTest.scala
index 55b9fe92c3c3a..b53004b2eaffd 100644
--- a/core/src/test/scala/unit/kafka/server/DescribeQuorumRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/DescribeQuorumRequestTest.scala
@@ -17,7 +17,6 @@
 package kafka.server
 
 import java.io.IOException
-
 import kafka.test.ClusterInstance
 import kafka.test.annotation.{ClusterTest, ClusterTestDefaults, Type}
 import kafka.test.junit.ClusterTestExtensions
@@ -26,12 +25,13 @@ import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.requests.DescribeQuorumRequest.singletonRequest
 import org.apache.kafka.common.requests.{AbstractRequest, AbstractResponse, ApiVersionsRequest, ApiVersionsResponse, DescribeQuorumRequest, DescribeQuorumResponse}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Tag
+import org.junit.jupiter.api.{Tag, Timeout}
 import org.junit.jupiter.api.extension.ExtendWith
 
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
 
+@Timeout(120)
 @ExtendWith(value = Array(classOf[ClusterTestExtensions]))
 @ClusterTestDefaults(clusterType = Type.KRAFT)
 @Tag("integration")
@@ -54,30 +54,43 @@ class DescribeQuorumRequestTest(cluster: ClusterInstance) {
 
   @ClusterTest
   def testDescribeQuorum(): Unit = {
-    val request = new DescribeQuorumRequest.Builder(
-      singletonRequest(KafkaRaftServer.MetadataPartition)
-    ).build()
-
-    val response = connectAndReceive[DescribeQuorumResponse](request)
-
-    assertEquals(Errors.NONE, Errors.forCode(response.data.errorCode))
-    assertEquals(1, response.data.topics.size)
-
-    val topicData = response.data.topics.get(0)
-    assertEquals(KafkaRaftServer.MetadataTopic, topicData.topicName)
-    assertEquals(1, topicData.partitions.size)
-
-    val partitionData = topicData.partitions.get(0)
-    assertEquals(KafkaRaftServer.MetadataPartition.partition, partitionData.partitionIndex)
-    assertEquals(Errors.NONE, Errors.forCode(partitionData.errorCode))
-    assertTrue(partitionData.leaderEpoch > 0)
-
-    val leaderId = partitionData.leaderId
-    assertTrue(leaderId > 0)
-
-    val leaderState = partitionData.currentVoters.asScala.find(_.replicaId == leaderId)
-      .getOrElse(throw new AssertionError("Failed to find leader among current voter states"))
-    assertTrue(leaderState.logEndOffset > 0)
+    for (version <- ApiKeys.DESCRIBE_QUORUM.allVersions.asScala) {
+      val request = new DescribeQuorumRequest.Builder(
+        singletonRequest(KafkaRaftServer.MetadataPartition)
+      ).build(version.toShort)
+      val response = connectAndReceive[DescribeQuorumResponse](request)
+
+      assertEquals(Errors.NONE, Errors.forCode(response.data.errorCode))
+      assertEquals(1, response.data.topics.size)
+
+      val topicData = response.data.topics.get(0)
+      assertEquals(KafkaRaftServer.MetadataTopic, topicData.topicName)
+      assertEquals(1, topicData.partitions.size)
+
+      val partitionData = topicData.partitions.get(0)
+      assertEquals(KafkaRaftServer.MetadataPartition.partition, partitionData.partitionIndex)
+      assertEquals(Errors.NONE, Errors.forCode(partitionData.errorCode))
+      assertTrue(partitionData.leaderEpoch > 0)
+
+      val leaderId = partitionData.leaderId
+      assertTrue(leaderId > 0)
+
+      val leaderState = partitionData.currentVoters.asScala.find(_.replicaId == leaderId)
+        .getOrElse(throw new AssertionError("Failed to find leader among current voter states"))
+      assertTrue(leaderState.logEndOffset > 0)
+
+      val voterData = partitionData.currentVoters.asScala
+      assertEquals(cluster.controllerIds().asScala, voterData.map(_.replicaId).toSet);
+
+      val observerData = partitionData.observers.asScala
+      assertEquals(cluster.brokerIds().asScala, observerData.map(_.replicaId).toSet);
+
+      (voterData ++ observerData).foreach { state =>
+        assertTrue(0 < state.logEndOffset)
+        assertEquals(-1, state.lastFetchTimestamp)
+        assertEquals(-1, state.lastCaughtUpTimestamp)
+      }
+    }
   }
 
   private def connectAndReceive[T <: AbstractResponse](
diff --git a/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala b/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala
index 3a3725976b2d0..84d6f5a2ef93d 100644
--- a/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala
+++ b/core/src/test/scala/unit/kafka/server/DynamicConfigChangeTest.scala
@@ -16,12 +16,11 @@
   */
 package kafka.server
 
-import kafka.api.KAFKA_3_0_IV1
 import java.net.InetAddress
 import java.nio.charset.StandardCharsets
 import java.util
 import java.util.Collections.{singletonList, singletonMap}
-import java.util.Properties
+import java.util.{Collections, Properties}
 import java.util.concurrent.ExecutionException
 
 import kafka.integration.KafkaServerTestHarness
@@ -31,7 +30,7 @@ import kafka.server.Constants._
 import kafka.zk.ConfigEntityChangeNotificationZNode
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType.SET
-import org.apache.kafka.clients.admin.{Admin, AlterConfigOp, ConfigEntry}
+import org.apache.kafka.clients.admin.{Admin, AlterConfigOp, Config, ConfigEntry}
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.config.ConfigResource
 import org.apache.kafka.common.config.internals.QuotaConfigs
@@ -42,6 +41,7 @@ import org.apache.kafka.common.quota.ClientQuotaEntity.{CLIENT_ID, IP, USER}
 import org.apache.kafka.common.quota.{ClientQuotaAlteration, ClientQuotaEntity}
 import org.apache.kafka.common.record.{CompressionType, RecordVersion}
 import org.apache.kafka.common.security.auth.KafkaPrincipal
+import org.apache.kafka.server.common.MetadataVersion.IBP_3_0_IV1
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.params.ParameterizedTest
@@ -56,7 +56,7 @@ import scala.jdk.CollectionConverters._
 class DynamicConfigChangeTest extends KafkaServerTestHarness {
   def generateConfigs = List(KafkaConfig.fromProps(TestUtils.createBrokerConfig(0, zkConnectOrNull)))
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConfigChange(quorum: String): Unit = {
     if (!isKRaftTest()) {
@@ -80,7 +80,13 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
         val resource = new ConfigResource(ConfigResource.Type.TOPIC, tp.topic())
         val op = new AlterConfigOp(new ConfigEntry(FlushMessagesProp, newVal.toString()),
           SET)
-        admin.incrementalAlterConfigs(Map(resource -> List(op).asJavaCollection).asJava).all.get
+        val resource2 = new ConfigResource(ConfigResource.Type.BROKER, "")
+        val op2 = new AlterConfigOp(new ConfigEntry(KafkaConfig.LogFlushIntervalMsProp, newVal.toString()),
+          SET)
+        admin.incrementalAlterConfigs(Map(
+          resource -> List(op).asJavaCollection,
+          resource2 -> List(op2).asJavaCollection,
+        ).asJava).all.get
       } finally {
         admin.close()
       }
@@ -94,7 +100,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDynamicTopicConfigChange(quorum: String): Unit = {
     val tp = new TopicPartition("test", 0)
@@ -136,7 +142,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
   }
 
   @nowarn("cat=deprecation")
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testMessageFormatVersionChange(quorum: String): Unit = {
     val tp = new TopicPartition("test", 0)
@@ -148,7 +154,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
       "Topic metadata propagation failed")
     val log = server.logManager.getLog(tp).get
     // message format version should always be 3.0 if inter-broker protocol is 3.0 or higher
-    assertEquals(KAFKA_3_0_IV1, log.config.messageFormatVersion)
+    assertEquals(IBP_3_0_IV1, log.config.messageFormatVersion)
     assertEquals(RecordVersion.V2, log.config.recordVersion)
 
     val compressionType = CompressionType.LZ4.name
@@ -159,7 +165,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     TestUtils.waitUntilTrue(() =>
       server.logManager.getLog(tp).get.config.compressionType == compressionType,
       "Topic config change propagation failed")
-    assertEquals(KAFKA_3_0_IV1, log.config.messageFormatVersion)
+    assertEquals(IBP_3_0_IV1, log.config.messageFormatVersion)
     assertEquals(RecordVersion.V2, log.config.recordVersion)
   }
 
@@ -206,7 +212,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testClientIdQuotaConfigChange(quorum: String): Unit = {
     val m = new util.HashMap[String, String]
@@ -214,7 +220,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     testQuotaConfigChange(new ClientQuotaEntity(m), KafkaPrincipal.ANONYMOUS, "testClient")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUserQuotaConfigChange(quorum: String): Unit = {
     val m = new util.HashMap[String, String]
@@ -222,7 +228,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     testQuotaConfigChange(new ClientQuotaEntity(m), KafkaPrincipal.ANONYMOUS, "testClient")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testUserClientIdQuotaChange(quorum: String): Unit = {
     val m = new util.HashMap[String, String]
@@ -231,7 +237,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     testQuotaConfigChange(new ClientQuotaEntity(m), KafkaPrincipal.ANONYMOUS, "testClient")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDefaultClientIdQuotaConfigChange(quorum: String): Unit = {
     val m = new util.HashMap[String, String]
@@ -239,7 +245,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     testQuotaConfigChange(new ClientQuotaEntity(m), KafkaPrincipal.ANONYMOUS, "testClient")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDefaultUserQuotaConfigChange(quorum: String): Unit = {
     val m = new util.HashMap[String, String]
@@ -247,7 +253,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     testQuotaConfigChange(new ClientQuotaEntity(m), KafkaPrincipal.ANONYMOUS, "testClient")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testDefaultUserClientIdQuotaConfigChange(quorum: String): Unit = {
     val m = new util.HashMap[String, String]
@@ -256,7 +262,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     testQuotaConfigChange(new ClientQuotaEntity(m), KafkaPrincipal.ANONYMOUS, "testClient")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testQuotaInitialization(quorum: String): Unit = {
     val server = servers.head
@@ -288,7 +294,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     assertEquals(Quota.upperBound(200000),  quotaManagers.fetch.quota("ANONYMOUS", "overriddenUserClientId"))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIpQuotaInitialization(quorum: String): Unit = {
     val broker = brokers.head
@@ -328,7 +334,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIpQuotaConfigChange(quorum: String): Unit = {
     val admin = createAdminClient()
@@ -370,7 +376,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testConfigChangeOnNonExistingTopic(quorum: String): Unit = {
     val topic = TestUtils.tempTopic()
@@ -379,7 +385,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     assertThrows(classOf[UnknownTopicOrPartitionException], () => adminZkClient.changeTopicConfig(topic, logProps))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConfigChangeOnNonExistingTopicWithAdminClient(quorum: String): Unit = {
     val topic = TestUtils.tempTopic()
@@ -397,7 +403,7 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testProcessNotification(quorum: String): Unit = {
     val props = new Properties()
@@ -430,18 +436,30 @@ class DynamicConfigChangeTest extends KafkaServerTestHarness {
     verify(handler).processConfigChanges(anyString, any[Properties])
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
-  def testConfigureDefaultTopic(quorum: String): Unit = {
+  def testIncrementalAlterDefaultTopicConfig(quorum: String): Unit = {
     val admin = createAdminClient()
     try {
       val resource = new ConfigResource(ConfigResource.Type.TOPIC, "")
       val op = new AlterConfigOp(new ConfigEntry(FlushMessagesProp, "200000"), SET)
-      admin.incrementalAlterConfigs(Map(resource -> List(op).asJavaCollection).asJava).all.get
-      fail("Should fail with InvalidRequestException for topic doesn't exist")
-    } catch {
-      case e: ExecutionException =>
-        assertEquals(classOf[InvalidRequestException], e.getCause().getClass())
+      val future = admin.incrementalAlterConfigs(Map(resource -> List(op).asJavaCollection).asJava).all
+      TestUtils.assertFutureExceptionTypeEquals(future, classOf[InvalidRequestException])
+    } finally {
+      admin.close()
+    }
+  }
+
+  @nowarn("cat=deprecation")
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testAlterDefaultTopicConfig(quorum: String): Unit = {
+    val admin = createAdminClient()
+    try {
+      val resource = new ConfigResource(ConfigResource.Type.TOPIC, "")
+      val config = new Config(Collections.singleton(new ConfigEntry(FlushMessagesProp, "200000")))
+      val future = admin.alterConfigs(Map(resource -> config).asJava).all
+      TestUtils.assertFutureExceptionTypeEquals(future, classOf[InvalidRequestException])
     } finally {
       admin.close()
     }
diff --git a/core/src/test/scala/unit/kafka/server/EdgeCaseRequestTest.scala b/core/src/test/scala/unit/kafka/server/EdgeCaseRequestTest.scala
index 1a383a8fbcdf9..1bbde3ffb6b88 100755
--- a/core/src/test/scala/unit/kafka/server/EdgeCaseRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/EdgeCaseRequestTest.scala
@@ -35,19 +35,20 @@ import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.utils.ByteUtils
 import org.apache.kafka.common.{TopicPartition, requests}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Test
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.jdk.CollectionConverters._
 
 class EdgeCaseRequestTest extends KafkaServerTestHarness {
 
   def generateConfigs = {
-    val props = TestUtils.createBrokerConfig(1, zkConnect)
+    val props = TestUtils.createBrokerConfig(1, zkConnectOrNull)
     props.setProperty(KafkaConfig.AutoCreateTopicsEnableProp, "false")
     List(KafkaConfig.fromProps(props))
   }
 
-  private def socketServer = servers.head.socketServer
+  private def socketServer = brokers.head.socketServer
 
   private def connect(s: SocketServer = socketServer, protocol: SecurityProtocol = SecurityProtocol.PLAINTEXT): Socket = {
     new Socket("localhost", s.boundPort(ListenerName.forSecurityProtocol(protocol)))
@@ -116,8 +117,9 @@ class EdgeCaseRequestTest extends KafkaServerTestHarness {
     }
   }
 
-  @Test
-  def testProduceRequestWithNullClientId(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testProduceRequestWithNullClientId(quorum: String): Unit = {
     val topic = "topic"
     val topicPartition = new TopicPartition(topic, 0)
     val correlationId = -1
@@ -161,23 +163,27 @@ class EdgeCaseRequestTest extends KafkaServerTestHarness {
     assertEquals(Errors.NONE, Errors.forCode(partitionProduceResponse.errorCode), "There should be no error")
   }
 
-  @Test
-  def testHeaderOnlyRequest(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testHeaderOnlyRequest(quorum: String): Unit = {
     verifyDisconnect(requestHeaderBytes(ApiKeys.PRODUCE.id, 1))
   }
 
-  @Test
-  def testInvalidApiKeyRequest(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidApiKeyRequest(quorum: String): Unit = {
     verifyDisconnect(requestHeaderBytes(-1, 0))
   }
 
-  @Test
-  def testInvalidApiVersionRequest(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testInvalidApiVersionRequest(quorum: String): Unit = {
     verifyDisconnect(requestHeaderBytes(ApiKeys.PRODUCE.id, -1))
   }
 
-  @Test
-  def testMalformedHeaderRequest(): Unit = {
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testMalformedHeaderRequest(quorum: String): Unit = {
     val serializedBytes = {
       // Only send apiKey and apiVersion
       val buffer = ByteBuffer.allocate(
diff --git a/core/src/test/scala/unit/kafka/server/FetchRequestDownConversionConfigTest.scala b/core/src/test/scala/unit/kafka/server/FetchRequestDownConversionConfigTest.scala
index 6efa37b11728a..0cf7c1d8e2ee4 100644
--- a/core/src/test/scala/unit/kafka/server/FetchRequestDownConversionConfigTest.scala
+++ b/core/src/test/scala/unit/kafka/server/FetchRequestDownConversionConfigTest.scala
@@ -18,21 +18,25 @@ package kafka.server
 
 import java.util
 import java.util.{Optional, Properties}
+
 import kafka.log.LogConfig
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
+import org.apache.kafka.common.message.FetchResponseData
 import org.apache.kafka.common.{TopicPartition, Uuid}
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.requests.{FetchRequest, FetchResponse}
 import org.apache.kafka.common.serialization.StringSerializer
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 import scala.jdk.CollectionConverters._
 
 class FetchRequestDownConversionConfigTest extends BaseRequestTest {
   private var producer: KafkaProducer[String, String] = null
-  override def brokerCount: Int = 1
+  override def brokerCount: Int = 2
 
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
@@ -64,8 +68,12 @@ class FetchRequestDownConversionConfigTest extends BaseRequestTest {
     topicConfig.setProperty(LogConfig.MinInSyncReplicasProp, 1.toString)
     configs.foreach { case (k, v) => topicConfig.setProperty(k, v) }
     topics.flatMap { topic =>
-      val partitionToLeader = createTopic(topic, numPartitions = numPartitions, replicationFactor = 1,
-        topicConfig = topicConfig)
+      val partitionToLeader = createTopic(
+        topic,
+        numPartitions = numPartitions,
+        replicationFactor = 2,
+        topicConfig = topicConfig
+      )
       partitionToLeader.map { case (partition, leader) => new TopicPartition(topic, partition) -> leader }
     }.toMap
   }
@@ -140,56 +148,101 @@ class FetchRequestDownConversionConfigTest extends BaseRequestTest {
    * Tests that "message.downconversion.enable" can be set at topic level, and its configuration is obeyed for client
    * fetch requests.
    */
-  @Test
-  def testV1FetchWithTopicLevelOverrides(): Unit = {
-    // create topics with default down-conversion configuration (i.e. conversion disabled)
-    val conversionDisabledTopicsMap = createTopics(numTopics = 5, numPartitions = 1, topicSuffixStart = 0)
-    val conversionDisabledTopicPartitions = conversionDisabledTopicsMap.keySet.toSeq
-
-    // create topics with down-conversion configuration enabled
-    val topicConfig = Map(LogConfig.MessageDownConversionEnableProp -> "true")
-    val conversionEnabledTopicsMap = createTopics(numTopics = 5, numPartitions = 1, topicConfig, topicSuffixStart = 5)
-    val conversionEnabledTopicPartitions = conversionEnabledTopicsMap.keySet.toSeq
-
-    val allTopics = conversionDisabledTopicPartitions ++ conversionEnabledTopicPartitions
-    val leaderId = conversionDisabledTopicsMap.head._2
-    val topicIds = servers.head.kafkaController.controllerContext.topicIds
-    val topicNames = topicIds.map(_.swap)
-
-    allTopics.foreach(tp => producer.send(new ProducerRecord(tp.topic(), "key", "value")).get())
-    val fetchRequest = FetchRequest.Builder.forConsumer(1, Int.MaxValue, 0, createPartitionMap(1024,
-      allTopics, topicIds.toMap)).build(1)
-    val fetchResponse = sendFetchRequest(leaderId, fetchRequest)
-
-    val fetchResponseData = fetchResponse.responseData(topicNames.asJava, 1)
-    conversionDisabledTopicPartitions.foreach(tp => assertEquals(Errors.UNSUPPORTED_VERSION, Errors.forCode(fetchResponseData.get(tp).errorCode)))
-    conversionEnabledTopicPartitions.foreach(tp => assertEquals(Errors.NONE, Errors.forCode(fetchResponseData.get(tp).errorCode)))
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testV1FetchFromConsumer(quorum: String): Unit = {
+    testV1Fetch(isFollowerFetch = false)
   }
 
   /**
    * Tests that "message.downconversion.enable" has no effect on fetch requests from replicas.
    */
-  @Test
-  def testV1FetchFromReplica(): Unit = {
-    // create topics with default down-conversion configuration (i.e. conversion disabled)
-    val conversionDisabledTopicsMap = createTopics(numTopics = 5, numPartitions = 1, topicSuffixStart = 0)
-    val conversionDisabledTopicPartitions = conversionDisabledTopicsMap.keySet.toSeq
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testV1FetchFromReplica(quorum: String): Unit = {
+    testV1Fetch(isFollowerFetch = true)
+  }
 
-    // create topics with down-conversion configuration enabled
-    val topicConfig = Map(LogConfig.MessageDownConversionEnableProp -> "true")
-    val conversionEnabledTopicsMap = createTopics(numTopics = 5, numPartitions = 1, topicConfig, topicSuffixStart = 5)
-    val conversionEnabledTopicPartitions = conversionEnabledTopicsMap.keySet.toSeq
+  def testV1Fetch(isFollowerFetch: Boolean): Unit = {
+    val topicWithDownConversionEnabled = "foo"
+    val topicWithDownConversionDisabled = "bar"
+    val replicaIds = brokers.map(_.config.brokerId)
+    val leaderId = replicaIds.head
+    val followerId = replicaIds.last
 
-    val allTopicPartitions = conversionDisabledTopicPartitions ++ conversionEnabledTopicPartitions
-    val topicIds = servers.head.kafkaController.controllerContext.topicIds
-    val topicNames = topicIds.map(_.swap)
-    val leaderId = conversionDisabledTopicsMap.head._2
+    val admin = createAdminClient()
+
+    val topicWithDownConversionDisabledId = TestUtils.createTopicWithAdminRaw(
+      admin,
+      topicWithDownConversionDisabled,
+      replicaAssignment = Map(0 -> replicaIds)
+    )
+
+    val topicConfig = new Properties
+    topicConfig.put(LogConfig.MessageDownConversionEnableProp, "true")
+    val topicWithDownConversionEnabledId = TestUtils.createTopicWithAdminRaw(
+      admin,
+      topicWithDownConversionEnabled,
+      replicaAssignment = Map(0 -> replicaIds),
+      topicConfig = topicConfig
+    )
+
+    val partitionWithDownConversionEnabled = new TopicPartition(topicWithDownConversionEnabled, 0)
+    val partitionWithDownConversionDisabled = new TopicPartition(topicWithDownConversionDisabled, 0)
+
+    val allTopicPartitions = Seq(
+      partitionWithDownConversionEnabled,
+      partitionWithDownConversionDisabled
+    )
+
+    allTopicPartitions.foreach { tp =>
+      producer.send(new ProducerRecord(tp.topic, "key", "value")).get()
+    }
+
+    val topicIdMap = Map(
+      topicWithDownConversionEnabled -> topicWithDownConversionEnabledId,
+      topicWithDownConversionDisabled -> topicWithDownConversionDisabledId
+    )
+
+    val fetchResponseData = sendFetch(
+      leaderId,
+      allTopicPartitions,
+      topicIdMap,
+      fetchVersion = 1,
+      replicaIdOpt = if (isFollowerFetch) Some(followerId) else None
+    )
+
+    def error(tp: TopicPartition): Errors = {
+      Errors.forCode(fetchResponseData.get(tp).errorCode)
+    }
+
+    assertEquals(Errors.NONE, error(partitionWithDownConversionEnabled))
+    if (isFollowerFetch) {
+      assertEquals(Errors.NONE, error(partitionWithDownConversionDisabled))
+    } else {
+      assertEquals(Errors.UNSUPPORTED_VERSION, error(partitionWithDownConversionDisabled))
+    }
+  }
+
+  private def sendFetch(
+    leaderId: Int,
+    partitions: Seq[TopicPartition],
+    topicIdMap: Map[String, Uuid],
+    fetchVersion: Short,
+    replicaIdOpt: Option[Int]
+  ): util.LinkedHashMap[TopicPartition, FetchResponseData.PartitionData] = {
+    val topicNameMap = topicIdMap.map(_.swap)
+    val partitionMap = createPartitionMap(1024, partitions, topicIdMap)
+
+    val fetchRequest = replicaIdOpt.map { replicaId =>
+      FetchRequest.Builder.forReplica(fetchVersion, replicaId, Int.MaxValue, 0, partitionMap)
+        .build(fetchVersion)
+    }.getOrElse {
+      FetchRequest.Builder.forConsumer(fetchVersion, Int.MaxValue, 0, partitionMap)
+        .build(fetchVersion)
+    }
 
-    allTopicPartitions.foreach(tp => producer.send(new ProducerRecord(tp.topic, "key", "value")).get())
-    val fetchRequest = FetchRequest.Builder.forReplica(1, 1, Int.MaxValue, 0,
-      createPartitionMap(1024, allTopicPartitions, topicIds.toMap)).build()
     val fetchResponse = sendFetchRequest(leaderId, fetchRequest)
-    val fetchResponseData = fetchResponse.responseData(topicNames.asJava, 1)
-    allTopicPartitions.foreach(tp => assertEquals(Errors.NONE, Errors.forCode(fetchResponseData.get(tp).errorCode)))
+    fetchResponse.responseData(topicNameMap.asJava, fetchVersion)
   }
 }
diff --git a/core/src/test/scala/unit/kafka/server/FetchRequestWithLegacyMessageFormatTest.scala b/core/src/test/scala/unit/kafka/server/FetchRequestWithLegacyMessageFormatTest.scala
index 2f78b9d10ae55..82e0449c87b48 100644
--- a/core/src/test/scala/unit/kafka/server/FetchRequestWithLegacyMessageFormatTest.scala
+++ b/core/src/test/scala/unit/kafka/server/FetchRequestWithLegacyMessageFormatTest.scala
@@ -16,16 +16,16 @@
  */
 package kafka.server
 
-import kafka.api.KAFKA_0_10_2_IV0
 import kafka.log.LogConfig
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.{FetchRequest, FetchResponse}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.Test
-
 import java.util.Properties
 
+import org.apache.kafka.server.common.MetadataVersion.IBP_0_10_2_IV0
+
 import scala.annotation.nowarn
 import scala.collection.Seq
 import scala.jdk.CollectionConverters._
@@ -49,7 +49,7 @@ class FetchRequestWithLegacyMessageFormatTest extends BaseFetchRequestTest {
     val maxPartitionBytes = 200
     // Fetch v2 down-converts if the message format is >= 0.11 and we want to avoid
     // that as it affects the size of the returned buffer
-    val topicConfig = Map(LogConfig.MessageFormatVersionProp -> KAFKA_0_10_2_IV0.version)
+    val topicConfig = Map(LogConfig.MessageFormatVersionProp -> IBP_0_10_2_IV0.version)
     val (topicPartition, leaderId) = createTopics(numTopics = 1, numPartitions = 1, topicConfig).head
     val topicIds = getTopicIds().asJava
     val topicNames = topicIds.asScala.map(_.swap).asJava
diff --git a/core/src/test/scala/unit/kafka/server/FinalizedFeatureCacheTest.scala b/core/src/test/scala/unit/kafka/server/FinalizedFeatureCacheTest.scala
index d0f4c0ab05f64..5eb562fb29fba 100644
--- a/core/src/test/scala/unit/kafka/server/FinalizedFeatureCacheTest.scala
+++ b/core/src/test/scala/unit/kafka/server/FinalizedFeatureCacheTest.scala
@@ -17,7 +17,9 @@
 
 package kafka.server
 
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
+import kafka.server.metadata.{FeatureCacheUpdateException, ZkMetadataCache}
+import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows, assertTrue}
 import org.junit.jupiter.api.Test
 
@@ -27,7 +29,7 @@ class FinalizedFeatureCacheTest {
 
   @Test
   def testEmpty(): Unit = {
-    assertTrue(new FinalizedFeatureCache(BrokerFeatures.createDefault()).get.isEmpty)
+    assertTrue(new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, BrokerFeatures.createDefault()).getFeatureOption.isEmpty)
   }
 
   @Test
@@ -37,22 +39,20 @@ class FinalizedFeatureCacheTest {
     val brokerFeatures = BrokerFeatures.createDefault()
     brokerFeatures.setSupportedFeatures(Features.supportedFeatures(supportedFeatures.asJava))
 
-    val features = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(1, 4))
-    val finalizedFeatures = Features.finalizedFeatures(features.asJava)
+    val finalizedFeatures = Map[String, Short]("feature_1" -> 4)
 
-    val cache = new FinalizedFeatureCache(brokerFeatures)
-    cache.updateOrThrow(finalizedFeatures, 10)
-    assertTrue(cache.get.isDefined)
-    assertEquals(finalizedFeatures, cache.get.get.features)
-    assertEquals(10, cache.get.get.epoch)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
+    cache.updateFeaturesOrThrow(finalizedFeatures, 10)
+    assertTrue(cache.getFeatureOption.isDefined)
+    assertEquals(finalizedFeatures, cache.getFeatureOption.get.features)
+    assertEquals(10, cache.getFeatureOption.get.epoch)
 
-    assertThrows(classOf[FeatureCacheUpdateException], () => cache.updateOrThrow(finalizedFeatures, 9))
+    assertThrows(classOf[FeatureCacheUpdateException], () => cache.updateFeaturesOrThrow(finalizedFeatures, 9))
 
     // Check that the failed updateOrThrow call did not make any mutations.
-    assertTrue(cache.get.isDefined)
-    assertEquals(finalizedFeatures, cache.get.get.features)
-    assertEquals(10, cache.get.get.epoch)
+    assertTrue(cache.getFeatureOption.isDefined)
+    assertEquals(finalizedFeatures, cache.getFeatureOption.get.features)
+    assertEquals(10, cache.getFeatureOption.get.epoch)
   }
 
   @Test
@@ -62,15 +62,13 @@ class FinalizedFeatureCacheTest {
     val brokerFeatures = BrokerFeatures.createDefault()
     brokerFeatures.setSupportedFeatures(Features.supportedFeatures(supportedFeatures.asJava))
 
-    val features = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(1, 2))
-    val finalizedFeatures = Features.finalizedFeatures(features.asJava)
+    val finalizedFeatures = Map[String, Short]("feature_1" -> 2)
 
-    val cache = new FinalizedFeatureCache(brokerFeatures)
-    assertThrows(classOf[FeatureCacheUpdateException], () => cache.updateOrThrow(finalizedFeatures, 12))
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
+    assertThrows(classOf[FeatureCacheUpdateException], () => cache.updateFeaturesOrThrow(finalizedFeatures, 12))
 
     // Check that the failed updateOrThrow call did not make any mutations.
-    assertTrue(cache.isEmpty)
+    assertTrue(cache.getFeatureOption.isEmpty)
   }
 
   @Test
@@ -80,15 +78,13 @@ class FinalizedFeatureCacheTest {
     val brokerFeatures = BrokerFeatures.createDefault()
     brokerFeatures.setSupportedFeatures(Features.supportedFeatures(supportedFeatures.asJava))
 
-    val features = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(2, 3))
-    val finalizedFeatures = Features.finalizedFeatures(features.asJava)
+    val finalizedFeatures = Map[String, Short]("feature_1" -> 3)
 
-    val cache = new FinalizedFeatureCache(brokerFeatures)
-    cache.updateOrThrow(finalizedFeatures, 12)
-    assertTrue(cache.get.isDefined)
-    assertEquals(finalizedFeatures,  cache.get.get.features)
-    assertEquals(12, cache.get.get.epoch)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
+    cache.updateFeaturesOrThrow(finalizedFeatures, 12)
+    assertTrue(cache.getFeatureOption.isDefined)
+    assertEquals(finalizedFeatures,  cache.getFeatureOption.get.features)
+    assertEquals(12, cache.getFeatureOption.get.epoch)
   }
 
   @Test
@@ -98,17 +94,15 @@ class FinalizedFeatureCacheTest {
     val brokerFeatures = BrokerFeatures.createDefault()
     brokerFeatures.setSupportedFeatures(Features.supportedFeatures(supportedFeatures.asJava))
 
-    val features = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(2, 3))
-    val finalizedFeatures = Features.finalizedFeatures(features.asJava)
+    val finalizedFeatures = Map[String, Short]("feature_1" -> 3)
 
-    val cache = new FinalizedFeatureCache(brokerFeatures)
-    cache.updateOrThrow(finalizedFeatures, 12)
-    assertTrue(cache.get.isDefined)
-    assertEquals(finalizedFeatures, cache.get.get.features)
-    assertEquals(12, cache.get.get.epoch)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
+    cache.updateFeaturesOrThrow(finalizedFeatures, 12)
+    assertTrue(cache.getFeatureOption.isDefined)
+    assertEquals(finalizedFeatures, cache.getFeatureOption.get.features)
+    assertEquals(12, cache.getFeatureOption.get.epoch)
 
-    cache.clear()
-    assertTrue(cache.isEmpty)
+    cache.clearFeatures()
+    assertTrue(cache.getFeatureOption.isEmpty)
   }
 }
diff --git a/core/src/test/scala/unit/kafka/server/FinalizedFeatureChangeListenerTest.scala b/core/src/test/scala/unit/kafka/server/FinalizedFeatureChangeListenerTest.scala
index d59474efd4e4a..67313ba3c2679 100644
--- a/core/src/test/scala/unit/kafka/server/FinalizedFeatureChangeListenerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/FinalizedFeatureChangeListenerTest.scala
@@ -17,17 +17,18 @@
 
 package kafka.server
 
-import java.util.concurrent.{CountDownLatch, TimeoutException}
-
-import kafka.server.QuorumTestHarness
-import kafka.zk.{FeatureZNode, FeatureZNodeStatus, ZkVersion}
+import kafka.server.metadata.ZkMetadataCache
 import kafka.utils.TestUtils
+import kafka.zk.{FeatureZNode, FeatureZNodeStatus, ZkVersion}
+import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
 import org.apache.kafka.common.utils.Exit
-import org.apache.kafka.common.feature.{Features, FinalizedVersionRange, SupportedVersionRange}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_3_2_IV0
 import org.apache.kafka.test.{TestUtils => JTestUtils}
-import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotEquals, assertThrows, assertTrue}
+import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 
+import java.util.concurrent.{CountDownLatch, TimeoutException}
 import scala.jdk.CollectionConverters._
 
 class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
@@ -42,33 +43,31 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   }
 
   private def createFinalizedFeatures(): FinalizedFeaturesAndEpoch = {
-    val finalizedFeaturesMap = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(2, 3))
-    val finalizedFeatures = Features.finalizedFeatures(finalizedFeaturesMap.asJava)
-    zkClient.createFeatureZNode(FeatureZNode(FeatureZNodeStatus.Enabled, finalizedFeatures))
+    val finalizedFeaturesMap = Map[String, Short]("feature_1" -> 3)
+    zkClient.createFeatureZNode(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, finalizedFeaturesMap))
     val (mayBeFeatureZNodeBytes, version) = zkClient.getDataAndVersion(FeatureZNode.path)
     assertNotEquals(version, ZkVersion.UnknownVersion)
     assertFalse(mayBeFeatureZNodeBytes.isEmpty)
-    FinalizedFeaturesAndEpoch(finalizedFeatures, version)
+    FinalizedFeaturesAndEpoch(finalizedFeaturesMap, version)
   }
 
   private def createListener(
-    cache: FinalizedFeatureCache,
+    cache: ZkMetadataCache,
     expectedCacheContent: Option[FinalizedFeaturesAndEpoch]
   ): FinalizedFeatureChangeListener = {
     val listener = new FinalizedFeatureChangeListener(cache, zkClient)
     assertFalse(listener.isListenerInitiated)
-    assertTrue(cache.isEmpty)
+    assertTrue(cache.getFeatureOption.isEmpty)
     listener.initOrThrow(15000)
     assertTrue(listener.isListenerInitiated)
     if (expectedCacheContent.isDefined) {
-      val mayBeNewCacheContent = cache.get
+      val mayBeNewCacheContent = cache.getFeatureOption
       assertFalse(mayBeNewCacheContent.isEmpty)
       val newCacheContent = mayBeNewCacheContent.get
       assertEquals(expectedCacheContent.get.features, newCacheContent.features)
       assertEquals(expectedCacheContent.get.epoch, newCacheContent.epoch)
     } else {
-      val mayBeNewCacheContent = cache.get
+      val mayBeNewCacheContent = cache.getFeatureOption
       assertTrue(mayBeNewCacheContent.isEmpty)
     }
     listener
@@ -84,35 +83,31 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   def testInitSuccessAndNotificationSuccess(): Unit = {
     val initialFinalizedFeatures = createFinalizedFeatures()
     val brokerFeatures = createBrokerFeatures()
-    val cache = new FinalizedFeatureCache(brokerFeatures)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
     val listener = createListener(cache, Some(initialFinalizedFeatures))
 
-    def updateAndCheckCache(finalizedFeatures: Features[FinalizedVersionRange]): Unit = {
-      zkClient.updateFeatureZNode(FeatureZNode(FeatureZNodeStatus.Enabled, finalizedFeatures))
+    def updateAndCheckCache(finalizedFeatures: Map[String, Short]): Unit = {
+      zkClient.updateFeatureZNode(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, finalizedFeatures))
       val (mayBeFeatureZNodeNewBytes, updatedVersion) = zkClient.getDataAndVersion(FeatureZNode.path)
       assertNotEquals(updatedVersion, ZkVersion.UnknownVersion)
       assertFalse(mayBeFeatureZNodeNewBytes.isEmpty)
       assertTrue(updatedVersion > initialFinalizedFeatures.epoch)
 
-      cache.waitUntilEpochOrThrow(updatedVersion, JTestUtils.DEFAULT_MAX_WAIT_MS)
-      assertEquals(FinalizedFeaturesAndEpoch(finalizedFeatures, updatedVersion), cache.get.get)
+      cache.waitUntilFeatureEpochOrThrow(updatedVersion, JTestUtils.DEFAULT_MAX_WAIT_MS)
+      assertEquals(FinalizedFeaturesAndEpoch(finalizedFeatures, updatedVersion), cache.getFeatureOption.get)
       assertTrue(listener.isListenerInitiated)
     }
 
     // Check if the write succeeds and a ZK notification is received that causes the feature cache
     // to be populated.
-    updateAndCheckCache(
-      Features.finalizedFeatures(
-        Map[String, FinalizedVersionRange](
-        "feature_1" -> new FinalizedVersionRange(2, 4)).asJava))
+    updateAndCheckCache(Map[String, Short]("feature_1" -> 4))
     // Check if second write succeeds and a ZK notification is again received that causes the cache
     // to be populated. This check is needed to verify that the watch on the FeatureZNode was
     // re-established after the notification was received due to the first write above.
     updateAndCheckCache(
-      Features.finalizedFeatures(
-        Map[String, FinalizedVersionRange](
-          "feature_1" -> new FinalizedVersionRange(2, 4),
-          "feature_2" -> new FinalizedVersionRange(1, 3)).asJava))
+      Map[String, Short](
+        "feature_1" -> 4,
+        "feature_2" -> 3))
   }
 
   /**
@@ -122,7 +117,7 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   @Test
   def testFeatureZNodeDeleteNotificationProcessing(): Unit = {
     val brokerFeatures = createBrokerFeatures()
-    val cache = new FinalizedFeatureCache(brokerFeatures)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
     val initialFinalizedFeatures = createFinalizedFeatures()
     val listener = createListener(cache, Some(initialFinalizedFeatures))
 
@@ -131,7 +126,7 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
     assertEquals(deletedVersion, ZkVersion.UnknownVersion)
     assertTrue(mayBeFeatureZNodeDeletedBytes.isEmpty)
     TestUtils.waitUntilTrue(() => {
-      cache.isEmpty
+      cache.getFeatureOption.isEmpty
     }, "Timed out waiting for FinalizedFeatureCache to become empty")
     assertTrue(listener.isListenerInitiated)
   }
@@ -143,17 +138,16 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   @Test
   def testFeatureZNodeDisablingNotificationProcessing(): Unit = {
     val brokerFeatures = createBrokerFeatures()
-    val cache = new FinalizedFeatureCache(brokerFeatures)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
     val initialFinalizedFeatures = createFinalizedFeatures()
 
-    val updatedFinalizedFeaturesMap = Map[String, FinalizedVersionRange]()
-    val updatedFinalizedFeatures = Features.finalizedFeatures(updatedFinalizedFeaturesMap.asJava)
-    zkClient.updateFeatureZNode(FeatureZNode(FeatureZNodeStatus.Disabled, updatedFinalizedFeatures))
+    val updatedFinalizedFeaturesMap = Map[String, Short]()
+    zkClient.updateFeatureZNode(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Disabled, updatedFinalizedFeaturesMap))
     val (mayBeFeatureZNodeNewBytes, updatedVersion) = zkClient.getDataAndVersion(FeatureZNode.path)
     assertNotEquals(updatedVersion, ZkVersion.UnknownVersion)
     assertFalse(mayBeFeatureZNodeNewBytes.isEmpty)
     assertTrue(updatedVersion > initialFinalizedFeatures.epoch)
-    assertTrue(cache.get.isEmpty)
+    assertTrue(cache.getFeatureOption.isEmpty)
   }
 
   /**
@@ -164,21 +158,20 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   @Test
   def testCacheUpdateWaitFailsForUnreachableVersion(): Unit = {
     val initialFinalizedFeatures = createFinalizedFeatures()
-    val cache = new FinalizedFeatureCache(createBrokerFeatures())
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, createBrokerFeatures())
     val listener = createListener(cache, Some(initialFinalizedFeatures))
 
-    assertThrows(classOf[TimeoutException], () => cache.waitUntilEpochOrThrow(initialFinalizedFeatures.epoch + 1, JTestUtils.DEFAULT_MAX_WAIT_MS))
+    assertThrows(classOf[TimeoutException], () => cache.waitUntilFeatureEpochOrThrow(initialFinalizedFeatures.epoch + 1, JTestUtils.DEFAULT_MAX_WAIT_MS))
 
-    val updatedFinalizedFeaturesMap = Map[String, FinalizedVersionRange]()
-    val updatedFinalizedFeatures = Features.finalizedFeatures(updatedFinalizedFeaturesMap.asJava)
-    zkClient.updateFeatureZNode(FeatureZNode(FeatureZNodeStatus.Disabled, updatedFinalizedFeatures))
+    val updatedFinalizedFeaturesMap = Map[String, Short]()
+    zkClient.updateFeatureZNode(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Disabled, updatedFinalizedFeaturesMap))
     val (mayBeFeatureZNodeNewBytes, updatedVersion) = zkClient.getDataAndVersion(FeatureZNode.path)
     assertNotEquals(updatedVersion, ZkVersion.UnknownVersion)
     assertFalse(mayBeFeatureZNodeNewBytes.isEmpty)
     assertTrue(updatedVersion > initialFinalizedFeatures.epoch)
 
-    assertThrows(classOf[TimeoutException], () => cache.waitUntilEpochOrThrow(updatedVersion, JTestUtils.DEFAULT_MAX_WAIT_MS))
-    assertTrue(cache.get.isEmpty)
+    assertThrows(classOf[TimeoutException], () => cache.waitUntilFeatureEpochOrThrow(updatedVersion, JTestUtils.DEFAULT_MAX_WAIT_MS))
+    assertTrue(cache.getFeatureOption.isEmpty)
     assertTrue(listener.isListenerInitiated)
   }
 
@@ -189,12 +182,9 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   @Test
   def testInitFailureDueToFeatureIncompatibility(): Unit = {
     val brokerFeatures = createBrokerFeatures()
-    val cache = new FinalizedFeatureCache(brokerFeatures)
-
-    val incompatibleFinalizedFeaturesMap = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(2, 5))
-    val incompatibleFinalizedFeatures = Features.finalizedFeatures(incompatibleFinalizedFeaturesMap.asJava)
-    zkClient.createFeatureZNode(FeatureZNode(FeatureZNodeStatus.Enabled, incompatibleFinalizedFeatures))
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
+    val incompatibleFinalizedFeaturesMap = Map[String, Short]("feature_1" -> 5)
+    zkClient.createFeatureZNode(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, incompatibleFinalizedFeaturesMap))
     val (mayBeFeatureZNodeBytes, initialVersion) = zkClient.getDataAndVersion(FeatureZNode.path)
     assertNotEquals(initialVersion, ZkVersion.UnknownVersion)
     assertFalse(mayBeFeatureZNodeBytes.isEmpty)
@@ -204,12 +194,12 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
     try {
       val listener = new FinalizedFeatureChangeListener(cache, zkClient)
       assertFalse(listener.isListenerInitiated)
-      assertTrue(cache.isEmpty)
+      assertTrue(cache.getFeatureOption.isEmpty)
       assertThrows(classOf[TimeoutException], () => listener.initOrThrow(5000))
       exitLatch.await()
       assertFalse(listener.isListenerInitiated)
       assertTrue(listener.isListenerDead)
-      assertTrue(cache.isEmpty)
+      assertTrue(cache.getFeatureOption.isEmpty)
     } finally {
       Exit.resetExitProcedure()
     }
@@ -221,7 +211,7 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   @Test
   def testInitFailureDueToInvalidWaitTime(): Unit = {
     val brokerFeatures = createBrokerFeatures()
-    val cache = new FinalizedFeatureCache(brokerFeatures)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
     val listener = new FinalizedFeatureChangeListener(cache, zkClient)
     assertThrows(classOf[IllegalArgumentException], () => listener.initOrThrow(0))
     assertThrows(classOf[IllegalArgumentException], () => listener.initOrThrow(-1))
@@ -234,18 +224,15 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
   @Test
   def testNotificationFailureDueToFeatureIncompatibility(): Unit = {
     val brokerFeatures = createBrokerFeatures()
-    val cache = new FinalizedFeatureCache(brokerFeatures)
+    val cache = new ZkMetadataCache(1, MetadataVersion.IBP_2_8_IV1, brokerFeatures)
     val initialFinalizedFeatures = createFinalizedFeatures()
     val listener = createListener(cache, Some(initialFinalizedFeatures))
 
     val exitLatch = new CountDownLatch(1)
     Exit.setExitProcedure((_, _) => exitLatch.countDown())
-    val incompatibleFinalizedFeaturesMap = Map[String, FinalizedVersionRange](
-      "feature_1" -> new FinalizedVersionRange(
-        brokerFeatures.supportedFeatures.get("feature_1").min(),
-        (brokerFeatures.supportedFeatures.get("feature_1").max() + 1).asInstanceOf[Short]))
-    val incompatibleFinalizedFeatures = Features.finalizedFeatures(incompatibleFinalizedFeaturesMap.asJava)
-    zkClient.updateFeatureZNode(FeatureZNode(FeatureZNodeStatus.Enabled, incompatibleFinalizedFeatures))
+    val incompatibleFinalizedFeaturesMap = Map[String, Short](
+      "feature_1" -> (brokerFeatures.supportedFeatures.get("feature_1").max() + 1).asInstanceOf[Short])
+    zkClient.updateFeatureZNode(FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, incompatibleFinalizedFeaturesMap))
     val (mayBeFeatureZNodeIncompatibleBytes, updatedVersion) = zkClient.getDataAndVersion(FeatureZNode.path)
     assertNotEquals(updatedVersion, ZkVersion.UnknownVersion)
     assertFalse(mayBeFeatureZNodeIncompatibleBytes.isEmpty)
@@ -261,7 +248,7 @@ class FinalizedFeatureChangeListenerTest extends QuorumTestHarness {
         listener.isListenerDead &&
         // Make sure the cache contents are as expected, and, the incompatible features were not
         // applied.
-        cache.get.get.equals(initialFinalizedFeatures)
+        cache.getFeatureOption.get.equals(initialFinalizedFeatures)
       }, "Timed out waiting for listener death and FinalizedFeatureCache to be updated")
     } finally {
       Exit.resetExitProcedure()
diff --git a/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala b/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala
index 221fd9a426565..b03427b1aa78b 100755
--- a/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala
+++ b/core/src/test/scala/unit/kafka/server/HighwatermarkPersistenceTest.scala
@@ -21,6 +21,7 @@ import java.io.File
 
 import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.utils.Utils
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.junit.jupiter.api._
 import org.junit.jupiter.api.Assertions._
 import kafka.utils.{KafkaScheduler, MockTime, TestUtils}
@@ -69,9 +70,9 @@ class HighwatermarkPersistenceTest {
       scheduler = scheduler,
       logManager = logManagers.head,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(configs.head.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(configs.head.brokerId, configs.head.interBrokerProtocolVersion),
       logDirFailureChannel = logDirFailureChannels.head,
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
     replicaManager.startup()
     try {
       replicaManager.checkpointHighWatermarks()
@@ -84,10 +85,12 @@ class HighwatermarkPersistenceTest {
       partition0.setLog(log0, isFutureLog = false)
 
       partition0.updateAssignmentAndIsr(
-        assignment = Seq(configs.head.brokerId, configs.last.brokerId),
+        replicas = Seq(configs.head.brokerId, configs.last.brokerId),
+        isLeader = true,
         isr = Set(configs.head.brokerId),
         addingReplicas = Seq.empty,
-        removingReplicas = Seq.empty
+        removingReplicas = Seq.empty,
+        leaderRecoveryState = LeaderRecoveryState.RECOVERED
       )
 
       replicaManager.checkpointHighWatermarks()
@@ -125,9 +128,9 @@ class HighwatermarkPersistenceTest {
       scheduler = scheduler,
       logManager = logManagers.head,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(configs.head.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(configs.head.brokerId, configs.head.interBrokerProtocolVersion),
       logDirFailureChannel = logDirFailureChannels.head,
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
     replicaManager.startup()
     try {
       replicaManager.checkpointHighWatermarks()
diff --git a/core/src/test/scala/unit/kafka/server/IsrExpirationTest.scala b/core/src/test/scala/unit/kafka/server/IsrExpirationTest.scala
index f81e301db6cc8..bba439cbe7125 100644
--- a/core/src/test/scala/unit/kafka/server/IsrExpirationTest.scala
+++ b/core/src/test/scala/unit/kafka/server/IsrExpirationTest.scala
@@ -22,11 +22,12 @@ import java.util.Properties
 import kafka.cluster.Partition
 import kafka.log.{LogManager, UnifiedLog}
 import kafka.server.QuotaFactory.QuotaManagers
-import kafka.utils.TestUtils.MockAlterIsrManager
+import kafka.utils.TestUtils.MockAlterPartitionManager
 import kafka.utils._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.utils.Time
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.mockito.Mockito.{atLeastOnce, mock, verify, when}
@@ -54,7 +55,7 @@ class IsrExpirationTest {
   var quotaManager: QuotaManagers = null
   var replicaManager: ReplicaManager = null
 
-  var alterIsrManager: MockAlterIsrManager = _
+  var alterIsrManager: MockAlterPartitionManager = _
 
   @BeforeEach
   def setUp(): Unit = {
@@ -70,9 +71,9 @@ class IsrExpirationTest {
       scheduler = null,
       logManager = logManager,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(configs.head.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(configs.head.brokerId, configs.head.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(configs.head.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
   }
 
   @AfterEach
@@ -225,10 +226,12 @@ class IsrExpirationTest {
     partition.setLog(localLog, isFutureLog = false)
 
     partition.updateAssignmentAndIsr(
-      assignment = configs.map(_.brokerId),
+      replicas = configs.map(_.brokerId),
+      isLeader = true,
       isr = configs.map(_.brokerId).toSet,
       addingReplicas = Seq.empty,
-      removingReplicas = Seq.empty
+      removingReplicas = Seq.empty,
+      leaderRecoveryState = LeaderRecoveryState.RECOVERED
     )
 
     // set lastCaughtUpTime to current time
diff --git a/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala b/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala
index e0ea465ae696f..d176f369f8df4 100644
--- a/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala
+++ b/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala
@@ -23,7 +23,8 @@ import java.util
 import java.util.Arrays.asList
 import java.util.concurrent.TimeUnit
 import java.util.{Collections, Optional, Properties, Random}
-import kafka.api.{ApiVersion, KAFKA_0_10_2_IV0, KAFKA_2_2_IV1, LeaderAndIsr}
+
+import kafka.api.LeaderAndIsr
 import kafka.cluster.Broker
 import kafka.controller.{ControllerContext, KafkaController}
 import kafka.coordinator.group.GroupCoordinatorConcurrencyTest.{JoinGroupCallback, SyncGroupCallback}
@@ -70,7 +71,6 @@ import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.quota.{ClientQuotaAlteration, ClientQuotaEntity}
 import org.apache.kafka.common.record.FileRecords.TimestampAndOffset
 import org.apache.kafka.common.record._
-import org.apache.kafka.common.replica.ClientMetadata
 import org.apache.kafka.common.requests.FindCoordinatorRequest.CoordinatorType
 import org.apache.kafka.common.requests.MetadataResponse.TopicMetadata
 import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
@@ -91,8 +91,9 @@ import org.mockito.{ArgumentCaptor, ArgumentMatchers, Mockito}
 
 import scala.collection.{Map, Seq, mutable}
 import scala.jdk.CollectionConverters._
-
 import org.apache.kafka.common.message.CreatePartitionsRequestData.CreatePartitionsTopic
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_10_2_IV0, IBP_2_2_IV1}
 
 class KafkaApisTest {
   private val requestChannel: RequestChannel = mock(classOf[RequestChannel])
@@ -112,7 +113,8 @@ class KafkaApisTest {
   private val zkClient: KafkaZkClient = mock(classOf[KafkaZkClient])
   private val metrics = new Metrics()
   private val brokerId = 1
-  private var metadataCache: MetadataCache = MetadataCache.zkMetadataCache(brokerId)
+  // KRaft tests should override this with a KRaftMetadataCache
+  private var metadataCache: MetadataCache = MetadataCache.zkMetadataCache(brokerId, MetadataVersion.latest())
   private val clientQuotaManager: ClientQuotaManager = mock(classOf[ClientQuotaManager])
   private val clientRequestQuotaManager: ClientRequestQuotaManager = mock(classOf[ClientRequestQuotaManager])
   private val clientControllerQuotaManager: ControllerMutationQuotaManager = mock(classOf[ControllerMutationQuotaManager])
@@ -132,13 +134,12 @@ class KafkaApisTest {
     metrics.close()
   }
 
-  def createKafkaApis(interBrokerProtocolVersion: ApiVersion = ApiVersion.latestVersion,
+  def createKafkaApis(interBrokerProtocolVersion: MetadataVersion = MetadataVersion.latest,
                       authorizer: Option[Authorizer] = None,
                       enableForwarding: Boolean = false,
                       configRepository: ConfigRepository = new MockConfigRepository(),
                       raftSupport: Boolean = false,
                       overrideProperties: Map[String, String] = Map.empty): KafkaApis = {
-
     val properties = if (raftSupport) {
       val properties = TestUtils.createBrokerConfig(brokerId, "")
       properties.put(KafkaConfig.NodeIdProp, brokerId.toString)
@@ -306,8 +307,10 @@ class KafkaApisTest {
         Seq(new AlterConfigsRequest.ConfigEntry("foo", "bar")).asJava))
     val alterConfigsRequest = new AlterConfigsRequest.Builder(configs.asJava, false).build(requestHeader.apiVersion)
 
-    val request = TestUtils.buildRequestWithEnvelope(
-      alterConfigsRequest, kafkaPrincipalSerde, requestChannelMetrics, time.nanoseconds())
+    val startTimeNanos = time.nanoseconds()
+    val queueDurationNanos = 5 * 1000 * 1000
+    val request = TestUtils.buildEnvelopeRequest(
+      alterConfigsRequest, kafkaPrincipalSerde, requestChannelMetrics, startTimeNanos, startTimeNanos + queueDurationNanos)
 
     val capturedResponse: ArgumentCaptor[AlterConfigsResponse] = ArgumentCaptor.forClass(classOf[AlterConfigsResponse])
     val capturedRequest: ArgumentCaptor[RequestChannel.Request] = ArgumentCaptor.forClass(classOf[RequestChannel.Request])
@@ -320,6 +323,8 @@ class KafkaApisTest {
       any()
     )
     assertEquals(Some(request), capturedRequest.getValue.envelope)
+    // the dequeue time of forwarded request should equals to envelop request
+    assertEquals(request.requestDequeueTimeNanos, capturedRequest.getValue.requestDequeueTimeNanos)
     val innerResponse = capturedResponse.getValue
     val responseMap = innerResponse.data.responses().asScala.map { resourceResponse =>
       resourceResponse.resourceName() -> Errors.forCode(resourceResponse.errorCode)
@@ -340,7 +345,7 @@ class KafkaApisTest {
 
     when(controller.isActive).thenReturn(true)
 
-    val request = TestUtils.buildRequestWithEnvelope(
+    val request = TestUtils.buildEnvelopeRequest(
       leaveGroupRequest, kafkaPrincipalSerde, requestChannelMetrics, time.nanoseconds())
     when(clientRequestQuotaManager.maybeRecordAndGetThrottleTimeMs(any[RequestChannel.Request](),
       any[Long])).thenReturn(0)
@@ -395,8 +400,8 @@ class KafkaApisTest {
     val alterConfigsRequest = new AlterConfigsRequest.Builder(configs.asJava, false)
       .build(requestHeader.apiVersion)
 
-    val request = TestUtils.buildRequestWithEnvelope(
-      alterConfigsRequest, kafkaPrincipalSerde, requestChannelMetrics, time.nanoseconds(), fromPrivilegedListener)
+    val request = TestUtils.buildEnvelopeRequest(
+      alterConfigsRequest, kafkaPrincipalSerde, requestChannelMetrics, time.nanoseconds(), fromPrivilegedListener = fromPrivilegedListener)
 
     val capturedResponse: ArgumentCaptor[AbstractResponse] = ArgumentCaptor.forClass(classOf[AbstractResponse])
     createKafkaApis(authorizer = Some(authorizer), enableForwarding = true).handle(request, RequestLocal.withThreadConfinedCaching)
@@ -1613,7 +1618,7 @@ class KafkaApisTest {
 
       assertEquals(1, response.data.responses.size)
       val topicProduceResponse = response.data.responses.asScala.head
-      assertEquals(1, topicProduceResponse.partitionResponses.size)   
+      assertEquals(1, topicProduceResponse.partitionResponses.size)
       val partitionProduceResponse = topicProduceResponse.partitionResponses.asScala.head
       assertEquals(Errors.INVALID_PRODUCER_EPOCH, Errors.forCode(partitionProduceResponse.errorCode))
     }
@@ -1649,31 +1654,31 @@ class KafkaApisTest {
   @Test
   def shouldThrowUnsupportedVersionExceptionOnHandleAddOffsetToTxnRequestWhenInterBrokerProtocolNotSupported(): Unit = {
     assertThrows(classOf[UnsupportedVersionException],
-      () => createKafkaApis(KAFKA_0_10_2_IV0).handleAddOffsetsToTxnRequest(null, RequestLocal.withThreadConfinedCaching))
+      () => createKafkaApis(IBP_0_10_2_IV0).handleAddOffsetsToTxnRequest(null, RequestLocal.withThreadConfinedCaching))
   }
 
   @Test
   def shouldThrowUnsupportedVersionExceptionOnHandleAddPartitionsToTxnRequestWhenInterBrokerProtocolNotSupported(): Unit = {
     assertThrows(classOf[UnsupportedVersionException],
-      () => createKafkaApis(KAFKA_0_10_2_IV0).handleAddPartitionToTxnRequest(null, RequestLocal.withThreadConfinedCaching))
+      () => createKafkaApis(IBP_0_10_2_IV0).handleAddPartitionToTxnRequest(null, RequestLocal.withThreadConfinedCaching))
   }
 
   @Test
   def shouldThrowUnsupportedVersionExceptionOnHandleTxnOffsetCommitRequestWhenInterBrokerProtocolNotSupported(): Unit = {
     assertThrows(classOf[UnsupportedVersionException],
-      () => createKafkaApis(KAFKA_0_10_2_IV0).handleAddPartitionToTxnRequest(null, RequestLocal.withThreadConfinedCaching))
+      () => createKafkaApis(IBP_0_10_2_IV0).handleAddPartitionToTxnRequest(null, RequestLocal.withThreadConfinedCaching))
   }
 
   @Test
   def shouldThrowUnsupportedVersionExceptionOnHandleEndTxnRequestWhenInterBrokerProtocolNotSupported(): Unit = {
     assertThrows(classOf[UnsupportedVersionException],
-      () => createKafkaApis(KAFKA_0_10_2_IV0).handleEndTxnRequest(null, RequestLocal.withThreadConfinedCaching))
+      () => createKafkaApis(IBP_0_10_2_IV0).handleEndTxnRequest(null, RequestLocal.withThreadConfinedCaching))
   }
 
   @Test
   def shouldThrowUnsupportedVersionExceptionOnHandleWriteTxnMarkersRequestWhenInterBrokerProtocolNotSupported(): Unit = {
     assertThrows(classOf[UnsupportedVersionException],
-      () => createKafkaApis(KAFKA_0_10_2_IV0).handleWriteTxnMarkersRequest(null, RequestLocal.withThreadConfinedCaching))
+      () => createKafkaApis(IBP_0_10_2_IV0).handleWriteTxnMarkersRequest(null, RequestLocal.withThreadConfinedCaching))
   }
 
   @Test
@@ -1758,7 +1763,7 @@ class KafkaApisTest {
   @Test
   def shouldResignCoordinatorsIfStopReplicaReceivedWithDeleteFlagAndLeaderEpoch(): Unit = {
     shouldResignCoordinatorsIfStopReplicaReceivedWithDeleteFlag(
-      LeaderAndIsr.initialLeaderEpoch + 2, deletePartition = true)
+      LeaderAndIsr.InitialLeaderEpoch + 2, deletePartition = true)
   }
 
   @Test
@@ -1776,7 +1781,7 @@ class KafkaApisTest {
   @Test
   def shouldNotResignCoordinatorsIfStopReplicaReceivedWithoutDeleteFlag(): Unit = {
     shouldResignCoordinatorsIfStopReplicaReceivedWithDeleteFlag(
-      LeaderAndIsr.initialLeaderEpoch + 2, deletePartition = false)
+      LeaderAndIsr.InitialLeaderEpoch + 2, deletePartition = false)
   }
 
   def shouldResignCoordinatorsIfStopReplicaReceivedWithDeleteFlag(leaderEpoch: Int,
@@ -2356,12 +2361,13 @@ class KafkaApisTest {
 
     when(replicaManager.getLogConfig(ArgumentMatchers.eq(tp))).thenReturn(None)
 
-    when(replicaManager.fetchMessages(anyLong, anyInt, anyInt, anyInt, anyBoolean,
-      any[Seq[(TopicIdPartition, FetchRequest.PartitionData)]], any[ReplicaQuota],
-      any[Seq[(TopicIdPartition, FetchPartitionData)] => Unit](), any[IsolationLevel],
-      any[Option[ClientMetadata]])
-    ).thenAnswer(invocation => {
-      val callback = invocation.getArgument(7).asInstanceOf[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]
+    when(replicaManager.fetchMessages(
+      any[FetchParams],
+      any[Seq[(TopicIdPartition, FetchRequest.PartitionData)]],
+      any[ReplicaQuota],
+      any[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]()
+    )).thenAnswer(invocation => {
+      val callback = invocation.getArgument(3).asInstanceOf[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]
       val records = MemoryRecords.withRecords(CompressionType.NONE,
         new SimpleRecord(timestamp, "foo".getBytes(StandardCharsets.UTF_8)))
       callback(Seq(tidp -> FetchPartitionData(Errors.NONE, hw, 0, records,
@@ -2782,7 +2788,7 @@ class KafkaApisTest {
 
     val requestChannelRequest = buildRequest(joinGroupRequest)
 
-    createKafkaApis(KAFKA_2_2_IV1).handleJoinGroupRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
+    createKafkaApis(IBP_2_2_IV1).handleJoinGroupRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
 
     val capturedResponse = verifyNoThrottling(requestChannelRequest)
     val response = capturedResponse.getValue.asInstanceOf[JoinGroupResponse]
@@ -2801,7 +2807,7 @@ class KafkaApisTest {
 
     val requestChannelRequest = buildRequest(syncGroupRequest)
 
-    createKafkaApis(KAFKA_2_2_IV1).handleSyncGroupRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
+    createKafkaApis(IBP_2_2_IV1).handleSyncGroupRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
 
     val capturedResponse = verifyNoThrottling(requestChannelRequest)
     val response = capturedResponse.getValue.asInstanceOf[SyncGroupResponse]
@@ -2819,7 +2825,7 @@ class KafkaApisTest {
     ).build()
     val requestChannelRequest = buildRequest(heartbeatRequest)
 
-    createKafkaApis(KAFKA_2_2_IV1).handleHeartbeatRequest(requestChannelRequest)
+    createKafkaApis(IBP_2_2_IV1).handleHeartbeatRequest(requestChannelRequest)
 
     val capturedResponse = verifyNoThrottling(requestChannelRequest)
     val response = capturedResponse.getValue.asInstanceOf[HeartbeatResponse]
@@ -2849,7 +2855,7 @@ class KafkaApisTest {
 
     val requestChannelRequest = buildRequest(offsetCommitRequest)
 
-    createKafkaApis(KAFKA_2_2_IV1).handleOffsetCommitRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
+    createKafkaApis(IBP_2_2_IV1).handleOffsetCommitRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
 
     val expectedTopicErrors = Collections.singletonList(
       new OffsetCommitResponseData.OffsetCommitResponseTopic()
@@ -2944,12 +2950,13 @@ class KafkaApisTest {
 
     val records = MemoryRecords.withRecords(CompressionType.NONE,
       new SimpleRecord(1000, "foo".getBytes(StandardCharsets.UTF_8)))
-    when(replicaManager.fetchMessages(anyLong, anyInt, anyInt, anyInt, anyBoolean,
-      any[Seq[(TopicIdPartition, FetchRequest.PartitionData)]], any[ReplicaQuota],
-      any[Seq[(TopicIdPartition, FetchPartitionData)] => Unit](), any[IsolationLevel],
-      any[Option[ClientMetadata]])
-    ).thenAnswer(invocation => {
-      val callback = invocation.getArgument(7).asInstanceOf[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]
+    when(replicaManager.fetchMessages(
+      any[FetchParams],
+      any[Seq[(TopicIdPartition, FetchRequest.PartitionData)]],
+      any[ReplicaQuota],
+      any[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]()
+    )).thenAnswer(invocation => {
+      val callback = invocation.getArgument(3).asInstanceOf[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]
       callback(Seq(tidp0 -> FetchPartitionData(Errors.NONE, hw, 0, records,
         None, None, None, Option.empty, isReassignmentFetch = isReassigning)))
     })
@@ -2976,7 +2983,6 @@ class KafkaApisTest {
     else
       assertEquals(0, brokerTopicStats.allTopicsStats.reassignmentBytesOutPerSec.get.count())
     assertEquals(records.sizeInBytes(), brokerTopicStats.allTopicsStats.replicationBytesOutRate.get.count())
-
   }
 
   @Test
@@ -2991,7 +2997,7 @@ class KafkaApisTest {
 
     val requestChannelRequest = buildRequest(initProducerIdRequest)
 
-    createKafkaApis(KAFKA_2_2_IV1).handleInitProducerIdRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
+    createKafkaApis(IBP_2_2_IV1).handleInitProducerIdRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
 
     val capturedResponse = verifyNoThrottling(requestChannelRequest)
     val response = capturedResponse.getValue.asInstanceOf[InitProducerIdResponse]
@@ -3009,7 +3015,7 @@ class KafkaApisTest {
     ).build()
     val requestChannelRequest = buildRequest(initProducerIdRequest)
 
-    createKafkaApis(KAFKA_2_2_IV1).handleInitProducerIdRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
+    createKafkaApis(IBP_2_2_IV1).handleInitProducerIdRequest(requestChannelRequest, RequestLocal.withThreadConfinedCaching)
 
     val capturedResponse = verifyNoThrottling(requestChannelRequest)
     val response = capturedResponse.getValue.asInstanceOf[InitProducerIdResponse]
@@ -3094,7 +3100,7 @@ class KafkaApisTest {
         .setLeader(0)
         .setLeaderEpoch(1)
         .setIsr(asList(0, 1))
-        .setZkVersion(2)
+        .setPartitionEpoch(2)
         .setReplicas(asList(0, 1, 2))
         .setIsNew(false)
     ).asJava
@@ -3192,6 +3198,14 @@ class KafkaApisTest {
     )
     val stopReplicaResponse = capturedResponse.getValue
     assertEquals(expectedError, stopReplicaResponse.error())
+    if (expectedError != Errors.STALE_BROKER_EPOCH) {
+      verify(replicaManager).stopReplicas(
+        ArgumentMatchers.eq(request.context.correlationId),
+        ArgumentMatchers.eq(controllerId),
+        ArgumentMatchers.eq(controllerEpoch),
+        ArgumentMatchers.eq(stopReplicaRequest.partitionStates().asScala)
+      )
+    }
   }
 
   @Test
@@ -4003,9 +4017,9 @@ class KafkaApisTest {
   }
 
   @Test
-  def testRaftShouldNeverHandleAlterIsrRequest(): Unit = {
+  def testRaftShouldNeverHandleAlterPartitionRequest(): Unit = {
     metadataCache = MetadataCache.kRaftMetadataCache(brokerId)
-    verifyShouldNeverHandleErrorMessage(createKafkaApis(raftSupport = true).handleAlterIsrRequest)
+    verifyShouldNeverHandleErrorMessage(createKafkaApis(raftSupport = true).handleAlterPartitionRequest)
   }
 
   @Test
diff --git a/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala b/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala
index a6597d881587c..ee638ba893d97 100755
--- a/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala
+++ b/core/src/test/scala/unit/kafka/server/KafkaConfigTest.scala
@@ -17,7 +17,6 @@
 
 package kafka.server
 
-import kafka.api.{ApiVersion, KAFKA_0_8_2, KAFKA_3_0_IV1}
 import kafka.cluster.EndPoint
 import kafka.log.LogConfig
 import kafka.message._
@@ -32,11 +31,13 @@ import org.apache.kafka.raft.RaftConfig
 import org.apache.kafka.raft.RaftConfig.{AddressSpec, InetAddressSpec, UNKNOWN_ADDRESS_SPEC_INSTANCE}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
-
 import java.net.InetSocketAddress
 import java.util
 import java.util.{Collections, Properties}
+
 import org.apache.kafka.common.Node
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.{IBP_0_8_2, IBP_3_0_IV1}
 import org.apache.kafka.server.log.remote.storage.RemoteLogManagerConfig
 import org.junit.jupiter.api.function.Executable
 
@@ -547,23 +548,23 @@ class KafkaConfigTest {
     props.put(KafkaConfig.BrokerIdProp, "1")
     props.put(KafkaConfig.ZkConnectProp, "localhost:2181")
     val conf = KafkaConfig.fromProps(props)
-    assertEquals(ApiVersion.latestVersion, conf.interBrokerProtocolVersion)
+    assertEquals(MetadataVersion.latest, conf.interBrokerProtocolVersion)
 
     props.put(KafkaConfig.InterBrokerProtocolVersionProp, "0.8.2.0")
     // We need to set the message format version to make the configuration valid.
     props.put(KafkaConfig.LogMessageFormatVersionProp, "0.8.2.0")
     val conf2 = KafkaConfig.fromProps(props)
-    assertEquals(KAFKA_0_8_2, conf2.interBrokerProtocolVersion)
+    assertEquals(IBP_0_8_2, conf2.interBrokerProtocolVersion)
 
     // check that 0.8.2.0 is the same as 0.8.2.1
     props.put(KafkaConfig.InterBrokerProtocolVersionProp, "0.8.2.1")
     // We need to set the message format version to make the configuration valid
     props.put(KafkaConfig.LogMessageFormatVersionProp, "0.8.2.1")
     val conf3 = KafkaConfig.fromProps(props)
-    assertEquals(KAFKA_0_8_2, conf3.interBrokerProtocolVersion)
+    assertEquals(IBP_0_8_2, conf3.interBrokerProtocolVersion)
 
     //check that latest is newer than 0.8.2
-    assertTrue(ApiVersion.latestVersion >= conf3.interBrokerProtocolVersion)
+    assertTrue(MetadataVersion.latest.isAtLeast(conf3.interBrokerProtocolVersion))
   }
 
   private def isValidKafkaConfig(props: Properties): Boolean = {
@@ -657,7 +658,7 @@ class KafkaConfigTest {
   def testInvalidCompressionType(): Unit = {
     val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 8181)
     props.put(KafkaConfig.CompressionTypeProp, "abc")
-    assertThrows(classOf[IllegalArgumentException], () => KafkaConfig.fromProps(props))
+    assertThrows(classOf[ConfigException], () => KafkaConfig.fromProps(props))
   }
 
   @Test
@@ -690,20 +691,20 @@ class KafkaConfigTest {
   @nowarn("cat=deprecation")
   @Test
   def testInterBrokerVersionMessageFormatCompatibility(): Unit = {
-    def buildConfig(interBrokerProtocol: ApiVersion, messageFormat: ApiVersion): KafkaConfig = {
+    def buildConfig(interBrokerProtocol: MetadataVersion, messageFormat: MetadataVersion): KafkaConfig = {
       val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 8181)
       props.put(KafkaConfig.InterBrokerProtocolVersionProp, interBrokerProtocol.version)
       props.put(KafkaConfig.LogMessageFormatVersionProp, messageFormat.version)
       KafkaConfig.fromProps(props)
     }
 
-    ApiVersion.allVersions.foreach { interBrokerVersion =>
-      ApiVersion.allVersions.foreach { messageFormatVersion =>
-        if (interBrokerVersion.recordVersion.value >= messageFormatVersion.recordVersion.value) {
+    MetadataVersion.VERSIONS.foreach { interBrokerVersion =>
+      MetadataVersion.VERSIONS.foreach { messageFormatVersion =>
+        if (interBrokerVersion.highestSupportedRecordVersion.value >= messageFormatVersion.highestSupportedRecordVersion.value) {
           val config = buildConfig(interBrokerVersion, messageFormatVersion)
           assertEquals(interBrokerVersion, config.interBrokerProtocolVersion)
-          if (interBrokerVersion >= KAFKA_3_0_IV1)
-            assertEquals(KAFKA_3_0_IV1, config.logMessageFormatVersion)
+          if (interBrokerVersion.isAtLeast(IBP_3_0_IV1))
+            assertEquals(IBP_3_0_IV1, config.logMessageFormatVersion)
           else
             assertEquals(messageFormatVersion, config.logMessageFormatVersion)
         } else {
@@ -768,6 +769,7 @@ class KafkaConfigTest {
         case KafkaConfig.MetadataMaxRetentionBytesProp => assertPropertyInvalid(baseProperties, name, "not_a_number")
         case KafkaConfig.MetadataMaxRetentionMillisProp => assertPropertyInvalid(baseProperties, name, "not_a_number")
         case KafkaConfig.ControllerListenerNamesProp => // ignore string
+        case KafkaConfig.MetadataMaxIdleIntervalMsProp  => assertPropertyInvalid(baseProperties, name, "not_a_number")
 
         case KafkaConfig.AuthorizerClassNameProp => //ignore string
         case KafkaConfig.CreateTopicPolicyClassNameProp => //ignore string
@@ -1485,13 +1487,19 @@ class KafkaConfigTest {
     assertEquals("3", originals.get(KafkaConfig.NodeIdProp))
   }
 
-  @Test
-  def testBrokerIdIsInferredByNodeIdWithKraft(): Unit = {
+  def kraftProps(): Properties = {
     val props = new Properties()
     props.setProperty(KafkaConfig.ProcessRolesProp, "broker")
-    props.put(KafkaConfig.ControllerListenerNamesProp, "SSL")
+    props.setProperty(KafkaConfig.ControllerListenerNamesProp, "CONTROLLER")
     props.setProperty(KafkaConfig.NodeIdProp, "3")
     props.setProperty(KafkaConfig.QuorumVotersProp, "1@localhost:9093")
+    props
+  }
+
+  @Test
+  def testBrokerIdIsInferredByNodeIdWithKraft(): Unit = {
+    val props = new Properties(kraftProps())
+    props.putAll(kraftProps())
     val config = KafkaConfig.fromProps(props)
     assertEquals(3, config.brokerId)
     assertEquals(3, config.nodeId)
@@ -1508,4 +1516,86 @@ class KafkaConfigTest {
     assertNotNull(config.getLong(KafkaConfig.SaslOAuthBearerJwksEndpointRetryBackoffMsProp))
     assertNotNull(config.getLong(KafkaConfig.SaslOAuthBearerJwksEndpointRetryBackoffMaxMsProp))
   }
+
+  @Test
+  def testInvalidAuthorizerClassName(): Unit = {
+    val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 8181)
+    val configs = new util.HashMap[Object, Object](props)
+    configs.put(KafkaConfig.AuthorizerClassNameProp, null)
+    val ce = assertThrows(classOf[ConfigException], () => KafkaConfig.apply(configs))
+    assertTrue(ce.getMessage.contains(KafkaConfig.AuthorizerClassNameProp))
+  }
+
+  @Test
+  def testInvalidSecurityInterBrokerProtocol(): Unit = {
+    val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect, port = 8181)
+    props.put(KafkaConfig.InterBrokerSecurityProtocolProp, "abc")
+    val ce = assertThrows(classOf[ConfigException], () => KafkaConfig.fromProps(props))
+    assertTrue(ce.getMessage.contains(KafkaConfig.InterBrokerSecurityProtocolProp))
+  }
+
+  @Test
+  def testEarlyStartListenersDefault(): Unit = {
+    val props = new Properties()
+    props.setProperty(KafkaConfig.ProcessRolesProp, "controller")
+    props.setProperty(KafkaConfig.ControllerListenerNamesProp, "CONTROLLER")
+    props.setProperty(KafkaConfig.ListenersProp, "CONTROLLER://:8092")
+    props.setProperty(KafkaConfig.NodeIdProp, "1")
+    props.setProperty(KafkaConfig.QuorumVotersProp, "1@localhost:9093")
+    val config = new KafkaConfig(props)
+    assertEquals(Set("CONTROLLER"), config.earlyStartListeners.map(_.value()))
+  }
+
+  @Test
+  def testEarlyStartListeners(): Unit = {
+    val props = new Properties()
+    props.putAll(kraftProps())
+    props.setProperty(KafkaConfig.EarlyStartListenersProp, "INTERNAL,INTERNAL2")
+    props.setProperty(KafkaConfig.InterBrokerListenerNameProp, "INTERNAL")
+    props.setProperty(KafkaConfig.ListenerSecurityProtocolMapProp,
+      "INTERNAL:PLAINTEXT,INTERNAL2:PLAINTEXT,CONTROLLER:PLAINTEXT")
+    props.setProperty(KafkaConfig.ListenersProp,
+      "INTERNAL://127.0.0.1:9092,INTERNAL2://127.0.0.1:9093")
+    val config = new KafkaConfig(props)
+    assertEquals(Set(new ListenerName("INTERNAL"), new ListenerName("INTERNAL2")),
+      config.earlyStartListeners)
+  }
+
+  @Test
+  def testEarlyStartListenersMustBeListeners(): Unit = {
+    val props = new Properties()
+    props.putAll(kraftProps())
+    props.setProperty(KafkaConfig.EarlyStartListenersProp, "INTERNAL")
+    assertEquals("early.start.listeners contains listener INTERNAL, but this is not " +
+      "contained in listeners or controller.listener.names",
+        assertThrows(classOf[ConfigException], () => new KafkaConfig(props)).getMessage)
+  }
+
+  @Test
+  def testIgnoreUserInterBrokerProtocolVersionKRaft(): Unit = {
+    for (ibp <- Seq("3.0", "3.1", "3.2")) {
+      val props = new Properties()
+      props.putAll(kraftProps())
+      props.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, ibp)
+      val config = new KafkaConfig(props)
+      assertEquals(config.interBrokerProtocolVersion, MetadataVersion.MINIMUM_KRAFT_VERSION)
+    }
+  }
+
+  @Test
+  def testInvalidInterBrokerProtocolVersionKRaft(): Unit = {
+    val props = new Properties()
+    props.putAll(kraftProps())
+    props.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, "2.8")
+    assertEquals("A non-KRaft version 2.8 given for inter.broker.protocol.version. The minimum version is 3.0-IV1",
+      assertThrows(classOf[ConfigException], () => new KafkaConfig(props)).getMessage)
+  }
+
+  @Test
+  def testDefaultInterBrokerProtocolVersionKRaft(): Unit = {
+    val props = new Properties()
+    props.putAll(kraftProps())
+    val config = new KafkaConfig(props)
+    assertEquals(config.interBrokerProtocolVersion, MetadataVersion.MINIMUM_KRAFT_VERSION)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/KafkaMetricsReporterTest.scala b/core/src/test/scala/unit/kafka/server/KafkaMetricsReporterTest.scala
index 7e5d791db252b..1adf544819fa3 100644
--- a/core/src/test/scala/unit/kafka/server/KafkaMetricsReporterTest.scala
+++ b/core/src/test/scala/unit/kafka/server/KafkaMetricsReporterTest.scala
@@ -17,15 +17,13 @@
 package kafka.server
 
 import java.util
-
 import java.util.concurrent.atomic.AtomicReference
-
-import kafka.utils.{CoreUtils, TestUtils}
-import kafka.server.QuorumTestHarness
+import kafka.utils.{CoreUtils, TestInfoUtils, TestUtils}
 import org.apache.kafka.common.metrics.{KafkaMetric, MetricsContext, MetricsReporter}
-import org.junit.jupiter.api.Assertions.{assertEquals}
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, TestInfo}
 import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
 
 object KafkaMetricsReporterTest {
@@ -43,52 +41,63 @@ object KafkaMetricsReporterTest {
     override def contextChange(metricsContext: MetricsContext): Unit = {
       //read jmxPrefix
 
-      MockMetricsReporter.JMXPREFIX.set(metricsContext.contextLabels().get("_namespace").toString)
-      MockMetricsReporter.CLUSTERID.set(metricsContext.contextLabels().get("kafka.cluster.id").toString)
-      MockMetricsReporter.BROKERID.set(metricsContext.contextLabels().get("kafka.broker.id").toString)
+      MockMetricsReporter.JMXPREFIX.set(contextLabelOrNull("_namespace", metricsContext))
+      MockMetricsReporter.CLUSTERID.set(contextLabelOrNull("kafka.cluster.id", metricsContext))
+      MockMetricsReporter.BROKERID.set(contextLabelOrNull("kafka.broker.id", metricsContext))
+      MockMetricsReporter.NODEID.set(contextLabelOrNull("kafka.node.id", metricsContext))
     }
 
-    override def configure(configs: util.Map[String, _]): Unit = {}
+    private def contextLabelOrNull(name: String, metricsContext: MetricsContext): String = {
+      Option(metricsContext.contextLabels().get(name)).flatMap(v => Option(v.toString())).getOrElse(null)
+    }
 
+    override def configure(configs: util.Map[String, _]): Unit = {}
   }
 
   object MockMetricsReporter {
     val JMXPREFIX: AtomicReference[String] = new AtomicReference[String]
     val BROKERID : AtomicReference[String] = new AtomicReference[String]
+    val NODEID : AtomicReference[String] = new AtomicReference[String]
     val CLUSTERID : AtomicReference[String] = new AtomicReference[String]
   }
 }
 
 class KafkaMetricsReporterTest extends QuorumTestHarness {
-  var server: KafkaServer = null
+  var broker: KafkaBroker = null
   var config: KafkaConfig = null
 
   @BeforeEach
   override def setUp(testInfo: TestInfo): Unit = {
     super.setUp(testInfo)
-    val props = TestUtils.createBrokerConfig(1, zkConnect)
+    val props = TestUtils.createBrokerConfig(1, zkConnectOrNull)
     props.setProperty(KafkaConfig.MetricReporterClassesProp, "kafka.server.KafkaMetricsReporterTest$MockMetricsReporter")
     props.setProperty(KafkaConfig.BrokerIdGenerationEnableProp, "true")
-    props.setProperty(KafkaConfig.BrokerIdProp, "-1")
+    props.setProperty(KafkaConfig.BrokerIdProp, "1")
     config = KafkaConfig.fromProps(props)
-    server = new KafkaServer(config, threadNamePrefix = Option(this.getClass.getName))
-    server.startup()
+    broker = createBroker(config, threadNamePrefix = Option(this.getClass.getName))
+    broker.startup()
   }
 
-  @Test
-  def testMetricsContextNamespacePresent(): Unit = {
-    assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.CLUSTERID)
-    assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.BROKERID)
-    assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.JMXPREFIX)
-    assertEquals("kafka.server", KafkaMetricsReporterTest.MockMetricsReporter.JMXPREFIX.get())
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testMetricsContextNamespacePresent(quorum: String): Unit = {
+    assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.CLUSTERID.get())
+    if (isKRaftTest()) {
+      assertNull(KafkaMetricsReporterTest.MockMetricsReporter.BROKERID.get())
+      assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.NODEID.get())
+    } else {
+      assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.BROKERID.get())
+      assertNull(KafkaMetricsReporterTest.MockMetricsReporter.NODEID.get())
+    }
+    assertNotNull(KafkaMetricsReporterTest.MockMetricsReporter.JMXPREFIX.get())
 
-    server.shutdown()
+    broker.shutdown()
     TestUtils.assertNoNonDaemonThreads(this.getClass.getName)
   }
 
   @AfterEach
   override def tearDown(): Unit = {
-    server.shutdown()
+    broker.shutdown()
     CoreUtils.delete(config.logDirs)
     super.tearDown()
   }
diff --git a/core/src/test/scala/unit/kafka/server/KafkaRaftServerTest.scala b/core/src/test/scala/unit/kafka/server/KafkaRaftServerTest.scala
index 82ad5427a6a4e..17483e58a6a05 100644
--- a/core/src/test/scala/unit/kafka/server/KafkaRaftServerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/KafkaRaftServerTest.scala
@@ -19,10 +19,12 @@ package kafka.server
 import java.io.File
 import java.nio.file.Files
 import java.util.Properties
-import kafka.common.{InconsistentBrokerMetadataException, InconsistentNodeIdException, KafkaException}
+import kafka.common.{InconsistentBrokerMetadataException, InconsistentNodeIdException}
 import kafka.log.UnifiedLog
-import org.apache.kafka.common.Uuid
+import org.apache.kafka.common.{KafkaException, Uuid}
 import org.apache.kafka.common.utils.Utils
+import org.apache.kafka.controller.BootstrapMetadata
+import org.apache.kafka.server.common.MetadataVersion
 import org.apache.kafka.test.TestUtils
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@@ -43,7 +45,7 @@ class KafkaRaftServerTest {
     configProperties.put(KafkaConfig.QuorumVotersProp, s"$nodeId@localhost:9093")
     configProperties.put(KafkaConfig.ControllerListenerNamesProp, "SSL")
 
-    val (loadedMetaProperties, offlineDirs) =
+    val (loadedMetaProperties, _, offlineDirs) =
       invokeLoadMetaProperties(metaProperties, configProperties)
 
     assertEquals(metaProperties, loadedMetaProperties)
@@ -70,12 +72,13 @@ class KafkaRaftServerTest {
 
   private def invokeLoadMetaProperties(
     metaProperties: MetaProperties,
-    configProperties: Properties
-  ): (MetaProperties, collection.Seq[String]) = {
+    configProperties: Properties,
+    metadataVersion: Option[MetadataVersion] = Some(MetadataVersion.latest())
+  ): (MetaProperties, BootstrapMetadata, collection.Seq[String]) = {
     val tempLogDir = TestUtils.tempDirectory()
     try {
       writeMetaProperties(tempLogDir, metaProperties)
-
+      metadataVersion.foreach(mv => writeBootstrapMetadata(tempLogDir, mv))
       configProperties.put(KafkaConfig.LogDirProp, tempLogDir.getAbsolutePath)
       val config = KafkaConfig.fromProps(configProperties)
       KafkaRaftServer.initializeLogDirs(config)
@@ -93,6 +96,11 @@ class KafkaRaftServerTest {
     checkpoint.write(metaProperties.toProperties)
   }
 
+  private def writeBootstrapMetadata(logDir: File, metadataVersion: MetadataVersion): Unit = {
+    val bootstrapMetadata = BootstrapMetadata.create(metadataVersion)
+    BootstrapMetadata.write(bootstrapMetadata, logDir.toPath)
+  }
+
   @Test
   def testStartupFailsIfMetaPropertiesMissingInSomeLogDir(): Unit = {
     val clusterId = clusterIdBase64
@@ -146,6 +154,7 @@ class KafkaRaftServerTest {
     // One log dir is online and has properly formatted `meta.properties`
     val validDir = TestUtils.tempDirectory()
     writeMetaProperties(validDir, MetaProperties(clusterId, nodeId))
+    writeBootstrapMetadata(validDir, MetadataVersion.latest())
 
     // Use a regular file as an invalid log dir to trigger an IO error
     val invalidDir = TestUtils.tempFile("blah")
@@ -158,7 +167,7 @@ class KafkaRaftServerTest {
     configProperties.put(KafkaConfig.ControllerListenerNamesProp, "SSL")
     val config = KafkaConfig.fromProps(configProperties)
 
-    val (loadedProperties, offlineDirs) = KafkaRaftServer.initializeLogDirs(config)
+    val (loadedProperties, _, offlineDirs) = KafkaRaftServer.initializeLogDirs(config)
     assertEquals(nodeId, loadedProperties.nodeId)
     assertEquals(Seq(invalidDir.getAbsolutePath), offlineDirs)
   }
@@ -214,4 +223,47 @@ class KafkaRaftServerTest {
       () => KafkaRaftServer.initializeLogDirs(config))
   }
 
+  @Test
+  def testKRaftUpdateWithIBP(): Unit = {
+    val clusterId = clusterIdBase64
+    val nodeId = 0
+    val metaProperties = MetaProperties(clusterId, nodeId)
+
+    val configProperties = new Properties
+    configProperties.put(KafkaConfig.ProcessRolesProp, "broker,controller")
+    configProperties.put(KafkaConfig.NodeIdProp, nodeId.toString)
+    configProperties.put(KafkaConfig.ListenersProp, "PLAINTEXT://127.0.0.1:9092,SSL://127.0.0.1:9093")
+    configProperties.put(KafkaConfig.QuorumVotersProp, s"$nodeId@localhost:9093")
+    configProperties.put(KafkaConfig.ControllerListenerNamesProp, "SSL")
+    configProperties.put(KafkaConfig.InterBrokerProtocolVersionProp, "3.2")
+
+    val (loadedMetaProperties, bootstrapMetadata, offlineDirs) =
+      invokeLoadMetaProperties(metaProperties, configProperties, None)
+
+    assertEquals(metaProperties, loadedMetaProperties)
+    assertEquals(Seq.empty, offlineDirs)
+    assertEquals(bootstrapMetadata.metadataVersion(), MetadataVersion.IBP_3_2_IV0)
+  }
+
+  @Test
+  def testKRaftUpdateWithoutIBP(): Unit = {
+    val clusterId = clusterIdBase64
+    val nodeId = 0
+    val metaProperties = MetaProperties(clusterId, nodeId)
+
+    val logDir = TestUtils.tempDirectory()
+    writeMetaProperties(logDir, metaProperties)
+
+    val configProperties = new Properties
+    configProperties.put(KafkaConfig.ProcessRolesProp, "broker,controller")
+    configProperties.put(KafkaConfig.NodeIdProp, nodeId.toString)
+    configProperties.put(KafkaConfig.ListenersProp, "PLAINTEXT://127.0.0.1:9092,SSL://127.0.0.1:9093")
+    configProperties.put(KafkaConfig.QuorumVotersProp, s"$nodeId@localhost:9093")
+    configProperties.put(KafkaConfig.ControllerListenerNamesProp, "SSL")
+    configProperties.put(KafkaConfig.LogDirProp, logDir.getAbsolutePath)
+
+    val config = KafkaConfig.fromProps(configProperties)
+    assertEquals("Cannot upgrade from KRaft version prior to 3.3 without first setting inter.broker.protocol.version on each broker.",
+      assertThrows(classOf[KafkaException], () => KafkaRaftServer.initializeLogDirs(config)).getMessage)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/KafkaServerTest.scala b/core/src/test/scala/unit/kafka/server/KafkaServerTest.scala
index 79bfc241fd67a..5a84820bf4bd8 100755
--- a/core/src/test/scala/unit/kafka/server/KafkaServerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/KafkaServerTest.scala
@@ -17,14 +17,14 @@
 
 package kafka.server
 
-import kafka.api.ApiVersion
 import kafka.utils.TestUtils
 import org.apache.kafka.common.security.JaasUtils
 import org.junit.jupiter.api.Assertions.{assertEquals, assertNull, assertThrows, fail}
 import org.junit.jupiter.api.Test
-
 import java.util.Properties
 
+import org.apache.kafka.server.common.MetadataVersion
+
 class KafkaServerTest extends QuorumTestHarness {
 
   @Test
@@ -116,8 +116,8 @@ class KafkaServerTest extends QuorumTestHarness {
     props.put(KafkaConfig.InterBrokerProtocolVersionProp, "2.7-IV1")
 
     val server = TestUtils.createServer(KafkaConfig.fromProps(props))
-    server.replicaManager.alterIsrManager match {
-      case _: ZkIsrManager =>
+    server.replicaManager.alterPartitionManager match {
+      case _: ZkAlterPartitionManager =>
       case _ => fail("Should use ZK for ISR manager in versions before 2.7-IV2")
     }
     server.shutdown()
@@ -126,11 +126,11 @@ class KafkaServerTest extends QuorumTestHarness {
   @Test
   def testAlterIsrManager(): Unit = {
     val props = TestUtils.createBrokerConfigs(1, zkConnect).head
-    props.put(KafkaConfig.InterBrokerProtocolVersionProp, ApiVersion.latestVersion.toString)
+    props.put(KafkaConfig.InterBrokerProtocolVersionProp, MetadataVersion.latest.toString)
 
     val server = TestUtils.createServer(KafkaConfig.fromProps(props))
-    server.replicaManager.alterIsrManager match {
-      case _: DefaultAlterIsrManager =>
+    server.replicaManager.alterPartitionManager match {
+      case _: DefaultAlterPartitionManager =>
       case _ => fail("Should use AlterIsr for ISR manager in versions after 2.7-IV2")
     }
     server.shutdown()
diff --git a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
index a1fb7cd4b5714..f0dea91335e65 100755
--- a/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
+++ b/core/src/test/scala/unit/kafka/server/LeaderElectionTest.scala
@@ -149,9 +149,9 @@ class LeaderElectionTest extends QuorumTestHarness {
           .setPartitionIndex(partitionId)
           .setControllerEpoch(2)
           .setLeader(brokerId2)
-          .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch)
+          .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch)
           .setIsr(Seq(brokerId1, brokerId2).map(Integer.valueOf).asJava)
-          .setZkVersion(LeaderAndIsr.initialZKVersion)
+          .setPartitionEpoch(LeaderAndIsr.InitialPartitionEpoch)
           .setReplicas(Seq(0, 1).map(Integer.valueOf).asJava)
           .setIsNew(false)
       )
diff --git a/core/src/test/scala/unit/kafka/server/LogDirFailureTest.scala b/core/src/test/scala/unit/kafka/server/LogDirFailureTest.scala
index bfbb14e1aaae7..c073f08d13190 100644
--- a/core/src/test/scala/unit/kafka/server/LogDirFailureTest.scala
+++ b/core/src/test/scala/unit/kafka/server/LogDirFailureTest.scala
@@ -31,6 +31,7 @@ import org.apache.kafka.common.utils.Utils
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
 
+import java.nio.file.Files
 import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
 
@@ -84,7 +85,7 @@ class LogDirFailureTest extends IntegrationTestHarness {
       val logDir = new File(kafkaConfig.logDirs.head)
       // Make log directory of the partition on the leader broker inaccessible by replacing it with a file
       CoreUtils.swallow(Utils.delete(logDir), this)
-      logDir.createNewFile()
+      Files.createFile(logDir.toPath)
       assertTrue(logDir.isFile)
 
       server = TestUtils.createServer(kafkaConfig)
diff --git a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala
index e143539fd4894..78f85d8f5469e 100755
--- a/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala
+++ b/core/src/test/scala/unit/kafka/server/LogOffsetTest.scala
@@ -18,15 +18,20 @@
 package kafka.server
 
 import kafka.log.{ClientRecordDeletion, LogSegment, UnifiedLog}
-import kafka.utils.{MockTime, TestUtils}
+import kafka.utils.TestUtils
 import org.apache.kafka.common.message.ListOffsetsRequestData.{ListOffsetsPartition, ListOffsetsTopic}
 import org.apache.kafka.common.message.ListOffsetsResponseData.{ListOffsetsPartitionResponse, ListOffsetsTopicResponse}
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.requests.{FetchRequest, FetchResponse, ListOffsetsRequest, ListOffsetsResponse}
+import org.apache.kafka.common.utils.Time
 import org.apache.kafka.common.{IsolationLevel, TopicPartition}
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.Test
+import org.junit.jupiter.api.Timeout
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 import org.mockito.Mockito.{mock, when}
+import org.mockito.invocation.InvocationOnMock
+import org.mockito.stubbing.Answer
 
 import java.io.File
 import java.util.concurrent.atomic.AtomicInteger
@@ -34,14 +39,11 @@ import java.util.{Optional, Properties, Random}
 import scala.collection.mutable.Buffer
 import scala.jdk.CollectionConverters._
 
+@Timeout(300)
 class LogOffsetTest extends BaseRequestTest {
 
-  private lazy val time = new MockTime
-
   override def brokerCount = 1
 
-  protected override def brokerTime(brokerId: Int) = time
-
   protected override def brokerPropertyOverrides(props: Properties): Unit = {
     props.put("log.flush.interval.messages", "1")
     props.put("num.partitions", "20")
@@ -51,8 +53,9 @@ class LogOffsetTest extends BaseRequestTest {
   }
 
   @deprecated("ListOffsetsRequest V0", since = "")
-  @Test
-  def testGetOffsetsForUnknownTopic(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testGetOffsetsForUnknownTopic(quorum: String): Unit = {
     val topicPartition = new TopicPartition("foo", 0)
     val request = ListOffsetsRequest.Builder.forConsumer(false, IsolationLevel.READ_UNCOMMITTED, false)
       .setTargetTimes(buildTargetTimes(topicPartition, ListOffsetsRequest.LATEST_TIMESTAMP, 10).asJava).build(0)
@@ -61,8 +64,9 @@ class LogOffsetTest extends BaseRequestTest {
   }
 
   @deprecated("ListOffsetsRequest V0", since = "")
-  @Test
-  def testGetOffsetsAfterDeleteRecords(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testGetOffsetsAfterDeleteRecords(quorum: String): Unit = {
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, 0)
     val log = createTopicAndGetLog(topic, topicPartition)
@@ -78,7 +82,7 @@ class LogOffsetTest extends BaseRequestTest {
     val offsets = log.legacyFetchOffsetsBefore(ListOffsetsRequest.LATEST_TIMESTAMP, 15)
     assertEquals(Seq(20L, 18L, 16L, 14L, 12L, 10L, 8L, 6L, 4L, 3L), offsets)
 
-    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, topicPartition.partition, server),
+    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, topicPartition.partition, broker),
       "Leader should be elected")
     val request = ListOffsetsRequest.Builder.forReplica(0, 0)
       .setTargetTimes(buildTargetTimes(topicPartition, ListOffsetsRequest.LATEST_TIMESTAMP, 15).asJava).build()
@@ -86,8 +90,9 @@ class LogOffsetTest extends BaseRequestTest {
     assertEquals(Seq(20L, 18L, 16L, 14L, 12L, 10L, 8L, 6L, 4L, 3L), consumerOffsets)
   }
 
-  @Test
-  def testFetchOffsetByTimestampForMaxTimestampAfterTruncate(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFetchOffsetByTimestampForMaxTimestampAfterTruncate(quorum: String): Unit = {
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, 0)
     val log = createTopicAndGetLog(topic, topicPartition)
@@ -109,8 +114,9 @@ class LogOffsetTest extends BaseRequestTest {
     assertEquals(-1L, secondOffset.get.timestamp)
   }
 
-  @Test
-  def testFetchOffsetByTimestampForMaxTimestampWithUnorderedTimestamps(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFetchOffsetByTimestampForMaxTimestampWithUnorderedTimestamps(quorum: String): Unit = {
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, 0)
     val log = createTopicAndGetLog(topic, topicPartition)
@@ -127,13 +133,14 @@ class LogOffsetTest extends BaseRequestTest {
     assertEquals(6L, maxTimestampOffset.get.timestamp)
   }
 
-  @Test
-  def testGetOffsetsBeforeLatestTime(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testGetOffsetsBeforeLatestTime(quorum: String): Unit = {
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, 0)
     val log = createTopicAndGetLog(topic, topicPartition)
 
-    val topicIds = getTopicIds().asJava
+    val topicIds = getTopicIds(Seq("kafka-")).asJava
     val topicNames = topicIds.asScala.map(_.swap).asJava
     val topicId = topicIds.get(topic)
 
@@ -144,7 +151,7 @@ class LogOffsetTest extends BaseRequestTest {
     val offsets = log.legacyFetchOffsetsBefore(ListOffsetsRequest.LATEST_TIMESTAMP, 15)
     assertEquals(Seq(20L, 18L, 16L, 14L, 12L, 10L, 8L, 6L, 4L, 2L, 0L), offsets)
 
-    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, 0, server),
+    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, 0, broker),
       "Leader should be elected")
     val request = ListOffsetsRequest.Builder.forReplica(0, 0)
       .setTargetTimes(buildTargetTimes(topicPartition, ListOffsetsRequest.LATEST_TIMESTAMP, 15).asJava).build()
@@ -159,8 +166,9 @@ class LogOffsetTest extends BaseRequestTest {
     assertFalse(FetchResponse.recordsOrFail(fetchResponse.responseData(topicNames, ApiKeys.FETCH.latestVersion).get(topicPartition)).batches.iterator.hasNext)
   }
 
-  @Test
-  def testEmptyLogsGetOffsets(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testEmptyLogsGetOffsets(quorum: String): Unit = {
     val random = new Random
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, random.nextInt(10))
@@ -182,8 +190,9 @@ class LogOffsetTest extends BaseRequestTest {
     assertFalse(offsetChanged)
   }
 
-  @Test
-  def testFetchOffsetByTimestampForMaxTimestampWithEmptyLog(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFetchOffsetByTimestampForMaxTimestampWithEmptyLog(quorum: String): Unit = {
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, 0)
     val log = createTopicAndGetLog(topic, topicPartition)
@@ -197,27 +206,28 @@ class LogOffsetTest extends BaseRequestTest {
   }
 
   @deprecated("legacyFetchOffsetsBefore", since = "")
-  @Test
-  def testGetOffsetsBeforeNow(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testGetOffsetsBeforeNow(quorum: String): Unit = {
     val random = new Random
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, random.nextInt(3))
 
     createTopic(topic, 3, 1)
 
-    val logManager = server.getLogManager
+    val logManager = broker.logManager
     val log = logManager.getOrCreateLog(topicPartition, topicId = None)
 
     for (_ <- 0 until 20)
       log.appendAsLeader(TestUtils.singletonRecords(value = Integer.toString(42).getBytes()), leaderEpoch = 0)
     log.flush(false)
 
-    val now = time.milliseconds + 30000 // pretend it is the future to avoid race conditions with the fs
+    val now = Time.SYSTEM.milliseconds + 30000 // pretend it is the future to avoid race conditions with the fs
 
     val offsets = log.legacyFetchOffsetsBefore(now, 15)
     assertEquals(Seq(20L, 18L, 16L, 14L, 12L, 10L, 8L, 6L, 4L, 2L, 0L), offsets)
 
-    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, topicPartition.partition, server),
+    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, topicPartition.partition, broker),
       "Leader should be elected")
     val request = ListOffsetsRequest.Builder.forReplica(0, 0)
       .setTargetTimes(buildTargetTimes(topicPartition, now, 15).asJava).build()
@@ -226,15 +236,16 @@ class LogOffsetTest extends BaseRequestTest {
   }
 
   @deprecated("legacyFetchOffsetsBefore", since = "")
-  @Test
-  def testGetOffsetsBeforeEarliestTime(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testGetOffsetsBeforeEarliestTime(quorum: String): Unit = {
     val random = new Random
     val topic = "kafka-"
     val topicPartition = new TopicPartition(topic, random.nextInt(3))
 
     createTopic(topic, 3, 1)
 
-    val logManager = server.getLogManager
+    val logManager = broker.logManager
     val log = logManager.getOrCreateLog(topicPartition, topicId = None)
     for (_ <- 0 until 20)
       log.appendAsLeader(TestUtils.singletonRecords(value = Integer.toString(42).getBytes()), leaderEpoch = 0)
@@ -244,7 +255,7 @@ class LogOffsetTest extends BaseRequestTest {
 
     assertEquals(Seq(0L), offsets)
 
-    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, topicPartition.partition, server),
+    TestUtils.waitUntilTrue(() => TestUtils.isLeaderLocalOnBroker(topic, topicPartition.partition, broker),
       "Leader should be elected")
     val request = ListOffsetsRequest.Builder.forReplica(0, 0)
       .setTargetTimes(buildTargetTimes(topicPartition, ListOffsetsRequest.EARLIEST_TIMESTAMP, 10).asJava).build()
@@ -254,14 +265,14 @@ class LogOffsetTest extends BaseRequestTest {
 
   /* We test that `fetchOffsetsBefore` works correctly if `LogSegment.size` changes after each invocation (simulating
    * a race condition) */
-  @Test
-  def testFetchOffsetsBeforeWithChangingSegmentSize(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFetchOffsetsBeforeWithChangingSegmentSize(quorum: String): Unit = {
     val log: UnifiedLog = mock(classOf[UnifiedLog])
     val logSegment: LogSegment = mock(classOf[LogSegment])
-    when(logSegment.size).thenAnswer(_ => {
-      val value = new AtomicInteger(0)
-      def answer: Int = value.getAndIncrement()
-      answer
+    when(logSegment.size).thenAnswer(new Answer[Int] {
+      private[this] val value = new AtomicInteger(0)
+      override def answer(invocation: InvocationOnMock): Int = value.getAndIncrement()
     })
     val logSegments = Seq(logSegment)
     when(log.logSegments).thenReturn(logSegments)
@@ -270,21 +281,21 @@ class LogOffsetTest extends BaseRequestTest {
 
   /* We test that `fetchOffsetsBefore` works correctly if `Log.logSegments` content and size are
    * different (simulating a race condition) */
-  @Test
-  def testFetchOffsetsBeforeWithChangingSegments(): Unit = {
+  @ParameterizedTest
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testFetchOffsetsBeforeWithChangingSegments(quorum: String): Unit = {
     val log: UnifiedLog = mock(classOf[UnifiedLog])
     val logSegment: LogSegment = mock(classOf[LogSegment])
-    when(log.logSegments).thenAnswer { _ =>
-      def answer = new Iterable[LogSegment] {
+    when(log.logSegments).thenReturn(
+      new Iterable[LogSegment] {
         override def size = 2
-        def iterator = Seq(logSegment).iterator
+        override def iterator = Seq(logSegment).iterator
       }
-      answer
-    }
+    )
     log.legacyFetchOffsetsBefore(System.currentTimeMillis, 100)
   }
 
-  private def server: KafkaServer = servers.head
+  private def broker: KafkaBroker = brokers.head
 
   private def sendListOffsetsRequest(request: ListOffsetsRequest): ListOffsetsResponse = {
     connectAndReceive[ListOffsetsResponse](request)
@@ -312,7 +323,7 @@ class LogOffsetTest extends BaseRequestTest {
   private def createTopicAndGetLog(topic: String, topicPartition: TopicPartition): UnifiedLog = {
     createTopic(topic, 1, 1)
 
-    val logManager = server.getLogManager
+    val logManager = broker.logManager
     TestUtils.waitUntilTrue(() => logManager.getLog(topicPartition).isDefined,
       "Log for partition [topic,0] should be created")
     logManager.getLog(topicPartition).get
diff --git a/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala b/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala
index 856c0f64d6a83..d92c76f71189d 100644
--- a/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala
+++ b/core/src/test/scala/unit/kafka/server/MetadataCacheTest.scala
@@ -19,24 +19,27 @@ package kafka.server
 import org.apache.kafka.common.{Node, TopicPartition, Uuid}
 
 import java.util
-import util.Arrays.asList
+import java.util.Arrays.asList
+import java.util.Collections
+
+import kafka.api.LeaderAndIsr
+import kafka.server.metadata.{KRaftMetadataCache, ZkMetadataCache}
 import org.apache.kafka.common.message.UpdateMetadataRequestData.{UpdateMetadataBroker, UpdateMetadataEndpoint, UpdateMetadataPartitionState, UpdateMetadataTopicState}
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.{ApiKeys, ApiMessage, Errors}
 import org.apache.kafka.common.record.RecordBatch
 import org.apache.kafka.common.requests.UpdateMetadataRequest
 import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.apache.kafka.common.metadata.{BrokerRegistrationChangeRecord, PartitionRecord, RegisterBrokerRecord, RemoveTopicRecord, TopicRecord}
+import org.apache.kafka.common.metadata.RegisterBrokerRecord.{BrokerEndpoint, BrokerEndpointCollection}
+import org.apache.kafka.image.{ClusterImage, MetadataDelta, MetadataImage}
+import org.apache.kafka.server.common.MetadataVersion
 import org.apache.kafka.raft.{OffsetAndEpoch => RaftOffsetAndEpoch}
+
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.MethodSource
-
-import java.util.Collections
-import kafka.api.LeaderAndIsr
-import kafka.server.metadata.{KRaftMetadataCache, ZkMetadataCache}
-import org.apache.kafka.common.metadata.{PartitionRecord, RegisterBrokerRecord, RemoveTopicRecord, TopicRecord}
-import org.apache.kafka.common.metadata.RegisterBrokerRecord.{BrokerEndpoint, BrokerEndpointCollection}
-import org.apache.kafka.image.{ClusterImage, MetadataDelta, MetadataImage}
+import org.junit.jupiter.api.Test
 
 import scala.collection.{Seq, mutable}
 import scala.jdk.CollectionConverters._
@@ -44,12 +47,12 @@ import scala.jdk.CollectionConverters._
 object MetadataCacheTest {
   def zkCacheProvider(): util.stream.Stream[MetadataCache] =
     util.stream.Stream.of[MetadataCache](
-      MetadataCache.zkMetadataCache(1)
+      MetadataCache.zkMetadataCache(1, MetadataVersion.latest())
     )
 
   def cacheProvider(): util.stream.Stream[MetadataCache] =
     util.stream.Stream.of[MetadataCache](
-      MetadataCache.zkMetadataCache(1),
+      MetadataCache.zkMetadataCache(1, MetadataVersion.latest()),
       MetadataCache.kRaftMetadataCache(1)
     )
 
@@ -638,4 +641,48 @@ class MetadataCacheTest {
     assertEquals(Seq(expectedNode0, expectedNode1), partitionInfo.inSyncReplicas.toSeq)
     assertEquals(Seq(expectedNode1), partitionInfo.offlineReplicas.toSeq)
   }
+
+  @Test
+  def testIsBrokerFenced(): Unit = {
+    val metadataCache = MetadataCache.kRaftMetadataCache(0)
+
+    val delta = new MetadataDelta(MetadataImage.EMPTY)
+    delta.replay(new RegisterBrokerRecord()
+      .setBrokerId(0)
+      .setFenced(false))
+
+    metadataCache.setImage(delta.apply())
+
+    assertFalse(metadataCache.isBrokerFenced(0))
+
+    delta.replay(new BrokerRegistrationChangeRecord()
+      .setBrokerId(0)
+      .setFenced(1.toByte))
+
+    metadataCache.setImage(delta.apply())
+
+    assertTrue(metadataCache.isBrokerFenced(0))
+  }
+
+  @Test
+  def testIsBrokerInControlledShutdown(): Unit = {
+    val metadataCache = MetadataCache.kRaftMetadataCache(0)
+
+    val delta = new MetadataDelta(MetadataImage.EMPTY)
+    delta.replay(new RegisterBrokerRecord()
+      .setBrokerId(0)
+      .setInControlledShutdown(false))
+
+    metadataCache.setImage(delta.apply())
+
+    assertFalse(metadataCache.isBrokerShuttingDown(0))
+
+    delta.replay(new BrokerRegistrationChangeRecord()
+      .setBrokerId(0)
+      .setInControlledShutdown(1.toByte))
+
+    metadataCache.setImage(delta.apply())
+
+    assertTrue(metadataCache.isBrokerShuttingDown(0))
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/MetadataRequestTest.scala b/core/src/test/scala/unit/kafka/server/MetadataRequestTest.scala
index 27a9f6b059547..7b9576026c180 100644
--- a/core/src/test/scala/unit/kafka/server/MetadataRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/MetadataRequestTest.scala
@@ -18,8 +18,7 @@
 package kafka.server
 
 import java.util.Optional
-
-import kafka.utils.TestUtils
+import kafka.utils.{TestInfoUtils, TestUtils}
 import org.apache.kafka.common.Uuid
 import org.apache.kafka.common.errors.UnsupportedVersionException
 import org.apache.kafka.common.internals.Topic
@@ -42,7 +41,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     doSetup(testInfo, createOffsetsTopic = false)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testClusterIdWithRequestVersion1(quorum: String): Unit = {
     val v1MetadataResponse = sendMetadataRequest(MetadataRequest.Builder.allTopics.build(1.toShort))
@@ -50,7 +49,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertNull(v1ClusterId, s"v1 clusterId should be null")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testClusterIdIsValid(quorum: String): Unit = {
     val metadataResponse = sendMetadataRequest(MetadataRequest.Builder.allTopics.build(2.toShort))
@@ -61,9 +60,9 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
    * This test only runs in ZK mode because in KRaft mode, the controller ID visible to
    * the client is randomized.
    */
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
-  def testControllerId(): Unit = {
+  def testControllerId(quorum: String): Unit = {
     val controllerServer = servers.find(_.kafkaController.isActive).get
     val controllerId = controllerServer.config.brokerId
     val metadataResponse = sendMetadataRequest(MetadataRequest.Builder.allTopics.build(1.toShort))
@@ -84,7 +83,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     }, "Controller id should match the active controller after failover", 5000)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testRack(quorum: String): Unit = {
     val metadataResponse = sendMetadataRequest(MetadataRequest.Builder.allTopics.build(1.toShort))
@@ -94,7 +93,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIsInternal(quorum: String): Unit = {
     val internalTopic = Topic.GROUP_METADATA_TOPIC_NAME
@@ -116,7 +115,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertEquals(Set(internalTopic).asJava, metadataResponse.buildCluster().internalTopics)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testNoTopicsRequest(quorum: String): Unit = {
     // create some topics
@@ -130,7 +129,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertTrue(metadataResponse.topicMetadata.isEmpty, "Response should have no topics")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAutoTopicCreation(quorum: String): Unit = {
     val topic1 = "t1"
@@ -161,7 +160,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAutoCreateTopicWithInvalidReplicationFactor(quorum: String): Unit = {
     // Shutdown all but one broker so that the number of brokers is less than the default replication factor
@@ -181,7 +180,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertEquals(0, topicMetadata.partitionMetadata.size)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testAutoCreateOfCollidingTopics(quorum: String): Unit = {
     val topic1 = "testAutoCreate.Topic"
@@ -212,7 +211,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertTrue(partitionMetadata.leaderId.get >= 0)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAllTopicsRequest(quorum: String): Unit = {
     // create some topics
@@ -230,7 +229,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertEquals(2, metadataResponseV1.topicMetadata.size(), "V1 Response should have 2 (all) topics")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testTopicIdsInResponse(quorum: String): Unit = {
     val replicaAssignment = Map(0 -> Seq(1, 2, 0), 1 -> Seq(2, 0, 1))
@@ -260,7 +259,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
   /**
     * Preferred replica should be the first item in the replicas list
     */
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testPreferredReplica(quorum: String): Unit = {
     val replicaAssignment = Map(0 -> Seq(1, 2, 0), 1 -> Seq(2, 0, 1))
@@ -284,7 +283,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testReplicaDownResponse(quorum: String): Unit = {
     val replicaDownTopic = "replicaDown"
@@ -330,7 +329,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     assertEquals(replicaCount, v1PartitionMetadata.replicaIds.size, s"Response should have $replicaCount replicas")
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testIsrAfterBrokerShutDownAndJoinsBack(quorum: String): Unit = {
     def checkIsr[B <: KafkaBroker](
@@ -368,7 +367,7 @@ class MetadataRequestTest extends AbstractMetadataRequestTest {
     checkIsr(brokers, topic)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testAliveBrokersWithNoTopics(quorum: String): Unit = {
     def checkMetadata[B <: KafkaBroker](
diff --git a/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala b/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala
index 7d3ded57c4890..97be68c681059 100644
--- a/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ProduceRequestTest.scala
@@ -22,13 +22,13 @@ import java.util.{Collections, Properties}
 
 import kafka.log.LogConfig
 import kafka.message.ZStdCompressionCodec
-import kafka.metrics.KafkaYammerMetrics
 import kafka.utils.TestUtils
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.message.ProduceRequestData
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.record._
 import org.apache.kafka.common.requests.{ProduceRequest, ProduceResponse}
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
 
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaAlterLogDirsThreadTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaAlterLogDirsThreadTest.scala
index bf2671a16e30e..09939f43fddad 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaAlterLogDirsThreadTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaAlterLogDirsThreadTest.scala
@@ -16,7 +16,6 @@
   */
 package kafka.server
 
-import java.util.{Collections, Optional}
 import kafka.api.Request
 import kafka.cluster.{BrokerEndPoint, Partition}
 import kafka.log.{LogManager, UnifiedLog}
@@ -31,13 +30,15 @@ import org.apache.kafka.common.message.UpdateMetadataRequestData
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.record.MemoryRecords
 import org.apache.kafka.common.requests.{FetchRequest, UpdateMetadataRequest}
-import org.apache.kafka.common.{IsolationLevel, TopicIdPartition, TopicPartition, Uuid}
+import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
-import org.mockito.ArgumentMatchers.{any, anyBoolean, anyInt, anyLong}
+import org.mockito.ArgumentMatchers.{any, anyBoolean}
 import org.mockito.Mockito.{doNothing, mock, never, times, verify, when}
 import org.mockito.{ArgumentCaptor, ArgumentMatchers, Mockito}
 
+import java.util.{Collections, Optional}
 import scala.collection.{Map, Seq}
 import scala.jdk.CollectionConverters._
 
@@ -61,7 +62,7 @@ class ReplicaAlterLogDirsThreadTest {
   private val updateMetadataRequest = new UpdateMetadataRequest.Builder(ApiKeys.UPDATE_METADATA.latestVersion(),
     0, 0, 0, partitionStates, Collections.emptyList(), topicIds.asJava).build()
   // TODO: support raft code?
-  private val metadataCache = new ZkMetadataCache(0)
+  private val metadataCache = new ZkMetadataCache(0, MetadataVersion.latest(), BrokerFeatures.createEmpty())
   metadataCache.updateMetadata(0, updateMetadataRequest)
 
   private def initialFetchState(fetchOffset: Long, leaderEpoch: Int = 1): InitialFetchState = {
@@ -80,14 +81,15 @@ class ReplicaAlterLogDirsThreadTest {
     when(replicaManager.futureLogExists(t1p0)).thenReturn(false)
 
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = new BrokerTopicStats)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      new BrokerTopicStats,
+      config.replicaFetchBackoffMs)
 
     val addedPartitions = thread.addPartitions(Map(t1p0 -> initialFetchState(0L)))
     assertEquals(Set.empty, addedPartitions)
@@ -147,14 +149,15 @@ class ReplicaAlterLogDirsThreadTest {
     mockFetchFromCurrentLog(tid1p0, fencedRequestData, config, replicaManager, fencedResponseData)
 
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
-      "alter-logs-dirs-thread",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = new BrokerTopicStats)
+      "alter-log-dirs-thread",
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      new BrokerTopicStats,
+      config.replicaFetchBackoffMs)
 
     // Initially we add the partition with an older epoch which results in an error
     thread.addPartitions(Map(t1p0 -> initialFetchState(fetchOffset = 0L, leaderEpoch - 1)))
@@ -245,14 +248,15 @@ class ReplicaAlterLogDirsThreadTest {
     mockFetchFromCurrentLog(tid1p0, requestData, config, replicaManager, responseData)
 
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = new BrokerTopicStats)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      new BrokerTopicStats,
+      config.replicaFetchBackoffMs)
 
     thread.addPartitions(Map(t1p0 -> initialFetchState(fetchOffset = 0L, leaderEpoch)))
     assertTrue(thread.fetchState(t1p0).isDefined)
@@ -271,18 +275,26 @@ class ReplicaAlterLogDirsThreadTest {
                                       responseData: FetchPartitionData): Unit = {
     val callbackCaptor: ArgumentCaptor[Seq[(TopicIdPartition, FetchPartitionData)] => Unit] =
       ArgumentCaptor.forClass(classOf[Seq[(TopicIdPartition, FetchPartitionData)] => Unit])
+
+    val expectedFetchParams = FetchParams(
+      requestVersion = ApiKeys.FETCH.latestVersion,
+      replicaId = Request.FutureLocalReplicaId,
+      maxWaitMs = 0L,
+      minBytes = 0,
+      maxBytes = config.replicaFetchResponseMaxBytes,
+      isolation = FetchLogEnd,
+      clientMetadata = None
+    )
+
+    println(expectedFetchParams)
+
     when(replicaManager.fetchMessages(
-      timeout = ArgumentMatchers.eq(0L),
-      replicaId = ArgumentMatchers.eq(Request.FutureLocalReplicaId),
-      fetchMinBytes = ArgumentMatchers.eq(0),
-      fetchMaxBytes = ArgumentMatchers.eq(config.replicaFetchResponseMaxBytes),
-      hardMaxBytesLimit = ArgumentMatchers.eq(false),
+      params = ArgumentMatchers.eq(expectedFetchParams),
       fetchInfos = ArgumentMatchers.eq(Seq(topicIdPartition -> requestData)),
       quota = ArgumentMatchers.eq(UnboundedQuota),
       responseCallback = callbackCaptor.capture(),
-      isolationLevel = ArgumentMatchers.eq(IsolationLevel.READ_UNCOMMITTED),
-      clientMetadata = ArgumentMatchers.eq(None)
     )).thenAnswer(_ => {
+      println("Did we get the callback?")
       callbackCaptor.getValue.apply(Seq((topicIdPartition, responseData)))
     })
   }
@@ -327,16 +339,17 @@ class ReplicaAlterLogDirsThreadTest {
         .setEndOffset(leoT1p1))
 
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, null)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = null,
-      brokerTopicStats = null)
-
-    val result = thread.fetchEpochEndOffsets(Map(
+      leader,
+      failedPartitions,
+      replicaManager,
+      null,
+      null,
+      config.replicaFetchBackoffMs)
+
+    val result = thread.leader.fetchEpochEndOffsets(Map(
       t1p0 -> new OffsetForLeaderPartition()
         .setPartition(t1p0.partition)
         .setLeaderEpoch(leaderEpochT1p0),
@@ -388,16 +401,17 @@ class ReplicaAlterLogDirsThreadTest {
       .thenThrow(new KafkaStorageException)
 
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, null)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = null,
-      brokerTopicStats = null)
-
-    val result = thread.fetchEpochEndOffsets(Map(
+      leader,
+      failedPartitions,
+      replicaManager,
+      null,
+      null,
+      config.replicaFetchBackoffMs)
+
+    val result = thread.leader.fetchEpochEndOffsets(Map(
       t1p0 -> new OffsetForLeaderPartition()
         .setPartition(t1p0.partition)
         .setLeaderEpoch(leaderEpoch),
@@ -489,14 +503,15 @@ class ReplicaAlterLogDirsThreadTest {
 
     //Create the thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(t1p0 -> initialFetchState(0L), t1p1 -> initialFetchState(0L)))
 
     //Run it
@@ -572,14 +587,15 @@ class ReplicaAlterLogDirsThreadTest {
 
     //Create the thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(t1p0 -> initialFetchState(0L)))
 
     // First run will result in another offset for leader epoch request
@@ -627,14 +643,15 @@ class ReplicaAlterLogDirsThreadTest {
 
     //Create the thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(t1p0 -> initialFetchState(initialFetchOffset)))
 
     //Run it
@@ -701,28 +718,23 @@ class ReplicaAlterLogDirsThreadTest {
 
     when(replicaManager.logManager).thenReturn(logManager)
     when(replicaManager.fetchMessages(
-      anyLong(),
-      anyInt(),
-      anyInt(),
-      anyInt(),
-      any(),
-      any(),
-      any(),
+      any[FetchParams],
+      any[Seq[(TopicIdPartition, FetchRequest.PartitionData)]],
+      any[ReplicaQuota],
       responseCallback.capture(),
-      any(),
-      any(),
     )).thenAnswer(_ => responseCallback.getValue.apply(Seq.empty[(TopicIdPartition, FetchPartitionData)]))
 
     //Create the thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(t1p0 -> initialFetchState(0L)))
 
     // Run thread 3 times (exactly number of times we mock exception for getReplicaOrException)
@@ -783,14 +795,15 @@ class ReplicaAlterLogDirsThreadTest {
 
     //Create the fetcher thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(t1p0 -> initialFetchState(0L)))
 
     // loop few times
@@ -823,19 +836,20 @@ class ReplicaAlterLogDirsThreadTest {
     //Create the fetcher thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
     val leaderEpoch = 1
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(
       t1p0 -> initialFetchState(0L, leaderEpoch),
       t1p1 -> initialFetchState(0L, leaderEpoch)))
 
-    val ResultWithPartitions(fetchRequestOpt, partitionsWithError) = thread.buildFetch(Map(
+    val ResultWithPartitions(fetchRequestOpt, partitionsWithError) = thread.leader.buildFetch(Map(
       t1p0 -> PartitionFetchState(Some(topicId), 150, None, leaderEpoch, None, state = Fetching, lastFetchedEpoch = None),
       t1p1 -> PartitionFetchState(Some(topicId), 160, None, leaderEpoch, None, state = Fetching, lastFetchedEpoch = None)))
 
@@ -873,20 +887,21 @@ class ReplicaAlterLogDirsThreadTest {
     //Create the fetcher thread
     val endPoint = new BrokerEndPoint(0, "localhost", 1000)
     val leaderEpoch = 1
+    val leader = new LocalLeaderEndPoint(endPoint, config, replicaManager, quotaManager)
     val thread = new ReplicaAlterLogDirsThread(
       "alter-logs-dirs-thread-test1",
-      sourceBroker = endPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      quota = quotaManager,
-      brokerTopicStats = null)
+      leader,
+      failedPartitions,
+      replicaManager,
+      quotaManager,
+      null,
+      config.replicaFetchBackoffMs)
     thread.addPartitions(Map(
       t1p0 -> initialFetchState(0L, leaderEpoch),
       t1p1 -> initialFetchState(0L, leaderEpoch)))
 
     // one partition is ready and one is truncating
-    val ResultWithPartitions(fetchRequestOpt, partitionsWithError) = thread.buildFetch(Map(
+    val ResultWithPartitions(fetchRequestOpt, partitionsWithError) = thread.leader.buildFetch(Map(
         t1p0 -> PartitionFetchState(Some(topicId), 150, None, leaderEpoch, state = Fetching, lastFetchedEpoch = None),
         t1p1 -> PartitionFetchState(Some(topicId), 160, None, leaderEpoch, state = Truncating, lastFetchedEpoch = None)))
 
@@ -900,7 +915,7 @@ class ReplicaAlterLogDirsThreadTest {
     assertEquals(150, fetchInfos.head._2.fetchOffset)
 
     // one partition is ready and one is delayed
-    val ResultWithPartitions(fetchRequest2Opt, partitionsWithError2) = thread.buildFetch(Map(
+    val ResultWithPartitions(fetchRequest2Opt, partitionsWithError2) = thread.leader.buildFetch(Map(
         t1p0 -> PartitionFetchState(Some(topicId), 140, None, leaderEpoch, state = Fetching, lastFetchedEpoch = None),
         t1p1 -> PartitionFetchState(Some(topicId), 160, None, leaderEpoch, delay = Some(new DelayedItem(5000)), state = Fetching, lastFetchedEpoch = None)))
 
@@ -914,7 +929,7 @@ class ReplicaAlterLogDirsThreadTest {
     assertEquals(140, fetchInfos2.head._2.fetchOffset)
 
     // both partitions are delayed
-    val ResultWithPartitions(fetchRequest3Opt, partitionsWithError3) = thread.buildFetch(Map(
+    val ResultWithPartitions(fetchRequest3Opt, partitionsWithError3) = thread.leader.buildFetch(Map(
         t1p0 -> PartitionFetchState(Some(topicId), 140, None, leaderEpoch, delay = Some(new DelayedItem(5000)), state = Fetching, lastFetchedEpoch = None),
         t1p1 -> PartitionFetchState(Some(topicId), 160, None, leaderEpoch, delay = Some(new DelayedItem(5000)), state = Fetching, lastFetchedEpoch = None)))
     assertTrue(fetchRequest3Opt.isEmpty, "Expected no fetch requests since all partitions are delayed")
@@ -939,16 +954,10 @@ class ReplicaAlterLogDirsThreadTest {
                             responseCallback: ArgumentCaptor[Seq[(TopicIdPartition, FetchPartitionData)] => Unit]): Unit = {
     stub(logT1p0, logT1p1, futureLog, partition, replicaManager)
     when(replicaManager.fetchMessages(
-      anyLong(),
-      anyInt(),
-      anyInt(),
-      anyInt(),
-      any(),
-      any(),
-      any(),
-      responseCallback.capture(),
-      any(),
-      any())
-    ).thenAnswer(_ => responseCallback.getValue.apply(Seq.empty[(TopicIdPartition, FetchPartitionData)]))
+      any[FetchParams],
+      any[Seq[(TopicIdPartition, FetchRequest.PartitionData)]],
+      any[ReplicaQuota],
+      responseCallback.capture()
+    )).thenAnswer(_ => responseCallback.getValue.apply(Seq.empty[(TopicIdPartition, FetchPartitionData)]))
   }
 }
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala
index e7bca593843b8..eaeb76f03eee2 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaFetchTest.scala
@@ -17,43 +17,38 @@
 
 package kafka.server
 
-import scala.collection.Seq
-
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
-import kafka.server.QuorumTestHarness
-import kafka.utils.TestUtils
+import org.junit.jupiter.api.AfterEach
+import kafka.utils.{TestInfoUtils, TestUtils}
 import TestUtils._
+import kafka.api.IntegrationTestHarness
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.StringSerializer
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.ValueSource
 
-class ReplicaFetchTest extends QuorumTestHarness  {
-  var brokers: Seq[KafkaServer] = null
+class ReplicaFetchTest extends IntegrationTestHarness {
   val topic1 = "foo"
   val topic2 = "bar"
 
-  @BeforeEach
-  override def setUp(testInfo: TestInfo): Unit = {
-    super.setUp(testInfo)
-    val props = createBrokerConfigs(2, zkConnect)
-    brokers = props.map(KafkaConfig.fromProps).map(TestUtils.createServer(_))
-  }
-
   @AfterEach
   override def tearDown(): Unit = {
     TestUtils.shutdownServers(brokers)
     super.tearDown()
   }
 
-  @Test
-  def testReplicaFetcherThread(): Unit = {
+  override def brokerCount: Int = 2
+
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk", "kraft"))
+  def testReplicaFetcherThread(quorum: String): Unit = {
     val partition = 0
     val testMessageList1 = List("test1", "test2", "test3", "test4")
     val testMessageList2 = List("test5", "test6", "test7", "test8")
 
     // create a topic and partition and await leadership
     for (topic <- List(topic1,topic2)) {
-      createTopic(zkClient, topic, numPartitions = 1, replicationFactor = 2, servers = brokers)
+      createTopic(topic, replicationFactor = 2)
     }
 
     // send test messages to leader
@@ -69,9 +64,9 @@ class ReplicaFetchTest extends QuorumTestHarness  {
       var result = true
       for (topic <- List(topic1, topic2)) {
         val tp = new TopicPartition(topic, partition)
-        val expectedOffset = brokers.head.getLogManager.getLog(tp).get.logEndOffset
+        val expectedOffset = brokers.head.logManager.getLog(tp).get.logEndOffset
         result = result && expectedOffset > 0 && brokers.forall { item =>
-          expectedOffset == item.getLogManager.getLog(tp).get.logEndOffset
+          expectedOffset == item.logManager.getLog(tp).get.logEndOffset
         }
       }
       result
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala
index efbb0157c0c5a..c7a222c2d547d 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala
@@ -16,34 +16,36 @@
   */
 package kafka.server
 
-import kafka.api.{ApiVersion, KAFKA_2_6_IV0}
 import kafka.cluster.{BrokerEndPoint, Partition}
 import kafka.log.{LogAppendInfo, LogManager, UnifiedLog}
 import kafka.server.AbstractFetcherThread.ResultWithPartitions
 import kafka.server.QuotaFactory.UnboundedQuota
-import kafka.server.epoch.util.ReplicaFetcherMockBlockingSend
+import kafka.server.epoch.util.MockBlockingSender
 import kafka.server.metadata.ZkMetadataCache
 import kafka.utils.TestUtils
+import org.apache.kafka.clients.FetchSessionHandler
 import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
 import org.apache.kafka.common.message.{FetchResponseData, UpdateMetadataRequestData}
 import org.apache.kafka.common.message.OffsetForLeaderEpochRequestData.OffsetForLeaderPartition
 import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset
-import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.protocol.Errors._
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.record.{CompressionType, MemoryRecords, SimpleRecord}
 import org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.{UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET}
 import org.apache.kafka.common.requests.{FetchRequest, FetchResponse, UpdateMetadataRequest}
-import org.apache.kafka.common.utils.SystemTime
+import org.apache.kafka.common.utils.{LogContext, SystemTime}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, Test}
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, anyBoolean, anyLong}
 import org.mockito.Mockito.{mock, never, times, verify, when}
-
 import java.nio.charset.StandardCharsets
 import java.util
 import java.util.{Collections, Optional}
+
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_6_IV0
+
 import scala.collection.{Map, mutable}
 import scala.jdk.CollectionConverters._
 
@@ -79,7 +81,7 @@ class ReplicaFetcherThreadTest {
   private val updateMetadataRequest = new UpdateMetadataRequest.Builder(ApiKeys.UPDATE_METADATA.latestVersion(),
     0, 0, 0, partitionStates, Collections.emptyList(), topicIds.asJava).build()
   // TODO: support raft code?
-  private val metadataCache = new ZkMetadataCache(0)
+  private var metadataCache = new ZkMetadataCache(0, MetadataVersion.latest(), BrokerFeatures.createEmpty())
   metadataCache.updateMetadata(0, updateMetadataRequest)
 
   private def initialFetchState(topicId: Option[Uuid], fetchOffset: Long, leaderEpoch: Int = 1): InitialFetchState = {
@@ -92,26 +94,38 @@ class ReplicaFetcherThreadTest {
     TestUtils.clearYammerMetrics()
   }
 
+  private def createReplicaFetcherThread(name: String,
+                                         fetcherId: Int,
+                                         brokerConfig: KafkaConfig,
+                                         failedPartitions: FailedPartitions,
+                                         replicaMgr: ReplicaManager,
+                                         quota: ReplicaQuota,
+                                         leaderEndpointBlockingSend: BlockingSend): ReplicaFetcherThread = {
+    val logContext = new LogContext(s"[ReplicaFetcher replicaId=${brokerConfig.brokerId}, leaderId=${leaderEndpointBlockingSend.brokerEndPoint().id}, fetcherId=$fetcherId] ")
+    val fetchSessionHandler = new FetchSessionHandler(logContext, leaderEndpointBlockingSend.brokerEndPoint().id)
+    val leader = new RemoteLeaderEndPoint(logContext.logPrefix, leaderEndpointBlockingSend, fetchSessionHandler,
+      brokerConfig, replicaMgr, quota, () => brokerConfig.interBrokerProtocolVersion)
+    new ReplicaFetcherThread(name,
+      leader,
+      brokerConfig,
+      failedPartitions,
+      replicaMgr,
+      quota,
+      logContext.logPrefix,
+      () => brokerConfig.interBrokerProtocolVersion)
+  }
+
   @Test
   def shouldSendLatestRequestVersionsByDefault(): Unit = {
     val props = TestUtils.createBrokerConfig(1, "localhost:1234")
     val config = KafkaConfig.fromProps(props)
+
     val replicaManager: ReplicaManager = mock(classOf[ReplicaManager])
     when(replicaManager.brokerTopicStats).thenReturn(mock(classOf[BrokerTopicStats]))
-    val thread = new ReplicaFetcherThread(
-      name = "bob",
-      fetcherId = 0,
-      sourceBroker = brokerEndPoint,
-      brokerConfig = config,
-      failedPartitions: FailedPartitions,
-      replicaMgr = replicaManager,
-      metrics =  new Metrics(),
-      time = new SystemTime(),
-      quota = UnboundedQuota,
-      leaderEndpointBlockingSend = None)
-    assertEquals(ApiKeys.FETCH.latestVersion, thread.fetchRequestVersion)
-    assertEquals(ApiKeys.OFFSET_FOR_LEADER_EPOCH.latestVersion, thread.offsetForLeaderEpochRequestVersion)
-    assertEquals(ApiKeys.LIST_OFFSETS.latestVersion, thread.listOffsetRequestVersion)
+
+    assertEquals(ApiKeys.FETCH.latestVersion, config.interBrokerProtocolVersion.fetchRequestVersion())
+    assertEquals(ApiKeys.OFFSET_FOR_LEADER_EPOCH.latestVersion, config.interBrokerProtocolVersion.offsetForLeaderEpochRequestVersion)
+    assertEquals(ApiKeys.LIST_OFFSETS.latestVersion, config.interBrokerProtocolVersion.listOffsetRequestVersion)
   }
 
   @Test
@@ -150,9 +164,16 @@ class ReplicaFetcherThreadTest {
       t1p1 -> newOffsetForLeaderPartitionResult(t1p1, leaderEpoch, 1)).asJava
 
     //Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsets, brokerEndPoint, new SystemTime())
+    val mockNetwork = new MockBlockingSender(offsets, brokerEndPoint, new SystemTime())
 
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork)
 
     // topic 1 supports epoch, t2 doesn't.
     thread.addPartitions(Map(
@@ -214,24 +235,23 @@ class ReplicaFetcherThreadTest {
     val props = TestUtils.createBrokerConfig(1, "localhost:1234")
     val config = KafkaConfig.fromProps(props)
     val mockBlockingSend: BlockingSend = mock(classOf[BlockingSend])
-
+    when(mockBlockingSend.brokerEndPoint()).thenReturn(brokerEndPoint)
     when(mockBlockingSend.sendRequest(any())).thenThrow(new NullPointerException)
+
     val replicaManager: ReplicaManager = mock(classOf[ReplicaManager])
     when(replicaManager.brokerTopicStats).thenReturn(mock(classOf[BrokerTopicStats]))
 
-    val thread = new ReplicaFetcherThread(
-      name = "bob",
-      fetcherId = 0,
-      sourceBroker = brokerEndPoint,
-      brokerConfig = config,
-      failedPartitions: FailedPartitions,
-      replicaMgr = replicaManager,
-      metrics =  new Metrics(),
-      time = new SystemTime(),
-      quota = null,
-      leaderEndpointBlockingSend = Some(mockBlockingSend))
-
-    val result = thread.fetchEpochEndOffsets(Map(
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      null,
+      mockBlockingSend
+    )
+
+    val result = thread.leader.fetchEpochEndOffsets(Map(
       t1p0 -> new OffsetForLeaderPartition()
         .setPartition(t1p0.partition)
         .setLeaderEpoch(0),
@@ -250,19 +270,22 @@ class ReplicaFetcherThreadTest {
 
   @Test
   def shouldFetchLeaderEpochOnFirstFetchOnlyIfLeaderEpochKnownToBothIbp26(): Unit = {
-    verifyFetchLeaderEpochOnFirstFetch(KAFKA_2_6_IV0)
+    verifyFetchLeaderEpochOnFirstFetch(IBP_2_6_IV0)
   }
 
   @Test
   def shouldNotFetchLeaderEpochOnFirstFetchWithTruncateOnFetch(): Unit = {
-    verifyFetchLeaderEpochOnFirstFetch(ApiVersion.latestVersion, epochFetchCount = 0)
+    verifyFetchLeaderEpochOnFirstFetch(MetadataVersion.latest, epochFetchCount = 0)
   }
 
-  private def verifyFetchLeaderEpochOnFirstFetch(ibp: ApiVersion, epochFetchCount: Int = 1): Unit = {
+  private def verifyFetchLeaderEpochOnFirstFetch(ibp: MetadataVersion, epochFetchCount: Int = 1): Unit = {
     val props = TestUtils.createBrokerConfig(1, "localhost:1234")
     props.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, ibp.version)
     val config = KafkaConfig.fromProps(props)
 
+    metadataCache = new ZkMetadataCache(0, ibp, BrokerFeatures.createEmpty())
+    metadataCache.updateMetadata(0, updateMetadataRequest)
+
     //Setup all dependencies
     val logManager: LogManager = mock(classOf[LogManager])
     val replicaAlterLogDirsManager: ReplicaAlterLogDirsManager = mock(classOf[ReplicaAlterLogDirsManager])
@@ -290,9 +313,16 @@ class ReplicaFetcherThreadTest {
       t1p1 -> newOffsetForLeaderPartitionResult(t1p1, leaderEpoch, 1)).asJava
 
     //Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsets, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager,
-      new Metrics, new SystemTime, UnboundedQuota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsets, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      UnboundedQuota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t1p1 -> initialFetchState(Some(topicId1), 0L)))
 
     //Loop 1
@@ -349,9 +379,16 @@ class ReplicaFetcherThreadTest {
       t2p1 -> newOffsetForLeaderPartitionResult(t2p1, leaderEpoch, 172)).asJava
 
     //Create the thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsetsReply, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager,
-      new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsetsReply, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t2p1 -> initialFetchState(Some(topicId2), 0L)))
 
     //Run it
@@ -402,9 +439,16 @@ class ReplicaFetcherThreadTest {
       t2p1 -> newOffsetForLeaderPartitionResult(t2p1, leaderEpochAtLeader, 202)).asJava
 
     //Create the thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsetsReply, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions,
-      replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsetsReply, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t2p1 -> initialFetchState(Some(topicId2), 0L)))
 
     //Run it
@@ -458,8 +502,16 @@ class ReplicaFetcherThreadTest {
       t1p1 -> newOffsetForLeaderPartitionResult(t1p1, 4, 143)).asJava
 
     // Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsets, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsets, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t1p1 -> initialFetchState(Some(topicId1), 0L)))
 
     // Loop 1 -- both topic partitions will need to fetch another leader epoch
@@ -528,8 +580,13 @@ class ReplicaFetcherThreadTest {
     stub(partition, replicaManager, log)
 
     // Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(Collections.emptyMap(), brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork)) {
+    val mockNetwork = new MockBlockingSender(Collections.emptyMap(), brokerEndPoint, new SystemTime())
+    val logContext = new LogContext(s"[ReplicaFetcher replicaId=${config.brokerId}, leaderId=${brokerEndPoint.id}, fetcherId=0] ")
+    val fetchSessionHandler = new FetchSessionHandler(logContext, brokerEndPoint.id)
+    val leader = new RemoteLeaderEndPoint(logContext.logPrefix, mockNetwork, fetchSessionHandler, config,
+      replicaManager, quota, () => config.interBrokerProtocolVersion)
+    val thread = new ReplicaFetcherThread("bob", leader, config, failedPartitions,
+      replicaManager, quota, logContext.logPrefix, () => config.interBrokerProtocolVersion) {
       override def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = None
     }
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), initialLEO), t1p1 -> initialFetchState(Some(topicId1), initialLEO)))
@@ -643,8 +700,16 @@ class ReplicaFetcherThreadTest {
       t1p1 -> newOffsetForLeaderPartitionResult(t1p1, UNDEFINED_EPOCH, 143)).asJava
 
     // Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsets, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsets, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t1p1 -> initialFetchState(Some(topicId1), 0L)))
 
     // Loop 1 -- both topic partitions will truncate to leader offset even though they don't know
@@ -699,8 +764,16 @@ class ReplicaFetcherThreadTest {
       t1p0 -> newOffsetForLeaderPartitionResult(t1p0, UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET)).asJava
 
     //Create the thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsetsReply, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsetsReply, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), initialFetchOffset)))
 
     //Run it
@@ -752,8 +825,16 @@ class ReplicaFetcherThreadTest {
     ).asJava
 
     //Create the thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsetsReply, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsetsReply, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t1p1 -> initialFetchState(Some(topicId1), 0L)))
 
     //Run thread 3 times
@@ -806,8 +887,16 @@ class ReplicaFetcherThreadTest {
     ).asJava
 
     //Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsetsReply, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(), new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsetsReply, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
 
     //When
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t1p1 -> initialFetchState(Some(topicId1), 0L)))
@@ -858,9 +947,16 @@ class ReplicaFetcherThreadTest {
     ).asJava
 
     //Create the fetcher thread
-    val mockNetwork = new ReplicaFetcherMockBlockingSend(offsetsReply, brokerEndPoint, new SystemTime())
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions, replicaManager, new Metrics(),
-      new SystemTime(), quota, Some(mockNetwork))
+    val mockNetwork = new MockBlockingSender(offsetsReply, brokerEndPoint, new SystemTime())
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      quota,
+      mockNetwork
+    )
 
     //When
     thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), 0L), t1p1 -> initialFetchState(Some(topicId1), 0L)))
@@ -883,24 +979,24 @@ class ReplicaFetcherThreadTest {
   def shouldCatchExceptionFromBlockingSendWhenShuttingDownReplicaFetcherThread(): Unit = {
     val props = TestUtils.createBrokerConfig(1, "localhost:1234")
     val config = KafkaConfig.fromProps(props)
-    val mockBlockingSend: BlockingSend = mock(classOf[BlockingSend])
 
+    val mockBlockingSend: BlockingSend = mock(classOf[BlockingSend])
+    when(mockBlockingSend.brokerEndPoint()).thenReturn(brokerEndPoint)
     when(mockBlockingSend.initiateClose()).thenThrow(new IllegalArgumentException())
     when(mockBlockingSend.close()).thenThrow(new IllegalStateException())
+
     val replicaManager: ReplicaManager = mock(classOf[ReplicaManager])
     when(replicaManager.brokerTopicStats).thenReturn(mock(classOf[BrokerTopicStats]))
 
-    val thread = new ReplicaFetcherThread(
-      name = "bob",
-      fetcherId = 0,
-      sourceBroker = brokerEndPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      metrics =  new Metrics(),
-      time = new SystemTime(),
-      quota = null,
-      leaderEndpointBlockingSend = Some(mockBlockingSend))
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      null,
+      mockBlockingSend
+    )
     thread.start()
 
     // Verify that:
@@ -936,13 +1032,24 @@ class ReplicaFetcherThreadTest {
     val replicaQuota: ReplicaQuota = mock(classOf[ReplicaQuota])
     val log: UnifiedLog = mock(classOf[UnifiedLog])
 
+    when(mockBlockingSend.brokerEndPoint()).thenReturn(brokerEndPoint)
     when(replicaManager.brokerTopicStats).thenReturn(mock(classOf[BrokerTopicStats]))
     when(replicaManager.localLogOrException(any[TopicPartition])).thenReturn(log)
     when(replicaQuota.isThrottled(any[TopicPartition])).thenReturn(false)
     when(log.logStartOffset).thenReturn(0)
 
-    val thread = new ReplicaFetcherThread("bob", 0, brokerEndPoint, config, failedPartitions,
-      replicaManager, new Metrics(), new SystemTime(), replicaQuota, Some(mockBlockingSend))
+    val logContext = new LogContext(s"[ReplicaFetcher replicaId=${config.brokerId}, leaderId=${brokerEndPoint.id}, fetcherId=0] ")
+    val fetchSessionHandler = new FetchSessionHandler(logContext, brokerEndPoint.id)
+    val leader = new RemoteLeaderEndPoint(logContext.logPrefix, mockBlockingSend, fetchSessionHandler, config,
+      replicaManager, replicaQuota, () => config.interBrokerProtocolVersion)
+    val thread = new ReplicaFetcherThread("bob",
+      leader,
+      config,
+      failedPartitions,
+      replicaManager,
+      replicaQuota,
+      logContext.logPrefix,
+      () => config.interBrokerProtocolVersion)
 
     val leaderEpoch = 1
 
@@ -951,7 +1058,7 @@ class ReplicaFetcherThreadTest {
         t1p1 -> PartitionFetchState(Some(topicId1), 155, None, leaderEpoch, None, state = Fetching, lastFetchedEpoch = None),
         t2p1 -> PartitionFetchState(Some(topicId2), 160, None, leaderEpoch, None, state = Fetching, lastFetchedEpoch = None))
 
-    val ResultWithPartitions(fetchRequestOpt, _) = thread.buildFetch(partitionMap)
+    val ResultWithPartitions(fetchRequestOpt, _) = thread.leader.buildFetch(partitionMap)
 
     assertTrue(fetchRequestOpt.isDefined)
     val fetchRequestBuilder = fetchRequestOpt.get.fetchRequest
@@ -971,14 +1078,14 @@ class ReplicaFetcherThreadTest {
     responseData.put(tid2p1, new FetchResponseData.PartitionData())
     val fetchResponse = FetchResponse.of(Errors.NONE, 0, 123, responseData)
 
-    thread.fetchSessionHandler.handleResponse(fetchResponse, ApiKeys.FETCH.latestVersion())
+    leader.fetchSessionHandler.handleResponse(fetchResponse, ApiKeys.FETCH.latestVersion())
 
     // Remove t1p0, change the ID for t2p1, and keep t1p1 the same
     val newTopicId = Uuid.randomUuid()
     val partitionMap2 = Map(
       t1p1 -> PartitionFetchState(Some(topicId1), 155, None, leaderEpoch, None, state = Fetching, lastFetchedEpoch = None),
       t2p1 -> PartitionFetchState(Some(newTopicId), 160, None, leaderEpoch, None, state = Fetching, lastFetchedEpoch = None))
-    val ResultWithPartitions(fetchRequestOpt2, _) = thread.buildFetch(partitionMap2)
+    val ResultWithPartitions(fetchRequestOpt2, _) = thread.leader.buildFetch(partitionMap2)
 
     // Since t1p1 didn't change, we drop that one
     val partitionDataMap2 = partitionMap2.drop(1).map { case (tp, state) =>
@@ -1019,6 +1126,7 @@ class ReplicaFetcherThreadTest {
     val config = KafkaConfig.fromProps(props)
 
     val mockBlockingSend: BlockingSend = mock(classOf[BlockingSend])
+    when(mockBlockingSend.brokerEndPoint()).thenReturn(brokerEndPoint)
 
     val log: UnifiedLog = mock(classOf[UnifiedLog])
 
@@ -1034,17 +1142,15 @@ class ReplicaFetcherThreadTest {
 
     val replicaQuota: ReplicaQuota = mock(classOf[ReplicaQuota])
 
-    val thread = new ReplicaFetcherThread(
-      name = "bob",
-      fetcherId = 0,
-      sourceBroker = brokerEndPoint,
-      brokerConfig = config,
-      failedPartitions = failedPartitions,
-      replicaMgr = replicaManager,
-      metrics =  new Metrics(),
-      time = new SystemTime(),
-      quota = replicaQuota,
-      leaderEndpointBlockingSend = Some(mockBlockingSend))
+    val thread = createReplicaFetcherThread(
+      "bob",
+      0,
+      config,
+      failedPartitions,
+      replicaManager,
+      replicaQuota,
+      mockBlockingSend
+    )
 
     val records = MemoryRecords.withRecords(CompressionType.NONE,
       new SimpleRecord(1000, "foo".getBytes(StandardCharsets.UTF_8)))
@@ -1074,7 +1180,7 @@ class ReplicaFetcherThreadTest {
 
   private def kafkaConfigNoTruncateOnFetch: KafkaConfig = {
     val props = TestUtils.createBrokerConfig(1, "localhost:1234")
-    props.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, KAFKA_2_6_IV0.version)
+    props.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, IBP_2_6_IV0.version)
     KafkaConfig.fromProps(props)
   }
 }
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerConcurrencyTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerConcurrencyTest.scala
index f0003f42a2ea4..651451afad430 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaManagerConcurrencyTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerConcurrencyTest.scala
@@ -20,15 +20,16 @@ import java.net.InetAddress
 import java.util
 import java.util.concurrent.{CompletableFuture, Executors, LinkedBlockingQueue, TimeUnit}
 import java.util.{Optional, Properties}
-
 import kafka.api.LeaderAndIsr
 import kafka.log.{AppendOrigin, LogConfig}
+import kafka.server.metadata.KRaftMetadataCache
 import kafka.server.metadata.MockConfigRepository
 import kafka.utils.TestUtils.waitUntilTrue
 import kafka.utils.{MockTime, ShutdownableThread, TestUtils}
+import org.apache.kafka.common.metadata.RegisterBrokerRecord
 import org.apache.kafka.common.metadata.{PartitionChangeRecord, PartitionRecord, TopicRecord}
 import org.apache.kafka.common.metrics.Metrics
-import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.record.SimpleRecord
 import org.apache.kafka.common.replica.ClientMetadata.DefaultClientMetadata
 import org.apache.kafka.common.requests.{FetchRequest, ProduceResponse}
@@ -36,6 +37,7 @@ import org.apache.kafka.common.security.auth.KafkaPrincipal
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.common.{IsolationLevel, TopicIdPartition, TopicPartition, Uuid}
 import org.apache.kafka.image.{MetadataDelta, MetadataImage}
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.apache.kafka.metadata.PartitionRegistration
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, Test}
@@ -69,20 +71,22 @@ class ReplicaManagerConcurrencyTest {
   def testIsrExpandAndShrinkWithConcurrentProduce(): Unit = {
     val localId = 0
     val remoteId = 1
+    val metadataCache = MetadataCache.kRaftMetadataCache(localId)
     val channel = new ControllerChannel
-    val replicaManager = buildReplicaManager(localId, channel)
+    val replicaManager = buildReplicaManager(localId, channel, metadataCache)
 
     // Start with the remote replica out of the ISR
     val initialPartitionRegistration = registration(
       replicaIds = Seq(localId, remoteId),
       isr = Seq(localId),
-      leader = localId
+      leader = localId,
+      LeaderRecoveryState.RECOVERED
     )
 
     val topicModel = new TopicModel(Uuid.randomUuid(), "foo", Map(0 -> initialPartitionRegistration))
     val topicPartition = new TopicPartition(topicModel.name, 0)
     val topicIdPartition = new TopicIdPartition(topicModel.topicId, topicPartition)
-    val controller = new ControllerModel(topicModel, channel, replicaManager)
+    val controller = new ControllerModel(Seq(localId, remoteId), topicModel, channel, replicaManager, metadataCache)
 
     submit(new Clock(time))
     replicaManager.startup()
@@ -138,7 +142,8 @@ class ReplicaManagerConcurrencyTest {
 
   private def buildReplicaManager(
     localId: Int,
-    channel: ControllerChannel
+    channel: ControllerChannel,
+    metadataCache: MetadataCache,
   ): ReplicaManager = {
     val logDir = TestUtils.tempDir()
 
@@ -166,9 +171,9 @@ class ReplicaManagerConcurrencyTest {
       scheduler = time.scheduler,
       logManager = logManager,
       quotaManagers = QuotaFactory.instantiate(config, metrics, time, ""),
-      metadataCache = MetadataCache.kRaftMetadataCache(config.brokerId),
+      metadataCache = metadataCache,
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = new MockAlterIsrManager(channel)
+      alterPartitionManager = new MockAlterPartitionManager(channel)
     ) {
       override def createReplicaFetcherManager(
         metrics: Metrics,
@@ -222,17 +227,21 @@ class ReplicaManagerConcurrencyTest {
         }
       }
 
-      replicaManager.fetchMessages(
-        timeout = random.nextInt(100),
+      val fetchParams = FetchParams(
+        requestVersion = ApiKeys.FETCH.latestVersion,
         replicaId = replicaId,
-        fetchMinBytes = 1,
-        fetchMaxBytes = 1024 * 1024,
-        hardMaxBytesLimit = false,
+        maxWaitMs = random.nextInt(100),
+        minBytes = 1,
+        maxBytes = 1024 * 1024,
+        isolation = FetchIsolation(replicaId, IsolationLevel.READ_UNCOMMITTED),
+        clientMetadata = Some(clientMetadata)
+      )
+
+      replicaManager.fetchMessages(
+        params = fetchParams,
         fetchInfos = Seq(topicIdPartition -> partitionData),
         quota = QuotaFactory.UnboundedQuota,
         responseCallback = fetchCallback,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        clientMetadata = Some(clientMetadata)
       )
 
       val fetchResult = future.get()
@@ -289,7 +298,7 @@ class ReplicaManagerConcurrencyTest {
   case object ShutdownEvent extends ControllerEvent
   case class AlterIsrEvent(
     future: CompletableFuture[LeaderAndIsr],
-    topicPartition: TopicPartition,
+    topicPartition: TopicIdPartition,
     leaderAndIsr: LeaderAndIsr
   ) extends ControllerEvent
 
@@ -301,7 +310,7 @@ class ReplicaManagerConcurrencyTest {
     }
 
     def alterIsr(
-      topicPartition: TopicPartition,
+      topicPartition: TopicIdPartition,
       leaderAndIsr: LeaderAndIsr
     ): CompletableFuture[LeaderAndIsr] = {
       val future = new CompletableFuture[LeaderAndIsr]()
@@ -319,9 +328,11 @@ class ReplicaManagerConcurrencyTest {
   }
 
   private class ControllerModel(
+    brokerIds: Seq[Int],
     topic: TopicModel,
     channel: ControllerChannel,
-    replicaManager: ReplicaManager
+    replicaManager: ReplicaManager,
+    metadataCache: KRaftMetadataCache
   ) extends ShutdownableThread(name = "controller", isInterruptible = false) {
     private var latestImage = MetadataImage.EMPTY
 
@@ -339,8 +350,15 @@ class ReplicaManagerConcurrencyTest {
       channel.poll() match {
         case InitializeEvent =>
           val delta = new MetadataDelta(latestImage)
+          brokerIds.foreach { brokerId =>
+            delta.replay(new RegisterBrokerRecord()
+              .setBrokerId(brokerId)
+              .setFenced(false)
+            )
+          }
           topic.initialize(delta)
           latestImage = delta.apply()
+          metadataCache.setImage(latestImage)
           replicaManager.applyDelta(delta.topicsDelta, latestImage)
 
         case AlterIsrEvent(future, topicPartition, leaderAndIsr) =>
@@ -374,7 +392,7 @@ class ReplicaManagerConcurrencyTest {
     }
 
     def alterIsr(
-      topicPartition: TopicPartition,
+      topicPartition: TopicIdPartition,
       leaderAndIsr: LeaderAndIsr,
       delta: MetadataDelta
     ): LeaderAndIsr = {
@@ -405,7 +423,7 @@ class ReplicaManagerConcurrencyTest {
         .partitionChanges
         .get(partitionId)
 
-      leaderAndIsr.withZkVersion(registration.partitionEpoch)
+      leaderAndIsr.withPartitionEpoch(registration.partitionEpoch)
     }
 
     private def toList(ints: Array[Int]): util.List[Integer] = {
@@ -425,9 +443,9 @@ class ReplicaManagerConcurrencyTest {
     }
   }
 
-  private class MockAlterIsrManager(channel: ControllerChannel) extends AlterIsrManager {
+  private class MockAlterPartitionManager(channel: ControllerChannel) extends AlterPartitionManager {
     override def submit(
-      topicPartition: TopicPartition,
+      topicPartition: TopicIdPartition,
       leaderAndIsr: LeaderAndIsr,
       controllerEpoch: Int
     ): CompletableFuture[LeaderAndIsr] = {
@@ -439,8 +457,9 @@ class ReplicaManagerConcurrencyTest {
     replicaIds: Seq[Int],
     isr: Seq[Int],
     leader: Int,
+    leaderRecoveryState: LeaderRecoveryState,
     leaderEpoch: Int = 0,
-    version: Int = 0
+    partitionEpoch: Int = 0
   ): PartitionRegistration = {
     new PartitionRegistration(
       replicaIds.toArray,
@@ -448,8 +467,9 @@ class ReplicaManagerConcurrencyTest {
       Array.empty[Int],
       Array.empty[Int],
       leader,
+      leaderRecoveryState,
       leaderEpoch,
-      version
+      partitionEpoch
     )
   }
 
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerQuotasTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerQuotasTest.scala
index 5a2b4d08dca6e..d0826f54c5d3a 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaManagerQuotasTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerQuotasTest.scala
@@ -18,20 +18,22 @@ package kafka.server
 
 import java.io.File
 import java.util.{Collections, Optional, Properties}
-import kafka.cluster.Partition
+import kafka.cluster.{Partition, PartitionTest}
 import kafka.log.{LogManager, LogOffsetSnapshot, UnifiedLog}
 import kafka.server.QuotaFactory.QuotaManagers
 import kafka.utils._
-import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
 import org.apache.kafka.common.metrics.Metrics
+import org.apache.kafka.common.protocol.ApiKeys
 import org.apache.kafka.common.record.{CompressionType, MemoryRecords, SimpleRecord}
-import org.apache.kafka.common.requests.FetchRequest.PartitionData
 import org.apache.kafka.common.requests.FetchRequest
+import org.apache.kafka.common.requests.FetchRequest.PartitionData
+import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, Test}
-import org.mockito.{AdditionalMatchers, ArgumentMatchers}
 import org.mockito.ArgumentMatchers.{any, anyBoolean, anyInt, anyLong}
 import org.mockito.Mockito.{mock, when}
+import org.mockito.{AdditionalMatchers, ArgumentMatchers}
 
 import scala.jdk.CollectionConverters._
 
@@ -62,18 +64,10 @@ class ReplicaManagerQuotasTest {
       .thenReturn(false)
       .thenReturn(true)
 
-    val fetch = replicaManager.readFromLocalLog(
-      replicaId = followerReplicaId,
-      fetchOnlyFromLeader = true,
-      fetchIsolation = FetchHighWatermark,
-      fetchMaxBytes = Int.MaxValue,
-      hardMaxBytesLimit = false,
-      readPartitionInfo = fetchInfo,
-      quota = quota,
-      clientMetadata = None)
+    val fetchParams = PartitionTest.followerFetchParams(followerReplicaId)
+    val fetch = replicaManager.readFromLocalLog(fetchParams, fetchInfo, quota, readFromPurgatory = false)
     assertEquals(1, fetch.find(_._1 == topicIdPartition1).get._2.info.records.batches.asScala.size,
       "Given two partitions, with only one throttled, we should get the first")
-
     assertEquals(0, fetch.find(_._1 == topicIdPartition2).get._2.info.records.batches.asScala.size,
       "But we shouldn't get the second")
   }
@@ -88,15 +82,8 @@ class ReplicaManagerQuotasTest {
       .thenReturn(true)
       .thenReturn(true)
 
-    val fetch = replicaManager.readFromLocalLog(
-      replicaId = followerReplicaId,
-      fetchOnlyFromLeader = true,
-      fetchIsolation = FetchHighWatermark,
-      fetchMaxBytes = Int.MaxValue,
-      hardMaxBytesLimit = false,
-      readPartitionInfo = fetchInfo,
-      quota = quota,
-      clientMetadata = None)
+    val fetchParams = PartitionTest.followerFetchParams(followerReplicaId)
+    val fetch = replicaManager.readFromLocalLog(fetchParams, fetchInfo, quota, readFromPurgatory = false)
     assertEquals(0, fetch.find(_._1 == topicIdPartition1).get._2.info.records.batches.asScala.size,
       "Given two partitions, with both throttled, we should get no messages")
     assertEquals(0, fetch.find(_._1 == topicIdPartition2).get._2.info.records.batches.asScala.size,
@@ -113,15 +100,8 @@ class ReplicaManagerQuotasTest {
       .thenReturn(false)
       .thenReturn(false)
 
-    val fetch = replicaManager.readFromLocalLog(
-      replicaId = followerReplicaId,
-      fetchOnlyFromLeader = true,
-      fetchIsolation = FetchHighWatermark,
-      fetchMaxBytes = Int.MaxValue,
-      hardMaxBytesLimit = false,
-      readPartitionInfo = fetchInfo,
-      quota = quota,
-      clientMetadata = None)
+    val fetchParams = PartitionTest.followerFetchParams(followerReplicaId)
+    val fetch = replicaManager.readFromLocalLog(fetchParams, fetchInfo, quota, readFromPurgatory = false)
     assertEquals(1, fetch.find(_._1 == topicIdPartition1).get._2.info.records.batches.asScala.size,
       "Given two partitions, with both non-throttled, we should get both messages")
     assertEquals(1, fetch.find(_._1 == topicIdPartition2).get._2.info.records.batches.asScala.size,
@@ -138,15 +118,8 @@ class ReplicaManagerQuotasTest {
       .thenReturn(false)
       .thenReturn(true)
 
-    val fetch = replicaManager.readFromLocalLog(
-      replicaId = followerReplicaId,
-      fetchOnlyFromLeader = true,
-      fetchIsolation = FetchHighWatermark,
-      fetchMaxBytes = Int.MaxValue,
-      hardMaxBytesLimit = false,
-      readPartitionInfo = fetchInfo,
-      quota = quota,
-      clientMetadata = None)
+    val fetchParams = PartitionTest.followerFetchParams(followerReplicaId)
+    val fetch = replicaManager.readFromLocalLog(fetchParams, fetchInfo, quota, readFromPurgatory = false)
     assertEquals(1, fetch.find(_._1 == topicIdPartition1).get._2.info.records.batches.asScala.size,
       "Given two partitions, with only one throttled, we should get the first")
 
@@ -161,19 +134,10 @@ class ReplicaManagerQuotasTest {
     val quota = mockQuota()
     when(quota.isQuotaExceeded).thenReturn(true)
 
-    val fetch = replicaManager.readFromLocalLog(
-      replicaId = FetchRequest.CONSUMER_REPLICA_ID,
-      fetchOnlyFromLeader = true,
-      fetchIsolation = FetchHighWatermark,
-      fetchMaxBytes = Int.MaxValue,
-      hardMaxBytesLimit = false,
-      readPartitionInfo = fetchInfo,
-      quota = quota,
-      clientMetadata = None).toMap
-
+    val fetchParams = PartitionTest.consumerFetchParams()
+    val fetch = replicaManager.readFromLocalLog(fetchParams, fetchInfo, quota, readFromPurgatory = false).toMap
     assertEquals(1, fetch(topicIdPartition1).info.records.batches.asScala.size,
       "Replication throttled partitions should return data for consumer fetch")
-
     assertEquals(1, fetch(topicIdPartition2).info.records.batches.asScala.size,
       "Replication throttled partitions should return data for consumer fetch")
   }
@@ -204,17 +168,23 @@ class ReplicaManagerQuotasTest {
       val tp = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("t1", 0))
       val fetchPartitionStatus = FetchPartitionStatus(LogOffsetMetadata(messageOffset = 50L, segmentBaseOffset = 0L,
          relativePositionInSegment = 250), new PartitionData(Uuid.ZERO_UUID, 50, 0, 1, Optional.empty()))
-      val fetchMetadata = FetchMetadata(fetchMinBytes = 1,
-        fetchMaxBytes = 1000,
-        hardMaxBytesLimit = true,
-        fetchOnlyLeader = true,
-        fetchIsolation = FetchLogEnd,
-        isFromFollower = true,
+      val fetchParams = FetchParams(
+        requestVersion = ApiKeys.FETCH.latestVersion,
         replicaId = 1,
-        fetchPartitionStatus = List((tp, fetchPartitionStatus))
+        maxWaitMs = 600,
+        minBytes = 1,
+        maxBytes = 1000,
+        isolation = FetchLogEnd,
+        clientMetadata = None
       )
-      new DelayedFetch(delayMs = 600, fetchMetadata = fetchMetadata, replicaManager = replicaManager,
-        quota = null, clientMetadata = None, responseCallback = null) {
+
+      new DelayedFetch(
+        params = fetchParams,
+        fetchPartitionStatus = Seq(tp -> fetchPartitionStatus),
+        replicaManager = replicaManager,
+        quota = null,
+        responseCallback = null
+      ) {
         override def forceComplete(): Boolean = true
       }
     }
@@ -248,17 +218,23 @@ class ReplicaManagerQuotasTest {
       val tidp = new TopicIdPartition(Uuid.randomUuid(), new TopicPartition("t1", 0))
       val fetchPartitionStatus = FetchPartitionStatus(LogOffsetMetadata(messageOffset = 50L, segmentBaseOffset = 0L,
         relativePositionInSegment = 250), new PartitionData(Uuid.ZERO_UUID, 50, 0, 1, Optional.empty()))
-      val fetchMetadata = FetchMetadata(fetchMinBytes = 1,
-        fetchMaxBytes = 1000,
-        hardMaxBytesLimit = true,
-        fetchOnlyLeader = true,
-        fetchIsolation = FetchLogEnd,
-        isFromFollower = false,
+      val fetchParams = FetchParams(
+        requestVersion = ApiKeys.FETCH.latestVersion,
         replicaId = FetchRequest.CONSUMER_REPLICA_ID,
-        fetchPartitionStatus = List((tidp, fetchPartitionStatus))
+        maxWaitMs = 600,
+        minBytes = 1,
+        maxBytes = 1000,
+        isolation = FetchHighWatermark,
+        clientMetadata = None
       )
-      new DelayedFetch(delayMs = 600, fetchMetadata = fetchMetadata, replicaManager = replicaManager,
-        quota = null, clientMetadata = None, responseCallback = null) {
+
+      new DelayedFetch(
+        params = fetchParams,
+        fetchPartitionStatus = Seq(tidp -> fetchPartitionStatus),
+        replicaManager = replicaManager,
+        quota = null,
+        responseCallback = null
+      ) {
         override def forceComplete(): Boolean = true
       }
     }
@@ -300,6 +276,10 @@ class ReplicaManagerQuotasTest {
         MemoryRecords.EMPTY
       ))
 
+    when(log.maybeIncrementHighWatermark(
+      any[LogOffsetMetadata]
+    )).thenReturn(None)
+
     //Create log manager
     val logManager: LogManager = mock(classOf[LogManager])
 
@@ -307,7 +287,7 @@ class ReplicaManagerQuotasTest {
     when(logManager.getLog(any[TopicPartition], anyBoolean)).thenReturn(Some(log))
     when(logManager.liveLogDirs).thenReturn(Array.empty[File])
 
-    val alterIsrManager: AlterIsrManager = mock(classOf[AlterIsrManager])
+    val alterIsrManager: AlterPartitionManager = mock(classOf[AlterPartitionManager])
 
     val leaderBrokerId = configs.head.brokerId
     quotaManager = QuotaFactory.instantiate(configs.head, metrics, time, "")
@@ -318,9 +298,9 @@ class ReplicaManagerQuotasTest {
       scheduler = scheduler,
       logManager = logManager,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(leaderBrokerId),
+      metadataCache = MetadataCache.zkMetadataCache(leaderBrokerId, configs.head.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(configs.head.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
 
     //create the two replicas
     for ((p, _) <- fetchInfo) {
@@ -330,10 +310,12 @@ class ReplicaManagerQuotasTest {
       partition.setLog(log, isFutureLog = false)
 
       partition.updateAssignmentAndIsr(
-        assignment = Seq(leaderBrokerId, configs.last.brokerId),
+        replicas = Seq(leaderBrokerId, configs.last.brokerId),
+        isLeader = true,
         isr = if (bothReplicasInSync) Set(leaderBrokerId, configs.last.brokerId) else Set(leaderBrokerId),
         addingReplicas = Seq.empty,
-        removingReplicas = Seq.empty
+        removingReplicas = Seq.empty,
+        leaderRecoveryState = LeaderRecoveryState.RECOVERED
       )
     }
   }
@@ -350,4 +332,5 @@ class ReplicaManagerQuotasTest {
     when(quota.isThrottled(any[TopicPartition])).thenReturn(true)
     quota
   }
+
 }
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
index b606f5bcbba41..8050a36d1d942 100644
--- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
@@ -21,19 +21,20 @@ import java.io.File
 import java.net.InetAddress
 import java.nio.file.Files
 import java.util
-import java.util.concurrent.atomic.AtomicReference
+import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.stream.IntStream
 import java.util.{Collections, Optional, Properties}
-
 import kafka.api._
 import kafka.cluster.{BrokerEndPoint, Partition}
 import kafka.log._
 import kafka.server.QuotaFactory.{QuotaManagers, UnboundedQuota}
 import kafka.server.checkpoints.{LazyOffsetCheckpoints, OffsetCheckpointFile}
-import kafka.server.epoch.util.ReplicaFetcherMockBlockingSend
+import kafka.server.epoch.util.MockBlockingSender
 import kafka.utils.timer.MockTimer
-import kafka.utils.{MockScheduler, MockTime, TestUtils}
+import kafka.utils.{MockScheduler, MockTime, Pool, TestUtils}
+import org.apache.kafka.clients.FetchSessionHandler
+import org.apache.kafka.common.errors.KafkaStorageException
 import org.apache.kafka.common.message.FetchResponseData
 import org.apache.kafka.common.message.LeaderAndIsrRequestData
 import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState
@@ -44,16 +45,19 @@ import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.record._
-import org.apache.kafka.common.replica.ClientMetadata
+import org.apache.kafka.common.replica.{ClientMetadata, PartitionView, ReplicaSelector, ReplicaView}
 import org.apache.kafka.common.replica.ClientMetadata.DefaultClientMetadata
 import org.apache.kafka.common.requests.FetchRequest.PartitionData
 import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse
 import org.apache.kafka.common.requests._
 import org.apache.kafka.common.security.auth.KafkaPrincipal
-import org.apache.kafka.common.utils.{Time, Utils}
+import org.apache.kafka.common.utils.{LogContext, Time, Utils}
 import org.apache.kafka.common.{IsolationLevel, Node, TopicIdPartition, TopicPartition, Uuid}
 import org.apache.kafka.image.{AclsImage, ClientQuotasImage, ClusterImageTest, ConfigurationsImage, FeaturesImage, MetadataImage, ProducerIdsImage, TopicsDelta, TopicsImage}
+import org.apache.kafka.metadata.LeaderConstants.NO_LEADER
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.apache.kafka.raft.{OffsetAndEpoch => RaftOffsetAndEpoch}
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_6_IV0
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.junit.jupiter.params.ParameterizedTest
@@ -61,8 +65,8 @@ import org.junit.jupiter.params.provider.ValueSource
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.mockito.ArgumentMatchers
-import org.mockito.ArgumentMatchers.{any, anyInt}
-import org.mockito.Mockito.{mock, times, verify, when}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyString}
+import org.mockito.Mockito.{mock, never, reset, times, verify, when}
 
 import scala.collection.{Map, Seq, mutable}
 import scala.jdk.CollectionConverters._
@@ -76,7 +80,7 @@ class ReplicaManagerTest {
   val time = new MockTime
   val scheduler = new MockScheduler(time)
   val metrics = new Metrics
-  var alterIsrManager: AlterIsrManager = _
+  var alterPartitionManager: AlterPartitionManager = _
   var config: KafkaConfig = _
   var quotaManager: QuotaManagers = _
 
@@ -90,7 +94,7 @@ class ReplicaManagerTest {
   def setUp(): Unit = {
     val props = TestUtils.createBrokerConfig(1, TestUtils.MockZkConnect)
     config = KafkaConfig.fromProps(props)
-    alterIsrManager = mock(classOf[AlterIsrManager])
+    alterPartitionManager = mock(classOf[AlterPartitionManager])
     quotaManager = QuotaFactory.instantiate(config, metrics, time, "")
   }
 
@@ -111,9 +115,9 @@ class ReplicaManagerTest {
       scheduler = new MockScheduler(time),
       logManager = mockLogMgr,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterPartitionManager)
     try {
       val partition = rm.createPartition(new TopicPartition(topic, 1))
       partition.createLogIfNotExists(isNew = false, isFutureReplica = false,
@@ -138,9 +142,9 @@ class ReplicaManagerTest {
       scheduler = new MockScheduler(time),
       logManager = mockLogMgr,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterPartitionManager)
     try {
       val partition = rm.createPartition(new TopicPartition(topic, 1))
       partition.createLogIfNotExists(isNew = false, isFutureReplica = false,
@@ -162,9 +166,9 @@ class ReplicaManagerTest {
       scheduler = new MockScheduler(time),
       logManager = mockLogMgr,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager,
+      alterPartitionManager = alterPartitionManager,
       threadNamePrefix = Option(this.getClass.getName))
     try {
       def callback(responseStatus: Map[TopicPartition, PartitionResponse]): Unit = {
@@ -210,6 +214,7 @@ class ReplicaManagerTest {
     val aliveBrokers = Seq(new Node(0, "host0", 0), new Node(1, "host1", 1))
     val metadataCache: MetadataCache = mock(classOf[MetadataCache])
     mockGetAliveBrokerFunctions(metadataCache, aliveBrokers)
+    when(metadataCache.metadataVersion()).thenReturn(config.interBrokerProtocolVersion)
     val rm = new ReplicaManager(
       metrics = metrics,
       config = config,
@@ -219,7 +224,7 @@ class ReplicaManagerTest {
       quotaManagers = quotaManager,
       metadataCache = metadataCache,
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterPartitionManager)
 
     try {
       val brokerList = Seq[Integer](0, 1).asJava
@@ -237,7 +242,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(0)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(false)).asJava,
         topicIds,
@@ -260,14 +265,14 @@ class ReplicaManagerTest {
           .setLeader(1)
           .setLeaderEpoch(1)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(false)).asJava,
         topicIds,
         Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build()
       rm.becomeLeaderOrFollower(1, leaderAndIsrRequest2, (_, _) => ())
 
-      assertTrue(appendResult.isFired)
+      assertTrue(appendResult.hasFired)
     } finally {
       rm.shutdown(checkpointHW = false)
     }
@@ -297,7 +302,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(epoch)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -357,7 +362,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(0)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         Collections.singletonMap(topic, Uuid.randomUuid()),
@@ -423,7 +428,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(0)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -483,7 +488,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(0)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -506,12 +511,15 @@ class ReplicaManagerTest {
       }
 
       // fetch as follower to advance the high watermark
-      fetchAsFollower(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      fetchPartitionAsFollower(
+        replicaManager,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, numRecords, 0, 100000, Optional.empty()),
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED)
+        replicaId = 1
+      )
 
       // fetch should return empty since LSO should be stuck at 0
-      var consumerFetchResult = fetchAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      var consumerFetchResult = fetchPartitionAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
         isolationLevel = IsolationLevel.READ_COMMITTED)
       var fetchData = consumerFetchResult.assertFired
@@ -521,10 +529,15 @@ class ReplicaManagerTest {
       assertEquals(Some(List.empty[FetchResponseData.AbortedTransaction]), fetchData.abortedTransactions)
 
       // delayed fetch should timeout and return nothing
-      consumerFetchResult = fetchAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      consumerFetchResult = fetchPartitionAsConsumer(
+        replicaManager,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
-        isolationLevel = IsolationLevel.READ_COMMITTED, minBytes = 1000)
-      assertFalse(consumerFetchResult.isFired)
+        isolationLevel = IsolationLevel.READ_COMMITTED,
+        minBytes = 1000,
+        maxWaitMs = 1000
+      )
+      assertFalse(consumerFetchResult.hasFired)
       timer.advanceClock(1001)
 
       fetchData = consumerFetchResult.assertFired
@@ -542,21 +555,27 @@ class ReplicaManagerTest {
 
       // the LSO has advanced, but the appended commit marker has not been replicated, so
       // none of the data from the transaction should be visible yet
-      consumerFetchResult = fetchAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      consumerFetchResult = fetchPartitionAsConsumer(
+        replicaManager,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
-        isolationLevel = IsolationLevel.READ_COMMITTED)
+        isolationLevel = IsolationLevel.READ_COMMITTED
+      )
 
       fetchData = consumerFetchResult.assertFired
       assertEquals(Errors.NONE, fetchData.error)
       assertTrue(fetchData.records.batches.asScala.isEmpty)
 
       // fetch as follower to advance the high watermark
-      fetchAsFollower(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      fetchPartitionAsFollower(
+        replicaManager,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, numRecords + 1, 0, 100000, Optional.empty()),
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED)
+        replicaId = 1
+      )
 
       // now all of the records should be fetchable
-      consumerFetchResult = fetchAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      consumerFetchResult = fetchPartitionAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
         isolationLevel = IsolationLevel.READ_COMMITTED)
 
@@ -590,7 +609,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(0)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -620,16 +639,24 @@ class ReplicaManagerTest {
         .onFire { response => assertEquals(Errors.NONE, response.error) }
 
       // fetch as follower to advance the high watermark
-      fetchAsFollower(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      fetchPartitionAsFollower(
+        replicaManager,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, numRecords + 1, 0, 100000, Optional.empty()),
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED)
+        replicaId = 1
+      )
 
       // Set the minBytes in order force this request to enter purgatory. When it returns, we should still
       // see the newly aborted transaction.
-      val fetchResult = fetchAsConsumer(replicaManager, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      val fetchResult = fetchPartitionAsConsumer(
+        replicaManager,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
-        isolationLevel = IsolationLevel.READ_COMMITTED, minBytes = 10000)
-      assertFalse(fetchResult.isFired)
+        isolationLevel = IsolationLevel.READ_COMMITTED,
+        minBytes = 10000,
+        maxWaitMs = 1000
+      )
+      assertFalse(fetchResult.hasFired)
 
       timer.advanceClock(1001)
       val fetchData = fetchResult.assertFired
@@ -667,7 +694,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(0)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(false)).asJava,
         topicIds.asJava,
@@ -685,8 +712,12 @@ class ReplicaManagerTest {
       }
 
       // Followers are always allowed to fetch above the high watermark
-      val followerFetchResult = fetchAsFollower(rm, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
-        new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.empty()))
+      val followerFetchResult = fetchPartitionAsFollower(
+        rm,
+        new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+        new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.empty()),
+        replicaId = 1
+      )
       val followerFetchData = followerFetchResult.assertFired
       assertEquals(Errors.NONE, followerFetchData.error, "Should not give an exception")
       assertTrue(followerFetchData.records.batches.iterator.hasNext, "Should return some data")
@@ -694,7 +725,7 @@ class ReplicaManagerTest {
       // Consumers are not allowed to consume above the high watermark. However, since the
       // high watermark could be stale at the time of the request, we do not return an out of
       // range error and instead return an empty record set.
-      val consumerFetchResult = fetchAsConsumer(rm, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
+      val consumerFetchResult = fetchPartitionAsConsumer(rm, new TopicIdPartition(topicId, new TopicPartition(topic, 0)),
         new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.empty()))
       val consumerFetchData = consumerFetchResult.assertFired
       assertEquals(Errors.NONE, consumerFetchData.error, "Should not give an exception")
@@ -724,7 +755,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(replicas)
         .setIsNew(true)
       val leaderAndIsrRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
@@ -740,8 +771,8 @@ class ReplicaManagerTest {
 
       assertTrue(partition.getReplica(1).isDefined)
       val followerReplica = partition.getReplica(1).get
-      assertEquals(-1L, followerReplica.logStartOffset)
-      assertEquals(-1L, followerReplica.logEndOffset)
+      assertEquals(-1L, followerReplica.stateSnapshot.logStartOffset)
+      assertEquals(-1L, followerReplica.stateSnapshot.logEndOffset)
 
       // Leader appends some data
       for (i <- 1 to 5) {
@@ -751,76 +782,53 @@ class ReplicaManagerTest {
       }
 
       // We receive one valid request from the follower and replica state is updated
-      var successfulFetch: Option[FetchPartitionData] = None
-      def callback(response: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
-        successfulFetch = response.headOption.filter(_._1 == tidp).map(_._2)
-      }
-
       val validFetchPartitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, maxFetchBytes,
         Optional.of(leaderEpoch))
 
-      replicaManager.fetchMessages(
-        timeout = 0L,
-        replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
-        fetchInfos = Seq(tidp -> validFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+      val validFetchResult = fetchPartitionAsFollower(
+        replicaManager,
+        tidp,
+        validFetchPartitionData,
+        replicaId = 1
       )
 
-      assertTrue(successfulFetch.isDefined)
-      assertEquals(0L, followerReplica.logStartOffset)
-      assertEquals(0L, followerReplica.logEndOffset)
-
+      assertEquals(Errors.NONE, validFetchResult.assertFired.error)
+      assertEquals(0L, followerReplica.stateSnapshot.logStartOffset)
+      assertEquals(0L, followerReplica.stateSnapshot.logEndOffset)
 
       // Next we receive an invalid request with a higher fetch offset, but an old epoch.
       // We expect that the replica state does not get updated.
       val invalidFetchPartitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 3L, 0L, maxFetchBytes,
         Optional.of(leaderEpoch - 1))
 
-      replicaManager.fetchMessages(
-        timeout = 0L,
-        replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
-        fetchInfos = Seq(tidp -> invalidFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+
+      val invalidFetchResult = fetchPartitionAsFollower(
+        replicaManager,
+        tidp,
+        invalidFetchPartitionData,
+        replicaId = 1
       )
 
-      assertTrue(successfulFetch.isDefined)
-      assertEquals(0L, followerReplica.logStartOffset)
-      assertEquals(0L, followerReplica.logEndOffset)
+      assertEquals(Errors.FENCED_LEADER_EPOCH, invalidFetchResult.assertFired.error)
+      assertEquals(0L, followerReplica.stateSnapshot.logStartOffset)
+      assertEquals(0L, followerReplica.stateSnapshot.logEndOffset)
 
       // Next we receive an invalid request with a higher fetch offset, but a diverging epoch.
       // We expect that the replica state does not get updated.
       val divergingFetchPartitionData = new FetchRequest.PartitionData(tidp.topicId, 3L, 0L, maxFetchBytes,
         Optional.of(leaderEpoch), Optional.of(leaderEpoch - 1))
 
-      replicaManager.fetchMessages(
-        timeout = 0L,
-        replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
-        fetchInfos = Seq(tidp -> divergingFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+      val divergingEpochResult = fetchPartitionAsFollower(
+        replicaManager,
+        tidp,
+        divergingFetchPartitionData,
+        replicaId = 1
       )
 
-      assertTrue(successfulFetch.isDefined)
-      assertEquals(0L, followerReplica.logStartOffset)
-      assertEquals(0L, followerReplica.logEndOffset)
-
+      assertEquals(Errors.NONE, divergingEpochResult.assertFired.error)
+      assertTrue(divergingEpochResult.assertFired.divergingEpoch.isDefined)
+      assertEquals(0L, followerReplica.stateSnapshot.logStartOffset)
+      assertEquals(0L, followerReplica.stateSnapshot.logEndOffset)
     } finally {
       replicaManager.shutdown(checkpointHW = false)
     }
@@ -846,7 +854,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(replicas)
         .setIsNew(true)
       val leaderAndIsrRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
@@ -869,18 +877,14 @@ class ReplicaManagerTest {
       def callback(response: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
         successfulFetch = response
       }
-      replicaManager.fetchMessages(
-        timeout = 0L,
+
+      fetchPartitions(
+        replicaManager,
         replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
         fetchInfos = Seq(inconsistentTidp -> validFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+        responseCallback = callback
       )
+
       val fetch1 = successfulFetch.headOption.filter(_._1 == inconsistentTidp).map(_._2)
       assertTrue(fetch1.isDefined)
       assertEquals(Errors.INCONSISTENT_TOPIC_ID, fetch1.get.error)
@@ -889,17 +893,11 @@ class ReplicaManagerTest {
       // Fetch messages simulating an ID in the log.
       // We should not see topic ID errors.
       val zeroTidp = new TopicIdPartition(Uuid.ZERO_UUID, tidp.topicPartition)
-      replicaManager.fetchMessages(
-        timeout = 0L,
+      fetchPartitions(
+        replicaManager,
         replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
         fetchInfos = Seq(zeroTidp -> validFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+        responseCallback = callback
       )
       val fetch2 = successfulFetch.headOption.filter(_._1 == zeroTidp).map(_._2)
       assertTrue(fetch2.isDefined)
@@ -917,7 +915,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(leaderEpoch)
         .setIsr(replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(replicas)
         .setIsNew(true)
       val leaderAndIsrRequest2 = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
@@ -930,17 +928,11 @@ class ReplicaManagerTest {
       assertEquals(None, replicaManager.getPartitionOrException(tp2).topicId)
 
       // Fetch messages simulating the request containing a topic ID. We should not have an error.
-      replicaManager.fetchMessages(
-        timeout = 0L,
+      fetchPartitions(
+        replicaManager,
         replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
         fetchInfos = Seq(tidp2 -> validFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+        responseCallback = callback
       )
       val fetch3 = successfulFetch.headOption.filter(_._1 == tidp2).map(_._2)
       assertTrue(fetch3.isDefined)
@@ -948,17 +940,11 @@ class ReplicaManagerTest {
 
       // Fetch messages simulating the request not containing a topic ID. We should not have an error.
       val zeroTidp2 = new TopicIdPartition(Uuid.ZERO_UUID, tidp2.topicPartition)
-      replicaManager.fetchMessages(
-        timeout = 0L,
+      fetchPartitions(
+        replicaManager,
         replicaId = 1,
-        fetchMinBytes = 1,
-        fetchMaxBytes = maxFetchBytes,
-        hardMaxBytesLimit = false,
         fetchInfos = Seq(zeroTidp2 -> validFetchPartitionData),
-        quota = UnboundedQuota,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        responseCallback = callback,
-        clientMetadata = None
+        responseCallback = callback
       )
       val fetch4 = successfulFetch.headOption.filter(_._1 == zeroTidp2).map(_._2)
       assertTrue(fetch4.isDefined)
@@ -990,16 +976,17 @@ class ReplicaManagerTest {
       val partition0Replicas = Seq[Integer](0, 1).asJava
       val partition1Replicas = Seq[Integer](0, 2).asJava
       val topicIds = Map(tp0.topic -> topicId, tp1.topic -> topicId).asJava
+      val leaderEpoch = 0
       val leaderAndIsrRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
         Seq(
           new LeaderAndIsrPartitionState()
             .setTopicName(tp0.topic)
             .setPartitionIndex(tp0.partition)
             .setControllerEpoch(0)
-            .setLeader(0)
+            .setLeader(leaderEpoch)
             .setLeaderEpoch(0)
             .setIsr(partition0Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition0Replicas)
             .setIsNew(true),
           new LeaderAndIsrPartitionState()
@@ -1007,9 +994,9 @@ class ReplicaManagerTest {
             .setPartitionIndex(tp1.partition)
             .setControllerEpoch(0)
             .setLeader(0)
-            .setLeaderEpoch(0)
+            .setLeaderEpoch(leaderEpoch)
             .setIsr(partition1Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition1Replicas)
             .setIsNew(true)
         ).asJava,
@@ -1041,28 +1028,24 @@ class ReplicaManagerTest {
         assertEquals(Errors.NONE, tp0Status.get.error)
         assertTrue(tp0Status.get.records.batches.iterator.hasNext)
 
+        // Replica 1 is not a valid replica for partition 1
         val tp1Status = responseStatusMap.get(tidp1)
-        assertTrue(tp1Status.isDefined)
-        assertEquals(0, tp1Status.get.highWatermark)
-        assertEquals(Some(0), tp0Status.get.lastStableOffset)
-        assertEquals(Errors.NONE, tp1Status.get.error)
-        assertFalse(tp1Status.get.records.batches.iterator.hasNext)
+        assertEquals(Errors.UNKNOWN_LEADER_EPOCH, tp1Status.get.error)
       }
 
-      replicaManager.fetchMessages(
-        timeout = 1000,
+      fetchPartitions(
+        replicaManager,
         replicaId = 1,
-        fetchMinBytes = 0,
-        fetchMaxBytes = Int.MaxValue,
-        hardMaxBytesLimit = false,
         fetchInfos = Seq(
-          tidp0 -> new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.empty()),
-          tidp1 -> new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.empty())),
-        quota = UnboundedQuota,
+          tidp0 -> new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.of[Integer](leaderEpoch)),
+          tidp1 -> new PartitionData(Uuid.ZERO_UUID, 1, 0, 100000, Optional.of[Integer](leaderEpoch))
+        ),
         responseCallback = fetchCallback,
-        isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-        clientMetadata = None
+        maxWaitMs = 1000,
+        minBytes = 0,
+        maxBytes = Int.MaxValue
       )
+
       val tp0Log = replicaManager.localLog(tp0)
       assertTrue(tp0Log.isDefined)
       assertEquals(1, tp0Log.get.highWatermark, "hw should be incremented")
@@ -1084,7 +1067,7 @@ class ReplicaManagerTest {
   @Test
   def testBecomeFollowerWhenLeaderIsUnchangedButMissedLeaderUpdateIbp26(): Unit = {
     val extraProps = new Properties
-    extraProps.put(KafkaConfig.InterBrokerProtocolVersionProp, KAFKA_2_6_IV0.version)
+    extraProps.put(KafkaConfig.InterBrokerProtocolVersionProp, IBP_2_6_IV0.version)
     verifyBecomeFollowerWhenLeaderIsUnchangedButMissedLeaderUpdate(extraProps, expectTruncation = true)
   }
 
@@ -1205,8 +1188,6 @@ class ReplicaManagerTest {
       val tp0 = new TopicPartition(topic, 0)
       val tidp0 = new TopicIdPartition(topicId, tp0)
 
-      initializeLogAndTopicId(replicaManager, tp0, topicId)
-
       // Make this replica the follower
       val leaderAndIsrRequest2 = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
         Seq(new LeaderAndIsrPartitionState()
@@ -1216,7 +1197,7 @@ class ReplicaManagerTest {
           .setLeader(1)
           .setLeaderEpoch(1)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(false)).asJava,
         Collections.singletonMap(topic, topicId),
@@ -1226,12 +1207,12 @@ class ReplicaManagerTest {
       val metadata: ClientMetadata = new DefaultClientMetadata("rack-a", "client-id",
         InetAddress.getByName("localhost"), KafkaPrincipal.ANONYMOUS, "default")
 
-      val consumerResult = fetchAsConsumer(replicaManager, tidp0,
+      val consumerResult = fetchPartitionAsConsumer(replicaManager, tidp0,
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
         clientMetadata = Some(metadata))
 
       // Fetch from follower succeeds
-      assertTrue(consumerResult.isFired)
+      assertTrue(consumerResult.hasFired)
 
       // But only leader will compute preferred replica
       assertTrue(consumerResult.assertFired.preferredReadReplica.isEmpty)
@@ -1263,9 +1244,7 @@ class ReplicaManagerTest {
       val tp0 = new TopicPartition(topic, 0)
       val tidp0 = new TopicIdPartition(topicId, tp0)
 
-      initializeLogAndTopicId(replicaManager, tp0, topicId)
-
-      // Make this replica the follower
+      // Make this replica the leader
       val leaderAndIsrRequest2 = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
         Seq(new LeaderAndIsrPartitionState()
           .setTopicName(topic)
@@ -1274,22 +1253,22 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(1)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(false)).asJava,
         Collections.singletonMap(topic, topicId),
         Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build()
       replicaManager.becomeLeaderOrFollower(1, leaderAndIsrRequest2, (_, _) => ())
 
-      val metadata: ClientMetadata = new DefaultClientMetadata("rack-a", "client-id",
+      val metadata = new DefaultClientMetadata("rack-a", "client-id",
         InetAddress.getByName("localhost"), KafkaPrincipal.ANONYMOUS, "default")
 
-      val consumerResult = fetchAsConsumer(replicaManager, tidp0,
+      val consumerResult = fetchPartitionAsConsumer(replicaManager, tidp0,
         new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000, Optional.empty()),
         clientMetadata = Some(metadata))
 
-      // Fetch from follower succeeds
-      assertTrue(consumerResult.isFired)
+      // Fetch from leader succeeds
+      assertTrue(consumerResult.hasFired)
 
       // Returns a preferred replica (should just be the leader, which is None)
       assertFalse(consumerResult.assertFired.preferredReadReplica.isDefined)
@@ -1300,6 +1279,119 @@ class ReplicaManagerTest {
     TestUtils.assertNoNonDaemonThreads(this.getClass.getName)
   }
 
+  @Test
+  def testFetchFromFollowerShouldNotRunPreferLeaderSelect(): Unit = {
+    val replicaManager = setupReplicaManagerWithMockedPurgatories(new MockTimer(time),
+      propsModifier = props => props.put(KafkaConfig.ReplicaSelectorClassProp, classOf[MockReplicaSelector].getName))
+    try {
+      val leaderBrokerId = 0
+      val followerBrokerId = 1
+      val brokerList = Seq[Integer](leaderBrokerId, followerBrokerId).asJava
+      val topicId = Uuid.randomUuid()
+      val tp0 = new TopicPartition(topic, 0)
+      val tidp0 = new TopicIdPartition(topicId, tp0)
+
+      // Make this replica the follower
+      val leaderAndIsrRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
+        Seq(new LeaderAndIsrPartitionState()
+          .setTopicName(topic)
+          .setPartitionIndex(0)
+          .setControllerEpoch(0)
+          .setLeader(1)
+          .setLeaderEpoch(1)
+          .setIsr(brokerList)
+          .setPartitionEpoch(0)
+          .setReplicas(brokerList)
+          .setIsNew(false)).asJava,
+        Collections.singletonMap(topic, topicId),
+        Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build()
+      replicaManager.becomeLeaderOrFollower(1, leaderAndIsrRequest, (_, _) => ())
+
+      val metadata = new DefaultClientMetadata("rack-a", "client-id",
+        InetAddress.getLocalHost, KafkaPrincipal.ANONYMOUS, "default")
+
+      val consumerResult = fetchPartitionAsConsumer(replicaManager, tidp0,
+        new PartitionData(Uuid.ZERO_UUID, 0, 0, 100000,
+          Optional.empty()), clientMetadata = Some(metadata))
+
+      // Fetch from follower succeeds
+      assertTrue(consumerResult.hasFired)
+
+      // Expect not run the preferred read replica selection
+      assertEquals(0, replicaManager.replicaSelectorOpt.get.asInstanceOf[MockReplicaSelector].getSelectionCount)
+
+      // Only leader will compute preferred replica
+      assertTrue(consumerResult.assertFired.preferredReadReplica.isEmpty)
+
+    } finally replicaManager.shutdown(checkpointHW = false)
+  }
+
+  @Test
+  def testFetchShouldReturnImmediatelyWhenPreferredReadReplicaIsDefined(): Unit = {
+    val replicaManager = setupReplicaManagerWithMockedPurgatories(new MockTimer(time),
+      propsModifier = props => props.put(KafkaConfig.ReplicaSelectorClassProp, "org.apache.kafka.common.replica.RackAwareReplicaSelector"))
+
+    try {
+      val leaderBrokerId = 0
+      val followerBrokerId = 1
+      val brokerList = Seq[Integer](leaderBrokerId, followerBrokerId).asJava
+      val topicId = Uuid.randomUuid()
+      val tp0 = new TopicPartition(topic, 0)
+      val tidp0 = new TopicIdPartition(topicId, tp0)
+
+      when(replicaManager.metadataCache.getPartitionReplicaEndpoints(
+        tp0,
+        new ListenerName("default")
+      )).thenReturn(Map(
+        leaderBrokerId -> new Node(leaderBrokerId, "host1", 9092, "rack-a"),
+        followerBrokerId -> new Node(followerBrokerId, "host2", 9092, "rack-b")
+      ).toMap)
+
+      // Make this replica the leader
+      val leaderEpoch = 1
+      val leaderAndIsrRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
+        Seq(new LeaderAndIsrPartitionState()
+          .setTopicName(topic)
+          .setPartitionIndex(0)
+          .setControllerEpoch(0)
+          .setLeader(0)
+          .setLeaderEpoch(leaderEpoch)
+          .setIsr(brokerList)
+          .setPartitionEpoch(0)
+          .setReplicas(brokerList)
+          .setIsNew(false)).asJava,
+        Collections.singletonMap(topic, topicId),
+        Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build()
+      replicaManager.becomeLeaderOrFollower(1, leaderAndIsrRequest, (_, _) => ())
+
+      // The leader must record the follower's fetch offset to make it eligible for follower fetch selection
+      val followerFetchData = new PartitionData(topicId, 0L, 0L, Int.MaxValue, Optional.of(Int.box(leaderEpoch)), Optional.empty[Integer])
+      fetchPartitionAsFollower(
+        replicaManager,
+        tidp0,
+        followerFetchData,
+        replicaId = followerBrokerId
+      )
+
+      val metadata = new DefaultClientMetadata("rack-b", "client-id",
+        InetAddress.getLocalHost, KafkaPrincipal.ANONYMOUS, "default")
+
+      // If a preferred read replica is selected, the fetch response returns immediately, even if min bytes and timeout conditions are not met.
+      val consumerResult = fetchPartitionAsConsumer(replicaManager, tidp0,
+        new PartitionData(topicId, 0, 0, 100000, Optional.empty()),
+        minBytes = 1, clientMetadata = Some(metadata), maxWaitMs = 5000)
+
+      // Fetch from leader succeeds
+      assertTrue(consumerResult.hasFired)
+
+      // No delayed fetch was inserted
+      assertEquals(0, replicaManager.delayedFetchPurgatory.watched)
+
+      // Returns a preferred replica
+      assertTrue(consumerResult.assertFired.preferredReadReplica.isDefined)
+    } finally replicaManager.shutdown(checkpointHW = false)
+  }
+
   @Test
   def testFollowerFetchWithDefaultSelectorNoForcedHwPropagation(): Unit = {
     val topicPartition = 0
@@ -1320,8 +1412,6 @@ class ReplicaManagerTest {
     val tp0 = new TopicPartition(topic, 0)
     val tidp0 = new TopicIdPartition(topicId, tp0)
 
-    initializeLogAndTopicId(replicaManager, tp0, topicId)
-
     // Make this replica the follower
     val leaderAndIsrRequest2 = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
       Seq(new LeaderAndIsrPartitionState()
@@ -1331,7 +1421,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(1)
         .setIsr(brokerList)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(brokerList)
         .setIsNew(false)).asJava,
       Collections.singletonMap(topic, topicId),
@@ -1344,24 +1434,33 @@ class ReplicaManagerTest {
 
     // Increment the hw in the leader by fetching from the last offset
     val fetchOffset = simpleRecords.size
-    var followerResult = fetchAsFollower(replicaManager, tidp0,
+    var followerResult = fetchPartitionAsFollower(
+      replicaManager,
+      tidp0,
       new PartitionData(Uuid.ZERO_UUID, fetchOffset, 0, 100000, Optional.empty()),
-      clientMetadata = None)
-    assertTrue(followerResult.isFired)
+      replicaId = 1,
+      minBytes = 0
+    )
+    assertTrue(followerResult.hasFired)
     assertEquals(0, followerResult.assertFired.highWatermark)
 
-    assertTrue(appendResult.isFired, "Expected producer request to be acked")
+    assertTrue(appendResult.hasFired, "Expected producer request to be acked")
 
     // Fetch from the same offset, no new data is expected and hence the fetch request should
     // go to the purgatory
-    followerResult = fetchAsFollower(replicaManager, tidp0,
+    followerResult = fetchPartitionAsFollower(
+      replicaManager,
+      tidp0,
       new PartitionData(Uuid.ZERO_UUID, fetchOffset, 0, 100000, Optional.empty()),
-      clientMetadata = None, minBytes = 1000)
-    assertFalse(followerResult.isFired, "Request completed immediately unexpectedly")
+      replicaId = 1,
+      minBytes = 1000,
+      maxWaitMs = 1000
+    )
+    assertFalse(followerResult.hasFired, "Request completed immediately unexpectedly")
 
     // Complete the request in the purgatory by advancing the clock
     timer.advanceClock(1001)
-    assertTrue(followerResult.isFired)
+    assertTrue(followerResult.hasFired)
 
     assertEquals(fetchOffset, followerResult.assertFired.highWatermark)
   }
@@ -1382,15 +1481,6 @@ class ReplicaManagerTest {
       leaderBrokerId, countDownLatch, expectTruncation = true, extraProps = props))
   }
 
-  // Due to some limitations to EasyMock, we need to create the log so that the Partition.topicId does not call
-  // LogManager.getLog with a default argument
-  // TODO: convert tests to using Mockito to avoid this issue.
-  private def initializeLogAndTopicId(replicaManager: ReplicaManager, topicPartition: TopicPartition, topicId: Uuid): Unit = {
-    val partition = replicaManager.createPartition(new TopicPartition(topic, 0))
-    val log = replicaManager.logManager.getOrCreateLog(topicPartition, false, false, Some(topicId))
-    partition.log = Some(log)
-  }
-
   @Test
   def testDefaultReplicaSelector(): Unit = {
     val topicPartition = 0
@@ -1424,7 +1514,7 @@ class ReplicaManagerTest {
           .setLeader(1)
           .setLeaderEpoch(0)
           .setIsr(partition0Replicas)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(partition0Replicas)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -1435,16 +1525,15 @@ class ReplicaManagerTest {
       val clientMetadata = new DefaultClientMetadata("", "", null, KafkaPrincipal.ANONYMOUS, "")
       var partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
         Optional.of(0))
-      var fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, Some(clientMetadata))
-      assertNotNull(fetchResult.get)
-      assertEquals(Errors.NONE, fetchResult.get.error)
+      var fetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData,
+        clientMetadata = Some(clientMetadata))
+      assertEquals(Errors.NONE, fetchResult.assertFired.error)
 
       // Fetch from follower, with empty ClientMetadata (which implies an older version)
       partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
         Optional.of(0))
-      fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, None)
-      assertNotNull(fetchResult.get)
-      assertEquals(Errors.NOT_LEADER_OR_FOLLOWER, fetchResult.get.error)
+      fetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData)
+      assertEquals(Errors.NOT_LEADER_OR_FOLLOWER, fetchResult.assertFired.error)
     } finally {
       replicaManager.shutdown()
     }
@@ -1471,7 +1560,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(1)
         .setIsr(partition0Replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(partition0Replicas)
         .setIsNew(true)).asJava,
       topicIds.asJava,
@@ -1486,16 +1575,14 @@ class ReplicaManagerTest {
     val partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
       Optional.empty())
 
-    val nonPurgatoryFetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, None)
-    assertNotNull(nonPurgatoryFetchResult.get)
-    assertEquals(Errors.NONE, nonPurgatoryFetchResult.get.error)
+    val nonPurgatoryFetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData)
+    assertEquals(Errors.NONE, nonPurgatoryFetchResult.assertFired.error)
     assertMetricCount(1)
 
-    val purgatoryFetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, None, timeout = 10)
-    assertNull(purgatoryFetchResult.get)
+    val purgatoryFetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData, maxWaitMs = 10)
+    assertFalse(purgatoryFetchResult.hasFired)
     mockTimer.advanceClock(11)
-    assertNotNull(purgatoryFetchResult.get)
-    assertEquals(Errors.NONE, purgatoryFetchResult.get.error)
+    assertEquals(Errors.NONE, purgatoryFetchResult.assertFired.error)
     assertMetricCount(2)
   }
 
@@ -1519,7 +1606,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(1)
           .setIsr(partition0Replicas)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(partition0Replicas)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -1528,8 +1615,8 @@ class ReplicaManagerTest {
 
       val partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
         Optional.empty())
-      val fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, None, timeout = 10)
-      assertNull(fetchResult.get)
+      val fetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData, maxWaitMs = 10)
+      assertFalse(fetchResult.hasFired)
 
       // Become a follower and ensure that the delayed fetch returns immediately
       val becomeFollowerRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
@@ -1540,15 +1627,13 @@ class ReplicaManagerTest {
           .setLeader(1)
           .setLeaderEpoch(2)
           .setIsr(partition0Replicas)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(partition0Replicas)
           .setIsNew(true)).asJava,
         topicIds.asJava,
         Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build()
       replicaManager.becomeLeaderOrFollower(0, becomeFollowerRequest, (_, _) => ())
-
-      assertNotNull(fetchResult.get)
-      assertEquals(Errors.NOT_LEADER_OR_FOLLOWER, fetchResult.get.error)
+      assertEquals(Errors.NOT_LEADER_OR_FOLLOWER, fetchResult.assertFired.error)
     } finally {
       replicaManager.shutdown()
     }
@@ -1576,7 +1661,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(1)
           .setIsr(partition0Replicas)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(partition0Replicas)
           .setIsNew(true)).asJava,
         topicIds.asJava,
@@ -1586,8 +1671,14 @@ class ReplicaManagerTest {
       val clientMetadata = new DefaultClientMetadata("", "", null, KafkaPrincipal.ANONYMOUS, "")
       val partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
         Optional.of(1))
-      val fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, Some(clientMetadata), timeout = 10)
-      assertNull(fetchResult.get)
+      val fetchResult = fetchPartitionAsConsumer(
+        replicaManager,
+        tidp0,
+        partitionData,
+        clientMetadata = Some(clientMetadata),
+        maxWaitMs = 10
+      )
+      assertFalse(fetchResult.hasFired)
 
       // Become a follower and ensure that the delayed fetch returns immediately
       val becomeFollowerRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch,
@@ -1598,15 +1689,13 @@ class ReplicaManagerTest {
           .setLeader(1)
           .setLeaderEpoch(2)
           .setIsr(partition0Replicas)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(partition0Replicas)
           .setIsNew(true)).asJava,
         topicIds.asJava,
         Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build()
       replicaManager.becomeLeaderOrFollower(0, becomeFollowerRequest, (_, _) => ())
-
-      assertNotNull(fetchResult.get)
-      assertEquals(Errors.FENCED_LEADER_EPOCH, fetchResult.get.error)
+      assertEquals(Errors.FENCED_LEADER_EPOCH, fetchResult.assertFired.error)
     } finally {
       replicaManager.shutdown()
     }
@@ -1632,7 +1721,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(1)
         .setIsr(partition0Replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(partition0Replicas)
         .setIsNew(true)).asJava,
       topicIds.asJava,
@@ -1642,15 +1731,13 @@ class ReplicaManagerTest {
     val clientMetadata = new DefaultClientMetadata("", "", null, KafkaPrincipal.ANONYMOUS, "")
     var partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
       Optional.of(1))
-    var fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, Some(clientMetadata))
-    assertNotNull(fetchResult.get)
-    assertEquals(Errors.NONE, fetchResult.get.error)
+    var fetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData, clientMetadata = Some(clientMetadata))
+    assertEquals(Errors.NONE, fetchResult.assertFired.error)
 
     partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
       Optional.empty())
-    fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, Some(clientMetadata))
-    assertNotNull(fetchResult.get)
-    assertEquals(Errors.NONE, fetchResult.get.error)
+    fetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData, clientMetadata = Some(clientMetadata))
+    assertEquals(Errors.NONE, fetchResult.assertFired.error)
   }
 
   @Test
@@ -1676,7 +1763,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(1)
         .setIsr(partition0Replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(partition0Replicas)
         .setIsNew(true)).asJava,
       topicIds.asJava,
@@ -1685,8 +1772,8 @@ class ReplicaManagerTest {
 
     val partitionData = new FetchRequest.PartitionData(Uuid.ZERO_UUID, 0L, 0L, 100,
       Optional.of(1))
-    val fetchResult = sendConsumerFetch(replicaManager, tidp0, partitionData, None, timeout = 10)
-    assertNull(fetchResult.get)
+    val fetchResult = fetchPartitionAsConsumer(replicaManager, tidp0, partitionData, maxWaitMs = 10)
+    assertFalse(fetchResult.hasFired)
     when(replicaManager.metadataCache.contains(tp0)).thenReturn(true)
 
     // We have a fetch in purgatory, now receive a stop replica request and
@@ -1697,8 +1784,7 @@ class ReplicaManagerTest {
         .setDeletePartition(true)
         .setLeaderEpoch(LeaderAndIsr.EpochDuringDelete)))
 
-    assertNotNull(fetchResult.get)
-    assertEquals(Errors.NOT_LEADER_OR_FOLLOWER, fetchResult.get.error)
+    assertEquals(Errors.NOT_LEADER_OR_FOLLOWER, fetchResult.assertFired.error)
   }
 
   @Test
@@ -1719,7 +1805,7 @@ class ReplicaManagerTest {
         .setLeader(0)
         .setLeaderEpoch(1)
         .setIsr(partition0Replicas)
-        .setZkVersion(0)
+        .setPartitionEpoch(0)
         .setReplicas(partition0Replicas)
         .setIsNew(true)).asJava,
       topicIds.asJava,
@@ -1770,30 +1856,6 @@ class ReplicaManagerTest {
     produceResult
   }
 
-  private def sendConsumerFetch(replicaManager: ReplicaManager,
-                                topicIdPartition: TopicIdPartition,
-                                partitionData: FetchRequest.PartitionData,
-                                clientMetadataOpt: Option[ClientMetadata],
-                                timeout: Long = 0L): AtomicReference[FetchPartitionData] = {
-    val fetchResult = new AtomicReference[FetchPartitionData]()
-    def callback(response: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
-      fetchResult.set(response.toMap.apply(topicIdPartition))
-    }
-    replicaManager.fetchMessages(
-      timeout = timeout,
-      replicaId = Request.OrdinaryConsumerId,
-      fetchMinBytes = 1,
-      fetchMaxBytes = 100,
-      hardMaxBytesLimit = false,
-      fetchInfos = Seq(topicIdPartition -> partitionData),
-      quota = UnboundedQuota,
-      isolationLevel = IsolationLevel.READ_UNCOMMITTED,
-      responseCallback = callback,
-      clientMetadata = clientMetadataOpt
-    )
-    fetchResult
-  }
-
   /**
    * This method assumes that the test using created ReplicaManager calls
    * ReplicaManager.becomeLeaderOrFollower() once with LeaderAndIsrRequest containing
@@ -1874,7 +1936,12 @@ class ReplicaManagerTest {
     val mockLogMgr: LogManager = mock(classOf[LogManager])
     when(mockLogMgr.liveLogDirs).thenReturn(config.logDirs.map(new File(_).getAbsoluteFile))
     when(mockLogMgr.getOrCreateLog(ArgumentMatchers.eq(topicPartitionObj), ArgumentMatchers.eq(false), ArgumentMatchers.eq(false), any())).thenReturn(mockLog)
+    when(mockLogMgr.getLog(topicPartitionObj, isFuture = false)).thenReturn(Some(mockLog))
     when(mockLogMgr.getLog(topicPartitionObj, isFuture = true)).thenReturn(None)
+    val allLogs = new Pool[TopicPartition, UnifiedLog]()
+    allLogs.put(topicPartitionObj, mockLog)
+    when(mockLogMgr.allLogs).thenReturn(allLogs.values)
+    when(mockLogMgr.isLogDirOnline(anyString)).thenReturn(true)
 
     val aliveBrokerIds = Seq[Integer](followerBrokerId, leaderBrokerId)
     val aliveBrokers = aliveBrokerIds.map(brokerId => new Node(brokerId, s"host$brokerId", brokerId))
@@ -1885,7 +1952,7 @@ class ReplicaManagerTest {
       any[TopicPartition], any[ListenerName])).
         thenReturn(Map(leaderBrokerId -> new Node(leaderBrokerId, "host1", 9092, "rack-a"),
           followerBrokerId -> new Node(followerBrokerId, "host2", 9092, "rack-b")).toMap)
-
+    when(metadataCache.metadataVersion()).thenReturn(config.interBrokerProtocolVersion)
     val mockProducePurgatory = new DelayedOperationPurgatory[DelayedProduce](
       purgatoryName = "Produce", timer, reaperEnabled = false)
     val mockFetchPurgatory = new DelayedOperationPurgatory[DelayedFetch](
@@ -1896,7 +1963,7 @@ class ReplicaManagerTest {
       purgatoryName = "ElectLeader", timer, reaperEnabled = false)
 
     // Mock network client to show leader offset of 5
-    val blockingSend = new ReplicaFetcherMockBlockingSend(
+    val blockingSend = new MockBlockingSender(
       Map(topicPartitionObj -> new EpochEndOffset()
         .setPartition(topicPartitionObj.partition)
         .setErrorCode(Errors.NONE.code)
@@ -1913,7 +1980,7 @@ class ReplicaManagerTest {
       brokerTopicStats = mockBrokerTopicStats,
       metadataCache = metadataCache,
       logDirFailureChannel = mockLogDirFailureChannel,
-      alterIsrManager = alterIsrManager,
+      alterPartitionManager = alterPartitionManager,
       delayedProducePurgatoryParam = Some(mockProducePurgatory),
       delayedFetchPurgatoryParam = Some(mockFetchPurgatory),
       delayedDeleteRecordsPurgatoryParam = Some(mockDeleteRecordsPurgatory),
@@ -1924,12 +1991,16 @@ class ReplicaManagerTest {
                                                          time: Time,
                                                          threadNamePrefix: Option[String],
                                                          replicationQuotaManager: ReplicationQuotaManager): ReplicaFetcherManager = {
-        new ReplicaFetcherManager(config, this, metrics, time, threadNamePrefix, replicationQuotaManager) {
+        new ReplicaFetcherManager(config, this, metrics, time, threadNamePrefix, replicationQuotaManager, () => metadataCache.metadataVersion()) {
 
           override def createFetcherThread(fetcherId: Int, sourceBroker: BrokerEndPoint): ReplicaFetcherThread = {
-            new ReplicaFetcherThread(s"ReplicaFetcherThread-$fetcherId", fetcherId,
-              sourceBroker, config, failedPartitions, replicaManager, metrics, time, quotaManager.follower, Some(blockingSend)) {
-
+            val logContext = new LogContext(s"[ReplicaFetcher replicaId=${config.brokerId}, leaderId=${sourceBroker.id}, " +
+              s"fetcherId=$fetcherId] ")
+            val fetchSessionHandler = new FetchSessionHandler(logContext, sourceBroker.id)
+            val leader = new RemoteLeaderEndPoint(logContext.logPrefix, blockingSend, fetchSessionHandler, config,
+              replicaManager, quotaManager.follower, () => config.interBrokerProtocolVersion)
+            new ReplicaFetcherThread(s"ReplicaFetcherThread-$fetcherId", leader, config, failedPartitions, replicaManager,
+              quotaManager.follower, logContext.logPrefix, () => config.interBrokerProtocolVersion) {
               override def doWork(): Unit = {
                 // In case the thread starts before the partition is added by AbstractFetcherManager,
                 // add it here (it's a no-op if already added)
@@ -1965,7 +2036,7 @@ class ReplicaManagerTest {
       .setLeader(leaderBrokerId)
       .setLeaderEpoch(leaderEpoch)
       .setIsr(aliveBrokerIds.asJava)
-      .setZkVersion(zkVersion)
+      .setPartitionEpoch(zkVersion)
       .setReplicas(aliveBrokerIds.asJava)
       .setIsNew(isNew)
   }
@@ -1975,11 +2046,11 @@ class ReplicaManagerTest {
     private var fun: Option[T => Unit] = None
 
     def assertFired: T = {
-      assertTrue(isFired, "Callback has not been fired")
+      assertTrue(hasFired, "Callback has not been fired")
       value.get
     }
 
-    def isFired: Boolean = {
+    def hasFired: Boolean = {
       value.isDefined
     }
 
@@ -1990,7 +2061,7 @@ class ReplicaManagerTest {
 
     def onFire(fun: T => Unit): CallbackResult[T] = {
       this.fun = Some(fun)
-      if (this.isFired) fire(value.get)
+      if (this.hasFired) fire(value.get)
       this
     }
   }
@@ -2018,31 +2089,67 @@ class ReplicaManagerTest {
     result
   }
 
-  private def fetchAsConsumer(replicaManager: ReplicaManager,
-                              partition: TopicIdPartition,
-                              partitionData: PartitionData,
-                              minBytes: Int = 0,
-                              isolationLevel: IsolationLevel = IsolationLevel.READ_UNCOMMITTED,
-                              clientMetadata: Option[ClientMetadata] = None): CallbackResult[FetchPartitionData] = {
-    fetchMessages(replicaManager, replicaId = -1, partition, partitionData, minBytes, isolationLevel, clientMetadata)
+  private def fetchPartitionAsConsumer(
+    replicaManager: ReplicaManager,
+    partition: TopicIdPartition,
+    partitionData: PartitionData,
+    maxWaitMs: Long = 0,
+    minBytes: Int = 1,
+    maxBytes: Int = 1024 * 1024,
+    isolationLevel: IsolationLevel = IsolationLevel.READ_UNCOMMITTED,
+    clientMetadata: Option[ClientMetadata] = None,
+  ): CallbackResult[FetchPartitionData] = {
+    val isolation = isolationLevel match {
+      case IsolationLevel.READ_COMMITTED => FetchTxnCommitted
+      case IsolationLevel.READ_UNCOMMITTED => FetchHighWatermark
+    }
+
+    fetchPartition(
+      replicaManager,
+      replicaId = Request.OrdinaryConsumerId,
+      partition,
+      partitionData,
+      minBytes,
+      maxBytes,
+      isolation,
+      clientMetadata,
+      maxWaitMs
+    )
   }
 
-  private def fetchAsFollower(replicaManager: ReplicaManager,
-                              partition: TopicIdPartition,
-                              partitionData: PartitionData,
-                              minBytes: Int = 0,
-                              isolationLevel: IsolationLevel = IsolationLevel.READ_UNCOMMITTED,
-                              clientMetadata: Option[ClientMetadata] = None): CallbackResult[FetchPartitionData] = {
-    fetchMessages(replicaManager, replicaId = 1, partition, partitionData, minBytes, isolationLevel, clientMetadata)
+  private def fetchPartitionAsFollower(
+    replicaManager: ReplicaManager,
+    partition: TopicIdPartition,
+    partitionData: PartitionData,
+    replicaId: Int,
+    maxWaitMs: Long = 0,
+    minBytes: Int = 1,
+    maxBytes: Int = 1024 * 1024,
+  ): CallbackResult[FetchPartitionData] = {
+    fetchPartition(
+      replicaManager,
+      replicaId = replicaId,
+      partition,
+      partitionData,
+      minBytes = minBytes,
+      maxBytes = maxBytes,
+      isolation = FetchLogEnd,
+      clientMetadata = None,
+      maxWaitMs = maxWaitMs
+    )
   }
 
-  private def fetchMessages(replicaManager: ReplicaManager,
-                            replicaId: Int,
-                            partition: TopicIdPartition,
-                            partitionData: PartitionData,
-                            minBytes: Int,
-                            isolationLevel: IsolationLevel,
-                            clientMetadata: Option[ClientMetadata]): CallbackResult[FetchPartitionData] = {
+  private def fetchPartition(
+    replicaManager: ReplicaManager,
+    replicaId: Int,
+    partition: TopicIdPartition,
+    partitionData: PartitionData,
+    minBytes: Int,
+    maxBytes: Int,
+    isolation: FetchIsolation,
+    clientMetadata: Option[ClientMetadata],
+    maxWaitMs: Long
+  ): CallbackResult[FetchPartitionData] = {
     val result = new CallbackResult[FetchPartitionData]()
     def fetchCallback(responseStatus: Seq[(TopicIdPartition, FetchPartitionData)]): Unit = {
       assertEquals(1, responseStatus.size)
@@ -2051,22 +2158,52 @@ class ReplicaManagerTest {
       result.fire(fetchData)
     }
 
-    replicaManager.fetchMessages(
-      timeout = 1000,
+    fetchPartitions(
+      replicaManager,
       replicaId = replicaId,
-      fetchMinBytes = minBytes,
-      fetchMaxBytes = Int.MaxValue,
-      hardMaxBytesLimit = false,
       fetchInfos = Seq(partition -> partitionData),
-      quota = UnboundedQuota,
       responseCallback = fetchCallback,
-      isolationLevel = isolationLevel,
+      maxWaitMs = maxWaitMs,
+      minBytes = minBytes,
+      maxBytes = maxBytes,
+      isolation = isolation,
       clientMetadata = clientMetadata
     )
 
     result
   }
 
+  private def fetchPartitions(
+    replicaManager: ReplicaManager,
+    replicaId: Int,
+    fetchInfos: Seq[(TopicIdPartition, PartitionData)],
+    responseCallback: Seq[(TopicIdPartition, FetchPartitionData)] => Unit,
+    requestVersion: Short = ApiKeys.FETCH.latestVersion,
+    maxWaitMs: Long = 0,
+    minBytes: Int = 1,
+    maxBytes: Int = 1024 * 1024,
+    quota: ReplicaQuota = UnboundedQuota,
+    isolation: FetchIsolation = FetchLogEnd,
+    clientMetadata: Option[ClientMetadata] = None
+  ): Unit = {
+    val params = FetchParams(
+      requestVersion = requestVersion,
+      replicaId = replicaId,
+      maxWaitMs = maxWaitMs,
+      minBytes = minBytes,
+      maxBytes = maxBytes,
+      isolation = isolation,
+      clientMetadata = clientMetadata
+    )
+
+    replicaManager.fetchMessages(
+      params,
+      fetchInfos,
+      quota,
+      responseCallback
+    )
+  }
+
   private def setupReplicaManagerWithMockedPurgatories(
     timer: MockTimer,
     brokerId: Int = 0,
@@ -2087,6 +2224,7 @@ class ReplicaManagerTest {
     when(metadataCache.topicIdInfo()).thenReturn((topicIds.asJava, topicNames.asJava))
     when(metadataCache.topicNamesToIds()).thenReturn(topicIds.asJava)
     when(metadataCache.topicIdsToNames()).thenReturn(topicNames.asJava)
+    when(metadataCache.metadataVersion()).thenReturn(config.interBrokerProtocolVersion)
     mockGetAliveBrokerFunctions(metadataCache, aliveBrokers)
     val mockProducePurgatory = new DelayedOperationPurgatory[DelayedProduce](
       purgatoryName = "Produce", timer, reaperEnabled = false)
@@ -2106,7 +2244,7 @@ class ReplicaManagerTest {
       quotaManagers = quotaManager,
       metadataCache = metadataCache,
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager,
+      alterPartitionManager = alterPartitionManager,
       delayedProducePurgatoryParam = Some(mockProducePurgatory),
       delayedFetchPurgatoryParam = Some(mockFetchPurgatory),
       delayedDeleteRecordsPurgatoryParam = Some(mockDeleteRecordsPurgatory),
@@ -2172,7 +2310,7 @@ class ReplicaManagerTest {
             .setLeader(0)
             .setLeaderEpoch(leaderEpoch)
             .setIsr(partition0Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition0Replicas)
             .setIsNew(true),
           new LeaderAndIsrPartitionState()
@@ -2182,7 +2320,7 @@ class ReplicaManagerTest {
             .setLeader(1)
             .setLeaderEpoch(leaderEpoch)
             .setIsr(partition1Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition1Replicas)
             .setIsNew(true)
         ).asJava,
@@ -2203,7 +2341,7 @@ class ReplicaManagerTest {
             .setLeader(0)
             .setLeaderEpoch(leaderEpoch + leaderEpochIncrement)
             .setIsr(partition0Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition0Replicas)
             .setIsNew(true),
           new LeaderAndIsrPartitionState()
@@ -2213,7 +2351,7 @@ class ReplicaManagerTest {
             .setLeader(0)
             .setLeaderEpoch(leaderEpoch + leaderEpochIncrement)
             .setIsr(partition1Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition1Replicas)
             .setIsNew(true)
         ).asJava,
@@ -2260,7 +2398,7 @@ class ReplicaManagerTest {
             .setLeader(1)
             .setLeaderEpoch(leaderEpoch)
             .setIsr(partition0Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition0Replicas)
             .setIsNew(true),
           new LeaderAndIsrPartitionState()
@@ -2270,7 +2408,7 @@ class ReplicaManagerTest {
             .setLeader(1)
             .setLeaderEpoch(leaderEpoch)
             .setIsr(partition1Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition1Replicas)
             .setIsNew(true)
         ).asJava,
@@ -2291,7 +2429,7 @@ class ReplicaManagerTest {
             .setLeader(0)
             .setLeaderEpoch(leaderEpoch + leaderEpochIncrement)
             .setIsr(partition0Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition0Replicas)
             .setIsNew(true),
           new LeaderAndIsrPartitionState()
@@ -2301,7 +2439,7 @@ class ReplicaManagerTest {
             .setLeader(0)
             .setLeaderEpoch(leaderEpoch + leaderEpochIncrement)
             .setIsr(partition1Replicas)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(partition1Replicas)
             .setIsNew(true)
         ).asJava,
@@ -2339,6 +2477,8 @@ class ReplicaManagerTest {
     val aliveBrokers = Seq(new Node(0, "host0", 0), new Node(1, "host1", 1))
     mockGetAliveBrokerFunctions(metadataCache0, aliveBrokers)
     mockGetAliveBrokerFunctions(metadataCache1, aliveBrokers)
+    when(metadataCache0.metadataVersion()).thenReturn(config0.interBrokerProtocolVersion)
+    when(metadataCache1.metadataVersion()).thenReturn(config1.interBrokerProtocolVersion)
 
     // each replica manager is for a broker
     val rm0 = new ReplicaManager(
@@ -2351,7 +2491,7 @@ class ReplicaManagerTest {
       brokerTopicStats = brokerTopicStats1,
       metadataCache = metadataCache0,
       logDirFailureChannel = new LogDirFailureChannel(config0.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterPartitionManager)
     val rm1 = new ReplicaManager(
       metrics = metrics,
       config = config1,
@@ -2362,7 +2502,7 @@ class ReplicaManagerTest {
       brokerTopicStats = brokerTopicStats2,
       metadataCache = metadataCache1,
       logDirFailureChannel = new LogDirFailureChannel(config1.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterPartitionManager)
 
     (rm0, rm1)
   }
@@ -2449,7 +2589,7 @@ class ReplicaManagerTest {
       // Delete the underlying directory to trigger an KafkaStorageException
       val dir = log.dir.getParentFile
       Utils.delete(dir)
-      dir.createNewFile()
+      Files.createFile(dir.toPath)
     }
 
     val partitionStates = Map(tp0 -> new StopReplicaPartitionState()
@@ -2573,10 +2713,11 @@ class ReplicaManagerTest {
     assertEquals(Some(1L), readLogStartOffsetCheckpoint().get(tp0))
 
     if (throwIOException) {
-      // Delete the underlying directory to trigger an KafkaStorageException
-      val dir = partition.log.get.dir
-      Utils.delete(dir)
-      dir.createNewFile()
+      // Replace underlying PartitionMetadataFile with a mock which throws
+      // a KafkaStorageException when maybeFlush is called.
+      val mockPartitionMetadataFile = mock(classOf[PartitionMetadataFile])
+      when(mockPartitionMetadataFile.maybeFlush()).thenThrow(new KafkaStorageException())
+      partition.log.get.partitionMetadataFile = Some(mockPartitionMetadataFile)
     }
 
     val partitionStates = Map(tp0 -> new StopReplicaPartitionState()
@@ -2610,9 +2751,9 @@ class ReplicaManagerTest {
         scheduler = new MockScheduler(time),
         logManager = mockLogMgr,
         quotaManagers = quotaManager,
-        metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+        metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
         logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-        alterIsrManager = alterIsrManager) {
+        alterPartitionManager = alterPartitionManager) {
         override def getPartitionOrException(topicPartition: TopicPartition): Partition = {
           throw Errors.NOT_LEADER_OR_FOLLOWER.exception()
         }
@@ -2648,7 +2789,7 @@ class ReplicaManagerTest {
             .setLeader(0)
             .setLeaderEpoch(epoch)
             .setIsr(brokerList)
-            .setZkVersion(0)
+            .setPartitionEpoch(0)
             .setReplicas(brokerList)
             .setIsNew(true)).asJava,
           topicIds,
@@ -2659,8 +2800,8 @@ class ReplicaManagerTest {
       assertFalse(replicaManager.localLog(topicPartition).isEmpty)
       val id = topicIds.get(topicPartition.topic())
       val log = replicaManager.localLog(topicPartition).get
-      assertTrue(log.partitionMetadataFile.exists())
-      val partitionMetadata = log.partitionMetadataFile.read()
+      assertTrue(log.partitionMetadataFile.get.exists())
+      val partitionMetadata = log.partitionMetadataFile.get.read()
 
       // Current version of PartitionMetadataFile is 0.
       assertEquals(0, partitionMetadata.version)
@@ -2680,7 +2821,7 @@ class ReplicaManagerTest {
       assertTrue(replicaManager.getLog(topicPartition).isDefined)
       var log = replicaManager.getLog(topicPartition).get
       assertEquals(None, log.topicId)
-      assertFalse(log.partitionMetadataFile.exists())
+      assertFalse(log.partitionMetadataFile.get.exists())
 
       val topicIds = Collections.singletonMap(topic, Uuid.randomUuid())
       val topicNames = topicIds.asScala.map(_.swap).asJava
@@ -2693,7 +2834,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(epoch)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds,
@@ -2704,8 +2845,8 @@ class ReplicaManagerTest {
       assertFalse(replicaManager.localLog(topicPartition).isEmpty)
       val id = topicIds.get(topicPartition.topic())
       log = replicaManager.localLog(topicPartition).get
-      assertTrue(log.partitionMetadataFile.exists())
-      val partitionMetadata = log.partitionMetadataFile.read()
+      assertTrue(log.partitionMetadataFile.get.exists())
+      val partitionMetadata = log.partitionMetadataFile.get.read()
 
       // Current version of PartitionMetadataFile is 0.
       assertEquals(0, partitionMetadata.version)
@@ -2730,7 +2871,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(leaderEpoch)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds,
@@ -2741,13 +2882,13 @@ class ReplicaManagerTest {
       assertEquals(Errors.NONE, response.partitionErrors(Collections.emptyMap()).get(topicPartition))
       assertTrue(replicaManager.localLog(topicPartition).isDefined)
       val log = replicaManager.localLog(topicPartition).get
-      assertFalse(log.partitionMetadataFile.exists())
+      assertFalse(log.partitionMetadataFile.get.exists())
       assertTrue(log.topicId.isEmpty)
 
       val response2 = replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(topicIds.asJava, ApiKeys.LEADER_AND_ISR.latestVersion), (_, _) => ())
       assertEquals(Errors.NONE, response2.partitionErrors(topicNames.asJava).get(topicPartition))
       assertTrue(replicaManager.localLog(topicPartition).isDefined)
-      assertTrue(log.partitionMetadataFile.exists())
+      assertTrue(log.partitionMetadataFile.get.exists())
       assertTrue(log.topicId.isDefined)
       assertEquals(topicId, log.topicId.get)
 
@@ -2757,18 +2898,18 @@ class ReplicaManagerTest {
       assertEquals(Errors.NONE, response3.partitionErrors(Collections.emptyMap()).get(topicPartition2))
       assertTrue(replicaManager.localLog(topicPartition2).isDefined)
       val log2 = replicaManager.localLog(topicPartition2).get
-      assertFalse(log2.partitionMetadataFile.exists())
+      assertFalse(log2.partitionMetadataFile.get.exists())
       assertTrue(log2.topicId.isEmpty)
 
       val response4 = replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(topicIds.asJava, ApiKeys.LEADER_AND_ISR.latestVersion, 1, 1), (_, _) => ())
       assertEquals(Errors.NONE, response4.partitionErrors(topicNames.asJava).get(topicPartition2))
       assertTrue(replicaManager.localLog(topicPartition2).isDefined)
-      assertTrue(log2.partitionMetadataFile.exists())
+      assertTrue(log2.partitionMetadataFile.get.exists())
       assertTrue(log2.topicId.isDefined)
       assertEquals(topicId, log2.topicId.get)
 
-      assertEquals(topicId, log.partitionMetadataFile.read().topicId)
-      assertEquals(topicId, log2.partitionMetadataFile.read().topicId)
+      assertEquals(topicId, log.partitionMetadataFile.get.read().topicId)
+      assertEquals(topicId, log2.partitionMetadataFile.get.read().topicId)
     } finally replicaManager.shutdown(checkpointHW = false)
   }
 
@@ -2793,7 +2934,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(epoch)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds,
@@ -2834,7 +2975,7 @@ class ReplicaManagerTest {
           .setLeader(0)
           .setLeaderEpoch(epoch)
           .setIsr(brokerList)
-          .setZkVersion(0)
+          .setPartitionEpoch(0)
           .setReplicas(brokerList)
           .setIsNew(true)).asJava,
         topicIds,
@@ -2844,28 +2985,28 @@ class ReplicaManagerTest {
       val response = replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(0, "fakeTopic", ApiKeys.LEADER_AND_ISR.latestVersion), (_, _) => ())
       assertTrue(replicaManager.localLog(topicPartitionFake).isDefined)
       val log = replicaManager.localLog(topicPartitionFake).get
-      assertFalse(log.partitionMetadataFile.exists())
+      assertFalse(log.partitionMetadataFile.get.exists())
       assertEquals(Errors.NONE, response.partitionErrors(topicNames).get(topicPartition))
 
       // There is no file if the topic has the default UUID.
       val response2 = replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(0, topic, ApiKeys.LEADER_AND_ISR.latestVersion), (_, _) => ())
       assertTrue(replicaManager.localLog(topicPartition).isDefined)
       val log2 = replicaManager.localLog(topicPartition).get
-      assertFalse(log2.partitionMetadataFile.exists())
+      assertFalse(log2.partitionMetadataFile.get.exists())
       assertEquals(Errors.NONE, response2.partitionErrors(topicNames).get(topicPartition))
 
       // There is no file if the request an older version
       val response3 = replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(0, "foo", 0), (_, _) => ())
       assertTrue(replicaManager.localLog(topicPartitionFoo).isDefined)
       val log3 = replicaManager.localLog(topicPartitionFoo).get
-      assertFalse(log3.partitionMetadataFile.exists())
+      assertFalse(log3.partitionMetadataFile.get.exists())
       assertEquals(Errors.NONE, response3.partitionErrors(topicNames).get(topicPartitionFoo))
 
       // There is no file if the request is an older version
       val response4 = replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(1, "foo", 4), (_, _) => ())
       assertTrue(replicaManager.localLog(topicPartitionFoo).isDefined)
       val log4 = replicaManager.localLog(topicPartitionFoo).get
-      assertFalse(log4.partitionMetadataFile.exists())
+      assertFalse(log4.partitionMetadataFile.get.exists())
       assertEquals(Errors.NONE, response4.partitionErrors(topicNames).get(topicPartitionFoo))
     } finally replicaManager.shutdown(checkpointHW = false)
   }
@@ -2917,7 +3058,7 @@ class ReplicaManagerTest {
       .setLeader(leaderAndIsr.leader)
       .setLeaderEpoch(leaderAndIsr.leaderEpoch)
       .setIsr(leaderAndIsr.isr.map(Int.box).asJava)
-      .setZkVersion(leaderAndIsr.zkVersion)
+      .setPartitionEpoch(leaderAndIsr.partitionEpoch)
       .setReplicas(replicas.map(Int.box).asJava)
       .setIsNew(isNew)
 
@@ -3029,14 +3170,11 @@ class ReplicaManagerTest {
 
       // Send a produce request and advance the highwatermark
       val leaderResponse = sendProducerAppend(replicaManager, topicPartition, numOfRecords)
-      fetchMessages(
+      fetchPartitionAsFollower(
         replicaManager,
-        otherId,
         topicIdPartition,
         new PartitionData(Uuid.ZERO_UUID, numOfRecords, 0, Int.MaxValue, Optional.empty()),
-        Int.MaxValue,
-        IsolationLevel.READ_UNCOMMITTED,
-        None
+        replicaId = otherId
       )
       assertEquals(Errors.NONE, leaderResponse.get.error)
 
@@ -3055,7 +3193,7 @@ class ReplicaManagerTest {
       assertEquals(1, followerPartition.getLeaderEpoch)
 
       val fetcher = replicaManager.replicaFetcherManager.getFetcher(topicPartition)
-      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.sourceBroker))
+      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.leader.brokerEndPoint()))
     } finally {
       replicaManager.shutdown()
     }
@@ -3083,7 +3221,7 @@ class ReplicaManagerTest {
       assertEquals(0, followerPartition.getLeaderEpoch)
 
       val fetcher = replicaManager.replicaFetcherManager.getFetcher(topicPartition)
-      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.sourceBroker))
+      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.leader.brokerEndPoint()))
 
       // Append on a follower should fail
       val followerResponse = sendProducerAppend(replicaManager, topicPartition, numOfRecords)
@@ -3098,14 +3236,11 @@ class ReplicaManagerTest {
 
       // Send a produce request and advance the highwatermark
       val leaderResponse = sendProducerAppend(replicaManager, topicPartition, numOfRecords)
-      fetchMessages(
+      fetchPartitionAsFollower(
         replicaManager,
-        otherId,
         topicIdPartition,
         new PartitionData(Uuid.ZERO_UUID, numOfRecords, 0, Int.MaxValue, Optional.empty()),
-        Int.MaxValue,
-        IsolationLevel.READ_UNCOMMITTED,
-        None
+        replicaId = otherId
       )
       assertEquals(Errors.NONE, leaderResponse.get.error)
 
@@ -3141,7 +3276,7 @@ class ReplicaManagerTest {
       assertEquals(0, followerPartition.getLeaderEpoch)
 
       val fetcher = replicaManager.replicaFetcherManager.getFetcher(topicPartition)
-      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.sourceBroker))
+      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.leader.brokerEndPoint()))
 
       // Apply the same delta again
       replicaManager.applyDelta(followerTopicsDelta, followerMetadataImage)
@@ -3152,7 +3287,7 @@ class ReplicaManagerTest {
       assertEquals(0, noChangePartition.getLeaderEpoch)
 
       val noChangeFetcher = replicaManager.replicaFetcherManager.getFetcher(topicPartition)
-      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), noChangeFetcher.map(_.sourceBroker))
+      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), noChangeFetcher.map(_.leader.brokerEndPoint()))
     } finally {
       replicaManager.shutdown()
     }
@@ -3179,7 +3314,7 @@ class ReplicaManagerTest {
       assertEquals(0, followerPartition.getLeaderEpoch)
 
       val fetcher = replicaManager.replicaFetcherManager.getFetcher(topicPartition)
-      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.sourceBroker))
+      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.leader.brokerEndPoint()))
 
       // Apply changes that remove replica
       val notReplicaTopicsDelta = topicsChangeDelta(followerMetadataImage.topics(), otherId, true)
@@ -3216,7 +3351,7 @@ class ReplicaManagerTest {
       assertEquals(0, followerPartition.getLeaderEpoch)
 
       val fetcher = replicaManager.replicaFetcherManager.getFetcher(topicPartition)
-      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.sourceBroker))
+      assertEquals(Some(BrokerEndPoint(otherId, "localhost", 9093)), fetcher.map(_.leader.brokerEndPoint()))
 
       // Apply changes that remove topic and replica
       val removeTopicsDelta = topicsDeleteDelta(followerMetadataImage.topics())
@@ -3242,7 +3377,7 @@ class ReplicaManagerTest {
     val replicaManager = setupReplicaManagerWithMockedPurgatories(new MockTimer(time), localId)
 
     try {
-      // Make the local replica the follower
+      // Make the local replica the leader
       val leaderTopicsDelta = topicsCreateDelta(localId, true)
       val leaderMetadataImage = imageFromTopics(leaderTopicsDelta.apply())
       replicaManager.applyDelta(leaderTopicsDelta, leaderMetadataImage)
@@ -3279,7 +3414,7 @@ class ReplicaManagerTest {
     val replicaManager = setupReplicaManagerWithMockedPurgatories(new MockTimer(time), localId)
 
     try {
-      // Make the local replica the follower
+      // Make the local replica the leader
       val leaderTopicsDelta = topicsCreateDelta(localId, true)
       val leaderMetadataImage = imageFromTopics(leaderTopicsDelta.apply())
       replicaManager.applyDelta(leaderTopicsDelta, leaderMetadataImage)
@@ -3371,15 +3506,15 @@ class ReplicaManagerTest {
       assertEquals(None, replicaManager.replicaFetcherManager.getFetcher(topicPartition))
 
       // Send a fetch request
-      val fetchCallback = fetchMessages(
+      val fetchCallback = fetchPartitionAsFollower(
         replicaManager,
-        otherId,
         topicIdPartition,
         new PartitionData(Uuid.ZERO_UUID, 0, 0, Int.MaxValue, Optional.empty()),
-        Int.MaxValue,
-        IsolationLevel.READ_UNCOMMITTED,
-        None
+        replicaId = otherId,
+        minBytes = Int.MaxValue,
+        maxWaitMs = 1000
       )
+      assertFalse(fetchCallback.hasFired)
 
       // Change the local replica to follower
       val followerTopicsDelta = topicsChangeDelta(leaderMetadataImage.topics(), localId, false)
@@ -3510,6 +3645,351 @@ class ReplicaManagerTest {
     TestUtils.assertNoNonDaemonThreads(this.getClass.getName)
   }
 
+  @Test
+  def testFetcherAreNotRestartedIfLeaderEpochIsNotBumpedWithZkPath(): Unit = {
+    val localId = 0
+    val topicPartition = new TopicPartition("foo", 0)
+
+    val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager])
+    val replicaManager = setupReplicaManagerWithMockedPurgatories(
+      timer = new MockTimer(time),
+      brokerId = localId,
+      aliveBrokerIds = Seq(localId, localId + 1, localId + 2),
+      mockReplicaFetcherManager = Some(mockReplicaFetcherManager)
+    )
+
+    try {
+      when(mockReplicaFetcherManager.removeFetcherForPartitions(
+        Set(topicPartition))
+      ).thenReturn(Map.empty[TopicPartition, PartitionFetchState])
+
+      // Make the local replica the follower.
+      var request = makeLeaderAndIsrRequest(
+        topicId = FOO_UUID,
+        topicPartition = topicPartition,
+        replicas = Seq(localId, localId + 1),
+        leaderAndIsr = LeaderAndIsr(
+          leader = localId + 1,
+          leaderEpoch = 0,
+          isr = List(localId, localId + 1),
+          leaderRecoveryState = LeaderRecoveryState.RECOVERED,
+          partitionEpoch = 0
+        )
+      )
+
+      replicaManager.becomeLeaderOrFollower(0, request, (_, _) => ())
+
+      // Check the state of that partition.
+      val HostedPartition.Online(followerPartition) = replicaManager.getPartition(topicPartition)
+      assertFalse(followerPartition.isLeader)
+      assertEquals(0, followerPartition.getLeaderEpoch)
+      assertEquals(0, followerPartition.getPartitionEpoch)
+
+      // Verify that the partition was removed and added back.
+      verify(mockReplicaFetcherManager).removeFetcherForPartitions(Set(topicPartition))
+      verify(mockReplicaFetcherManager).addFetcherForPartitions(Map(topicPartition -> InitialFetchState(
+        topicId = Some(FOO_UUID),
+        leader = BrokerEndPoint(localId + 1, s"host${localId + 1}", localId + 1),
+        currentLeaderEpoch = 0,
+        initOffset = 0
+      )))
+
+      reset(mockReplicaFetcherManager)
+
+      // Apply changes that bumps the partition epoch.
+      request = makeLeaderAndIsrRequest(
+        topicId = FOO_UUID,
+        topicPartition = topicPartition,
+        replicas = Seq(localId, localId + 1, localId + 2),
+        leaderAndIsr = LeaderAndIsr(
+          leader = localId + 1,
+          leaderEpoch = 0,
+          isr = List(localId, localId + 1),
+          leaderRecoveryState = LeaderRecoveryState.RECOVERED,
+          partitionEpoch = 1
+        )
+      )
+
+      replicaManager.becomeLeaderOrFollower(0, request, (_, _) => ())
+
+      assertFalse(followerPartition.isLeader)
+      assertEquals(0, followerPartition.getLeaderEpoch)
+      // Partition updates is fenced based on the leader epoch on the ZK path.
+      assertEquals(0, followerPartition.getPartitionEpoch)
+
+      // As the update is fenced based on the leader epoch, removeFetcherForPartitions and
+      // addFetcherForPartitions are not called at all.
+      reset(mockReplicaFetcherManager)
+
+      // Apply changes that bumps the leader epoch.
+      request = makeLeaderAndIsrRequest(
+        topicId = FOO_UUID,
+        topicPartition = topicPartition,
+        replicas = Seq(localId, localId + 1, localId + 2),
+        leaderAndIsr = LeaderAndIsr(
+          leader = localId + 2,
+          leaderEpoch = 1,
+          isr = List(localId, localId + 1, localId + 2),
+          leaderRecoveryState = LeaderRecoveryState.RECOVERED,
+          partitionEpoch = 2
+        )
+      )
+
+      replicaManager.becomeLeaderOrFollower(0, request, (_, _) => ())
+
+      assertFalse(followerPartition.isLeader)
+      assertEquals(1, followerPartition.getLeaderEpoch)
+      assertEquals(2, followerPartition.getPartitionEpoch)
+
+      // Verify that the partition was removed and added back.
+      verify(mockReplicaFetcherManager).removeFetcherForPartitions(Set(topicPartition))
+      verify(mockReplicaFetcherManager).addFetcherForPartitions(Map(topicPartition -> InitialFetchState(
+        topicId = Some(FOO_UUID),
+        leader = BrokerEndPoint(localId + 2, s"host${localId + 2}", localId + 2),
+        currentLeaderEpoch = 1,
+        initOffset = 0
+      )))
+    } finally {
+      replicaManager.shutdown()
+    }
+
+    TestUtils.assertNoNonDaemonThreads(this.getClass.getName)
+  }
+
+  @Test
+  def testFetcherAreNotRestartedIfLeaderEpochIsNotBumpedWithKRaftPath(): Unit = {
+    val localId = 0
+    val topicPartition = new TopicPartition("foo", 0)
+
+    val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager])
+    val replicaManager = setupReplicaManagerWithMockedPurgatories(
+      timer = new MockTimer(time),
+      brokerId = localId,
+      mockReplicaFetcherManager = Some(mockReplicaFetcherManager)
+    )
+
+    try {
+      when(mockReplicaFetcherManager.removeFetcherForPartitions(
+        Set(topicPartition))
+      ).thenReturn(Map.empty[TopicPartition, PartitionFetchState])
+
+      // Make the local replica the follower.
+      var followerTopicsDelta = new TopicsDelta(TopicsImage.EMPTY)
+      followerTopicsDelta.replay(new TopicRecord().setName("foo").setTopicId(FOO_UUID))
+      followerTopicsDelta.replay(new PartitionRecord()
+        .setPartitionId(0)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1))
+        .setIsr(util.Arrays.asList(localId, localId + 1))
+        .setRemovingReplicas(Collections.emptyList())
+        .setAddingReplicas(Collections.emptyList())
+        .setLeader(localId + 1)
+        .setLeaderEpoch(0)
+        .setPartitionEpoch(0)
+      )
+      var followerMetadataImage = imageFromTopics(followerTopicsDelta.apply())
+      replicaManager.applyDelta(followerTopicsDelta, followerMetadataImage)
+
+      // Check the state of that partition.
+      val HostedPartition.Online(followerPartition) = replicaManager.getPartition(topicPartition)
+      assertFalse(followerPartition.isLeader)
+      assertEquals(0, followerPartition.getLeaderEpoch)
+      assertEquals(0, followerPartition.getPartitionEpoch)
+
+      // Verify that the partition was removed and added back.
+      verify(mockReplicaFetcherManager).removeFetcherForPartitions(Set(topicPartition))
+      verify(mockReplicaFetcherManager).addFetcherForPartitions(Map(topicPartition -> InitialFetchState(
+        topicId = Some(FOO_UUID),
+        leader = BrokerEndPoint(localId + 1, "localhost", 9093),
+        currentLeaderEpoch = 0,
+        initOffset = 0
+      )))
+
+      reset(mockReplicaFetcherManager)
+
+      // Apply changes that bumps the partition epoch.
+      followerTopicsDelta = new TopicsDelta(followerMetadataImage.topics())
+      followerTopicsDelta.replay(new PartitionChangeRecord()
+        .setPartitionId(0)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1, localId + 2))
+        .setIsr(util.Arrays.asList(localId, localId + 1))
+      )
+      followerMetadataImage = imageFromTopics(followerTopicsDelta.apply())
+      replicaManager.applyDelta(followerTopicsDelta, followerMetadataImage)
+
+      assertFalse(followerPartition.isLeader)
+      assertEquals(0, followerPartition.getLeaderEpoch)
+      assertEquals(1, followerPartition.getPartitionEpoch)
+
+      // Verify that partition's fetcher was not impacted.
+      verify(mockReplicaFetcherManager, never()).removeFetcherForPartitions(any())
+      verify(mockReplicaFetcherManager, never()).addFetcherForPartitions(any())
+
+      reset(mockReplicaFetcherManager)
+
+      // Apply changes that bumps the leader epoch.
+      followerTopicsDelta = new TopicsDelta(followerMetadataImage.topics())
+      followerTopicsDelta.replay(new PartitionChangeRecord()
+        .setPartitionId(0)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1, localId + 2))
+        .setIsr(util.Arrays.asList(localId, localId + 1, localId + 2))
+        .setLeader(localId + 2)
+      )
+
+      followerMetadataImage = imageFromTopics(followerTopicsDelta.apply())
+      replicaManager.applyDelta(followerTopicsDelta, followerMetadataImage)
+
+      assertFalse(followerPartition.isLeader)
+      assertEquals(1, followerPartition.getLeaderEpoch)
+      assertEquals(2, followerPartition.getPartitionEpoch)
+
+      // Verify that the partition was removed and added back.
+      verify(mockReplicaFetcherManager).removeFetcherForPartitions(Set(topicPartition))
+      verify(mockReplicaFetcherManager).addFetcherForPartitions(Map(topicPartition -> InitialFetchState(
+        topicId = Some(FOO_UUID),
+        leader = BrokerEndPoint(localId + 2, "localhost", 9093),
+        currentLeaderEpoch = 1,
+        initOffset = 0
+      )))
+    } finally {
+      replicaManager.shutdown()
+    }
+
+    TestUtils.assertNoNonDaemonThreads(this.getClass.getName)
+  }
+
+  @Test
+  def testReplicasAreStoppedWhileInControlledShutdownWithKRaft(): Unit = {
+    val localId = 0
+    val foo0 = new TopicPartition("foo", 0)
+    val foo1 = new TopicPartition("foo", 1)
+    val foo2 = new TopicPartition("foo", 2)
+
+    val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager])
+    val replicaManager = setupReplicaManagerWithMockedPurgatories(
+      timer = new MockTimer(time),
+      brokerId = localId,
+      mockReplicaFetcherManager = Some(mockReplicaFetcherManager)
+    )
+
+    try {
+      when(mockReplicaFetcherManager.removeFetcherForPartitions(
+        Set(foo0, foo1))
+      ).thenReturn(Map.empty[TopicPartition, PartitionFetchState])
+
+      var topicsDelta = new TopicsDelta(TopicsImage.EMPTY)
+      topicsDelta.replay(new TopicRecord()
+        .setName("foo")
+        .setTopicId(FOO_UUID)
+      )
+
+      // foo0 is a follower in the ISR.
+      topicsDelta.replay(new PartitionRecord()
+        .setPartitionId(0)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1))
+        .setIsr(util.Arrays.asList(localId, localId + 1))
+        .setLeader(localId + 1)
+        .setLeaderEpoch(0)
+        .setPartitionEpoch(0)
+      )
+
+      // foo1 is a leader with only himself in the ISR.
+      topicsDelta.replay(new PartitionRecord()
+        .setPartitionId(1)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1))
+        .setIsr(util.Arrays.asList(localId))
+        .setLeader(localId)
+        .setLeaderEpoch(0)
+        .setPartitionEpoch(0)
+      )
+
+      // foo2 is a follower NOT in the ISR.
+      topicsDelta.replay(new PartitionRecord()
+        .setPartitionId(2)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1))
+        .setIsr(util.Arrays.asList(localId + 1))
+        .setLeader(localId + 1)
+        .setLeaderEpoch(0)
+        .setPartitionEpoch(0)
+      )
+
+      // Apply the delta.
+      var metadataImage = imageFromTopics(topicsDelta.apply())
+      replicaManager.applyDelta(topicsDelta, metadataImage)
+
+      // Check the state of the partitions.
+      val HostedPartition.Online(fooPartition0) = replicaManager.getPartition(foo0)
+      assertFalse(fooPartition0.isLeader)
+      assertEquals(0, fooPartition0.getLeaderEpoch)
+      assertEquals(0, fooPartition0.getPartitionEpoch)
+
+      val HostedPartition.Online(fooPartition1) = replicaManager.getPartition(foo1)
+      assertTrue(fooPartition1.isLeader)
+      assertEquals(0, fooPartition1.getLeaderEpoch)
+      assertEquals(0, fooPartition1.getPartitionEpoch)
+
+      val HostedPartition.Online(fooPartition2) = replicaManager.getPartition(foo2)
+      assertFalse(fooPartition2.isLeader)
+      assertEquals(0, fooPartition2.getLeaderEpoch)
+      assertEquals(0, fooPartition2.getPartitionEpoch)
+
+      reset(mockReplicaFetcherManager)
+
+      // The replica begins the controlled shutdown.
+      replicaManager.beginControlledShutdown()
+
+      // When the controller receives the controlled shutdown
+      // request, it does the following:
+      // - Shrinks the ISR of foo0 to remove this replica.
+      // - Sets the leader of foo1 to NO_LEADER because it cannot elect another leader.
+      // - Does nothing for foo2 because this replica is not in the ISR.
+      topicsDelta = new TopicsDelta(metadataImage.topics())
+      topicsDelta.replay(new PartitionChangeRecord()
+        .setPartitionId(0)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1))
+        .setIsr(util.Arrays.asList(localId + 1))
+        .setLeader(localId + 1)
+      )
+      topicsDelta.replay(new PartitionChangeRecord()
+        .setPartitionId(1)
+        .setTopicId(FOO_UUID)
+        .setReplicas(util.Arrays.asList(localId, localId + 1))
+        .setIsr(util.Arrays.asList(localId))
+        .setLeader(NO_LEADER)
+      )
+      metadataImage = imageFromTopics(topicsDelta.apply())
+      replicaManager.applyDelta(topicsDelta, metadataImage)
+
+      // Partition foo0 and foo1 are updated.
+      assertFalse(fooPartition0.isLeader)
+      assertEquals(1, fooPartition0.getLeaderEpoch)
+      assertEquals(1, fooPartition0.getPartitionEpoch)
+      assertFalse(fooPartition1.isLeader)
+      assertEquals(1, fooPartition1.getLeaderEpoch)
+      assertEquals(1, fooPartition1.getPartitionEpoch)
+
+      // Partition foo2 is not.
+      assertFalse(fooPartition2.isLeader)
+      assertEquals(0, fooPartition2.getLeaderEpoch)
+      assertEquals(0, fooPartition2.getPartitionEpoch)
+
+      // Fetcher for foo0 and foo1 are stopped.
+      verify(mockReplicaFetcherManager).removeFetcherForPartitions(Set(foo0, foo1))
+    } finally {
+      // Fetcher for foo2 is stopped when the replica manager shuts down
+      // because this replica was not in the ISR.
+      replicaManager.shutdown()
+    }
+
+    TestUtils.assertNoNonDaemonThreads(this.getClass.getName)
+  }
+
   private def topicsCreateDelta(startId: Int, isStartIdLeader: Boolean): TopicsDelta = {
     val leader = if (isStartIdLeader) startId else startId + 1
     val delta = new TopicsDelta(TopicsImage.EMPTY)
@@ -3580,7 +4060,7 @@ class ReplicaManagerTest {
       brokerId = 0, aliveBrokersIds)
     try {
       val tp = new TopicPartition(topic, 0)
-      val leaderAndIsr = new LeaderAndIsr(1, 0, aliveBrokersIds.toList, 0)
+      val leaderAndIsr = LeaderAndIsr(1, aliveBrokersIds.toList)
 
       // This test either starts with a topic ID in the PartitionFetchState and removes it on the next request (startsWithTopicId)
       // or does not start with a topic ID in the PartitionFetchState and adds one on the next request (!startsWithTopicId)
@@ -3656,4 +4136,41 @@ class ReplicaManagerTest {
       replicaManager.shutdown(checkpointHW = false)
     }
   }
+
+  @Test
+  def testDescribeLogDirs(): Unit = {
+    val topicPartition = 0
+    val topicId = Uuid.randomUuid()
+    val followerBrokerId = 0
+    val leaderBrokerId = 1
+    val leaderEpoch = 1
+    val leaderEpochIncrement = 2
+    val countDownLatch = new CountDownLatch(1)
+    val offsetFromLeader = 5
+
+    // Prepare the mocked components for the test
+    val (replicaManager, mockLogMgr) = prepareReplicaManagerAndLogManager(new MockTimer(time),
+      topicPartition, leaderEpoch + leaderEpochIncrement, followerBrokerId, leaderBrokerId, countDownLatch,
+      expectTruncation = false, localLogOffset = Some(10), offsetFromLeader = offsetFromLeader, topicId = Some(topicId))
+
+    val responses = replicaManager.describeLogDirs(Set(new TopicPartition(topic, topicPartition)))
+    assertEquals(mockLogMgr.liveLogDirs.size, responses.size)
+    responses.foreach { response =>
+      assertEquals(Errors.NONE.code, response.errorCode)
+      assertTrue(response.totalBytes > 0)
+      assertTrue(response.usableBytes >= 0)
+    }
+  }
+}
+
+class MockReplicaSelector extends ReplicaSelector {
+
+  private val selectionCount = new AtomicLong()
+
+  def getSelectionCount: Long = selectionCount.get
+
+  override def select(topicPartition: TopicPartition, clientMetadata: ClientMetadata, partitionView: PartitionView): Optional[ReplicaView] = {
+    selectionCount.incrementAndGet()
+    Optional.of(partitionView.leader)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/RequestQuotaTest.scala b/core/src/test/scala/unit/kafka/server/RequestQuotaTest.scala
index ddbc98770f009..82c19949e3bcf 100644
--- a/core/src/test/scala/unit/kafka/server/RequestQuotaTest.scala
+++ b/core/src/test/scala/unit/kafka/server/RequestQuotaTest.scala
@@ -251,7 +251,7 @@ class RequestQuotaTest extends BaseRequestTest {
               .setLeader(brokerId)
               .setLeaderEpoch(Int.MaxValue)
               .setIsr(List(brokerId).asJava)
-              .setZkVersion(2)
+              .setPartitionEpoch(2)
               .setReplicas(Seq(brokerId).asJava)
               .setIsNew(true)).asJava,
             getTopicIds().asJava,
@@ -263,7 +263,7 @@ class RequestQuotaTest extends BaseRequestTest {
               .setTopicName(tp.topic())
               .setPartitionStates(Seq(new StopReplicaPartitionState()
                 .setPartitionIndex(tp.partition())
-                .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 2)
+                .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 2)
                 .setDeletePartition(true)).asJava)
           ).asJava
           new StopReplicaRequest.Builder(ApiKeys.STOP_REPLICA.latestVersion, brokerId,
@@ -320,7 +320,7 @@ class RequestQuotaTest extends BaseRequestTest {
               )
           )
         case ApiKeys.OFFSET_FETCH =>
-          new OffsetFetchRequest.Builder("test-group", false, List(tp).asJava, false)
+          new OffsetFetchRequest.Builder(Map("test-group"-> List(tp).asJava).asJava, false, false)
 
         case ApiKeys.FIND_COORDINATOR =>
           new FindCoordinatorRequest.Builder(
@@ -598,8 +598,8 @@ class RequestQuotaTest extends BaseRequestTest {
           new EndQuorumEpochRequest.Builder(EndQuorumEpochRequest.singletonRequest(
             tp, 10, 5, Collections.singletonList(3)))
 
-        case ApiKeys.ALTER_ISR =>
-          new AlterIsrRequest.Builder(new AlterIsrRequestData())
+        case ApiKeys.ALTER_PARTITION =>
+          new AlterPartitionRequest.Builder(new AlterPartitionRequestData(), true)
 
         case ApiKeys.UPDATE_FEATURES =>
           new UpdateFeaturesRequest.Builder(new UpdateFeaturesRequestData())
@@ -763,7 +763,7 @@ class RequestQuotaTest extends BaseRequestTest {
 
 object RequestQuotaTest {
   val ClusterActions = ApiKeys.zkBrokerApis.asScala.filter(_.clusterAction).toSet
-  val ClusterActionsWithThrottle = Set(ApiKeys.ALLOCATE_PRODUCER_IDS)
+  val ClusterActionsWithThrottle = Set(ApiKeys.ALLOCATE_PRODUCER_IDS, ApiKeys.UPDATE_FEATURES)
   val SaslActions = Set(ApiKeys.SASL_HANDSHAKE, ApiKeys.SASL_AUTHENTICATE)
   val ClientActions = ApiKeys.zkBrokerApis.asScala.toSet -- ClusterActions -- SaslActions
 
diff --git a/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala b/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala
index 16d17d2fd219b..70554d9427c2f 100644
--- a/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala
+++ b/core/src/test/scala/unit/kafka/server/ServerShutdownTest.scala
@@ -16,7 +16,7 @@
  */
 package kafka.server
 
-import kafka.utils.{CoreUtils, TestUtils}
+import kafka.utils.{CoreUtils, Exit, TestInfoUtils, TestUtils}
 
 import java.io.{DataInputStream, File}
 import java.net.ServerSocket
@@ -30,7 +30,6 @@ import kafka.zookeeper.ZooKeeperClientTimeoutException
 import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 import org.apache.kafka.common.Uuid
-import org.apache.kafka.common.errors.KafkaStorageException
 import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.protocol.ApiKeys
@@ -39,8 +38,9 @@ import org.apache.kafka.common.security.auth.SecurityProtocol
 import org.apache.kafka.common.serialization.{IntegerDeserializer, IntegerSerializer, StringDeserializer, StringSerializer}
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.metadata.BrokerState
-import org.junit.jupiter.api.{BeforeEach, Test, TestInfo, Timeout}
+import org.junit.jupiter.api.{BeforeEach, Disabled, TestInfo, Timeout}
 import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.api.function.Executable
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.ValueSource
 
@@ -82,7 +82,7 @@ class ServerShutdownTest extends KafkaServerTestHarness {
     super.setUp(testInfo)
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCleanShutdown(quorum: String): Unit = {
 
@@ -141,34 +141,53 @@ class ServerShutdownTest extends KafkaServerTestHarness {
     producer.close()
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testCleanShutdownAfterFailedStartup(quorum: String): Unit = {
-    if (quorum == "zk") {
-      propsToChangeUponRestart.setProperty(KafkaConfig.ZkConnectionTimeoutMsProp, "50")
-      propsToChangeUponRestart.setProperty(KafkaConfig.ZkConnectProp, "some.invalid.hostname.foo.bar.local:65535")
-      verifyCleanShutdownAfterFailedStartup[ZooKeeperClientTimeoutException](quorum)
-    } else {
+    if (isKRaftTest()) {
       propsToChangeUponRestart.setProperty(KafkaConfig.InitialBrokerRegistrationTimeoutMsProp, "1000")
       shutdownBroker()
       shutdownKRaftController()
-      verifyCleanShutdownAfterFailedStartup[CancellationException](quorum)
+      verifyCleanShutdownAfterFailedStartup[CancellationException]
+    } else {
+      propsToChangeUponRestart.setProperty(KafkaConfig.ZkConnectionTimeoutMsProp, "50")
+      propsToChangeUponRestart.setProperty(KafkaConfig.ZkConnectProp, "some.invalid.hostname.foo.bar.local:65535")
+      verifyCleanShutdownAfterFailedStartup[ZooKeeperClientTimeoutException]
     }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
-  def testCleanShutdownAfterFailedStartupDueToCorruptLogs(quorum: String): Unit = {
+  def testNoCleanShutdownAfterFailedStartupDueToCorruptLogs(quorum: String): Unit = {
     createTopic(topic)
     shutdownBroker()
     config.logDirs.foreach { dirName =>
       val partitionDir = new File(dirName, s"$topic-0")
       partitionDir.listFiles.foreach(f => TestUtils.appendNonsenseToFile(f, TestUtils.random.nextInt(1024) + 1))
     }
-    verifyCleanShutdownAfterFailedStartup[KafkaStorageException](quorum)
+
+    val expectedStatusCode = Some(1)
+    @volatile var receivedStatusCode = Option.empty[Int]
+    @volatile var hasHaltProcedureCalled = false
+    Exit.setHaltProcedure((statusCode, _) => {
+      hasHaltProcedureCalled = true
+      receivedStatusCode = Some(statusCode)
+    }.asInstanceOf[Nothing])
+
+    try {
+      val recreateBrokerExec: Executable = () => recreateBroker(true)
+      // this startup should fail with no online log dir (due to corrupted log), and exit directly without throwing exception
+      assertDoesNotThrow(recreateBrokerExec)
+      // JVM should exit with status code 1
+      TestUtils.waitUntilTrue(() => hasHaltProcedureCalled == true && expectedStatusCode == receivedStatusCode,
+        s"Expected to halt directly with the expected status code:${expectedStatusCode.get}, " +
+          s"but got hasHaltProcedureCalled: $hasHaltProcedureCalled and received status code: ${receivedStatusCode.orNull}")
+    } finally {
+      Exit.resetHaltProcedure()
+    }
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk"))
   def testCleanShutdownWithZkUnavailable(quorum: String): Unit = {
     shutdownZooKeeper()
@@ -177,7 +196,8 @@ class ServerShutdownTest extends KafkaServerTestHarness {
     verifyNonDaemonThreadsStatus()
   }
 
-  @ParameterizedTest
+  @Disabled
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("kraft"))
   def testCleanShutdownWithKRaftControllerUnavailable(quorum: String): Unit = {
     shutdownKRaftController()
@@ -186,7 +206,7 @@ class ServerShutdownTest extends KafkaServerTestHarness {
     verifyNonDaemonThreadsStatus()
   }
 
-  private def verifyCleanShutdownAfterFailedStartup[E <: Exception](quorum: String)(implicit exceptionClassTag: ClassTag[E]): Unit = {
+  private def verifyCleanShutdownAfterFailedStartup[E <: Exception](implicit exceptionClassTag: ClassTag[E]): Unit = {
     try {
       recreateBroker(startup = true)
       fail("Expected KafkaServer setup to fail and throw exception")
@@ -195,13 +215,24 @@ class ServerShutdownTest extends KafkaServerTestHarness {
       // identify the correct exception, making sure the server was shutdown, and cleaning up if anything
       // goes wrong so that awaitShutdown doesn't hang
       case e: Exception =>
-        assertTrue(exceptionClassTag.runtimeClass.isInstance(e), s"Unexpected exception $e")
-        assertEquals(if (quorum == "zk") BrokerState.NOT_RUNNING else BrokerState.SHUTTING_DOWN, brokers.head.brokerState)
+        assertCause(exceptionClassTag.runtimeClass, e)
+        assertEquals(if (isKRaftTest()) BrokerState.SHUTTING_DOWN else BrokerState.NOT_RUNNING, brokers.head.brokerState)
     } finally {
       shutdownBroker()
     }
   }
 
+  private def assertCause(expectedClass: Class[_], e: Throwable): Unit = {
+    var cause = e
+    while (cause != null) {
+      if (expectedClass.isInstance(cause)) {
+        return
+      }
+      cause = cause.getCause
+    }
+    fail(s"Failed to assert cause of $e, expected cause $expectedClass")
+  }
+
   private[this] def isNonDaemonKafkaThread(t: Thread): Boolean = {
     !t.isDaemon && t.isAlive && t.getName.startsWith(this.getClass.getName)
   }
@@ -212,7 +243,7 @@ class ServerShutdownTest extends KafkaServerTestHarness {
       .count(isNonDaemonKafkaThread))
   }
 
-  @ParameterizedTest
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
   @ValueSource(strings = Array("zk", "kraft"))
   def testConsecutiveShutdown(quorum: String): Unit = {
     shutdownBroker()
@@ -220,9 +251,11 @@ class ServerShutdownTest extends KafkaServerTestHarness {
   }
 
   // Verify that if controller is in the midst of processing a request, shutdown completes
-  // without waiting for request timeout.
-  @Test
-  def testControllerShutdownDuringSend(): Unit = {
+  // without waiting for request timeout. Since this involves LeaderAndIsr request, it is
+  // ZK-only for now.
+  @ParameterizedTest(name = TestInfoUtils.TestWithParameterizedQuorumName)
+  @ValueSource(strings = Array("zk"))
+  def testControllerShutdownDuringSend(quorum: String): Unit = {
     val securityProtocol = SecurityProtocol.PLAINTEXT
     val listenerName = ListenerName.forSecurityProtocol(securityProtocol)
 
diff --git a/core/src/test/scala/unit/kafka/server/StopReplicaRequestTest.scala b/core/src/test/scala/unit/kafka/server/StopReplicaRequestTest.scala
index ff246aa4cc5bd..d509a51145662 100644
--- a/core/src/test/scala/unit/kafka/server/StopReplicaRequestTest.scala
+++ b/core/src/test/scala/unit/kafka/server/StopReplicaRequestTest.scala
@@ -53,13 +53,13 @@ class StopReplicaRequestTest extends BaseRequestTest {
         .setTopicName(tp0.topic())
         .setPartitionStates(Seq(new StopReplicaPartitionState()
           .setPartitionIndex(tp0.partition())
-          .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 2)
+          .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 2)
           .setDeletePartition(true)).asJava),
       new StopReplicaTopicState()
         .setTopicName(tp1.topic())
         .setPartitionStates(Seq(new StopReplicaPartitionState()
           .setPartitionIndex(tp1.partition())
-          .setLeaderEpoch(LeaderAndIsr.initialLeaderEpoch + 2)
+          .setLeaderEpoch(LeaderAndIsr.InitialLeaderEpoch + 2)
           .setDeletePartition(true)).asJava)
     ).asJava
 
diff --git a/core/src/test/scala/unit/kafka/server/TopicIdWithOldInterBrokerProtocolTest.scala b/core/src/test/scala/unit/kafka/server/TopicIdWithOldInterBrokerProtocolTest.scala
index 7a844fa273ee9..25c7c7bd04c4a 100644
--- a/core/src/test/scala/unit/kafka/server/TopicIdWithOldInterBrokerProtocolTest.scala
+++ b/core/src/test/scala/unit/kafka/server/TopicIdWithOldInterBrokerProtocolTest.scala
@@ -19,7 +19,6 @@ package kafka.server
 
 import java.util.{Arrays, LinkedHashMap, Optional, Properties}
 
-import kafka.api.KAFKA_2_7_IV0
 import kafka.network.SocketServer
 import kafka.utils.TestUtils
 import org.apache.kafka.common.{TopicIdPartition, TopicPartition, Uuid}
@@ -27,6 +26,7 @@ import org.apache.kafka.common.message.DeleteTopicsRequestData
 import org.apache.kafka.common.message.DeleteTopicsRequestData.DeleteTopicState
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.requests.{DeleteTopicsRequest, DeleteTopicsResponse, FetchRequest, FetchResponse, MetadataRequest, MetadataResponse}
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_7_IV0}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
 
@@ -36,7 +36,7 @@ import scala.jdk.CollectionConverters._
 class TopicIdWithOldInterBrokerProtocolTest extends BaseRequestTest {
 
   override def brokerPropertyOverrides(properties: Properties): Unit = {
-    properties.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, KAFKA_2_7_IV0.toString)
+    properties.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, IBP_2_7_IV0.toString)
     properties.setProperty(KafkaConfig.OffsetsTopicPartitionsProp, "1")
     properties.setProperty(KafkaConfig.DefaultReplicationFactorProp, "2")
     properties.setProperty(KafkaConfig.RackProp, s"rack/${properties.getProperty(KafkaConfig.BrokerIdProp)}")
diff --git a/core/src/test/scala/unit/kafka/server/UpdateFeaturesTest.scala b/core/src/test/scala/unit/kafka/server/UpdateFeaturesTest.scala
index 92ba0425dcb27..f81432bd56b31 100644
--- a/core/src/test/scala/unit/kafka/server/UpdateFeaturesTest.scala
+++ b/core/src/test/scala/unit/kafka/server/UpdateFeaturesTest.scala
@@ -19,22 +19,20 @@ package kafka.server
 
 import java.util.{Optional, Properties}
 import java.util.concurrent.ExecutionException
-
-import kafka.api.KAFKA_2_7_IV0
 import kafka.utils.TestUtils
 import kafka.zk.{FeatureZNode, FeatureZNodeStatus, ZkVersion}
 import kafka.utils.TestUtils.waitUntilTrue
 import org.apache.kafka.clients.admin.{Admin, FeatureUpdate, UpdateFeaturesOptions, UpdateFeaturesResult}
 import org.apache.kafka.common.errors.InvalidRequestException
-import org.apache.kafka.common.feature.FinalizedVersionRange
 import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
 import org.apache.kafka.common.message.UpdateFeaturesRequestData
 import org.apache.kafka.common.message.UpdateFeaturesRequestData.FeatureUpdateKeyCollection
 import org.apache.kafka.common.protocol.Errors
 import org.apache.kafka.common.requests.{UpdateFeaturesRequest, UpdateFeaturesResponse}
 import org.apache.kafka.common.utils.Utils
+import org.apache.kafka.server.common.MetadataVersion.{IBP_2_7_IV0, IBP_3_2_IV0}
 import org.junit.jupiter.api.Test
-import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotEquals, assertNotNull, assertTrue, assertThrows}
+import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotEquals, assertNotNull, assertThrows, assertTrue}
 
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
@@ -45,15 +43,15 @@ class UpdateFeaturesTest extends BaseRequestTest {
   override def brokerCount = 3
 
   override def brokerPropertyOverrides(props: Properties): Unit = {
-    props.put(KafkaConfig.InterBrokerProtocolVersionProp, KAFKA_2_7_IV0.toString)
+    props.put(KafkaConfig.InterBrokerProtocolVersionProp, IBP_2_7_IV0.toString)
   }
 
   private def defaultSupportedFeatures(): Features[SupportedVersionRange] = {
     Features.supportedFeatures(Utils.mkMap(Utils.mkEntry("feature_1", new SupportedVersionRange(1, 3))))
   }
 
-  private def defaultFinalizedFeatures(): Features[FinalizedVersionRange] = {
-    Features.finalizedFeatures(Utils.mkMap(Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 2))))
+  private def defaultFinalizedFeatures(): Map[String, Short] = {
+    Utils.mkMap(Utils.mkEntry("feature_1", 2.toShort)).asScala.toMap
   }
 
   private def updateSupportedFeatures(
@@ -84,12 +82,12 @@ class UpdateFeaturesTest extends BaseRequestTest {
     updateSupportedFeatures(features, Set[KafkaServer]() ++ servers)
   }
 
-  private def updateFeatureZNode(features: Features[FinalizedVersionRange]): Int = {
+  private def updateFeatureZNode(features: Map[String, Short]): Int = {
     val server = serverForId(0).get
-    val newNode = new FeatureZNode(FeatureZNodeStatus.Enabled, features)
+    val newNode = FeatureZNode(IBP_2_7_IV0, FeatureZNodeStatus.Enabled, features)
     val newVersion = server.zkClient.updateFeatureZNode(newNode)
     servers.foreach(s => {
-      s.featureCache.waitUntilEpochOrThrow(newVersion, s.config.zkConnectionTimeoutMs)
+      s.metadataCache.waitUntilFeatureEpochOrThrow(newVersion, s.config.zkConnectionTimeoutMs)
     })
     newVersion
   }
@@ -100,11 +98,11 @@ class UpdateFeaturesTest extends BaseRequestTest {
     FeatureZNode.decode(mayBeFeatureZNodeBytes.get)
   }
 
-  private def finalizedFeatures(features: java.util.Map[String, org.apache.kafka.clients.admin.FinalizedVersionRange]): Features[FinalizedVersionRange] = {
-    Features.finalizedFeatures(features.asScala.map {
+  private def finalizedFeatures(features: java.util.Map[String, org.apache.kafka.clients.admin.FinalizedVersionRange]): Map[String, Short] = {
+    features.asScala.map {
       case(name, versionRange) =>
-        (name, new FinalizedVersionRange(versionRange.minVersionLevel(), versionRange.maxVersionLevel()))
-    }.asJava)
+        (name, versionRange.maxVersionLevel())
+    }.toMap
   }
 
   private def supportedFeatures(features: java.util.Map[String, org.apache.kafka.clients.admin.SupportedVersionRange]): Features[SupportedVersionRange] = {
@@ -116,7 +114,7 @@ class UpdateFeaturesTest extends BaseRequestTest {
 
   private def checkFeatures(client: Admin,
                             expectedNode: FeatureZNode,
-                            expectedFinalizedFeatures: Features[FinalizedVersionRange],
+                            expectedFinalizedFeatures: Map[String, Short],
                             expectedFinalizedFeaturesEpoch: Long,
                             expectedSupportedFeatures: Features[SupportedVersionRange]): Unit = {
     assertEquals(expectedNode, getFeatureZNode())
@@ -183,8 +181,8 @@ class UpdateFeaturesTest extends BaseRequestTest {
 
     val nodeBefore = getFeatureZNode()
     val validUpdates = new FeatureUpdateKeyCollection()
-    val validUpdate = new UpdateFeaturesRequestData.FeatureUpdateKey();
-    validUpdate.setFeature("feature_1");
+    val validUpdate = new UpdateFeaturesRequestData.FeatureUpdateKey()
+    validUpdate.setFeature("feature_1")
     validUpdate.setMaxVersionLevel(defaultSupportedFeatures().get("feature_1").max())
     validUpdate.setAllowDowngrade(false)
     validUpdates.add(validUpdate)
@@ -210,11 +208,11 @@ class UpdateFeaturesTest extends BaseRequestTest {
    */
   @Test
   def testShouldFailRequestWhenDowngradeFlagIsNotSetDuringDowngrade(): Unit = {
-    val targetMaxVersionLevel = (defaultFinalizedFeatures().get("feature_1").max() - 1).asInstanceOf[Short]
+    val targetMaxVersionLevel = (defaultFinalizedFeatures()("feature_1") - 1).asInstanceOf[Short]
     testWithInvalidFeatureUpdate[InvalidRequestException](
       "feature_1",
-      new FeatureUpdate(targetMaxVersionLevel,false),
-      ".*Can not downgrade finalized feature.*allowDowngrade.*".r)
+      new FeatureUpdate(targetMaxVersionLevel, FeatureUpdate.UpgradeType.UPGRADE),
+      ".*Can not downgrade finalized feature.*".r)
   }
 
   /**
@@ -223,11 +221,11 @@ class UpdateFeaturesTest extends BaseRequestTest {
    */
   @Test
   def testShouldFailRequestWhenDowngradeToHigherVersionLevelIsAttempted(): Unit = {
-    val targetMaxVersionLevel = (defaultFinalizedFeatures().get("feature_1").max() + 1).asInstanceOf[Short]
+    val targetMaxVersionLevel = (defaultFinalizedFeatures()("feature_1") + 1).asInstanceOf[Short]
     testWithInvalidFeatureUpdate[InvalidRequestException](
       "feature_1",
-      new FeatureUpdate(targetMaxVersionLevel, true),
-      ".*When the allowDowngrade flag set in the request, the provided maxVersionLevel:3.*existing maxVersionLevel:2.*".r)
+      new FeatureUpdate(targetMaxVersionLevel,  FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
+      ".*When the downgradeType is set to SAFE in the request, the provided versionLevel:3.*existing versionLevel:2.*".r)
   }
 
   /**
@@ -245,14 +243,14 @@ class UpdateFeaturesTest extends BaseRequestTest {
     val nodeBefore = getFeatureZNode()
 
     val invalidUpdates
-      = new UpdateFeaturesRequestData.FeatureUpdateKeyCollection();
-    val invalidUpdate = new UpdateFeaturesRequestData.FeatureUpdateKey();
+      = new UpdateFeaturesRequestData.FeatureUpdateKeyCollection()
+    val invalidUpdate = new UpdateFeaturesRequestData.FeatureUpdateKey()
     invalidUpdate.setFeature("feature_1")
     invalidUpdate.setMaxVersionLevel(0)
     invalidUpdate.setAllowDowngrade(false)
-    invalidUpdates.add(invalidUpdate);
+    invalidUpdates.add(invalidUpdate)
     val requestData = new UpdateFeaturesRequestData()
-    requestData.setFeatureUpdates(invalidUpdates);
+    requestData.setFeatureUpdates(invalidUpdates)
 
     val response = connectAndReceive[UpdateFeaturesResponse](
       new UpdateFeaturesRequest.Builder(new UpdateFeaturesRequestData().setFeatureUpdates(invalidUpdates)).build(),
@@ -264,7 +262,7 @@ class UpdateFeaturesTest extends BaseRequestTest {
     assertEquals(Errors.INVALID_REQUEST, Errors.forCode(result.errorCode))
     assertNotNull(result.errorMessage)
     assertFalse(result.errorMessage.isEmpty)
-    val exceptionMsgPattern = ".*Can not provide maxVersionLevel: 0 less than 1.*allowDowngrade.*".r
+    val exceptionMsgPattern = ".*Can not provide versionLevel: 0 less than 1.*".r
     assertTrue(exceptionMsgPattern.findFirstIn(result.errorMessage).isDefined, result.errorMessage)
     checkFeatures(
       adminClient,
@@ -282,7 +280,7 @@ class UpdateFeaturesTest extends BaseRequestTest {
   def testShouldFailRequestDuringDeletionOfNonExistingFeature(): Unit = {
     testWithInvalidFeatureUpdate[InvalidRequestException](
       "feature_non_existing",
-      new FeatureUpdate(3, true),
+      new FeatureUpdate(3.toShort,  FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
       ".*Could not apply finalized feature update because the provided feature is not supported.*".r)
   }
 
@@ -292,17 +290,17 @@ class UpdateFeaturesTest extends BaseRequestTest {
    */
   @Test
   def testShouldFailRequestWhenUpgradingToSameVersionLevel(): Unit = {
-    val targetMaxVersionLevel = defaultFinalizedFeatures().get("feature_1").max()
+    val targetMaxVersionLevel = defaultFinalizedFeatures()("feature_1")
     testWithInvalidFeatureUpdate[InvalidRequestException](
       "feature_1",
-      new FeatureUpdate(targetMaxVersionLevel, false),
+      new FeatureUpdate(targetMaxVersionLevel,  FeatureUpdate.UpgradeType.UPGRADE),
       ".*Can not upgrade a finalized feature.*to the same value.*".r)
   }
 
   private def testShouldFailRequestDuringBrokerMaxVersionLevelIncompatibility(
     featureName: String,
     supportedVersionRange: SupportedVersionRange,
-    initialFinalizedVersionRange: Option[FinalizedVersionRange]
+    initialFinalizedVersionRange: Option[Short]
   ): Unit = {
     TestUtils.waitUntilControllerElected(zkClient)
 
@@ -327,11 +325,11 @@ class UpdateFeaturesTest extends BaseRequestTest {
     updateSupportedFeatures(supportedFeaturesWithVersionIncompatibility, brokersWithVersionIncompatibility)
 
     val initialFinalizedFeatures = initialFinalizedVersionRange.map(
-      versionRange => Features.finalizedFeatures(Utils.mkMap(Utils.mkEntry(featureName, versionRange)))
-    ).getOrElse(Features.emptyFinalizedFeatures())
+      versionRange => Utils.mkMap(Utils.mkEntry(featureName, versionRange)).asScala.toMap
+    ).getOrElse(Map.empty[String, Short])
     val versionBefore = updateFeatureZNode(initialFinalizedFeatures)
 
-    val invalidUpdate = new FeatureUpdate(supportedVersionRange.max(), false)
+    val invalidUpdate = new FeatureUpdate(supportedVersionRange.max(),  FeatureUpdate.UpgradeType.UPGRADE)
     val nodeBefore = getFeatureZNode()
     val adminClient = createAdminClient()
     val result = adminClient.updateFeatures(
@@ -358,7 +356,7 @@ class UpdateFeaturesTest extends BaseRequestTest {
     testShouldFailRequestDuringBrokerMaxVersionLevelIncompatibility(
       feature,
       defaultSupportedFeatures().get(feature),
-      Some(defaultFinalizedFeatures().get(feature)))
+      Some(defaultFinalizedFeatures()(feature)))
   }
 
   /**
@@ -389,14 +387,13 @@ class UpdateFeaturesTest extends BaseRequestTest {
           Utils.mkEntry("feature_1", new SupportedVersionRange(1, 3)),
           Utils.mkEntry("feature_2", new SupportedVersionRange(2, 5))))
     updateSupportedFeaturesInAllBrokers(supportedFeatures)
-    val versionBefore = updateFeatureZNode(Features.emptyFinalizedFeatures())
+    val versionBefore = updateFeatureZNode(Map.empty)
 
-    val targetFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 3)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 3))))
-    val update1 = new FeatureUpdate(targetFinalizedFeatures.get("feature_1").max(), false)
-    val update2 = new FeatureUpdate(targetFinalizedFeatures.get("feature_2").max(), false)
+    val targetFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 3.toShort),
+      Utils.mkEntry("feature_2", 3.toShort)).asScala.toMap
+    val update1 = new FeatureUpdate(targetFinalizedFeatures("feature_1"),  FeatureUpdate.UpgradeType.UPGRADE)
+    val update2 = new FeatureUpdate(targetFinalizedFeatures("feature_2"),  FeatureUpdate.UpgradeType.UPGRADE)
 
     val adminClient = createAdminClient()
     adminClient.updateFeatures(
@@ -406,7 +403,7 @@ class UpdateFeaturesTest extends BaseRequestTest {
 
     checkFeatures(
       adminClient,
-      new FeatureZNode(FeatureZNodeStatus.Enabled, targetFinalizedFeatures),
+      FeatureZNode(IBP_2_7_IV0, FeatureZNodeStatus.Enabled, targetFinalizedFeatures),
       targetFinalizedFeatures,
       versionBefore + 1,
       supportedFeatures)
@@ -425,21 +422,19 @@ class UpdateFeaturesTest extends BaseRequestTest {
         Utils.mkEntry("feature_1", new SupportedVersionRange(1, 3)),
         Utils.mkEntry("feature_2", new SupportedVersionRange(2, 5))))
     updateSupportedFeaturesInAllBrokers(supportedFeatures)
-    val initialFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 2)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 4))))
+    val initialFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 2.toShort),
+      Utils.mkEntry("feature_2", 4.toShort)).asScala.toMap
     val versionBefore = updateFeatureZNode(initialFinalizedFeatures)
 
     // Below we aim to do the following:
     // - Valid upgrade of feature_1 maxVersionLevel from 2 to 3
     // - Valid downgrade of feature_2 maxVersionLevel from 4 to 3
-    val targetFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 3)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 3))))
-    val update1 = new FeatureUpdate(targetFinalizedFeatures.get("feature_1").max(), false)
-    val update2 = new FeatureUpdate(targetFinalizedFeatures.get("feature_2").max(), true)
+    val targetFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 3.toShort),
+      Utils.mkEntry("feature_2", 3.toShort)).asScala.toMap
+    val update1 = new FeatureUpdate(targetFinalizedFeatures("feature_1"),  FeatureUpdate.UpgradeType.UPGRADE)
+    val update2 = new FeatureUpdate(targetFinalizedFeatures("feature_2"),  FeatureUpdate.UpgradeType.SAFE_DOWNGRADE)
 
     val adminClient = createAdminClient()
     adminClient.updateFeatures(
@@ -449,7 +444,7 @@ class UpdateFeaturesTest extends BaseRequestTest {
 
     checkFeatures(
       adminClient,
-      new FeatureZNode(FeatureZNodeStatus.Enabled, targetFinalizedFeatures),
+      FeatureZNode(IBP_2_7_IV0, FeatureZNodeStatus.Enabled, targetFinalizedFeatures),
       targetFinalizedFeatures,
       versionBefore + 1,
       supportedFeatures)
@@ -469,22 +464,20 @@ class UpdateFeaturesTest extends BaseRequestTest {
         Utils.mkEntry("feature_1", new SupportedVersionRange(1, 3)),
         Utils.mkEntry("feature_2", new SupportedVersionRange(2, 5))))
     updateSupportedFeaturesInAllBrokers(supportedFeatures)
-    val initialFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 2)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 4))))
+    val initialFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 2.toShort),
+      Utils.mkEntry("feature_2", 4.toShort)).asScala.toMap
     val versionBefore = updateFeatureZNode(initialFinalizedFeatures)
 
     // Below we aim to do the following:
     // - Valid upgrade of feature_1 maxVersionLevel from 2 to 3
     // - Invalid downgrade of feature_2 maxVersionLevel from 4 to 3
     //   (because we intentionally do not set the allowDowngrade flag)
-    val targetFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 3)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 3))))
-    val validUpdate = new FeatureUpdate(targetFinalizedFeatures.get("feature_1").max(), false)
-    val invalidUpdate = new FeatureUpdate(targetFinalizedFeatures.get("feature_2").max(), false)
+    val targetFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 3.toShort),
+      Utils.mkEntry("feature_2", 3.toShort)).asScala.toMap
+    val validUpdate = new FeatureUpdate(targetFinalizedFeatures("feature_1"),  FeatureUpdate.UpgradeType.UPGRADE)
+    val invalidUpdate = new FeatureUpdate(targetFinalizedFeatures("feature_2"),  FeatureUpdate.UpgradeType.UPGRADE)
 
     val adminClient = createAdminClient()
     val result = adminClient.updateFeatures(
@@ -495,14 +488,13 @@ class UpdateFeaturesTest extends BaseRequestTest {
     result.values().get("feature_1").get()
     // Expect update for "feature_2" to have failed.
     checkException[InvalidRequestException](
-      result, Map("feature_2" -> ".*Can not downgrade finalized feature.*allowDowngrade.*".r))
-    val expectedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", targetFinalizedFeatures.get("feature_1")),
-        Utils.mkEntry("feature_2", initialFinalizedFeatures.get("feature_2"))))
+      result, Map("feature_2" -> ".*Can not downgrade finalized feature.*".r))
+    val expectedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", targetFinalizedFeatures("feature_1")),
+      Utils.mkEntry("feature_2", initialFinalizedFeatures("feature_2"))).asScala.toMap
     checkFeatures(
       adminClient,
-      FeatureZNode(FeatureZNodeStatus.Enabled, expectedFeatures),
+      FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, expectedFeatures),
       expectedFeatures,
       versionBefore + 1,
       supportedFeatures)
@@ -537,22 +529,20 @@ class UpdateFeaturesTest extends BaseRequestTest {
         Utils.mkEntry("feature_2", supportedFeatures.get("feature_2"))))
     updateSupportedFeatures(supportedFeaturesWithVersionIncompatibility, brokersWithVersionIncompatibility)
 
-    val initialFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 2)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 4))))
+    val initialFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 2.toShort),
+      Utils.mkEntry("feature_2", 4.toShort)).asScala.toMap
     val versionBefore = updateFeatureZNode(initialFinalizedFeatures)
 
     // Below we aim to do the following:
     // - Invalid upgrade of feature_1 maxVersionLevel from 2 to 3
     //   (because one of the brokers does not support the max version: 3)
     // - Valid downgrade of feature_2 maxVersionLevel from 4 to 3
-    val targetFinalizedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", new FinalizedVersionRange(1, 3)),
-        Utils.mkEntry("feature_2", new FinalizedVersionRange(2, 3))))
-    val invalidUpdate = new FeatureUpdate(targetFinalizedFeatures.get("feature_1").max(), false)
-    val validUpdate = new FeatureUpdate(targetFinalizedFeatures.get("feature_2").max(), true)
+    val targetFinalizedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", 3.toShort),
+      Utils.mkEntry("feature_2", 3.toShort)).asScala.toMap
+    val invalidUpdate = new FeatureUpdate(targetFinalizedFeatures("feature_1"),  FeatureUpdate.UpgradeType.UPGRADE)
+    val validUpdate = new FeatureUpdate(targetFinalizedFeatures("feature_2"),  FeatureUpdate.UpgradeType.SAFE_DOWNGRADE)
 
     val adminClient = createAdminClient()
     val result = adminClient.updateFeatures(
@@ -563,13 +553,12 @@ class UpdateFeaturesTest extends BaseRequestTest {
     result.values().get("feature_2").get()
     // Expect update for "feature_1" to have failed.
     checkException[InvalidRequestException](result, Map("feature_1" -> ".*brokers.*incompatible.*".r))
-    val expectedFeatures = Features.finalizedFeatures(
-      Utils.mkMap(
-        Utils.mkEntry("feature_1", initialFinalizedFeatures.get("feature_1")),
-        Utils.mkEntry("feature_2", targetFinalizedFeatures.get("feature_2"))))
+    val expectedFeatures = Utils.mkMap(
+      Utils.mkEntry("feature_1", initialFinalizedFeatures("feature_1")),
+      Utils.mkEntry("feature_2", targetFinalizedFeatures("feature_2"))).asScala.toMap
     checkFeatures(
       adminClient,
-      FeatureZNode(FeatureZNodeStatus.Enabled, expectedFeatures),
+      FeatureZNode(IBP_3_2_IV0, FeatureZNodeStatus.Enabled, expectedFeatures),
       expectedFeatures,
       versionBefore + 1,
       supportedFeatures)
diff --git a/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceTest.scala b/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceTest.scala
index f02f87ba85cd8..c8c7a89df779f 100644
--- a/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceTest.scala
+++ b/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceTest.scala
@@ -19,7 +19,6 @@ package kafka.server.epoch
 
 import java.io.{File, RandomAccessFile}
 import java.util.Properties
-import kafka.api.ApiVersion
 import kafka.log.{UnifiedLog, LogLoader}
 import kafka.server.KafkaConfig._
 import kafka.server.{KafkaConfig, KafkaServer}
@@ -32,6 +31,7 @@ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.record.RecordBatch
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.server.common.MetadataVersion
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
 
@@ -49,8 +49,8 @@ import scala.collection.Seq
   */
 class EpochDrivenReplicationProtocolAcceptanceTest extends QuorumTestHarness with Logging {
 
-  // Set this to KAFKA_0_11_0_IV1 to demonstrate the tests failing in the pre-KIP-101 case
-  val apiVersion = ApiVersion.latestVersion
+  // Set this to IBP_0_11_0_IV1 to demonstrate the tests failing in the pre-KIP-101 case
+  override def metadataVersion = MetadataVersion.latest
   val topic = "topic1"
   val msg = new Array[Byte](1000)
   val msgBigger = new Array[Byte](10000)
@@ -178,7 +178,7 @@ class EpochDrivenReplicationProtocolAcceptanceTest extends QuorumTestHarness wit
     assertEquals(getLogFile(brokers(0), 0).length, getLogFile(brokers(1), 0).length, "Log files should match Broker0 vs Broker 1")
   }
 
-  //We can reproduce the pre-KIP-101 failure of this test by setting KafkaConfig.InterBrokerProtocolVersionProp = KAFKA_0_11_0_IV1
+  //We can reproduce the pre-KIP-101 failure of this test by setting KafkaConfig.InterBrokerProtocolVersionProp = IBP_0_11_0_IV1
   @Test
   def offsetsShouldNotGoBackwards(): Unit = {
 
@@ -465,7 +465,7 @@ class EpochDrivenReplicationProtocolAcceptanceTest extends QuorumTestHarness wit
 
   private def createBrokerForId(id: Int, enableUncleanLeaderElection: Boolean = false): KafkaServer = {
     val config = createBrokerConfig(id, zkConnect)
-    TestUtils.setIbpAndMessageFormatVersions(config, apiVersion)
+    TestUtils.setIbpAndMessageFormatVersions(config, metadataVersion)
     config.setProperty(KafkaConfig.UncleanLeaderElectionEnableProp, enableUncleanLeaderElection.toString)
     createServer(fromProps(config))
   }
diff --git a/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceWithIbp26Test.scala b/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceWithIbp26Test.scala
index 2ad4776bb2ca3..9b0eb4c676a2e 100644
--- a/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceWithIbp26Test.scala
+++ b/core/src/test/scala/unit/kafka/server/epoch/EpochDrivenReplicationProtocolAcceptanceWithIbp26Test.scala
@@ -17,7 +17,7 @@
 
 package kafka.server.epoch
 
-import kafka.api.KAFKA_2_6_IV0
+import org.apache.kafka.server.common.MetadataVersion.IBP_2_6_IV0
 
 /**
  * With IBP 2.7 onwards, we truncate based on diverging epochs returned in fetch responses.
@@ -25,5 +25,5 @@ import kafka.api.KAFKA_2_6_IV0
  * verifies that we handle older IBP versions with truncation on leader/follower change correctly.
  */
 class EpochDrivenReplicationProtocolAcceptanceWithIbp26Test extends EpochDrivenReplicationProtocolAcceptanceTest {
-  override val apiVersion = KAFKA_2_6_IV0
+  override val metadataVersion = IBP_2_6_IV0
 }
diff --git a/core/src/test/scala/unit/kafka/server/epoch/LeaderEpochIntegrationTest.scala b/core/src/test/scala/unit/kafka/server/epoch/LeaderEpochIntegrationTest.scala
index 3205606c81462..ef2e8200107de 100644
--- a/core/src/test/scala/unit/kafka/server/epoch/LeaderEpochIntegrationTest.scala
+++ b/core/src/test/scala/unit/kafka/server/epoch/LeaderEpochIntegrationTest.scala
@@ -18,7 +18,7 @@ package kafka.server.epoch
 
 import kafka.cluster.BrokerEndPoint
 import kafka.server.KafkaConfig._
-import kafka.server.{BlockingSend, KafkaServer, ReplicaFetcherBlockingSend}
+import kafka.server.{BlockingSend, KafkaServer, BrokerBlockingSender}
 import kafka.utils.Implicits._
 import kafka.utils.TestUtils._
 import kafka.utils.{Logging, TestUtils}
@@ -231,7 +231,7 @@ class LeaderEpochIntegrationTest extends QuorumTestHarness with Logging {
     val node = from.metadataCache.getAliveBrokerNode(to.config.brokerId,
       from.config.interBrokerListenerName).get
     val endPoint = new BrokerEndPoint(node.id(), node.host(), node.port())
-    new ReplicaFetcherBlockingSend(endPoint, from.config, new Metrics(), new SystemTime(), 42, "TestFetcher", new LogContext())
+    new BrokerBlockingSender(endPoint, from.config, new Metrics(), new SystemTime(), 42, "TestFetcher", new LogContext())
   }
 
   private def waitForEpochChangeTo(topic: String, partition: Int, epoch: Int): Unit = {
diff --git a/core/src/test/scala/unit/kafka/server/epoch/OffsetsForLeaderEpochTest.scala b/core/src/test/scala/unit/kafka/server/epoch/OffsetsForLeaderEpochTest.scala
index 44e17ebb4cc2d..4c6d74652cf83 100644
--- a/core/src/test/scala/unit/kafka/server/epoch/OffsetsForLeaderEpochTest.scala
+++ b/core/src/test/scala/unit/kafka/server/epoch/OffsetsForLeaderEpochTest.scala
@@ -70,9 +70,9 @@ class OffsetsForLeaderEpochTest {
       scheduler = null,
       logManager = logManager,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
     val partition = replicaManager.createPartition(tp)
     partition.setLog(mockLog, isFutureLog = false)
     partition.leaderReplicaIdOpt = Some(config.brokerId)
@@ -99,9 +99,9 @@ class OffsetsForLeaderEpochTest {
       scheduler = null,
       logManager = logManager,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
     replicaManager.createPartition(tp)
 
     //Given
@@ -130,9 +130,9 @@ class OffsetsForLeaderEpochTest {
       scheduler = null,
       logManager = logManager,
       quotaManagers = quotaManager,
-      metadataCache = MetadataCache.zkMetadataCache(config.brokerId),
+      metadataCache = MetadataCache.zkMetadataCache(config.brokerId, config.interBrokerProtocolVersion),
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
-      alterIsrManager = alterIsrManager)
+      alterPartitionManager = alterIsrManager)
 
     //Given
     val epochRequested: Integer = 5
diff --git a/core/src/test/scala/unit/kafka/server/epoch/util/ReplicaFetcherMockBlockingSend.scala b/core/src/test/scala/unit/kafka/server/epoch/util/MockBlockingSender.scala
similarity index 95%
rename from core/src/test/scala/unit/kafka/server/epoch/util/ReplicaFetcherMockBlockingSend.scala
rename to core/src/test/scala/unit/kafka/server/epoch/util/MockBlockingSender.scala
index 8f3fcff371c41..ac1d8b575479d 100644
--- a/core/src/test/scala/unit/kafka/server/epoch/util/ReplicaFetcherMockBlockingSend.scala
+++ b/core/src/test/scala/unit/kafka/server/epoch/util/MockBlockingSender.scala
@@ -16,19 +16,19 @@
   */
 package kafka.server.epoch.util
 
-import java.net.SocketTimeoutException
-import java.util
 import kafka.cluster.BrokerEndPoint
 import kafka.server.BlockingSend
 import org.apache.kafka.clients.{ClientRequest, ClientResponse, MockClient, NetworkClientUtils}
-import org.apache.kafka.common.message.{FetchResponseData, OffsetForLeaderEpochResponseData}
 import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.{EpochEndOffset, OffsetForLeaderTopicResult}
+import org.apache.kafka.common.message.{FetchResponseData, OffsetForLeaderEpochResponseData}
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.requests.AbstractRequest.Builder
 import org.apache.kafka.common.requests.{AbstractRequest, FetchResponse, OffsetsForLeaderEpochResponse, FetchMetadata => JFetchMetadata}
 import org.apache.kafka.common.utils.{SystemTime, Time}
 import org.apache.kafka.common.{Node, TopicIdPartition, TopicPartition, Uuid}
 
+import java.net.SocketTimeoutException
+import java.util
 import scala.collection.Map
 
 /**
@@ -39,9 +39,9 @@ import scala.collection.Map
   * OFFSET_FOR_LEADER_EPOCH with different offsets in response, it should update offsets using
   * setOffsetsForNextResponse
   */
-class ReplicaFetcherMockBlockingSend(offsets: java.util.Map[TopicPartition, EpochEndOffset],
-                                     sourceBroker: BrokerEndPoint,
-                                     time: Time)
+class MockBlockingSender(offsets: java.util.Map[TopicPartition, EpochEndOffset],
+                         sourceBroker: BrokerEndPoint,
+                         time: Time)
   extends BlockingSend {
 
   private val client = new MockClient(new SystemTime)
@@ -70,6 +70,8 @@ class ReplicaFetcherMockBlockingSend(offsets: java.util.Map[TopicPartition, Epoc
     this.topicIds = topicIds
   }
 
+  override def brokerEndPoint(): BrokerEndPoint = sourceBroker
+
   override def sendRequest(requestBuilder: Builder[_ <: AbstractRequest]): ClientResponse = {
     if (!NetworkClientUtils.awaitReady(client, sourceNode, time, 500))
       throw new SocketTimeoutException(s"Failed to connect within 500 ms")
diff --git a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataListenerTest.scala b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataListenerTest.scala
index d04377a21c241..6c8c2599d296a 100644
--- a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataListenerTest.scala
+++ b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataListenerTest.scala
@@ -20,20 +20,29 @@ package kafka.server.metadata
 import java.util
 import java.util.concurrent.atomic.AtomicReference
 import java.util.{Collections, Optional}
-
-import org.apache.kafka.common.metadata.{PartitionChangeRecord, PartitionRecord, RegisterBrokerRecord, TopicRecord}
+import org.apache.kafka.common.metadata.{FeatureLevelRecord, PartitionChangeRecord, PartitionRecord, RegisterBrokerRecord, TopicRecord}
+import org.apache.kafka.common.metrics.Metrics
 import org.apache.kafka.common.utils.Time
 import org.apache.kafka.common.{Endpoint, Uuid}
 import org.apache.kafka.image.{MetadataDelta, MetadataImage}
 import org.apache.kafka.metadata.{BrokerRegistration, RecordTestUtils, VersionRange}
-import org.apache.kafka.server.common.ApiMessageAndVersion
+import org.apache.kafka.server.common.{ApiMessageAndVersion, MetadataVersion}
+import org.apache.kafka.server.fault.MockFaultHandler
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
-import org.junit.jupiter.api.Test
+import org.junit.jupiter.api.{AfterEach, Test}
 
 import scala.jdk.CollectionConverters._
 
 class BrokerMetadataListenerTest {
+  private val metadataLoadingFaultHandler = new MockFaultHandler("metadata loading")
+
+  @AfterEach
+  def verifyNoFaults(): Unit = {
+    metadataLoadingFaultHandler.maybeRethrowFirstException()
+  }
+
   private def newBrokerMetadataListener(
+    metrics: BrokerServerMetrics = BrokerServerMetrics(new Metrics()),
     snapshotter: Option[MetadataSnapshotter] = None,
     maxBytesBetweenSnapshots: Long = 1000000L,
   ): BrokerMetadataListener = {
@@ -42,7 +51,10 @@ class BrokerMetadataListenerTest {
       time = Time.SYSTEM,
       threadNamePrefix = None,
       maxBytesBetweenSnapshots = maxBytesBetweenSnapshots,
-      snapshotter = snapshotter)
+      snapshotter = snapshotter,
+      brokerMetrics = metrics,
+      metadataLoadingFaultHandler = metadataLoadingFaultHandler
+    )
   }
 
   @Test
@@ -53,38 +65,64 @@ class BrokerMetadataListenerTest {
 
   @Test
   def testPublish(): Unit = {
-    val listener = newBrokerMetadataListener()
+    val metrics = BrokerServerMetrics(new Metrics())
+    val listener = newBrokerMetadataListener(metrics = metrics)
     try {
-      listener.handleCommit(RecordTestUtils.mockBatchReader(100L,
-        util.Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
-          setBrokerId(0).
-          setBrokerEpoch(100L).
-          setFenced(false).
-          setRack(null).
-          setIncarnationId(Uuid.fromString("GFBwlTcpQUuLYQ2ig05CSg")), 0.toShort))))
+      val unfencedTimestamp = 300L
+      listener.handleCommit(
+        RecordTestUtils.mockBatchReader(
+          100,
+          unfencedTimestamp,
+          util.Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
+            setBrokerId(0).
+            setBrokerEpoch(100L).
+            setFenced(false).
+            setRack(null).
+            setIncarnationId(Uuid.fromString("GFBwlTcpQUuLYQ2ig05CSg")), 0.toShort))
+        )
+      )
       val imageRecords = listener.getImageRecords().get()
-      assertEquals(0, imageRecords.size())
+      assertEquals(1, imageRecords.size())
       assertEquals(100L, listener.highestMetadataOffset)
-      listener.handleCommit(RecordTestUtils.mockBatchReader(200L,
-        util.Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
-          setBrokerId(1).
-          setBrokerEpoch(200L).
-          setFenced(true).
-          setRack(null).
-          setIncarnationId(Uuid.fromString("QkOQtNKVTYatADcaJ28xDg")), 0.toShort))))
+      assertEquals(0L, metrics.lastAppliedRecordOffset.get)
+      assertEquals(0L, metrics.lastAppliedRecordTimestamp.get)
+      assertEquals(0L, metrics.metadataLoadErrorCount.get)
+      assertEquals(0L, metrics.metadataApplyErrorCount.get)
+
+      val fencedTimestamp = 500L
+      val fencedLastOffset = 200L
+      listener.handleCommit(
+        RecordTestUtils.mockBatchReader(
+          fencedLastOffset,
+          fencedTimestamp,
+          util.Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
+            setBrokerId(1).
+            setBrokerEpoch(200L).
+            setFenced(true).
+            setRack(null).
+            setIncarnationId(Uuid.fromString("QkOQtNKVTYatADcaJ28xDg")), 0.toShort))
+        )
+      )
       listener.startPublishing(new MetadataPublisher {
         override def publish(delta: MetadataDelta, newImage: MetadataImage): Unit = {
           assertEquals(200L, newImage.highestOffsetAndEpoch().offset)
           assertEquals(new BrokerRegistration(0, 100L,
             Uuid.fromString("GFBwlTcpQUuLYQ2ig05CSg"), Collections.emptyList[Endpoint](),
-            Collections.emptyMap[String, VersionRange](), Optional.empty[String](), false),
+            Collections.emptyMap[String, VersionRange](), Optional.empty[String](), false, false),
             delta.clusterDelta().broker(0))
           assertEquals(new BrokerRegistration(1, 200L,
             Uuid.fromString("QkOQtNKVTYatADcaJ28xDg"), Collections.emptyList[Endpoint](),
-            Collections.emptyMap[String, VersionRange](), Optional.empty[String](), true),
+            Collections.emptyMap[String, VersionRange](), Optional.empty[String](), true, false),
             delta.clusterDelta().broker(1))
         }
+
+        override def publishedOffset: Long = -1
       }).get()
+
+      assertEquals(fencedLastOffset, metrics.lastAppliedRecordOffset.get)
+      assertEquals(fencedTimestamp, metrics.lastAppliedRecordTimestamp.get)
+      assertEquals(0L, metrics.metadataLoadErrorCount.get)
+      assertEquals(0L, metrics.metadataApplyErrorCount.get)
     } finally {
       listener.close()
     }
@@ -125,6 +163,8 @@ class BrokerMetadataListenerTest {
     override def publish(delta: MetadataDelta, newImage: MetadataImage): Unit = {
       image = newImage
     }
+
+    override def publishedOffset: Long = -1
   }
 
   private val FOO_ID = Uuid.fromString("jj1G9utnTuCegi_gpnRgYw")
@@ -132,15 +172,22 @@ class BrokerMetadataListenerTest {
   private def generateManyRecords(listener: BrokerMetadataListener,
                                   endOffset: Long): Unit = {
     (0 to 10000).foreach { _ =>
-      listener.handleCommit(RecordTestUtils.mockBatchReader(endOffset,
-        util.Arrays.asList(new ApiMessageAndVersion(new PartitionChangeRecord().
-          setPartitionId(0).
-          setTopicId(FOO_ID).
-          setRemovingReplicas(Collections.singletonList(1)), 0.toShort),
-          new ApiMessageAndVersion(new PartitionChangeRecord().
-            setPartitionId(0).
-            setTopicId(FOO_ID).
-            setRemovingReplicas(Collections.emptyList()), 0.toShort))))
+      listener.handleCommit(
+        RecordTestUtils.mockBatchReader(
+          endOffset,
+          0,
+          util.Arrays.asList(
+            new ApiMessageAndVersion(new PartitionChangeRecord().
+              setPartitionId(0).
+              setTopicId(FOO_ID).
+              setRemovingReplicas(Collections.singletonList(1)), 0.toShort),
+            new ApiMessageAndVersion(new PartitionChangeRecord().
+              setPartitionId(0).
+              setTopicId(FOO_ID).
+              setRemovingReplicas(Collections.emptyList()), 0.toShort)
+          )
+        )
+      )
     }
     listener.getImageRecords().get()
   }
@@ -205,19 +252,61 @@ class BrokerMetadataListenerTest {
     }
   }
 
+  @Test
+  def testNotSnapshotAfterMetadataVersionChangeBeforePublishing(): Unit = {
+    val snapshotter = new MockMetadataSnapshotter()
+    val listener = newBrokerMetadataListener(snapshotter = Some(snapshotter),
+      maxBytesBetweenSnapshots = 1000L)
+
+    updateFeature(listener, feature = MetadataVersion.FEATURE_NAME, MetadataVersion.latest.featureLevel(), 100L)
+    listener.getImageRecords().get()
+    assertEquals(-1L, snapshotter.activeSnapshotOffset, "We won't generate snapshot on metadata version change before starting publishing")
+  }
+
+  @Test
+  def testSnapshotAfterMetadataVersionChangeWhenStarting(): Unit = {
+    val snapshotter = new MockMetadataSnapshotter()
+    val listener = newBrokerMetadataListener(snapshotter = Some(snapshotter),
+      maxBytesBetweenSnapshots = 1000L)
+
+    val endOffset = 100L
+    updateFeature(listener, feature = MetadataVersion.FEATURE_NAME, MetadataVersion.latest.featureLevel(), endOffset)
+    listener.startPublishing(new MockMetadataPublisher()).get()
+    assertEquals(endOffset, snapshotter.activeSnapshotOffset, "We should try to generate snapshot when starting publishing")
+  }
+
+  @Test
+  def testSnapshotAfterMetadataVersionChange(): Unit = {
+    val snapshotter = new MockMetadataSnapshotter()
+    val listener = newBrokerMetadataListener(snapshotter = Some(snapshotter),
+      maxBytesBetweenSnapshots = 1000L)
+    listener.startPublishing(new MockMetadataPublisher()).get()
+
+    val endOffset = 100L
+    updateFeature(listener, feature = MetadataVersion.FEATURE_NAME, (MetadataVersion.latest().featureLevel() - 1).toShort, endOffset)
+    // Waiting for the metadata version update to get processed
+    listener.getImageRecords().get()
+    assertEquals(endOffset, snapshotter.activeSnapshotOffset, "We should generate snapshot on feature update")
+  }
+
   private def registerBrokers(
     listener: BrokerMetadataListener,
     brokerIds: Iterable[Int],
     endOffset: Long
   ): Unit = {
     brokerIds.foreach { brokerId =>
-      listener.handleCommit(RecordTestUtils.mockBatchReader(endOffset,
-        util.Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
-          setBrokerId(brokerId).
-          setBrokerEpoch(100L).
-          setFenced(false).
-          setRack(null).
-          setIncarnationId(Uuid.fromString("GFBwlTcpQUuLYQ2ig05CS" + brokerId)), 0.toShort))))
+      listener.handleCommit(
+        RecordTestUtils.mockBatchReader(
+          endOffset,
+          0,
+          util.Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
+            setBrokerId(brokerId).
+            setBrokerEpoch(100L).
+            setFenced(false).
+            setRack(null).
+            setIncarnationId(Uuid.fromString("GFBwlTcpQUuLYQ2ig05CS" + brokerId)), 0.toShort))
+        )
+      )
     }
   }
 
@@ -226,17 +315,41 @@ class BrokerMetadataListenerTest {
     replicas: Seq[Int],
     endOffset: Long
   ): Unit = {
-    listener.handleCommit(RecordTestUtils.mockBatchReader(endOffset,
-      util.Arrays.asList(
-        new ApiMessageAndVersion(new TopicRecord().
-          setName("foo").
-          setTopicId(FOO_ID), 0.toShort),
-        new ApiMessageAndVersion(new PartitionRecord().
-          setPartitionId(0).
-          setTopicId(FOO_ID).
-          setIsr(replicas.map(Int.box).asJava).
-          setLeader(0).
-          setReplicas(replicas.map(Int.box).asJava), 0.toShort)))
+    listener.handleCommit(
+      RecordTestUtils.mockBatchReader(
+        endOffset,
+        0,
+        util.Arrays.asList(
+          new ApiMessageAndVersion(new TopicRecord().
+            setName("foo").
+            setTopicId(FOO_ID), 0.toShort),
+          new ApiMessageAndVersion(new PartitionRecord().
+            setPartitionId(0).
+            setTopicId(FOO_ID).
+            setIsr(replicas.map(Int.box).asJava).
+            setLeader(0).
+            setReplicas(replicas.map(Int.box).asJava), 0.toShort)
+        )
+      )
+    )
+  }
+
+  private def updateFeature(
+    listener: BrokerMetadataListener,
+    feature: String,
+    version: Short,
+    endOffset: Long
+  ): Unit = {
+    listener.handleCommit(
+      RecordTestUtils.mockBatchReader(
+        endOffset,
+        0,
+        util.Arrays.asList(
+          new ApiMessageAndVersion(new FeatureLevelRecord().
+            setName(feature).
+            setFeatureLevel(version), 0.toShort)
+        )
+      )
     )
   }
 
diff --git a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala
index a8c5002a05d9d..652b8b3a0c29f 100644
--- a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala
+++ b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala
@@ -17,19 +17,53 @@
 
 package unit.kafka.server.metadata
 
+import java.util.Collections.{singleton, singletonList, singletonMap}
+import java.util.Properties
+import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 import kafka.log.UnifiedLog
+import kafka.server.{BrokerServer, KafkaConfig}
 import kafka.server.metadata.BrokerMetadataPublisher
+import kafka.testkit.{KafkaClusterTestKit, TestKitNodes}
+import kafka.utils.TestUtils
+import org.apache.kafka.clients.admin.AlterConfigOp.OpType.SET
+import org.apache.kafka.clients.admin.{Admin, AlterConfigOp, ConfigEntry, NewTopic}
+import org.apache.kafka.common.config.ConfigResource
+import org.apache.kafka.common.config.ConfigResource.Type.BROKER
+import org.apache.kafka.common.utils.Exit
 import org.apache.kafka.common.{TopicPartition, Uuid}
 import org.apache.kafka.image.{MetadataImageTest, TopicImage, TopicsImage}
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.apache.kafka.metadata.PartitionRegistration
-import org.junit.jupiter.api.Test
-import org.junit.jupiter.api.Assertions.assertEquals
-
+import org.apache.kafka.server.fault.{FaultHandler, MockFaultHandler}
+import org.junit.jupiter.api.Assertions.{assertEquals, assertNotNull, assertTrue}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito
+import org.mockito.Mockito.doThrow
+import org.mockito.invocation.InvocationOnMock
+import org.mockito.stubbing.Answer
 
 import scala.jdk.CollectionConverters._
 
 class BrokerMetadataPublisherTest {
+  val exitException = new AtomicReference[Throwable](null)
+
+  @BeforeEach
+  def setUp(): Unit = {
+    Exit.setExitProcedure((code, _) => exitException.set(new RuntimeException(s"Exit ${code}")))
+    Exit.setHaltProcedure((code, _) => exitException.set(new RuntimeException(s"Halt ${code}")))
+  }
+
+  @AfterEach
+  def tearDown(): Unit = {
+    Exit.resetExitProcedure();
+    Exit.resetHaltProcedure();
+    val exception = exitException.get()
+    if (exception != null) {
+      throw exception
+    }
+  }
+
   @Test
   def testGetTopicDelta(): Unit = {
     assert(BrokerMetadataPublisher.getTopicDelta(
@@ -127,6 +161,7 @@ class BrokerMetadataPublisherTest {
         Array.empty[Int],
         Array.empty[Int],
         replicas.head,
+        LeaderRecoveryState.RECOVERED,
         0,
         0
       )
@@ -142,4 +177,99 @@ class BrokerMetadataPublisherTest {
     new TopicsImage(idsMap.asJava, namesMap.asJava)
   }
 
+  private def newMockPublisher(
+    broker: BrokerServer,
+    errorHandler: FaultHandler = new MockFaultHandler("publisher")
+  ): BrokerMetadataPublisher = {
+    Mockito.spy(new BrokerMetadataPublisher(
+      conf = broker.config,
+      metadataCache = broker.metadataCache,
+      logManager = broker.logManager,
+      replicaManager = broker.replicaManager,
+      groupCoordinator = broker.groupCoordinator,
+      txnCoordinator = broker.transactionCoordinator,
+      clientQuotaMetadataManager = broker.clientQuotaMetadataManager,
+      dynamicConfigHandlers = broker.dynamicConfigHandlers.toMap,
+      _authorizer = Option.empty,
+      errorHandler,
+      errorHandler
+    ))
+  }
+
+  @Test
+  def testReloadUpdatedFilesWithoutConfigChange(): Unit = {
+    val cluster = new KafkaClusterTestKit.Builder(
+      new TestKitNodes.Builder().
+        setNumBrokerNodes(1).
+        setNumControllerNodes(1).build()).build()
+    try {
+      cluster.format()
+      cluster.startup()
+      cluster.waitForReadyBrokers()
+      val broker = cluster.brokers().values().iterator().next()
+      val publisher = newMockPublisher(broker)
+      val numTimesReloadCalled = new AtomicInteger(0)
+      Mockito.when(publisher.reloadUpdatedFilesWithoutConfigChange(any[Properties]())).
+        thenAnswer(new Answer[Unit]() {
+          override def answer(invocation: InvocationOnMock): Unit = numTimesReloadCalled.addAndGet(1)
+        })
+      broker.metadataListener.alterPublisher(publisher).get()
+      val admin = Admin.create(cluster.clientProperties())
+      try {
+        assertEquals(0, numTimesReloadCalled.get())
+        admin.incrementalAlterConfigs(singletonMap(
+          new ConfigResource(BROKER, ""),
+          singleton(new AlterConfigOp(new ConfigEntry(KafkaConfig.MaxConnectionsProp, "123"), SET)))).all().get()
+        TestUtils.waitUntilTrue(() => numTimesReloadCalled.get() == 0,
+          "numTimesConfigured never reached desired value")
+
+        // Setting the foo.bar.test.configuration to 1 will still trigger reconfiguration because
+        // reloadUpdatedFilesWithoutConfigChange will be called.
+        admin.incrementalAlterConfigs(singletonMap(
+          new ConfigResource(BROKER, broker.config.nodeId.toString),
+          singleton(new AlterConfigOp(new ConfigEntry(KafkaConfig.MaxConnectionsProp, "123"), SET)))).all().get()
+        TestUtils.waitUntilTrue(() => numTimesReloadCalled.get() == 1,
+          "numTimesConfigured never reached desired value")
+      } finally {
+        admin.close()
+      }
+    } finally {
+      cluster.close()
+    }
+  }
+
+  @Test
+  def testExceptionInUpdateCoordinator(): Unit = {
+    val errorHandler = new MockFaultHandler("publisher")
+    val cluster = new KafkaClusterTestKit.Builder(
+      new TestKitNodes.Builder().
+        setNumBrokerNodes(1).
+        setNumControllerNodes(1).build()).
+      setMetadataFaultHandler(errorHandler).build()
+    try {
+      cluster.format()
+      cluster.startup()
+      cluster.waitForReadyBrokers()
+      val broker = cluster.brokers().values().iterator().next()
+      TestUtils.retry(60000) {
+        assertNotNull(broker.metadataPublisher)
+      }
+      val publisher = Mockito.spy(broker.metadataPublisher)
+      doThrow(new RuntimeException("injected failure")).when(publisher).updateCoordinator(any(), any(), any(), any(), any())
+      broker.metadataListener.alterPublisher(publisher).get()
+      val admin = Admin.create(cluster.clientProperties())
+      try {
+        admin.createTopics(singletonList(new NewTopic("foo", 1, 1.toShort))).all().get()
+      } finally {
+        admin.close()
+      }
+      TestUtils.retry(60000) {
+        assertTrue(Option(errorHandler.firstException()).
+          flatMap(e => Option(e.getMessage())).getOrElse("(none)").contains("injected failure"))
+      }
+    } finally {
+      errorHandler.setIgnore(true)
+      cluster.close()
+    }
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataSnapshotterTest.scala b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataSnapshotterTest.scala
index 82426611425a1..ff2326c92fa37 100644
--- a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataSnapshotterTest.scala
+++ b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataSnapshotterTest.scala
@@ -20,8 +20,8 @@ package kafka.server.metadata
 import java.nio.ByteBuffer
 import java.util.Optional
 import java.util.concurrent.{CompletableFuture, CountDownLatch}
-
 import org.apache.kafka.common.memory.MemoryPool
+import org.apache.kafka.common.metadata.FenceBrokerRecord
 import org.apache.kafka.common.protocol.ByteBufferAccessor
 import org.apache.kafka.common.record.{CompressionType, MemoryRecords}
 import org.apache.kafka.common.utils.Time
@@ -34,6 +34,9 @@ import org.apache.kafka.snapshot.{MockRawSnapshotWriter, RecordsSnapshotWriter,
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
 import org.junit.jupiter.api.Test
 
+import java.util
+import java.util.Arrays.asList
+import scala.compat.java8.OptionConverters._
 
 class BrokerMetadataSnapshotterTest {
   @Test
@@ -48,7 +51,7 @@ class BrokerMetadataSnapshotterTest {
 
     override def build(committedOffset: Long,
                        committedEpoch: Int,
-                       lastContainedLogTime: Long): SnapshotWriter[ApiMessageAndVersion] = {
+                       lastContainedLogTime: Long): Option[SnapshotWriter[ApiMessageAndVersion]] = {
       val offsetAndEpoch = new OffsetAndEpoch(committedOffset, committedEpoch)
       RecordsSnapshotWriter.createWithHeader(
         () => {
@@ -62,7 +65,7 @@ class BrokerMetadataSnapshotterTest {
         lastContainedLogTime,
         CompressionType.NONE,
         MetadataRecordSerde.INSTANCE
-      ).get();
+      ).asScala
     }
 
     def consumeSnapshotBuffer(committedOffset: Long, committedEpoch: Int)(buffer: ByteBuffer): Unit = {
@@ -104,4 +107,31 @@ class BrokerMetadataSnapshotterTest {
       snapshotter.close()
     }
   }
+
+  class MockSnapshotWriter extends SnapshotWriter[ApiMessageAndVersion] {
+    val batches = new util.ArrayList[util.List[ApiMessageAndVersion]]
+    override def snapshotId(): OffsetAndEpoch = new OffsetAndEpoch(0, 0)
+    override def lastContainedLogOffset(): Long = 0
+    override def lastContainedLogEpoch(): Int = 0
+    override def isFrozen: Boolean = false
+    override def append(batch: util.List[ApiMessageAndVersion]): Unit = batches.add(batch)
+    override def freeze(): Unit = {}
+    override def close(): Unit = {}
+  }
+
+  @Test
+  def testRecordListConsumer(): Unit = {
+    val writer = new MockSnapshotWriter()
+    val consumer = new RecordListConsumer(3, writer)
+    val m = new ApiMessageAndVersion(new FenceBrokerRecord().setId(1).setEpoch(1), 0.toShort)
+    consumer.accept(asList(m, m))
+    assertEquals(asList(asList(m, m)), writer.batches)
+    consumer.accept(asList(m))
+    assertEquals(asList(asList(m, m), asList(m)), writer.batches)
+    consumer.accept(asList(m, m, m, m))
+    assertEquals(asList(asList(m, m), asList(m), asList(m, m, m), asList(m)), writer.batches)
+    consumer.accept(asList(m, m, m, m, m, m, m, m))
+    assertEquals(asList(asList(m, m), asList(m), asList(m, m, m), asList(m), asList(m, m, m), asList(m, m, m), asList(m, m)),
+      writer.batches)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/tools/ConsoleProducerTest.scala b/core/src/test/scala/unit/kafka/tools/ConsoleProducerTest.scala
index 84aafa18ea064..f136c62b5ff6c 100644
--- a/core/src/test/scala/unit/kafka/tools/ConsoleProducerTest.scala
+++ b/core/src/test/scala/unit/kafka/tools/ConsoleProducerTest.scala
@@ -67,6 +67,46 @@ class ConsoleProducerTest {
     "--producer-property",
     "client.id=producer-1"
   )
+  val batchSizeOverriddenByMaxPartitionMemoryBytesValue: Array[String] = Array(
+    "--broker-list",
+    "localhost:1001",
+    "--bootstrap-server",
+    "localhost:1002",
+    "--topic",
+    "t3",
+    "--batch-size",
+    "123",
+    "--max-partition-memory-bytes",
+    "456"
+  )
+  val btchSizeSetAndMaxPartitionMemoryBytesNotSet: Array[String] = Array(
+    "--broker-list",
+    "localhost:1001",
+    "--bootstrap-server",
+    "localhost:1002",
+    "--topic",
+    "t3",
+    "--batch-size",
+    "123"
+  )
+  val batchSizeNotSetAndMaxPartitionMemoryBytesSet: Array[String] = Array(
+    "--broker-list",
+    "localhost:1001",
+    "--bootstrap-server",
+    "localhost:1002",
+    "--topic",
+    "t3",
+    "--max-partition-memory-bytes",
+    "456"
+  )
+  val batchSizeDefault: Array[String] = Array(
+    "--broker-list",
+    "localhost:1001",
+    "--bootstrap-server",
+    "localhost:1002",
+    "--topic",
+    "t3"
+  )
 
   @Test
   def testValidConfigsBrokerList(): Unit = {
@@ -123,4 +163,37 @@ class ConsoleProducerTest {
     assertEquals("console-producer",
       producerConfig.getString(ProducerConfig.CLIENT_ID_CONFIG))
   }
+
+  @Test
+  def testBatchSizeOverriddenByMaxPartitionMemoryBytesValue(): Unit = {
+    val config = new ConsoleProducer.ProducerConfig(batchSizeOverriddenByMaxPartitionMemoryBytesValue)
+    val producerConfig = new ProducerConfig(ConsoleProducer.producerProps(config))
+    assertEquals(456,
+      producerConfig.getInt(ProducerConfig.BATCH_SIZE_CONFIG))
+  }
+
+  @Test
+  def testBatchSizeSetAndMaxPartitionMemoryBytesNotSet(): Unit = {
+    val config = new ConsoleProducer.ProducerConfig(btchSizeSetAndMaxPartitionMemoryBytesNotSet)
+    val producerConfig = new ProducerConfig(ConsoleProducer.producerProps(config))
+    assertEquals(123,
+      producerConfig.getInt(ProducerConfig.BATCH_SIZE_CONFIG))
+  }
+
+  @Test
+  def testDefaultBatchSize(): Unit = {
+    val config = new ConsoleProducer.ProducerConfig(batchSizeDefault)
+    val producerConfig = new ProducerConfig(ConsoleProducer.producerProps(config))
+    assertEquals(16*1024,
+      producerConfig.getInt(ProducerConfig.BATCH_SIZE_CONFIG))
+  }
+
+  @Test
+  def testBatchSizeNotSetAndMaxPartitionMemoryBytesSet (): Unit = {
+    val config = new ConsoleProducer.ProducerConfig(batchSizeNotSetAndMaxPartitionMemoryBytesSet)
+    val producerConfig = new ProducerConfig(ConsoleProducer.producerProps(config))
+    assertEquals(456,
+      producerConfig.getInt(ProducerConfig.BATCH_SIZE_CONFIG))
+  }
+
 }
diff --git a/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala b/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala
index 04556aa97af9e..5d5e462b5a8cd 100644
--- a/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala
+++ b/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala
@@ -22,23 +22,28 @@ import java.nio.ByteBuffer
 import java.util
 import java.util.Properties
 
-import kafka.log.{AppendOrigin, UnifiedLog, LogConfig, LogManager, LogTestUtils}
-import kafka.server.{BrokerTopicStats, FetchLogEnd, LogDirFailureChannel}
+import kafka.log.{AppendOrigin, Defaults, LogConfig, LogManager, LogTestUtils, UnifiedLog}
+import kafka.raft.{KafkaMetadataLog, MetadataLogConfig}
+import kafka.server.{BrokerTopicStats, FetchLogEnd, KafkaRaftServer, LogDirFailureChannel}
 import kafka.tools.DumpLogSegments.TimeIndexDumpErrors
 import kafka.utils.{MockTime, TestUtils}
 import org.apache.kafka.common.Uuid
+import org.apache.kafka.common.memory.MemoryPool
 import org.apache.kafka.common.metadata.{PartitionChangeRecord, RegisterBrokerRecord, TopicRecord}
 import org.apache.kafka.common.protocol.{ByteBufferAccessor, ObjectSerializationCache}
 import org.apache.kafka.common.record.{CompressionType, ControlRecordType, EndTransactionMarker, MemoryRecords, RecordVersion, SimpleRecord}
 import org.apache.kafka.common.utils.Utils
 import org.apache.kafka.metadata.MetadataRecordSerde
+import org.apache.kafka.raft.{KafkaRaftClient, OffsetAndEpoch}
 import org.apache.kafka.server.common.ApiMessageAndVersion
+import org.apache.kafka.snapshot.RecordsSnapshotWriter
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 
 import scala.jdk.CollectionConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
+import scala.util.matching.Regex
 
 case class BatchInfo(records: Seq[SimpleRecord], hasKeys: Boolean, hasValues: Boolean)
 
@@ -48,6 +53,7 @@ class DumpLogSegmentsTest {
   val logDir = TestUtils.randomPartitionLogDir(tmpDir)
   val segmentName = "00000000000000000000"
   val logFilePath = s"$logDir/$segmentName.log"
+  val snapshotPath = s"$logDir/00000000000000000000-0000000000.checkpoint"
   val indexFilePath = s"$logDir/$segmentName.index"
   val timeIndexFilePath = s"$logDir/$segmentName.timeindex"
   val time = new MockTime(0, 0)
@@ -255,13 +261,14 @@ class DumpLogSegmentsTest {
     log.appendAsLeader(MemoryRecords.withRecords(CompressionType.NONE, records:_*), leaderEpoch = 1)
     log.flush(false)
 
-    var output = runDumpLogSegments(Array("--cluster-metadata-decoder", "false", "--files", logFilePath))
-    assert(output.contains("TOPIC_RECORD"))
-    assert(output.contains("BROKER_RECORD"))
+    var output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--files", logFilePath))
+    assertTrue(output.contains("Log starting offset: 0"))
+    assertTrue(output.contains("TOPIC_RECORD"))
+    assertTrue(output.contains("BROKER_RECORD"))
 
-    output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--skip-record-metadata", "false", "--files", logFilePath))
-    assert(output.contains("TOPIC_RECORD"))
-    assert(output.contains("BROKER_RECORD"))
+    output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--skip-record-metadata", "--files", logFilePath))
+    assertTrue(output.contains("TOPIC_RECORD"))
+    assertTrue(output.contains("BROKER_RECORD"))
 
     // Bogus metadata record
     val buf = ByteBuffer.allocate(4)
@@ -271,10 +278,77 @@ class DumpLogSegmentsTest {
     log.appendAsLeader(MemoryRecords.withRecords(CompressionType.NONE, new SimpleRecord(null, buf.array)), leaderEpoch = 2)
     log.appendAsLeader(MemoryRecords.withRecords(CompressionType.NONE, records:_*), leaderEpoch = 2)
 
-    output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--skip-record-metadata", "false", "--files", logFilePath))
-    assert(output.contains("TOPIC_RECORD"))
-    assert(output.contains("BROKER_RECORD"))
-    assert(output.contains("skipping"))
+    output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--skip-record-metadata", "--files", logFilePath))
+    assertTrue(output.contains("TOPIC_RECORD"))
+    assertTrue(output.contains("BROKER_RECORD"))
+    assertTrue(output.contains("skipping"))
+  }
+
+  @Test
+  def testDumpMetadataSnapshot(): Unit = {
+    val metadataRecords = Seq(
+      new ApiMessageAndVersion(
+        new RegisterBrokerRecord().setBrokerId(0).setBrokerEpoch(10), 0.toShort),
+      new ApiMessageAndVersion(
+        new RegisterBrokerRecord().setBrokerId(1).setBrokerEpoch(20), 0.toShort),
+      new ApiMessageAndVersion(
+        new TopicRecord().setName("test-topic").setTopicId(Uuid.randomUuid()), 0.toShort),
+      new ApiMessageAndVersion(
+        new PartitionChangeRecord().setTopicId(Uuid.randomUuid()).setLeader(1).
+          setPartitionId(0).setIsr(util.Arrays.asList(0, 1, 2)), 0.toShort)
+    )
+
+    val metadataLog = KafkaMetadataLog(
+      KafkaRaftServer.MetadataPartition,
+      KafkaRaftServer.MetadataTopicId,
+      logDir,
+      time,
+      time.scheduler,
+      MetadataLogConfig(
+        logSegmentBytes = 100 * 1024,
+        logSegmentMinBytes = 100 * 1024,
+        logSegmentMillis = 10 * 1000,
+        retentionMaxBytes = 100 * 1024,
+        retentionMillis = 60 * 1000,
+        maxBatchSizeInBytes = KafkaRaftClient.MAX_BATCH_SIZE_BYTES,
+        maxFetchSizeInBytes = KafkaRaftClient.MAX_FETCH_SIZE_BYTES,
+        fileDeleteDelayMs = Defaults.FileDeleteDelayMs,
+        nodeId = 1
+      )
+    )
+
+    val lastContainedLogTimestamp = 10000
+
+    TestUtils.resource(
+      RecordsSnapshotWriter.createWithHeader(
+        () => metadataLog.createNewSnapshot(new OffsetAndEpoch(0, 0)),
+        1024,
+        MemoryPool.NONE,
+        new MockTime,
+        lastContainedLogTimestamp,
+        CompressionType.NONE,
+        new MetadataRecordSerde
+      ).get()
+    ) { snapshotWriter =>
+      snapshotWriter.append(metadataRecords.asJava)
+      snapshotWriter.freeze()
+    }
+
+    var output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--files", snapshotPath))
+    assertTrue(output.contains("Snapshot end offset: 0, epoch: 0"))
+    assertTrue(output.contains("TOPIC_RECORD"))
+    assertTrue(output.contains("BROKER_RECORD"))
+    assertTrue(output.contains("SnapshotHeader"))
+    assertTrue(output.contains("SnapshotFooter"))
+    assertTrue(output.contains(s""""lastContainedLogTimestamp":$lastContainedLogTimestamp"""))
+
+    output = runDumpLogSegments(Array("--cluster-metadata-decoder", "--skip-record-metadata", "--files", snapshotPath))
+    assertTrue(output.contains("Snapshot end offset: 0, epoch: 0"))
+    assertTrue(output.contains("TOPIC_RECORD"))
+    assertTrue(output.contains("BROKER_RECORD"))
+    assertFalse(output.contains("SnapshotHeader"))
+    assertFalse(output.contains("SnapshotFooter"))
+    assertFalse(output.contains(s""""lastContainedLogTimestamp": $lastContainedLogTimestamp"""))
   }
 
   @Test
@@ -298,6 +372,29 @@ class DumpLogSegmentsTest {
     outContent.toString
   }
 
+  @Test
+  def testPrintDataLogPartialBatches(): Unit = {
+    addSimpleRecords()
+    val totalBatches = batches.size
+    val partialBatches = totalBatches / 2
+
+    // Get all the batches
+    val output = runDumpLogSegments(Array("--files", logFilePath))
+    val lines = util.Arrays.asList(output.split("\n"): _*).listIterator()
+
+    // Get total bytes of the partial batches
+    val partialBatchesBytes = readPartialBatchesBytes(lines, partialBatches)
+
+    // Request only the partial batches by bytes
+    val partialOutput = runDumpLogSegments(Array("--max-bytes", partialBatchesBytes.toString, "--files", logFilePath))
+    val partialLines = util.Arrays.asList(partialOutput.split("\n"): _*).listIterator()
+
+    // Count the total of partial batches limited by bytes
+    val partialBatchesCount = countBatches(partialLines)
+
+    assertEquals(partialBatches, partialBatchesCount)
+  }
+
   private def readBatchMetadata(lines: util.ListIterator[String]): Option[String] = {
     while (lines.hasNext) {
       val line = lines.next()
@@ -310,6 +407,38 @@ class DumpLogSegmentsTest {
     None
   }
 
+  // Returns the total bytes of the batches specified
+  private def readPartialBatchesBytes(lines: util.ListIterator[String], limit: Int): Int = {
+    val sizePattern: Regex = raw".+?size:\s(\d+).+".r
+    var batchesBytes = 0
+    var batchesCounter = 0
+    while (lines.hasNext) {
+      if (batchesCounter >= limit){
+        return batchesBytes
+      }
+      val line = lines.next()
+      if (line.startsWith("baseOffset")) {
+        line match {
+          case sizePattern(size) => batchesBytes += size.toInt
+          case _ => throw new IllegalStateException(s"Failed to parse and find size value for batch line: $line")
+        }
+        batchesCounter += 1
+      }
+    }
+    batchesBytes
+  }
+
+  private def countBatches(lines: util.ListIterator[String]): Int = {
+    var countBatches = 0
+    while (lines.hasNext) {
+      val line = lines.next()
+      if (line.startsWith("baseOffset")) {
+        countBatches += 1
+      }
+    }
+    countBatches
+  }
+
   private def readBatchRecords(lines: util.ListIterator[String]): Seq[String] = {
     val records = mutable.ArrayBuffer.empty[String]
     while (lines.hasNext) {
diff --git a/core/src/test/scala/unit/kafka/tools/StorageToolTest.scala b/core/src/test/scala/unit/kafka/tools/StorageToolTest.scala
index 0242c33dab94d..0e11471527ae3 100644
--- a/core/src/test/scala/unit/kafka/tools/StorageToolTest.scala
+++ b/core/src/test/scala/unit/kafka/tools/StorageToolTest.scala
@@ -22,13 +22,15 @@ import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.util
 import java.util.Properties
-
 import kafka.server.{KafkaConfig, MetaProperties}
 import kafka.utils.TestUtils
 import org.apache.kafka.common.utils.Utils
-import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows}
+import org.apache.kafka.server.common.MetadataVersion
+import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows, assertTrue}
 import org.junit.jupiter.api.{Test, Timeout}
 
+import scala.collection.mutable
+
 
 @Timeout(value = 40)
 class StorageToolTest {
@@ -160,11 +162,11 @@ Found problem:
         clusterId = "XcZZOzUqS4yHOjhMQB6JLQ", nodeId = 2)
       val stream = new ByteArrayOutputStream()
       assertEquals(0, StorageTool.
-        formatCommand(new PrintStream(stream), Seq(tempDir.toString), metaProperties, false))
-      assertEquals("Formatting %s%n".format(tempDir), stream.toString())
+        formatCommand(new PrintStream(stream), Seq(tempDir.toString), metaProperties, MetadataVersion.latest(), ignoreFormatted = false))
+      assertTrue(stream.toString().startsWith("Formatting %s".format(tempDir)))
 
       try assertEquals(1, StorageTool.
-        formatCommand(new PrintStream(new ByteArrayOutputStream()), Seq(tempDir.toString), metaProperties, false)) catch {
+        formatCommand(new PrintStream(new ByteArrayOutputStream()), Seq(tempDir.toString), metaProperties, MetadataVersion.latest(), ignoreFormatted = false)) catch {
         case e: TerseFailure => assertEquals(s"Log directory ${tempDir} is already " +
           "formatted. Use --ignore-formatted to ignore this directory and format the " +
           "others.", e.getMessage)
@@ -172,7 +174,7 @@ Found problem:
 
       val stream2 = new ByteArrayOutputStream()
       assertEquals(0, StorageTool.
-        formatCommand(new PrintStream(stream2), Seq(tempDir.toString), metaProperties, true))
+        formatCommand(new PrintStream(stream2), Seq(tempDir.toString), metaProperties, MetadataVersion.latest(), ignoreFormatted = true))
       assertEquals("All of the log directories are already formatted.%n".format(), stream2.toString())
     } finally Utils.delete(tempDir)
   }
@@ -185,4 +187,30 @@ Found problem:
         "16 bytes of a base64-encoded UUID", assertThrows(classOf[TerseFailure],
           () => StorageTool.buildMetadataProperties("invalid", config)).getMessage)
   }
+
+  @Test
+  def testDefaultMetadataVersion(): Unit = {
+    val namespace = StorageTool.parseArguments(Array("format", "-c", "config.props", "-t", "XcZZOzUqS4yHOjhMQB6JLQ"))
+    val mv = StorageTool.getMetadataVersion(namespace)
+    assertEquals(MetadataVersion.latest().featureLevel(), mv.featureLevel(),
+      "Expected the default metadata.version to be the latest version")
+  }
+
+  @Test
+  def testMetadataVersionFlags(): Unit = {
+    def parseMetadataVersion(strings: String*): MetadataVersion = {
+      var args = mutable.Seq("format", "-c", "config.props", "-t", "XcZZOzUqS4yHOjhMQB6JLQ")
+      args ++= strings
+      val namespace = StorageTool.parseArguments(args.toArray)
+      StorageTool.getMetadataVersion(namespace)
+    }
+
+    var mv = parseMetadataVersion("--release-version", "3.0")
+    assertEquals("3.0", mv.shortVersion())
+
+    mv = parseMetadataVersion("--release-version", "3.0-IV1")
+    assertEquals(MetadataVersion.IBP_3_0_IV1, mv)
+
+    assertThrows(classOf[IllegalArgumentException], () => parseMetadataVersion("--release-version", "0.0"))
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/utils/PasswordEncoderTest.scala b/core/src/test/scala/unit/kafka/utils/PasswordEncoderTest.scala
index 0a5d5ac029814..50cdceabbca66 100755
--- a/core/src/test/scala/unit/kafka/utils/PasswordEncoderTest.scala
+++ b/core/src/test/scala/unit/kafka/utils/PasswordEncoderTest.scala
@@ -30,7 +30,7 @@ class PasswordEncoderTest {
 
   @Test
   def testEncodeDecode(): Unit = {
-    val encoder = new PasswordEncoder(new Password("password-encoder-secret"),
+    val encoder = PasswordEncoder.encrypting(new Password("password-encoder-secret"),
       None,
       Defaults.PasswordEncoderCipherAlgorithm,
       Defaults.PasswordEncoderKeyLength,
@@ -54,7 +54,7 @@ class PasswordEncoderTest {
 
   @Test
   def testEncoderConfigChange(): Unit = {
-    val encoder = new PasswordEncoder(new Password("password-encoder-secret"),
+    val encoder = PasswordEncoder.encrypting(new Password("password-encoder-secret"),
       Some("PBKDF2WithHmacSHA1"),
       "DES/CBC/PKCS5Padding",
       64,
@@ -68,7 +68,7 @@ class PasswordEncoderTest {
     assertEquals("DES/CBC/PKCS5Padding", encodedMap(PasswordEncoder.CipherAlgorithmProp))
 
     // Test that decoding works even if PasswordEncoder algorithm, iterations etc. are altered
-    val decoder = new PasswordEncoder(new Password("password-encoder-secret"),
+    val decoder = PasswordEncoder.encrypting(new Password("password-encoder-secret"),
       Some("PBKDF2WithHmacSHA1"),
       "AES/CBC/PKCS5Padding",
       128,
@@ -76,7 +76,7 @@ class PasswordEncoderTest {
     assertEquals(password, decoder.decode(encoded).value)
 
     // Test that decoding fails if secret is altered
-    val decoder2 = new PasswordEncoder(new Password("secret-2"),
+    val decoder2 = PasswordEncoder.encrypting(new Password("secret-2"),
       Some("PBKDF2WithHmacSHA1"),
       "AES/CBC/PKCS5Padding",
       128,
@@ -92,7 +92,7 @@ class PasswordEncoderTest {
   def testEncodeDecodeAlgorithms(): Unit = {
 
     def verifyEncodeDecode(keyFactoryAlg: Option[String], cipherAlg: String, keyLength: Int): Unit = {
-      val encoder = new PasswordEncoder(new Password("password-encoder-secret"),
+      val encoder = PasswordEncoder.encrypting(new Password("password-encoder-secret"),
         keyFactoryAlg,
         cipherAlg,
         keyLength,
diff --git a/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala b/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala
index a610956d7ca9d..ea3d369d87b47 100644
--- a/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala
+++ b/core/src/test/scala/unit/kafka/utils/ReplicationUtilsTest.scala
@@ -22,6 +22,7 @@ import kafka.controller.LeaderIsrAndControllerEpoch
 import kafka.server.QuorumTestHarness
 import kafka.zk._
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.metadata.LeaderRecoveryState
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{BeforeEach, Test, TestInfo}
 
@@ -39,7 +40,7 @@ class ReplicationUtilsTest extends QuorumTestHarness {
     super.setUp(testInfo)
     zkClient.makeSurePersistentPathExists(TopicZNode.path(topic))
     val topicPartition = new TopicPartition(topic, partition)
-    val leaderAndIsr = LeaderAndIsr(leader, leaderEpoch, isr, 1)
+    val leaderAndIsr = LeaderAndIsr(leader, leaderEpoch, isr, LeaderRecoveryState.RECOVERED, 1)
     val leaderIsrAndControllerEpoch = LeaderIsrAndControllerEpoch(leaderAndIsr, controllerEpoch)
     zkClient.createTopicPartitionStatesRaw(Map(topicPartition -> leaderIsrAndControllerEpoch), ZkVersion.MatchAnyVersion)
   }
@@ -51,14 +52,14 @@ class ReplicationUtilsTest extends QuorumTestHarness {
     val replicas = List(0, 1)
 
     // regular update
-    val newLeaderAndIsr1 = new LeaderAndIsr(leader, leaderEpoch, replicas, 0)
+    val newLeaderAndIsr1 = LeaderAndIsr(leader, leaderEpoch, replicas, LeaderRecoveryState.RECOVERED, 0)
     val (updateSucceeded1, newZkVersion1) = ReplicationUtils.updateLeaderAndIsr(zkClient,
       new TopicPartition(topic, partition), newLeaderAndIsr1, controllerEpoch)
     assertTrue(updateSucceeded1)
     assertEquals(newZkVersion1, 1)
 
     // mismatched zkVersion with the same data
-    val newLeaderAndIsr2 = new LeaderAndIsr(leader, leaderEpoch, replicas, zkVersion + 1)
+    val newLeaderAndIsr2 = LeaderAndIsr(leader, leaderEpoch, replicas, LeaderRecoveryState.RECOVERED, zkVersion + 1)
     val (updateSucceeded2, newZkVersion2) = ReplicationUtils.updateLeaderAndIsr(zkClient,
       new TopicPartition(topic, partition), newLeaderAndIsr2, controllerEpoch)
     assertTrue(updateSucceeded2)
@@ -66,7 +67,7 @@ class ReplicationUtilsTest extends QuorumTestHarness {
     assertEquals(newZkVersion2, 1)
 
     // mismatched zkVersion and leaderEpoch
-    val newLeaderAndIsr3 = new LeaderAndIsr(leader, leaderEpoch + 1, replicas, zkVersion + 1)
+    val newLeaderAndIsr3 = LeaderAndIsr(leader, leaderEpoch + 1, replicas, LeaderRecoveryState.RECOVERED, zkVersion + 1)
     val (updateSucceeded3, newZkVersion3) = ReplicationUtils.updateLeaderAndIsr(zkClient,
       new TopicPartition(topic, partition), newLeaderAndIsr3, controllerEpoch)
     assertFalse(updateSucceeded3)
diff --git a/core/src/test/scala/unit/kafka/utils/TestUtils.scala b/core/src/test/scala/unit/kafka/utils/TestUtils.scala
index 37819d2961852..1e0d5981daca4 100755
--- a/core/src/test/scala/unit/kafka/utils/TestUtils.scala
+++ b/core/src/test/scala/unit/kafka/utils/TestUtils.scala
@@ -28,26 +28,26 @@ import java.util
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 import java.util.concurrent.{Callable, CompletableFuture, ExecutionException, Executors, TimeUnit}
 import java.util.{Arrays, Collections, Optional, Properties}
-
 import com.yammer.metrics.core.{Gauge, Meter}
+
 import javax.net.ssl.X509TrustManager
 import kafka.api._
-import kafka.cluster.{Broker, EndPoint, IsrChangeListener}
+import kafka.cluster.{AlterPartitionListener, Broker, EndPoint}
 import kafka.controller.{ControllerEventManager, LeaderIsrAndControllerEpoch}
 import kafka.log._
-import kafka.metrics.KafkaYammerMetrics
 import kafka.network.RequestChannel
 import kafka.server._
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.server.metadata.{ConfigRepository, MockConfigRepository}
 import kafka.utils.Implicits._
 import kafka.zk._
-import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.{ClientResponse, CommonClientConfigs}
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType
 import org.apache.kafka.clients.admin._
 import org.apache.kafka.clients.consumer._
 import org.apache.kafka.clients.consumer.internals.AbstractCoordinator
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
+import org.apache.kafka.common.{KafkaFuture, Node, TopicIdPartition, TopicPartition, Uuid}
 import org.apache.kafka.common.acl.{AccessControlEntry, AccessControlEntryFilter, AclBinding, AclBindingFilter}
 import org.apache.kafka.common.config.{ConfigException, ConfigResource}
 import org.apache.kafka.common.config.ConfigResource.Type.TOPIC
@@ -61,21 +61,21 @@ import org.apache.kafka.common.network.{ClientInformation, ListenerName, Mode}
 import org.apache.kafka.common.protocol.{ApiKeys, Errors}
 import org.apache.kafka.common.quota.{ClientQuotaAlteration, ClientQuotaEntity}
 import org.apache.kafka.common.record._
-import org.apache.kafka.common.requests.{AbstractRequest, EnvelopeRequest, RequestContext, RequestHeader}
+import org.apache.kafka.common.requests.{AbstractRequest, AbstractResponse, EnvelopeRequest, RequestContext, RequestHeader}
 import org.apache.kafka.common.resource.ResourcePattern
 import org.apache.kafka.common.security.auth.{KafkaPrincipal, KafkaPrincipalSerde, SecurityProtocol}
 import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer, Deserializer, IntegerSerializer, Serializer}
 import org.apache.kafka.common.utils.Utils._
 import org.apache.kafka.common.utils.{Time, Utils}
-import org.apache.kafka.common.{KafkaFuture, TopicPartition}
 import org.apache.kafka.controller.QuorumController
-import org.apache.kafka.server.authorizer.{Authorizer => JAuthorizer}
+import org.apache.kafka.server.authorizer.{AuthorizableRequestContext, Authorizer => JAuthorizer}
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.kafka.test.{TestSslUtils, TestUtils => JTestUtils}
 import org.apache.zookeeper.KeeperException.SessionExpiredException
 import org.apache.zookeeper.ZooDefs._
 import org.apache.zookeeper.data.ACL
 import org.junit.jupiter.api.Assertions._
-import org.mockito.Mockito
 
 import scala.annotation.nowarn
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
@@ -361,7 +361,7 @@ object TestUtils extends Logging {
   }
 
   @nowarn("cat=deprecation")
-  def setIbpAndMessageFormatVersions(config: Properties, version: ApiVersion): Unit = {
+  def setIbpAndMessageFormatVersions(config: Properties, version: MetadataVersion): Unit = {
     config.setProperty(KafkaConfig.InterBrokerProtocolVersionProp, version.version)
     // for clarity, only set the log message format version if it's not ignored
     if (!LogConfig.shouldIgnoreMessageFormatVersion(version))
@@ -381,6 +381,34 @@ object TestUtils extends Logging {
     Admin.create(adminClientProperties)
   }
 
+  def createTopicWithAdminRaw[B <: KafkaBroker](
+    admin: Admin,
+    topic: String,
+    numPartitions: Int = 1,
+    replicationFactor: Int = 1,
+    replicaAssignment: collection.Map[Int, Seq[Int]] = Map.empty,
+    topicConfig: Properties = new Properties,
+  ): Uuid = {
+    val configsMap = new util.HashMap[String, String]()
+    topicConfig.forEach((k, v) => configsMap.put(k.toString, v.toString))
+
+    val result = if (replicaAssignment.isEmpty) {
+      admin.createTopics(Collections.singletonList(new NewTopic(
+        topic, numPartitions, replicationFactor.toShort).configs(configsMap)))
+    } else {
+      val assignment = new util.HashMap[Integer, util.List[Integer]]()
+      replicaAssignment.forKeyValue { case (k, v) =>
+        val replicas = new util.ArrayList[Integer]
+        v.foreach(r => replicas.add(r.asInstanceOf[Integer]))
+        assignment.put(k.asInstanceOf[Integer], replicas)
+      }
+      admin.createTopics(Collections.singletonList(new NewTopic(
+        topic, assignment).configs(configsMap)))
+    }
+
+    result.topicId(topic).get()
+}
+
   def createTopicWithAdmin[B <: KafkaBroker](
     admin: Admin,
     topic: String,
@@ -396,23 +424,15 @@ object TestUtils extends Logging {
       replicaAssignment.size
     }
 
-    val configsMap = new util.HashMap[String, String]()
-    topicConfig.forEach((k, v) => configsMap.put(k.toString, v.toString))
     try {
-      val result = if (replicaAssignment.isEmpty) {
-        admin.createTopics(Collections.singletonList(new NewTopic(
-          topic, numPartitions, replicationFactor.toShort).configs(configsMap)))
-      } else {
-        val assignment = new util.HashMap[Integer, util.List[Integer]]()
-        replicaAssignment.forKeyValue { case (k, v) =>
-          val replicas = new util.ArrayList[Integer]
-          v.foreach(r => replicas.add(r.asInstanceOf[Integer]))
-          assignment.put(k.asInstanceOf[Integer], replicas)
-        }
-        admin.createTopics(Collections.singletonList(new NewTopic(
-          topic, assignment).configs(configsMap)))
-      }
-      result.all().get()
+      createTopicWithAdminRaw(
+        admin,
+        topic,
+        numPartitions,
+        replicationFactor,
+        replicaAssignment,
+        topicConfig
+      )
     } catch {
       case e: ExecutionException => if (!(e.getCause != null &&
           e.getCause.isInstanceOf[TopicExistsException] &&
@@ -431,16 +451,24 @@ object TestUtils extends Logging {
     }.toMap
   }
 
+  def describeTopic(
+    admin: Admin,
+    topic: String
+  ): TopicDescription = {
+    val describedTopics = admin.describeTopics(
+      Collections.singleton(topic)
+    ).allTopicNames().get()
+    describedTopics.get(topic)
+  }
+
   def topicHasSameNumPartitionsAndReplicationFactor(adminClient: Admin,
                                                     topic: String,
                                                     numPartitions: Int,
                                                     replicationFactor: Int): Boolean = {
-    val describedTopics = adminClient.describeTopics(Collections.
-      singleton(topic)).allTopicNames().get()
-    val description = describedTopics.get(topic)
-    (description != null &&
+    val description = describeTopic(adminClient, topic)
+    description != null &&
       description.partitions().size() == numPartitions &&
-      description.partitions().iterator().next().replicas().size() == replicationFactor)
+      description.partitions().iterator().next().replicas().size() == replicationFactor
   }
 
   def createOffsetsTopicWithAdmin[B <: KafkaBroker](
@@ -482,7 +510,7 @@ object TestUtils extends Logging {
                   topic: String,
                   numPartitions: Int = 1,
                   replicationFactor: Int = 1,
-                  servers: Seq[KafkaServer],
+                  servers: Seq[KafkaBroker],
                   topicConfig: Properties = new Properties): scala.collection.immutable.Map[Int, Int] = {
     val adminZkClient = new AdminZkClient(zkClient)
     // create topic
@@ -514,7 +542,7 @@ object TestUtils extends Logging {
   def createTopic(zkClient: KafkaZkClient,
                   topic: String,
                   partitionReplicaAssignment: collection.Map[Int, Seq[Int]],
-                  servers: Seq[KafkaServer]): scala.collection.immutable.Map[Int, Int] = {
+                  servers: Seq[KafkaBroker]): scala.collection.immutable.Map[Int, Int] = {
     createTopic(zkClient, topic, partitionReplicaAssignment, servers, new Properties())
   }
 
@@ -526,7 +554,7 @@ object TestUtils extends Logging {
   def createTopic(zkClient: KafkaZkClient,
                   topic: String,
                   partitionReplicaAssignment: collection.Map[Int, Seq[Int]],
-                  servers: Seq[KafkaServer],
+                  servers: Seq[KafkaBroker],
                   topicConfig: Properties): scala.collection.immutable.Map[Int, Int] = {
     val adminZkClient = new AdminZkClient(zkClient)
     // create topic
@@ -554,7 +582,7 @@ object TestUtils extends Logging {
     * Create the consumer offsets/group metadata topic and wait until the leader is elected and metadata is propagated
     * to all brokers.
     */
-  def createOffsetsTopic(zkClient: KafkaZkClient, servers: Seq[KafkaServer]): Unit = {
+  def createOffsetsTopic(zkClient: KafkaZkClient, servers: Seq[KafkaBroker]): Unit = {
     val server = servers.head
     createTopic(zkClient, Topic.GROUP_METADATA_TOPIC_NAME,
       server.config.getInt(KafkaConfig.OffsetsTopicPartitionsProp),
@@ -808,7 +836,7 @@ object TestUtils extends Logging {
       Broker(b.id, Seq(EndPoint("localhost", 6667, listenerName, protocol)), b.rack)
     }
     brokers.foreach(b => zkClient.registerBroker(BrokerInfo(Broker(b.id, b.endPoints, rack = b.rack),
-      ApiVersion.latestVersion, jmxPort = -1)))
+      MetadataVersion.latest, jmxPort = -1)))
     brokers
   }
 
@@ -842,8 +870,59 @@ object TestUtils extends Logging {
    *         LeaderDuringDelete).
    * @throws AssertionError if the expected condition is not true within the timeout.
    */
-  def waitUntilLeaderIsElectedOrChanged(zkClient: KafkaZkClient, topic: String, partition: Int, timeoutMs: Long = 30000L,
-                                        oldLeaderOpt: Option[Int] = None, newLeaderOpt: Option[Int] = None): Int = {
+  def waitUntilLeaderIsElectedOrChanged(
+    zkClient: KafkaZkClient,
+    topic: String,
+    partition: Int,
+    timeoutMs: Long = 30000L,
+    oldLeaderOpt: Option[Int] = None,
+    newLeaderOpt: Option[Int] = None
+  ): Int = {
+    def getPartitionLeader(topic: String, partition: Int): Option[Int] = {
+      zkClient.getLeaderForPartition(new TopicPartition(topic, partition))
+    }
+    doWaitUntilLeaderIsElectedOrChanged(getPartitionLeader, topic, partition, timeoutMs, oldLeaderOpt, newLeaderOpt)
+  }
+
+  /**
+   *  If neither oldLeaderOpt nor newLeaderOpt is defined, wait until the leader of a partition is elected.
+   *  If oldLeaderOpt is defined, it waits until the new leader is different from the old leader.
+   *  If newLeaderOpt is defined, it waits until the new leader becomes the expected new leader.
+   *
+   * @return The new leader (note that negative values are used to indicate conditions like NoLeader and
+   *         LeaderDuringDelete).
+   * @throws AssertionError if the expected condition is not true within the timeout.
+   */
+  def waitUntilLeaderIsElectedOrChangedWithAdmin(
+    admin: Admin,
+    topic: String,
+    partition: Int,
+    timeoutMs: Long = 30000L,
+    oldLeaderOpt: Option[Int] = None,
+    newLeaderOpt: Option[Int] = None
+  ): Int = {
+    def getPartitionLeader(topic: String, partition: Int): Option[Int] = {
+      admin.describeTopics(Collections.singletonList(topic)).allTopicNames().get().get(topic).partitions().asScala.
+        find(_.partition() == partition).
+        flatMap { p =>
+          if (p.leader().id() == Node.noNode().id()) {
+            None
+          } else {
+            Some(p.leader().id())
+          }
+        }
+    }
+    doWaitUntilLeaderIsElectedOrChanged(getPartitionLeader, topic, partition, timeoutMs, oldLeaderOpt, newLeaderOpt)
+  }
+
+  private def doWaitUntilLeaderIsElectedOrChanged(
+    getPartitionLeader: (String, Int) => Option[Int],
+    topic: String,
+    partition: Int,
+    timeoutMs: Long,
+    oldLeaderOpt: Option[Int],
+    newLeaderOpt: Option[Int]
+  ): Int = {
     require(!(oldLeaderOpt.isDefined && newLeaderOpt.isDefined), "Can't define both the old and the new leader")
     val startTime = System.currentTimeMillis()
     val topicPartition = new TopicPartition(topic, partition)
@@ -855,7 +934,7 @@ object TestUtils extends Logging {
     var electedOrChangedLeader: Option[Int] = None
     while (electedOrChangedLeader.isEmpty && System.currentTimeMillis() < startTime + timeoutMs) {
       // check if leader is elected
-      leader = zkClient.getLeaderForPartition(topicPartition)
+      leader = getPartitionLeader(topic, partition)
       leader match {
         case Some(l) => (newLeaderOpt, oldLeaderOpt) match {
           case (Some(newLeader), _) if newLeader == l =>
@@ -1014,18 +1093,19 @@ object TestUtils extends Logging {
    * otherwise difficult to poll for. `computeUntilTrue` and `waitUntilTrue` should be preferred in cases where we can
    * easily wait on a condition before evaluating the assertions.
    */
-  def tryUntilNoAssertionError(waitTime: Long = JTestUtils.DEFAULT_MAX_WAIT_MS, pause: Long = 100L)(assertions: => Unit) = {
-    val (error, success) = TestUtils.computeUntilTrue({
+  def tryUntilNoAssertionError[T](waitTime: Long = JTestUtils.DEFAULT_MAX_WAIT_MS, pause: Long = 100L)(assertions: => T): T = {
+    val (either, success) = TestUtils.computeUntilTrue({
       try {
-        assertions
-        None
+        val res = assertions
+        Left(res)
       } catch {
-        case ae: AssertionError => Some(ae)
+        case ae: AssertionError => Right(ae)
       }
-    }, waitTime = waitTime, pause = pause)(_.isEmpty)
+    }, waitTime = waitTime, pause = pause)(_.isLeft)
 
-    if (!success) {
-      throw error.get
+    either match {
+      case Left(res) => res
+      case Right(err) => throw err
     }
   }
 
@@ -1126,6 +1206,25 @@ object TestUtils extends Logging {
       throw new IllegalStateException(s"Cannot get topic: $topic, partition: $partition in server metadata cache"))
   }
 
+  /**
+   * Wait until the kraft broker metadata have caught up to the controller, before calling this, we should make sure
+   * the related metadata message has already been committed to the controller metadata log.
+   */
+  def ensureConsistentKRaftMetadata(
+      brokers: Seq[KafkaBroker],
+      controllerServer: ControllerServer,
+      msg: String = "Timeout waiting for controller metadata propagating to brokers"
+  ): Unit = {
+    val controllerOffset = controllerServer.raftManager.replicatedLog.endOffset().offset - 1
+    TestUtils.waitUntilTrue(
+      () => {
+        brokers.forall { broker =>
+          val metadataOffset = broker.asInstanceOf[BrokerServer].metadataPublisher.publishedOffset
+          metadataOffset >= controllerOffset
+        }
+      }, msg)
+  }
+
   def waitUntilControllerElected(zkClient: KafkaZkClient, timeout: Long = JTestUtils.DEFAULT_MAX_WAIT_MS): Int = {
     val (controllerId, _) = computeUntilTrue(zkClient.getControllerId, waitTime = timeout)(_.isDefined)
     controllerId.getOrElse(throw new AssertionError(s"Controller not elected after $timeout ms"))
@@ -1233,13 +1332,14 @@ object TestUtils extends Logging {
                        configRepository: ConfigRepository = new MockConfigRepository,
                        cleanerConfig: CleanerConfig = CleanerConfig(enableCleaner = false),
                        time: MockTime = new MockTime(),
-                       interBrokerProtocolVersion: ApiVersion = ApiVersion.latestVersion): LogManager = {
+                       interBrokerProtocolVersion: MetadataVersion = MetadataVersion.latest,
+                       recoveryThreadsPerDataDir: Int = 4): LogManager = {
     new LogManager(logDirs = logDirs.map(_.getAbsoluteFile),
                    initialOfflineDirs = Array.empty[File],
                    configRepository = configRepository,
                    initialDefaultConfig = defaultConfig,
                    cleanerConfig = cleanerConfig,
-                   recoveryThreadsPerDataDir = 4,
+                   recoveryThreadsPerDataDir = recoveryThreadsPerDataDir,
                    flushCheckMs = 1000L,
                    flushRecoveryOffsetCheckpointMs = 10000L,
                    flushStartOffsetCheckpointMs = 10000L,
@@ -1254,19 +1354,24 @@ object TestUtils extends Logging {
                    interBrokerProtocolVersion = interBrokerProtocolVersion)
   }
 
-  class MockAlterIsrManager extends AlterIsrManager {
-    val isrUpdates: mutable.Queue[AlterIsrItem] = new mutable.Queue[AlterIsrItem]()
+  class MockAlterPartitionManager extends AlterPartitionManager {
+    val isrUpdates: mutable.Queue[AlterPartitionItem] = new mutable.Queue[AlterPartitionItem]()
     val inFlight: AtomicBoolean = new AtomicBoolean(false)
 
 
     override def submit(
-      topicPartition: TopicPartition,
+      topicPartition: TopicIdPartition,
       leaderAndIsr: LeaderAndIsr,
       controllerEpoch: Int
     ): CompletableFuture[LeaderAndIsr]= {
       val future = new CompletableFuture[LeaderAndIsr]()
       if (inFlight.compareAndSet(false, true)) {
-        isrUpdates += AlterIsrItem(topicPartition, leaderAndIsr, future, controllerEpoch)
+        isrUpdates += AlterPartitionItem(
+          topicPartition,
+          leaderAndIsr,
+          future,
+          controllerEpoch
+        )
       } else {
         future.completeExceptionally(new OperationNotAttemptedException(
           s"Failed to enqueue AlterIsr request for $topicPartition since there is already an inflight request"))
@@ -1274,10 +1379,10 @@ object TestUtils extends Logging {
       future
     }
 
-    def completeIsrUpdate(newZkVersion: Int): Unit = {
+    def completeIsrUpdate(newPartitionEpoch: Int): Unit = {
       if (inFlight.compareAndSet(true, false)) {
         val item = isrUpdates.dequeue()
-        item.future.complete(item.leaderAndIsr.withZkVersion(newZkVersion))
+        item.future.complete(item.leaderAndIsr.withPartitionEpoch(newPartitionEpoch))
       } else {
         fail("Expected an in-flight ISR update, but there was none")
       }
@@ -1293,18 +1398,18 @@ object TestUtils extends Logging {
     }
   }
 
-  def createAlterIsrManager(): MockAlterIsrManager = {
-    new MockAlterIsrManager()
+  def createAlterIsrManager(): MockAlterPartitionManager = {
+    new MockAlterPartitionManager()
   }
 
-  class MockIsrChangeListener extends IsrChangeListener {
+  class MockAlterPartitionListener extends AlterPartitionListener {
     val expands: AtomicInteger = new AtomicInteger(0)
     val shrinks: AtomicInteger = new AtomicInteger(0)
     val failures: AtomicInteger = new AtomicInteger(0)
 
-    override def markExpand(): Unit = expands.incrementAndGet()
+    override def markIsrExpand(): Unit = expands.incrementAndGet()
 
-    override def markShrink(): Unit = shrinks.incrementAndGet()
+    override def markIsrShrink(): Unit = shrinks.incrementAndGet()
 
     override def markFailed(): Unit = failures.incrementAndGet()
 
@@ -1315,8 +1420,8 @@ object TestUtils extends Logging {
     }
   }
 
-  def createIsrChangeListener(): MockIsrChangeListener = {
-    new MockIsrChangeListener()
+  def createIsrChangeListener(): MockAlterPartitionListener = {
+    new MockAlterPartitionListener()
   }
 
   def produceMessages[B <: KafkaBroker](
@@ -1417,7 +1522,7 @@ object TestUtils extends Logging {
     val localLog = leaderBroker.replicaManager.localLogOrException(partition)
     val logDir = localLog.dir.getParentFile
     CoreUtils.swallow(Utils.delete(logDir), this)
-    logDir.createNewFile()
+    Files.createFile(logDir.toPath)
     assertTrue(logDir.isFile)
 
     if (failureType == Roll) {
@@ -2071,6 +2176,17 @@ object TestUtils extends Logging {
     }
   }
 
+  val anonymousAuthorizableContext = new AuthorizableRequestContext() {
+    override def listenerName(): String = ""
+    override def securityProtocol(): SecurityProtocol = SecurityProtocol.PLAINTEXT
+    override def principal(): KafkaPrincipal = KafkaPrincipal.ANONYMOUS
+    override def clientAddress(): InetAddress = null
+    override def requestType(): Int = 0
+    override def requestVersion(): Int = 0
+    override def clientId(): String = ""
+    override def correlationId(): Int = 0
+  }
+
   def addAndVerifyAcls[B <: KafkaBroker](
     brokers: Seq[B],
     acls: Set[AccessControlEntry],
@@ -2079,7 +2195,7 @@ object TestUtils extends Logging {
   ): Unit = {
     val authorizerForWrite = pickAuthorizerForWrite(brokers, controllers)
     val aclBindings = acls.map { acl => new AclBinding(resource, acl) }
-    authorizerForWrite.createAcls(null, aclBindings.toList.asJava).asScala
+    authorizerForWrite.createAcls(anonymousAuthorizableContext, aclBindings.toList.asJava).asScala
       .map(_.toCompletableFuture.get)
       .foreach { result =>
         result.exception.ifPresent { e => throw e }
@@ -2100,7 +2216,7 @@ object TestUtils extends Logging {
   ): Unit = {
     val authorizerForWrite = pickAuthorizerForWrite(brokers, controllers)
     val aclBindingFilters = acls.map { acl => new AclBindingFilter(resource.toFilter, acl.toFilter) }
-    authorizerForWrite.deleteAcls(null, aclBindingFilters.toList.asJava).asScala
+    authorizerForWrite.deleteAcls(anonymousAuthorizableContext, aclBindingFilters.toList.asJava).asScala
       .map(_.toCompletableFuture.get)
       .foreach { result =>
         result.exception.ifPresent { e => throw e }
@@ -2113,14 +2229,14 @@ object TestUtils extends Logging {
     }
   }
 
-  def buildRequestWithEnvelope(request: AbstractRequest,
-                               principalSerde: KafkaPrincipalSerde,
-                               requestChannelMetrics: RequestChannel.Metrics,
-                               startTimeNanos: Long,
-                               fromPrivilegedListener: Boolean = true,
-                               shouldSpyRequestContext: Boolean = false,
-                               envelope: Option[RequestChannel.Request] = None
-                              ): RequestChannel.Request = {
+  def buildEnvelopeRequest(
+    request: AbstractRequest,
+    principalSerde: KafkaPrincipalSerde,
+    requestChannelMetrics: RequestChannel.Metrics,
+    startTimeNanos: Long,
+    dequeueTimeNanos: Long = -1,
+    fromPrivilegedListener: Boolean = true
+  ): RequestChannel.Request = {
     val clientId = "id"
     val listenerName = ListenerName.forSecurityProtocol(SecurityProtocol.PLAINTEXT)
 
@@ -2136,23 +2252,21 @@ object TestUtils extends Logging {
 
     RequestHeader.parse(envelopeBuffer)
 
-    var requestContext = new RequestContext(envelopeHeader, "1", InetAddress.getLocalHost,
+    val envelopeContext = new RequestContext(envelopeHeader, "1", InetAddress.getLocalHost,
       KafkaPrincipal.ANONYMOUS, listenerName, SecurityProtocol.PLAINTEXT, ClientInformation.EMPTY,
       fromPrivilegedListener, Optional.of(principalSerde))
 
-    if (shouldSpyRequestContext) {
-      requestContext = Mockito.spy(requestContext)
-    }
-
-    new RequestChannel.Request(
+    val envelopRequest = new RequestChannel.Request(
       processor = 1,
-      context = requestContext,
+      context = envelopeContext,
       startTimeNanos = startTimeNanos,
       memoryPool = MemoryPool.NONE,
       buffer = envelopeBuffer,
       metrics = requestChannelMetrics,
-      envelope = envelope
+      envelope = None
     )
+    envelopRequest.requestDequeueTimeNanos = dequeueTimeNanos
+    envelopRequest
   }
 
   def verifyNoUnexpectedThreads(context: String): Unit = {
@@ -2180,4 +2294,22 @@ object TestUtils extends Logging {
         s"${unexpected.mkString("`", ",", "`")}")
   }
 
-}
+  class TestControllerRequestCompletionHandler(expectedResponse: Option[AbstractResponse] = None)
+    extends ControllerRequestCompletionHandler {
+    var actualResponse: Option[ClientResponse] = Option.empty
+    val completed: AtomicBoolean = new AtomicBoolean(false)
+    val timedOut: AtomicBoolean = new AtomicBoolean(false)
+
+    override def onComplete(response: ClientResponse): Unit = {
+      actualResponse = Some(response)
+      expectedResponse.foreach { expected =>
+        assertEquals(expected, response.responseBody())
+      }
+      completed.set(true)
+    }
+
+    override def onTimeout(): Unit = {
+      timedOut.set(true)
+    }
+  }
+}
\ No newline at end of file
diff --git a/core/src/test/scala/unit/kafka/utils/ThrottlerTest.scala b/core/src/test/scala/unit/kafka/utils/ThrottlerTest.scala
index 1591cbad90083..80ebde4fcd752 100755
--- a/core/src/test/scala/unit/kafka/utils/ThrottlerTest.scala
+++ b/core/src/test/scala/unit/kafka/utils/ThrottlerTest.scala
@@ -58,4 +58,50 @@ class ThrottlerTest {
     val actualCountPerSec = 4 * desiredCountPerInterval * 1000 / elapsedTimeMs
     assertTrue(actualCountPerSec <= desiredCountPerSec)
   }
+
+  @Test
+  def testUpdateThrottleDesiredRate(): Unit = {
+    val throttleCheckIntervalMs = 100
+    val desiredCountPerSec = 1000.0
+    val desiredCountPerInterval = desiredCountPerSec * throttleCheckIntervalMs / 1000.0
+    val updatedDesiredCountPerSec = 1500.0;
+    val updatedDesiredCountPerInterval = updatedDesiredCountPerSec * throttleCheckIntervalMs / 1000.0
+
+    val mockTime = new MockTime()
+    val throttler = new Throttler(desiredRatePerSec = desiredCountPerSec,
+      checkIntervalMs = throttleCheckIntervalMs,
+      time = mockTime)
+
+    // Observe desiredCountPerInterval at t1
+    val t1 = mockTime.milliseconds()
+    throttler.maybeThrottle(desiredCountPerInterval)
+    assertEquals(t1, mockTime.milliseconds())
+
+    // Observe desiredCountPerInterval at t1 + throttleCheckIntervalMs + 1,
+    mockTime.sleep(throttleCheckIntervalMs + 1)
+    throttler.maybeThrottle(desiredCountPerInterval)
+    val t2 = mockTime.milliseconds()
+    assertTrue(t2 >= t1 + 2 * throttleCheckIntervalMs)
+
+    val elapsedTimeMs = t2 - t1
+    val actualCountPerSec = 2 * desiredCountPerInterval * 1000 / elapsedTimeMs
+    assertTrue(actualCountPerSec <= desiredCountPerSec)
+
+    // Update ThrottleDesiredRate
+    throttler.updateDesiredRatePerSec(updatedDesiredCountPerSec);
+
+    // Observe updatedDesiredCountPerInterval at t2
+    throttler.maybeThrottle(updatedDesiredCountPerInterval)
+    assertEquals(t2, mockTime.milliseconds())
+
+    // Observe updatedDesiredCountPerInterval at t2 + throttleCheckIntervalMs + 1
+    mockTime.sleep(throttleCheckIntervalMs + 1)
+    throttler.maybeThrottle(updatedDesiredCountPerInterval)
+    val t3 = mockTime.milliseconds()
+    assertTrue(t3 >= t2 + 2 * throttleCheckIntervalMs)
+
+    val updatedElapsedTimeMs = t3 - t2
+    val updatedActualCountPerSec = 2 * updatedDesiredCountPerInterval * 1000 / updatedElapsedTimeMs
+    assertTrue(updatedActualCountPerSec <= updatedDesiredCountPerSec)
+  }
 }
diff --git a/core/src/test/scala/unit/kafka/zk/KafkaZkClientTest.scala b/core/src/test/scala/unit/kafka/zk/KafkaZkClientTest.scala
index 6be954d5e7ad2..7b7ddfbc56fb3 100644
--- a/core/src/test/scala/unit/kafka/zk/KafkaZkClientTest.scala
+++ b/core/src/test/scala/unit/kafka/zk/KafkaZkClientTest.scala
@@ -16,46 +16,49 @@
 */
 package kafka.zk
 
-import java.util.{Collections, Properties}
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.{CountDownLatch, TimeUnit}
-import kafka.api.{ApiVersion, LeaderAndIsr}
+import java.util.{Collections, Properties}
+
+import kafka.api.LeaderAndIsr
 import kafka.cluster.{Broker, EndPoint}
+import kafka.controller.{LeaderIsrAndControllerEpoch, ReplicaAssignment}
 import kafka.log.LogConfig
+import kafka.security.authorizer.AclEntry
 import kafka.server.{ConfigType, KafkaConfig, QuorumTestHarness}
 import kafka.utils.CoreUtils
-import org.apache.kafka.common.{TopicPartition, Uuid}
-import org.apache.kafka.common.network.ListenerName
-import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
-import org.apache.kafka.common.security.token.delegation.TokenInformation
-import org.apache.kafka.common.utils.{SecurityUtils, Time}
-import org.apache.zookeeper.KeeperException.{Code, NoAuthException, NoNodeException, NodeExistsException}
-import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
-
-import scala.jdk.CollectionConverters._
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.{Seq, mutable}
-import scala.util.Random
-import kafka.controller.{LeaderIsrAndControllerEpoch, ReplicaAssignment}
-import kafka.security.authorizer.AclEntry
 import kafka.zk.KafkaZkClient.UpdateLeaderAndIsrResult
 import kafka.zookeeper._
 import org.apache.kafka.common.acl.AclOperation.READ
 import org.apache.kafka.common.acl.AclPermissionType.{ALLOW, DENY}
 import org.apache.kafka.common.errors.ControllerMovedException
-import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
 import org.apache.kafka.common.feature.Features._
+import org.apache.kafka.common.feature.{Features, SupportedVersionRange}
+import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.resource.ResourcePattern
 import org.apache.kafka.common.resource.ResourceType.{GROUP, TOPIC}
 import org.apache.kafka.common.security.JaasUtils
+import org.apache.kafka.common.security.auth.{KafkaPrincipal, SecurityProtocol}
+import org.apache.kafka.common.security.token.delegation.TokenInformation
+import org.apache.kafka.common.utils.{SecurityUtils, Time}
+import org.apache.kafka.common.{TopicPartition, Uuid}
+import org.apache.kafka.metadata.LeaderRecoveryState
+import org.apache.kafka.server.common.MetadataVersion
+import org.apache.zookeeper.KeeperException.{Code, NoAuthException, NoNodeException, NodeExistsException}
 import org.apache.zookeeper.ZooDefs
 import org.apache.zookeeper.client.ZKClientConfig
 import org.apache.zookeeper.common.ZKConfig
 import org.apache.zookeeper.data.Stat
+import org.junit.jupiter.api.Assertions._
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Test, TestInfo}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.ValueSource
 
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.{Seq, mutable}
+import scala.jdk.CollectionConverters._
+import scala.util.Random
+
 class KafkaZkClientTest extends QuorumTestHarness {
 
   private val group = "my-group"
@@ -808,7 +811,7 @@ class KafkaZkClientTest extends QuorumTestHarness {
         Seq(new EndPoint(host, port, ListenerName.forSecurityProtocol(securityProtocol), securityProtocol)),
         rack = rack,
         features = features),
-      ApiVersion.latestVersion, jmxPort = port + 10)
+      MetadataVersion.latest, jmxPort = port + 10)
 
   @Test
   def testRegisterBrokerInfo(): Unit = {
@@ -919,13 +922,13 @@ class KafkaZkClientTest extends QuorumTestHarness {
     stat
   }
 
-  private def leaderIsrAndControllerEpochs(state: Int, zkVersion: Int): Map[TopicPartition, LeaderIsrAndControllerEpoch] =
+  private def leaderIsrAndControllerEpochs(state: Int, partitionEpoch: Int): Map[TopicPartition, LeaderIsrAndControllerEpoch] =
     Map(
       topicPartition10 -> LeaderIsrAndControllerEpoch(
-        LeaderAndIsr(leader = 1, leaderEpoch = state, isr = List(2 + state, 3 + state), zkVersion = zkVersion),
+        LeaderAndIsr(leader = 1, leaderEpoch = state, isr = List(2 + state, 3 + state), LeaderRecoveryState.RECOVERED, partitionEpoch = partitionEpoch),
         controllerEpoch = 4),
       topicPartition11 -> LeaderIsrAndControllerEpoch(
-        LeaderAndIsr(leader = 0, leaderEpoch = state + 1, isr = List(1 + state, 2 + state), zkVersion = zkVersion),
+        LeaderAndIsr(leader = 0, leaderEpoch = state + 1, isr = List(1 + state, 2 + state), LeaderRecoveryState.RECOVERED, partitionEpoch = partitionEpoch),
         controllerEpoch = 4))
 
   val initialLeaderIsrAndControllerEpochs: Map[TopicPartition, LeaderIsrAndControllerEpoch] =
@@ -934,8 +937,8 @@ class KafkaZkClientTest extends QuorumTestHarness {
   val initialLeaderIsrs: Map[TopicPartition, LeaderAndIsr] =
     initialLeaderIsrAndControllerEpochs.map { case (k, v) => k -> v.leaderAndIsr }
 
-  private def leaderIsrs(state: Int, zkVersion: Int): Map[TopicPartition, LeaderAndIsr] =
-    leaderIsrAndControllerEpochs(state, zkVersion).map { case (k, v) => k -> v.leaderAndIsr }
+  private def leaderIsrs(state: Int, partitionEpoch: Int): Map[TopicPartition, LeaderAndIsr] =
+    leaderIsrAndControllerEpochs(state, partitionEpoch).map { case (k, v) => k -> v.leaderAndIsr }
 
   private def checkUpdateLeaderAndIsrResult(
                   expectedSuccessfulPartitions: Map[TopicPartition, LeaderAndIsr],
@@ -1000,26 +1003,26 @@ class KafkaZkClientTest extends QuorumTestHarness {
 
     // successful updates
     checkUpdateLeaderAndIsrResult(
-      leaderIsrs(state = 1, zkVersion = 1),
+      leaderIsrs(state = 1, partitionEpoch = 1),
       mutable.ArrayBuffer.empty,
       Map.empty,
-      zkClient.updateLeaderAndIsr(leaderIsrs(state = 1, zkVersion = 0),controllerEpoch = 4, controllerEpochZkVersion))
+      zkClient.updateLeaderAndIsr(leaderIsrs(state = 1, partitionEpoch = 0),controllerEpoch = 4, controllerEpochZkVersion))
 
     // Try to update with wrong ZK version
     checkUpdateLeaderAndIsrResult(
       Map.empty,
       ArrayBuffer(topicPartition10, topicPartition11),
       Map.empty,
-      zkClient.updateLeaderAndIsr(leaderIsrs(state = 1, zkVersion = 0),controllerEpoch = 4, controllerEpochZkVersion))
+      zkClient.updateLeaderAndIsr(leaderIsrs(state = 1, partitionEpoch = 0),controllerEpoch = 4, controllerEpochZkVersion))
 
     // Trigger successful, to be retried and failed partitions in same call
     val mixedState = Map(
-      topicPartition10 -> LeaderAndIsr(leader = 1, leaderEpoch = 2, isr = List(4, 5), zkVersion = 1),
-      topicPartition11 -> LeaderAndIsr(leader = 0, leaderEpoch = 2, isr = List(3, 4), zkVersion = 0),
-      topicPartition20 -> LeaderAndIsr(leader = 0, leaderEpoch = 2, isr = List(3, 4), zkVersion = 0))
+      topicPartition10 -> LeaderAndIsr(leader = 1, leaderEpoch = 2, isr = List(4, 5), LeaderRecoveryState.RECOVERED, partitionEpoch = 1),
+      topicPartition11 -> LeaderAndIsr(leader = 0, leaderEpoch = 2, isr = List(3, 4), LeaderRecoveryState.RECOVERED, partitionEpoch = 0),
+      topicPartition20 -> LeaderAndIsr(leader = 0, leaderEpoch = 2, isr = List(3, 4), LeaderRecoveryState.RECOVERED, partitionEpoch = 0))
 
     checkUpdateLeaderAndIsrResult(
-      leaderIsrs(state = 2, zkVersion = 2).filter { case (tp, _) => tp == topicPartition10 },
+      leaderIsrs(state = 2, partitionEpoch = 2).filter { case (tp, _) => tp == topicPartition10 },
       ArrayBuffer(topicPartition11),
       Map(
         topicPartition20 -> (classOf[NoNodeException], "KeeperErrorCode = NoNode for /brokers/topics/topic2/partitions/0/state")),
@@ -1030,7 +1033,7 @@ class KafkaZkClientTest extends QuorumTestHarness {
       leaderIsrAndControllerEpochs: Map[TopicPartition,LeaderIsrAndControllerEpoch],
       topicPartition: TopicPartition,
       response: GetDataResponse): Unit = {
-    val zkVersion = leaderIsrAndControllerEpochs(topicPartition).leaderAndIsr.zkVersion
+    val zkVersion = leaderIsrAndControllerEpochs(topicPartition).leaderAndIsr.partitionEpoch
     assertEquals(Code.OK, response.resultCode)
     assertEquals(TopicPartitionStateZNode.path(topicPartition), response.path)
     assertEquals(Some(topicPartition), response.ctx)
@@ -1106,20 +1109,20 @@ class KafkaZkClientTest extends QuorumTestHarness {
 
     assertEquals(
       expectedSetDataResponses(topicPartition10, topicPartition11)(Code.OK, statWithVersion(1)),
-      zkClient.setTopicPartitionStatesRaw(leaderIsrAndControllerEpochs(state = 1, zkVersion = 0), controllerEpochZkVersion).map {
+      zkClient.setTopicPartitionStatesRaw(leaderIsrAndControllerEpochs(state = 1, partitionEpoch = 0), controllerEpochZkVersion).map {
         eraseMetadataAndStat}.toList)
 
     // Mismatch controller epoch zkVersion
-    assertThrows(classOf[ControllerMovedException], () => zkClient.setTopicPartitionStatesRaw(leaderIsrAndControllerEpochs(state = 1, zkVersion = 0), controllerEpochZkVersion + 1))
+    assertThrows(classOf[ControllerMovedException], () => zkClient.setTopicPartitionStatesRaw(leaderIsrAndControllerEpochs(state = 1, partitionEpoch = 0), controllerEpochZkVersion + 1))
 
     val getResponses = zkClient.getTopicPartitionStatesRaw(topicPartitions10_11)
     assertEquals(2, getResponses.size)
-    topicPartitions10_11.zip(getResponses) foreach {case (tp, r) => checkGetDataResponse(leaderIsrAndControllerEpochs(state = 1, zkVersion = 0), tp, r)}
+    topicPartitions10_11.zip(getResponses) foreach {case (tp, r) => checkGetDataResponse(leaderIsrAndControllerEpochs(state = 1, partitionEpoch = 0), tp, r)}
 
     // Other ZK client can also write the state of a partition
     assertEquals(
       expectedSetDataResponses(topicPartition10, topicPartition11)(Code.OK, statWithVersion(2)),
-      otherZkClient.setTopicPartitionStatesRaw(leaderIsrAndControllerEpochs(state = 2, zkVersion = 1), controllerEpochZkVersion).map {
+      otherZkClient.setTopicPartitionStatesRaw(leaderIsrAndControllerEpochs(state = 2, partitionEpoch = 1), controllerEpochZkVersion).map {
         eraseMetadataAndStat}.toList)
   }
 
diff --git a/core/src/test/scala/unit/kafka/zookeeper/ZooKeeperClientTest.scala b/core/src/test/scala/unit/kafka/zookeeper/ZooKeeperClientTest.scala
index 5af2ba8db044a..40a8f1e60ff06 100644
--- a/core/src/test/scala/unit/kafka/zookeeper/ZooKeeperClientTest.scala
+++ b/core/src/test/scala/unit/kafka/zookeeper/ZooKeeperClientTest.scala
@@ -24,11 +24,11 @@ import java.util.concurrent.{ArrayBlockingQueue, ConcurrentLinkedQueue, CountDow
 import scala.collection.Seq
 import com.yammer.metrics.core.{Gauge, Meter, MetricName}
 import kafka.server.KafkaConfig
-import kafka.metrics.KafkaYammerMetrics
 import kafka.utils.TestUtils
 import kafka.server.QuorumTestHarness
 import org.apache.kafka.common.security.JaasUtils
 import org.apache.kafka.common.utils.Time
+import org.apache.kafka.server.metrics.KafkaYammerMetrics
 import org.apache.zookeeper.KeeperException.{Code, NoNodeException}
 import org.apache.zookeeper.Watcher.Event.{EventType, KeeperState}
 import org.apache.zookeeper.ZooKeeper.States
@@ -658,7 +658,7 @@ class ZooKeeperClientTest extends QuorumTestHarness {
 
     connectionStateOverride = Some(States.CONNECTED)
     zooKeeperClient.ZooKeeperClientWatcher.process(new WatchedEvent(EventType.None, KeeperState.AuthFailed, null))
-    assertFalse(sessionInitializedCountDownLatch.await(10, TimeUnit.MILLISECONDS), "Unexpected session initialization when connection is alive")
+    assertFalse(sessionInitializedCountDownLatch.await(1200, TimeUnit.MILLISECONDS), "Unexpected session initialization when connection is alive")
 
     connectionStateOverride = Some(States.AUTH_FAILED)
     zooKeeperClient.ZooKeeperClientWatcher.process(new WatchedEvent(EventType.None, KeeperState.AuthFailed, null))
diff --git a/docs/configuration.html b/docs/configuration.html
index 0782c83790eb5..ceb671ca7500d 100644
--- a/docs/configuration.html
+++ b/docs/configuration.html
@@ -215,7 +215,7 @@ <h5>Adding and Removing Listeners</h5>
     <li><code>listener.security.protocol.map</code></li>
   </ul>
   Inter-broker listener must be configured using the static broker configuration <code>inter.broker.listener.name</code>
-  or <code>inter.broker.security.protocol</code>.
+  or <code>security.inter.broker.protocol</code>.
 
   <h3 class="anchor-heading"><a id="topicconfigs" class="anchor-link"></a><a href="#topicconfigs">3.2 Topic-Level Configs</a></h3>
 
diff --git a/docs/connect.html b/docs/connect.html
index 66d621248dec5..d13d25d31393c 100644
--- a/docs/connect.html
+++ b/docs/connect.html
@@ -48,6 +48,7 @@ <h4><a id="connect_running" href="#connect_running">Running Kafka Connect</a></h
         <li><code>bootstrap.servers</code> - List of Kafka servers used to bootstrap connections to Kafka</li>
         <li><code>key.converter</code> - Converter class used to convert between Kafka Connect format and the serialized form that is written to Kafka. This controls the format of the keys in messages written to or read from Kafka, and since this is independent of connectors it allows any connector to work with any serialization format. Examples of common formats include JSON and Avro.</li>
         <li><code>value.converter</code> - Converter class used to convert between Kafka Connect format and the serialized form that is written to Kafka. This controls the format of the values in messages written to or read from Kafka, and since this is independent of connectors it allows any connector to work with any serialization format. Examples of common formats include JSON and Avro.</li>
+        <li><code>plugin.path</code> (default <code>empty</code>) - a list of paths that contain Connect plugins (connectors, converters, transformations). Before running quick starts, users must add the absolute path that contains the example FileStreamSourceConnector and FileStreamSinkConnector packaged in <code>connect-file-"version".jar</code>, because these connectors are not included by default to the <code>CLASSPATH</code> or the <code>plugin.path</code> of the Connect worker (see <a href="#connectconfigs_plugin.path">plugin.path</a> property for examples).</li>
     </ul>
 
     <p>The important configuration options specific to standalone mode are:</p>
@@ -326,6 +327,8 @@ <h4><a id="connect_rest" href="#connect_rest">REST API</a></h4>
         <li><code>GET /</code>- return basic information about the Kafka Connect cluster such as the version of the Connect worker that serves the REST request (including git commit ID of the source code) and the Kafka cluster ID that is connected to.
     </ul>
 
+    <p>For the complete specification of the REST API, see the <a href="generated/connect_rest.yaml">OpenAPI documentation</a></p>
+
     <h4><a id="connect_errorreporting" href="#connect_errorreporting">Error Reporting in Connect</a></h4>
 
     <p>Kafka Connect provides error reporting to handle errors encountered along various stages of processing. By default, any error encountered during conversion or within transformations will cause the connector to fail. Each connector configuration can also enable tolerating such errors by skipping them, optionally writing each error and the details of the failed operation and problematic record (with various levels of detail) to the Connect application log. These mechanisms also capture errors when a sink connector is processing the messages consumed from its Kafka topics, and all of the errors can be written to a configurable "dead letter queue" (DLQ) Kafka topic.</p>
@@ -710,7 +713,7 @@ <h4><a id="connect_administration" href="#connect_administration">Kafka Connect
     <li><b>RUNNING:</b> The connector/task is running.</li>
     <li><b>PAUSED:</b> The connector/task has been administratively paused.</li>
     <li><b>FAILED:</b> The connector/task has failed (usually by raising an exception, which is reported in the status output).</li>
-    <li><b>DESTROYED:</b> The connector/task has been administratively removed and will stop appearing in the Connect cluster.</li>
+    <li><b>RESTARTING:</b> The connector/task is either actively restarting or is expected to restart soon</li>
     </ul>
 
     <p>
diff --git a/docs/design.html b/docs/design.html
index db71b6524551b..6e32b2d7f6fdf 100644
--- a/docs/design.html
+++ b/docs/design.html
@@ -125,6 +125,9 @@ <h3 class="anchor-heading"><a id="maximizingefficiency" class="anchor-link"></a>
     <p>
     This combination of pagecache and sendfile means that on a Kafka cluster where the consumers are mostly caught up you will see no read activity on the disks whatsoever as they will be serving data entirely from cache.
     <p>
+    TLS/SSL libraries operate at the user space (in-kernel <code>SSL_sendfile</code> is currently not supported by Kafka). Due to this restriction, <code>sendfile</code> is not used when SSL is enabled. For enabling
+    SSL configuration, refer to <code>security.protocol</code> and <code>security.inter.broker.protocol</code>
+    <p>
     For more background on the sendfile and zero-copy support in Java, see this <a href="https://developer.ibm.com/articles/j-zerocopy/">article</a>.
 
     <h4 class="anchor-heading"><a id="design_compression" class="anchor-link"></a><a href="#design_compression">End-to-end Batch Compression</a></h4>
diff --git a/docs/documentation.html b/docs/documentation.html
index d13f691209f3b..07014db7af4b9 100644
--- a/docs/documentation.html
+++ b/docs/documentation.html
@@ -33,7 +33,7 @@
     <!--//#include virtual="../includes/_docs_banner.htm" -->
     
     <h1>Documentation</h1>
-    <h3>Kafka 3.1 Documentation</h3>
+    <h3>Kafka 3.2 Documentation</h3>
     Prior releases: <a href="/07/documentation.html">0.7.x</a>, 
                     <a href="/08/documentation.html">0.8.0</a>, 
                     <a href="/081/documentation.html">0.8.1.X</a>, 
@@ -55,6 +55,7 @@ <h3>Kafka 3.1 Documentation</h3>
                     <a href="/27/documentation.html">2.7.X</a>,
                     <a href="/28/documentation.html">2.8.X</a>,
                     <a href="/30/documentation.html">3.0.X</a>.
+                    <a href="/31/documentation.html">3.1.X</a>.
 
    <h2 class="anchor-heading"><a id="gettingStarted" class="anchor-link"></a><a href="#gettingStarted">1. Getting Started</a></h2>
       <h3 class="anchor-heading"><a id="introduction" class="anchor-link"></a><a href="#introduction">1.1 Introduction</a></h3>
diff --git a/docs/images/kafka_log.png b/docs/images/kafka_log.png
index 75abd96babc17..6658b3f43cc9b 100644
Binary files a/docs/images/kafka_log.png and b/docs/images/kafka_log.png differ
diff --git a/docs/implementation.html b/docs/implementation.html
index 773d510680be5..11cf365750ba3 100644
--- a/docs/implementation.html
+++ b/docs/implementation.html
@@ -164,7 +164,7 @@ <h4 class="anchor-heading"><a id="messageset" class="anchor-link"></a><a href="#
 
     <h3 class="anchor-heading"><a id="log" class="anchor-link"></a><a href="#log">5.4 Log</a></h3>
     <p>
-    A log for a topic named "my_topic" with two partitions consists of two directories (namely <code>my_topic_0</code> and <code>my_topic_1</code>) populated with data files containing the messages for that topic. The format of the log files is a sequence of "log entries""; each log entry is a 4 byte integer <i>N</i> storing the message length which is followed by the <i>N</i> message bytes. Each message is uniquely identified by a 64-bit integer <i>offset</i> giving the byte position of the start of this message in the stream of all messages ever sent to that topic on that partition. The on-disk format of each message is given below. Each log file is named with the offset of the first message it contains. So the first file created will be 00000000000.kafka, and each additional file will have an integer name roughly <i>S</i> bytes from the previous file where <i>S</i> is the max log file size given in the configuration.
+    A log for a topic named "my-topic" with two partitions consists of two directories (namely <code>my-topic-0</code> and <code>my-topic-1</code>) populated with data files containing the messages for that topic. The format of the log files is a sequence of "log entries"; each log entry is a 4 byte integer <i>N</i> storing the message length which is followed by the <i>N</i> message bytes. Each message is uniquely identified by a 64-bit integer <i>offset</i> giving the byte position of the start of this message in the stream of all messages ever sent to that topic on that partition. The on-disk format of each message is given below. Each log file is named with the offset of the first message it contains. So the first file created will be 00000000000000000000.log, and each additional file will have an integer name roughly <i>S</i> bytes from the previous file where <i>S</i> is the max log file size given in the configuration.
     </p>
     <p>
     The exact binary format for records is versioned and maintained as a standard interface so record batches can be transferred between producer, broker, and client without recopying or conversion when desirable. The previous section included details about the on-disk format of records.
diff --git a/docs/js/templateData.js b/docs/js/templateData.js
index b7b23e8487f88..e0ad3e9532577 100644
--- a/docs/js/templateData.js
+++ b/docs/js/templateData.js
@@ -17,8 +17,8 @@ limitations under the License.
 
 // Define variables for doc templates
 var context={
-    "version": "32",
-    "dotVersion": "3.2",
-    "fullDotVersion": "3.2.0",
+    "version": "34",
+    "dotVersion": "3.4",
+    "fullDotVersion": "3.4.0",
     "scalaVersion": "2.13"
 };
diff --git a/docs/ops.html b/docs/ops.html
index e5b2fb8e94678..1854cf057c2fc 100644
--- a/docs/ops.html
+++ b/docs/ops.html
@@ -78,7 +78,7 @@ <h4 class="anchor-heading"><a id="basic_ops_leader_balancing" class="anchor-link
 
   <pre class="line-numbers"><code class="language-text">      auto.leader.rebalance.enable=true</code></pre>
     You can also set this to false, but you will then need to manually restore leadership to the restored replicas by running the command:
-  <pre class="line-numbers"><code class="language-bash">  &gt; bin/kafka-preferred-replica-election.sh --bootstrap-server broker_host:port</code></pre>
+  <pre class="line-numbers"><code class="language-bash">  &gt; bin/kafka-leader-election.sh --bootstrap-server broker_host:port --election-type preferred --all-topic-partitions</code></pre>
 
   <h4 class="anchor-heading"><a id="basic_ops_racks" class="anchor-link"></a><a href="#basic_ops_racks">Balancing Replicas Across Racks</a></h4>
   The rack awareness feature spreads replicas of the same partition across different racks. This extends the guarantees Kafka provides for broker-failure to cover rack-failure, limiting the risk of data loss should all the brokers on a rack fail at once. The feature can also be applied to other broker groupings such as availability zones in EC2.
@@ -258,23 +258,23 @@ <h5 class="anchor-heading"><a id="basic_ops_automigrate" class="anchor-link"></a
   Current partition replica assignment
 
   {"version":1,
-  "partitions":[{"topic":"foo1","partition":2,"replicas":[1,2]},
-                {"topic":"foo1","partition":0,"replicas":[3,4]},
-                {"topic":"foo2","partition":2,"replicas":[1,2]},
-                {"topic":"foo2","partition":0,"replicas":[3,4]},
-                {"topic":"foo1","partition":1,"replicas":[2,3]},
-                {"topic":"foo2","partition":1,"replicas":[2,3]}]
+  "partitions":[{"topic":"foo1","partition":0,"replicas":[2,1]},
+                {"topic":"foo1","partition":1,"replicas":[1,3]},
+                {"topic":"foo1","partition":2,"replicas":[3,4]},
+                {"topic":"foo2","partition":0,"replicas":[4,2]},
+                {"topic":"foo2","partition":1,"replicas":[2,1]},
+                {"topic":"foo2","partition":2,"replicas":[1,3]}]
   }
 
   Proposed partition reassignment configuration
 
   {"version":1,
-  "partitions":[{"topic":"foo1","partition":2,"replicas":[5,6]},
-                {"topic":"foo1","partition":0,"replicas":[5,6]},
-                {"topic":"foo2","partition":2,"replicas":[5,6]},
-                {"topic":"foo2","partition":0,"replicas":[5,6]},
+  "partitions":[{"topic":"foo1","partition":0,"replicas":[6,5]},
                 {"topic":"foo1","partition":1,"replicas":[5,6]},
-                {"topic":"foo2","partition":1,"replicas":[5,6]}]
+                {"topic":"foo1","partition":2,"replicas":[6,5]},
+                {"topic":"foo2","partition":0,"replicas":[5,6]},
+                {"topic":"foo2","partition":1,"replicas":[6,5]},
+                {"topic":"foo2","partition":2,"replicas":[5,6]}]
   }</code></pre>
   <p>
   The tool generates a candidate assignment that will move all partitions from topics foo1,foo2 to brokers 5,6. Note, however, that at this point, the partition movement has not started, it merely tells you the current assignment and the proposed new assignment. The current assignment should be saved in case you want to rollback to it. The new assignment should be saved in a json file (e.g. expand-cluster-reassignment.json) to be input to the tool with the --execute option as follows:
@@ -282,34 +282,27 @@ <h5 class="anchor-heading"><a id="basic_ops_automigrate" class="anchor-link"></a
   Current partition replica assignment
 
   {"version":1,
-  "partitions":[{"topic":"foo1","partition":2,"replicas":[1,2]},
-                {"topic":"foo1","partition":0,"replicas":[3,4]},
-                {"topic":"foo2","partition":2,"replicas":[1,2]},
-                {"topic":"foo2","partition":0,"replicas":[3,4]},
-                {"topic":"foo1","partition":1,"replicas":[2,3]},
-                {"topic":"foo2","partition":1,"replicas":[2,3]}]
+  "partitions":[{"topic":"foo1","partition":0,"replicas":[2,1]},
+                {"topic":"foo1","partition":1,"replicas":[1,3]},
+                {"topic":"foo1","partition":2,"replicas":[3,4]},
+                {"topic":"foo2","partition":0,"replicas":[4,2]},
+                {"topic":"foo2","partition":1,"replicas":[2,1]},
+                {"topic":"foo2","partition":2,"replicas":[1,3]}]
   }
 
   Save this to use as the --reassignment-json-file option during rollback
-  Successfully started reassignment of partitions
-  {"version":1,
-  "partitions":[{"topic":"foo1","partition":2,"replicas":[5,6]},
-                {"topic":"foo1","partition":0,"replicas":[5,6]},
-                {"topic":"foo2","partition":2,"replicas":[5,6]},
-                {"topic":"foo2","partition":0,"replicas":[5,6]},
-                {"topic":"foo1","partition":1,"replicas":[5,6]},
-                {"topic":"foo2","partition":1,"replicas":[5,6]}]
-  }</code></pre>
+  Successfully started partition reassignments for foo1-0,foo1-1,foo1-2,foo2-0,foo2-1,foo2-2
+  </code></pre>
   <p>
   Finally, the --verify option can be used with the tool to check the status of the partition reassignment. Note that the same expand-cluster-reassignment.json (used with the --execute option) should be used with the --verify option:
   <pre class="line-numbers"><code class="language-bash">  > bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092 --reassignment-json-file expand-cluster-reassignment.json --verify
   Status of partition reassignment:
-  Reassignment of partition [foo1,0] completed successfully
-  Reassignment of partition [foo1,1] is in progress
-  Reassignment of partition [foo1,2] is in progress
-  Reassignment of partition [foo2,0] completed successfully
-  Reassignment of partition [foo2,1] completed successfully
-  Reassignment of partition [foo2,2] completed successfully</code></pre>
+  Reassignment of partition [foo1,0] is completed
+  Reassignment of partition [foo1,1] is still in progress
+  Reassignment of partition [foo1,2] is still in progress
+  Reassignment of partition [foo2,0] is completed
+  Reassignment of partition [foo2,1] is completed
+  Reassignment of partition [foo2,2] is completed</code></pre>
 
   <h5 class="anchor-heading"><a id="basic_ops_partitionassignment" class="anchor-link"></a><a href="#basic_ops_partitionassignment">Custom partition assignment and migration</a></h5>
   The partition reassignment tool can also be used to selectively move replicas of a partition to a specific set of brokers. When used in this manner, it is assumed that the user knows the reassignment plan and does not require the tool to generate a candidate reassignment, effectively skipping the --generate step and moving straight to the --execute step
@@ -329,17 +322,14 @@ <h5 class="anchor-heading"><a id="basic_ops_partitionassignment" class="anchor-l
   }
 
   Save this to use as the --reassignment-json-file option during rollback
-  Successfully started reassignment of partitions
-  {"version":1,
-  "partitions":[{"topic":"foo1","partition":0,"replicas":[5,6]},
-                {"topic":"foo2","partition":1,"replicas":[2,3]}]
-  }</code></pre>
+  Successfully started partition reassignments for foo1-0,foo2-1
+  </code></pre>
   <p>
   The --verify option can be used with the tool to check the status of the partition reassignment. Note that the same custom-reassignment.json (used with the --execute option) should be used with the --verify option:
   <pre class="line-numbers"><code class="language-bash">  > bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092 --reassignment-json-file custom-reassignment.json --verify
   Status of partition reassignment:
-  Reassignment of partition [foo1,0] completed successfully
-  Reassignment of partition [foo2,1] completed successfully</code></pre>
+  Reassignment of partition [foo1,0] is completed
+  Reassignment of partition [foo2,1] is completed</code></pre>
 
   <h4 class="anchor-heading"><a id="basic_ops_decommissioning_brokers" class="anchor-link"></a><a href="#basic_ops_decommissioning_brokers">Decommissioning brokers</a></h4>
   The partition reassignment tool does not have the ability to automatically generate a reassignment plan for decommissioning brokers yet. As such, the admin has to come up with a reassignment plan to move the replica for all partitions hosted on the broker to be decommissioned, to the rest of the brokers. This can be relatively tedious as the reassignment needs to ensure that all the replicas are not moved from the decommissioned broker to only one other broker. To make this process effortless, we plan to add tooling support for decommissioning brokers in the future.
@@ -361,14 +351,12 @@ <h4 class="anchor-heading"><a id="basic_ops_increase_replication_factor" class="
   "partitions":[{"topic":"foo","partition":0,"replicas":[5]}]}
 
   Save this to use as the --reassignment-json-file option during rollback
-  Successfully started reassignment of partitions
-  {"version":1,
-  "partitions":[{"topic":"foo","partition":0,"replicas":[5,6,7]}]}</code></pre>
+  Successfully started partition reassignment for foo-0</code></pre>
   <p>
   The --verify option can be used with the tool to check the status of the partition reassignment. Note that the same increase-replication-factor.json (used with the --execute option) should be used with the --verify option:
   <pre class="line-numbers"><code class="language-bash">  > bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092 --reassignment-json-file increase-replication-factor.json --verify
   Status of partition reassignment:
-  Reassignment of partition [foo,0] completed successfully</code></pre>
+  Reassignment of partition [foo,0] is completed</code></pre>
   You can also verify the increase in replication factor with the kafka-topics tool:
   <pre class="line-numbers"><code class="language-bash">  > bin/kafka-topics.sh --bootstrap-server localhost:9092 --topic foo --describe
   Topic:foo	PartitionCount:1	ReplicationFactor:3	Configs:
@@ -382,12 +370,11 @@ <h4 class="anchor-heading"><a id="rep-throttle" class="anchor-link"></a><a href=
   So for example, if you were to execute a rebalance, with the below command, it would move partitions at no more than 50MB/s.
   <pre class="language-bash">$ bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092 --execute --reassignment-json-file bigger-cluster.json --throttle 50000000</code></pre>
   When you execute this script you will see the throttle engage:
-  <pre class="line-numbers"><code class="language-bash">  The throttle limit was set to 50000000 B/s
-  Successfully started reassignment of partitions.</code></pre>
-  <p>Should you wish to alter the throttle, during a rebalance, say to increase the throughput so it completes quicker, you can do this by re-running the execute command passing the same reassignment-json-file:</p>
-  <pre class="language-bash">$ bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092  --execute --reassignment-json-file bigger-cluster.json --throttle 700000000
-  There is an existing assignment running.
-  The throttle limit was set to 700000000 B/s</code></pre>
+  <pre class="line-numbers"><code class="language-bash">  The inter-broker throttle limit was set to 50000000 B/s
+  Successfully started partition reassignment for foo1-0</code></pre>
+  <p>Should you wish to alter the throttle, during a rebalance, say to increase the throughput so it completes quicker, you can do this by re-running the execute command with the --additional option passing the same reassignment-json-file:</p>
+  <pre class="language-bash">$ bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092  --additional --execute --reassignment-json-file bigger-cluster.json --throttle 700000000
+  The inter-broker throttle limit was set to 700000000 B/s</code></pre>
 
   <p>Once the rebalance completes the administrator can check the status of the rebalance using the --verify option.
       If the rebalance has completed, the throttle will be removed via the --verify command. It is important that
@@ -397,9 +384,11 @@ <h4 class="anchor-heading"><a id="rep-throttle" class="anchor-link"></a><a href=
 
   <pre class="line-numbers"><code class="language-bash">  > bin/kafka-reassign-partitions.sh --bootstrap-server localhost:9092  --verify --reassignment-json-file bigger-cluster.json
   Status of partition reassignment:
-  Reassignment of partition [my-topic,1] completed successfully
-  Reassignment of partition [mytopic,0] completed successfully
-  Throttle was removed.</code></pre>
+  Reassignment of partition [my-topic,1] is completed
+  Reassignment of partition [my-topic,0] is completed
+
+  Clearing broker-level throttles on brokers 1,2,3
+  Clearing topic-level throttles on topic my-topic</code></pre>
 
   <p>The administrator can also validate the assigned configs using the kafka-configs.sh. There are two pairs of throttle
       configuration used to manage the throttling process. First pair refers to the throttle value itself. This is configured, at a broker
@@ -1277,7 +1266,8 @@ <h4 class="anchor-heading"><a id="prodconfig" class="anchor-link"></a><a href="#
 
   <h3 class="anchor-heading"><a id="java" class="anchor-link"></a><a href="#java">6.6 Java Version</a></h3>
 
-  Java 8 and Java 11 are supported. Java 11 performs significantly better if TLS is enabled, so it is highly recommended (it also includes a number of other
+  Java 8, Java 11, and Java 17 are supported. Note that Java 8 support has been deprecated since Apache Kafka 3.0 and will be removed in Apache Kafka 4.0.
+  Java 11 and later versions perform significantly better if TLS is enabled, so they are highly recommended (they also include a number of other
   performance improvements: G1GC, CRC32C, Compact Strings, Thread-Local Handshakes and more).
   
   From a security perspective, we recommend the latest released patch version as older freely available versions have disclosed security vulnerabilities.
@@ -1411,18 +1401,18 @@ <h4 class="anchor-heading"><a id="remote_jmx" class="anchor-link"></a><a href="#
       </tr>
       <tr>
         <td>Message in rate</td>
-        <td>kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec</td>
-        <td></td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec,topic=([-.\w]+)</td>
+        <td>Incoming message rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Byte in rate from clients</td>
-        <td>kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec</td>
-        <td></td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec,topic=([-.\w]+)</td>
+        <td>Byte in (from the clients) rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Byte in rate from other brokers</td>
-        <td>kafka.server:type=BrokerTopicMetrics,name=ReplicationBytesInPerSec</td>
-        <td></td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=ReplicationBytesInPerSec,topic=([-.\w]+)</td>
+        <td>Byte in (from the other brokers) rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Controller Request rate from Broker</td>
@@ -1451,7 +1441,27 @@ <h4 class="anchor-heading"><a id="remote_jmx" class="anchor-link"></a><a href="#
         <td>Error rate</td>
         <td>kafka.network:type=RequestMetrics,name=ErrorsPerSec,request=([-.\w]+),error=([-.\w]+)</td>
         <td>Number of errors in responses counted per-request-type, per-error-code. If a response contains
-            multiple errors, all are counted. error=NONE indicates successful responses.</td>
+          multiple errors, all are counted. error=NONE indicates successful responses.</td>
+      </tr>
+      <tr>
+        <td>Produce request rate</td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=TotalProduceRequestsPerSec,topic=([-.\w]+)</td>
+        <td>Produce request rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
+      </tr>
+      <tr>
+        <td>Fetch request rate</td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=TotalFetchRequestsPerSec,topic=([-.\w]+)</td>
+        <td>Fetch request (from clients or followers) rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
+      </tr>
+      <tr>
+        <td>Failed produce request rate</td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec,topic=([-.\w]+)</td>
+        <td>Failed Produce request rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
+      </tr>
+      <tr>
+        <td>Failed fetch request rate</td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec,topic=([-.\w]+)</td>
+        <td>Failed Fetch request (from clients or followers) rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Request size in bytes</td>
@@ -1471,7 +1481,7 @@ <h4 class="anchor-heading"><a id="remote_jmx" class="anchor-link"></a><a href="#
       <tr>
         <td>Message conversion rate</td>
         <td>kafka.server:type=BrokerTopicMetrics,name={Produce|Fetch}MessageConversionsPerSec,topic=([-.\w]+)</td>
-        <td>Number of records which required message format conversion.</td>
+        <td>Message format conversion rate, for Produce or Fetch requests, per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Request Queue Size</td>
@@ -1480,33 +1490,38 @@ <h4 class="anchor-heading"><a id="remote_jmx" class="anchor-link"></a><a href="#
       </tr>
       <tr>
         <td>Byte out rate to clients</td>
-        <td>kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec</td>
-        <td></td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec,topic=([-.\w]+)</td>
+        <td>Byte out (to the clients) rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Byte out rate to other brokers</td>
-        <td>kafka.server:type=BrokerTopicMetrics,name=ReplicationBytesOutPerSec</td>
-        <td></td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=ReplicationBytesOutPerSec,topic=([-.\w]+)</td>
+        <td>Byte out (to the other brokers) rate per topic. Omitting 'topic=(...)' will yield the all-topic rate.</td>
+      </tr>
+      <tr>
+        <td>Rejected byte rate</td>
+        <td>kafka.server:type=BrokerTopicMetrics,name=BytesRejectedPerSec,topic=([-.\w]+)</td>
+        <td>Rejected byte rate per topic, due to the record batch size being greater than max.message.bytes configuration. Omitting 'topic=(...)' will yield the all-topic rate.</td>
       </tr>
       <tr>
         <td>Message validation failure rate due to no key specified for compacted topic</td>
         <td>kafka.server:type=BrokerTopicMetrics,name=NoKeyCompactedTopicRecordsPerSec</td>
-        <td></td>
+        <td>0</td>
       </tr>
       <tr>
         <td>Message validation failure rate due to invalid magic number</td>
         <td>kafka.server:type=BrokerTopicMetrics,name=InvalidMagicNumberRecordsPerSec</td>
-        <td></td>
+        <td>0</td>
       </tr>
       <tr>
         <td>Message validation failure rate due to incorrect crc checksum</td>
         <td>kafka.server:type=BrokerTopicMetrics,name=InvalidMessageCrcRecordsPerSec</td>
-        <td></td>
+        <td>0</td>
       </tr>
       <tr>
         <td>Message validation failure rate due to non-continuous offset or sequence number in batch</td>
         <td>kafka.server:type=BrokerTopicMetrics,name=InvalidOffsetOrSequenceRecordsPerSec</td>
-        <td></td>
+        <td>0</td>
       </tr>
       <tr>
         <td>Log flush rate and time</td>
@@ -1750,12 +1765,12 @@ <h4 class="anchor-heading"><a id="remote_jmx" class="anchor-link"></a><a href="#
       <tr>
         <td>Outgoing byte rate of reassignment traffic</td>
         <td>kafka.server:type=BrokerTopicMetrics,name=ReassignmentBytesOutPerSec</td>
-        <td></td>
+        <td>0; non-zero when a partition reassignment is in progress.</td>
       </tr>
       <tr>
         <td>Incoming byte rate of reassignment traffic</td>
         <td>kafka.server:type=BrokerTopicMetrics,name=ReassignmentBytesInPerSec</td>
-        <td></td>
+        <td>0; non-zero when a partition reassignment is in progress.</td>
       </tr>
       <tr>
         <td>Size of a partition on disk (in bytes)</td>
@@ -2319,7 +2334,7 @@ <h4 class="anchor-heading"><a id="connect_monitoring" class="anchor-link"></a><a
   <h4 class="anchor-heading"><a id="kafka_streams_monitoring" class="anchor-link"></a><a href="#kafka_streams_monitoring">Streams Monitoring</a></h4>
 
   A Kafka Streams instance contains all the producer and consumer metrics as well as additional metrics specific to Streams.
-  By default Kafka Streams has metrics with three recording levels: <code>info</code>, <code>debug</code>, and <code>trace</code>.
+  The metrics have three recording levels: <code>info</code>, <code>debug</code>, and <code>trace</code>.
 
   <p>
     Note that the metrics have a 4-layer hierarchy. At the top level there are client-level metrics for each started
@@ -2589,6 +2604,16 @@ <h5 class="anchor-heading"><a id="kafka_streams_node_monitoring" class="anchor-l
         <th>Description</th>
         <th>Mbean name</th>
       </tr>
+      <tr>
+        <td>bytes-consumed-total</td>
+        <td>The total number of bytes consumed by a source processor node.</td>
+        <td>kafka.streams:type=stream-processor-node-metrics,thread-id=([-.\w]+),task-id=([-.\w]+),processor-node-id=([-.\w]+),topic=([-.\w]+)</td>
+      </tr>
+      <tr>
+        <td>bytes-produced-total</td>
+        <td>The total number of bytes produced by a sink processor node.</td>
+        <td>kafka.streams:type=stream-processor-node-metrics,thread-id=([-.\w]+),task-id=([-.\w]+),processor-node-id=([-.\w]+),topic=([-.\w]+)</td>
+      </tr>
       <tr>
         <td>process-rate</td>
         <td>The average number of records processed by a source processor node per second.</td>
@@ -2624,11 +2649,21 @@ <h5 class="anchor-heading"><a id="kafka_streams_node_monitoring" class="anchor-l
         <td>The minimum end-to-end latency of a record, measured by comparing the record timestamp with the system time when it has been fully processed by the node.</td>
         <td>kafka.streams:type=stream-processor-node-metrics,thread-id=([-.\w]+),task-id=([-.\w]+),processor-node-id=([-.\w]+)</td>
       </tr>
+      <tr>
+        <td>records-consumed-total</td>
+        <td>The total number of records consumed by a source processor node.</td>
+        <td>kafka.streams:type=stream-processor-node-metrics,thread-id=([-.\w]+),task-id=([-.\w]+),processor-node-id=([-.\w]+),topic=([-.\w]+)</td>
+      </tr>
+      <tr>
+        <td>records-produced-total</td>
+        <td>The total number of records produced by a sink processor node.</td>
+        <td>kafka.streams:type=stream-processor-node-metrics,thread-id=([-.\w]+),task-id=([-.\w]+),processor-node-id=([-.\w]+),topic=([-.\w]+)</td>
+      </tr>
       </tbody>
  </table>
 
  <h5 class="anchor-heading"><a id="kafka_streams_store_monitoring" class="anchor-link"></a><a href="#kafka_streams_store_monitoring">State Store Metrics</a></h5>
-All of the following metrics have a recording level of <code>debug</code>, except for the record-e2e-latency-* metrics which have a recording level <code>trace></code>.
+All of the following metrics have a recording level of <code>debug</code>, except for the record-e2e-latency-* metrics which have a recording level <code>trace</code>.
 Note that the <code>store-scope</code> value is specified in <code>StoreSupplier#metricsScope()</code> for user's customized state stores;
 for built-in state stores, currently we have:
   <ul>
diff --git a/docs/quickstart.html b/docs/quickstart.html
index 2ef56c8505745..3a75211d0b188 100644
--- a/docs/quickstart.html
+++ b/docs/quickstart.html
@@ -28,12 +28,12 @@ <h4 class="anchor-heading">
         </h4>
 
         <p>
-            <a href="https://www.apache.org/dyn/closer.cgi?path=/kafka/3.1.0/kafka_2.13-3.1.0.tgz">Download</a>
+            <a href="https://www.apache.org/dyn/closer.cgi?path=/kafka/{{fullDotVersion}}/kafka_{{scalaVersion}}-{{fullDotVersion}}.tgz">Download</a>
             the latest Kafka release and extract it:
         </p>
 
-        <pre class="line-numbers"><code class="language-bash">$ tar -xzf kafka_2.13-3.1.0.tgz
-$ cd kafka_2.13-3.1.0</code></pre>
+        <pre class="line-numbers"><code class="language-bash">$ tar -xzf kafka_{{scalaVersion}}-{{fullDotVersion}}.tgz
+$ cd kafka_{{scalaVersion}}-{{fullDotVersion}}</code></pre>
     </div>
 
     <div class="quickstart-step">
@@ -100,7 +100,7 @@ <h4 class="anchor-heading">
         </p>
 
         <pre class="line-numbers"><code class="language-bash">$ bin/kafka-topics.sh --describe --topic quickstart-events --bootstrap-server localhost:9092
-Topic:quickstart-events  PartitionCount:1    ReplicationFactor:1 Configs:
+Topic: quickstart-events        TopicId: NPmZHyhbR9y00wMglMH2sg PartitionCount: 1       ReplicationFactor: 1	Configs:
     Topic: quickstart-events Partition: 0    Leader: 0   Replicas: 0 Isr: 0</code></pre>
     </div>
 
@@ -173,7 +173,20 @@ <h4 class="anchor-heading">
         </p>
 
         <p>
-            First, we'll start by creating some seed data to test with:
+            First, make sure to add <code class="language-bash">connect-file-{{fullDotVersion}}.jar</code> to the <code>plugin.path</code> property in the Connect worker's configuration.
+            For the purpose of this quickstart we'll use a relative path and consider the connectors' package as an uber jar, which works when the quickstart commands are run from the installation directory.
+            However, it's worth noting that for production deployments using absolute paths is always preferable. See <a href="#connectconfigs_plugin.path">plugin.path</a> for a detailed description of how to set this config.
+        </p>
+
+        <p>
+            Edit the <code class="language-bash">config/connect-standalone.properties</code> file, add or change the <code>plugin.path</code> configuration property match the following, and save the file:
+        </p>
+
+        <pre class="brush: bash;">
+&gt; echo "plugin.path=libs/connect-file-{{fullDotVersion}}.jar"</pre>
+
+        <p>
+            Then, start by creating some seed data to test with:
         </p>
 
         <pre class="brush: bash;">
@@ -264,8 +277,8 @@ <h4 class="anchor-heading">
 wordCounts.toStream().to("output-topic", Produced.with(Serdes.String(), Serdes.Long()));</code></pre>
 
         <p>
-            The <a href="/25/documentation/streams/quickstart">Kafka Streams demo</a>
-            and the <a href="/25/documentation/streams/tutorial">app development tutorial</a>
+            The <a href="/documentation/streams/quickstart">Kafka Streams demo</a>
+            and the <a href="/{{version}}/documentation/streams/tutorial">app development tutorial</a>
             demonstrate how to code and run such a streaming application from start to finish.
         </p>
 
diff --git a/docs/security.html b/docs/security.html
index 8ff9e6d8b6746..d9b26f5d15e66 100644
--- a/docs/security.html
+++ b/docs/security.html
@@ -271,7 +271,7 @@ <h5>SSL key and certificates in PEM format</h5>
 
             <p>Store password configs <code>ssl.keystore.password</code> and <code>ssl.truststore.password</code> are not used for PEM.
             If private key is encrypted using a password, the key password must be provided in <code>ssl.key.password</code>. Private keys may be provided
-            in unencrypted form without a password when PEM is specified directly in the config value. In production deployments, configs should be encrypted or
+            in unencrypted form without a password. In production deployments, configs should be encrypted or
             externalized using password protection feature in Kafka in this case. Note that the default SSL engine factory has limited capabilities for decryption
             of encrypted private keys when external tools like OpenSSL are used for encryption. Third party libraries like BouncyCastle may be integrated witn a
             custom <code>SslEngineFactory</code> to support a wider range of encrypted private keys.</p>
@@ -643,7 +643,7 @@ <h3 class="anchor-heading"><a id="security_sasl" class="anchor-link"></a><a href
         <li><h4><a id="security_sasl_plain" href="#security_sasl_plain">Authentication using SASL/PLAIN</a></h4>
             <p>SASL/PLAIN is a simple username/password authentication mechanism that is typically used with TLS for encryption to implement secure authentication.
                 Kafka supports a default implementation for SASL/PLAIN which can be extended for production use as described <a href="#security_sasl_plain_production">here</a>.</p>
-            The username is used as the authenticated <code>Principal</code> for configuration of ACLs etc.
+            Under the default implementation of <code>principal.builder.class</code>, the username is used as the authenticated <code>Principal</code> for configuration of ACLs etc.
             <ol>
                 <li><h5 class="anchor-heading"><a id="security_sasl_plain_brokerconfig" class="anchor-link"></a><a href="#security_sasl_plain_brokerconfig">Configuring Kafka Brokers</a></h5>
                     <ol>
@@ -712,7 +712,7 @@ <h3 class="anchor-heading"><a id="security_sasl" class="anchor-link"></a><a href
                 addresses the security concerns with traditional mechanisms that perform username/password authentication
                 like PLAIN and DIGEST-MD5. The mechanism is defined in <a href="https://tools.ietf.org/html/rfc5802">RFC 5802</a>.
                 Kafka supports <a href="https://tools.ietf.org/html/rfc7677">SCRAM-SHA-256</a> and SCRAM-SHA-512 which
-                can be used with TLS to perform secure authentication. The username is used as the authenticated
+                can be used with TLS to perform secure authentication. Under the default implementation of <code>principal.builder.class</code>, the username is used as the authenticated
                 <code>Principal</code> for configuration of ACLs etc. The default SCRAM implementation in Kafka
                 stores SCRAM credentials in Zookeeper and is suitable for use in Kafka installations where Zookeeper
                 is on a private network. Refer to <a href="#security_sasl_scram_security">Security Considerations</a>
@@ -806,6 +806,7 @@ <h3 class="anchor-heading"><a id="security_sasl" class="anchor-link"></a><a href
                 The default OAUTHBEARER implementation in Kafka creates and validates <a href="https://tools.ietf.org/html/rfc7515#appendix-A.5">Unsecured JSON Web Tokens</a>
                 and is only suitable for use in non-production Kafka installations. Refer to <a href="#security_sasl_oauthbearer_security">Security Considerations</a>
                 for more details.</p>
+            Under the default implementation of <code>principal.builder.class</code>, the principalName of OAuthBearerToken is used as the authenticated <code>Principal</code> for configuration of ACLs etc.
             <ol>
                 <li><h5 class="anchor-heading"><a id="security_sasl_oauthbearer_brokerconfig" class="anchor-link"></a><a href="#security_sasl_oauthbearer_brokerconfig">Configuring Kafka Brokers</a></h5>
                     <ol>
@@ -1047,6 +1048,7 @@ <h3 class="anchor-heading"><a id="security_sasl" class="anchor-link"></a><a href
                 frameworks to distribute the workload to available workers in a secure environment without the added cost of distributing
                 Kerberos TGT/keytabs or keystores when 2-way SSL is used. See <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-48+Delegation+token+support+for+Kafka">KIP-48</a>
                 for more details.</p>
+            Under the default implementation of <code>principal.builder.class</code>, the owner of delegation token is used as the authenticated <code>Principal</code> for configuration of ACLs etc.
 
             <p>Typical steps for delegation token usage are:</p>
             <ol>
@@ -1078,9 +1080,14 @@ <h3 class="anchor-heading"><a id="security_sasl" class="anchor-link"></a><a href
                     <p>Tokens can be created by using Admin APIs or using <tt>kafka-delegation-tokens.sh</tt> script.
                         Delegation token requests (create/renew/expire/describe) should be issued only on SASL or SSL authenticated channels.
                         Tokens can not be requests if the initial authentication is done through delegation token.
+                        A token can be created by the user for that user or others as well by specifying the <tt>--owner-principal</tt> parameter.
+                        Owner/Renewers can renew or expire tokens. Owner/renewers can always describe their own tokens.
+                        To describe other tokens, a DESCRIBE_TOKEN permission needs to be added on the User resource representing the owner of the token.
                         <tt>kafka-delegation-tokens.sh</tt> script examples are given below.</p>
                     <p>Create a delegation token:
                     <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-delegation-tokens.sh --bootstrap-server localhost:9092 --create   --max-life-time-period -1 --command-config client.properties --renewer-principal User:user1</code></pre>
+                    <p>Create a delegation token for a different owner:
+                    <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-delegation-tokens.sh --bootstrap-server localhost:9092 --create   --max-life-time-period -1 --command-config client.properties --renewer-principal User:user1 --owner-principal User:owner1</code></pre>
                     <p>Renew a delegation token:
                     <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-delegation-tokens.sh --bootstrap-server localhost:9092 --renew    --renew-time-period -1 --command-config client.properties --hmac ABCDEFGHIJK</code></pre>
                     <p>Expire a delegation token:
@@ -1124,13 +1131,6 @@ <h3 class="anchor-heading"><a id="security_sasl" class="anchor-link"></a><a href
                     </ol>
                     <p>We intend to automate this in a future Kafka release.</p>
                 </li>
-
-                <li><h5 class="anchor-heading"><a id="security_token_notes" class="anchor-link"></a><a href="#security_token_notes">Notes on Delegation Tokens</a></h5>
-                    <ul>
-                        <li>Currently, we only allow a user to create delegation token for that user only. Owner/Renewers can renew or expire tokens.
-                            Owner/renewers can always describe their own tokens. To describe others tokens, we need to add DESCRIBE permission on Token Resource.</li>
-                    </ul>
-                </li>
             </ol>
         </li>
     </ol>
@@ -1264,6 +1264,13 @@ <h4 class="anchor-heading"><a id="security_authz_cli" class="anchor-link"></a><a
             <td></td>
             <td>ResourcePattern</td>
         </tr>
+        <tr>
+            <td>--user-principal [user-principal]</td>
+            <td>A user resource to which ACLs should be added or removed. This is currently supported in relation with delegation tokens.
+                A value of * indicates ACL should apply to all users.</td>
+            <td></td>
+            <td>ResourcePattern</td>
+        </tr>
         <tr>
             <td>--resource-pattern-type [pattern-type]</td>
             <td>Indicates to the script the type of resource pattern, (for --add), or resource pattern filter, (for --list and --remove), the user wishes to use.<br>
@@ -1321,6 +1328,8 @@ <h4 class="anchor-heading"><a id="security_authz_cli" class="anchor-link"></a><a
                     <li>DescribeConfigs</li>
                     <li>AlterConfigs</li>
                     <li>IdempotentWrite</li>
+                    <li>CreateTokens</li>
+                    <li>DescribeTokens</li>
                     <li>All</li>
                 </ul>
             </td>
@@ -1381,7 +1390,7 @@ <h4 class="anchor-heading"><a id="security_authz_examples" class="anchor-link"><
             Above examples add acls to a topic by specifying --topic [topic-name] as the resource pattern option. Similarly user can add acls to cluster by specifying --cluster and to a consumer group by specifying --group [group-name].
             You can add acls on any resource of a certain type, e.g. suppose you wanted to add an acl "Principal User:Peter is allowed to produce to any Topic from IP 198.51.200.0"
             You can do that by using the wildcard resource '*', e.g. by executing the CLI with following options:
-            <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal User:Peter --allow-host 198.51.200.1 --producer --topic *</code></pre>
+            <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal User:Peter --allow-host 198.51.200.1 --producer --topic '*'</code></pre>
             You can add acls on prefixed resource patterns, e.g. suppose you want to add an acl "Principal User:Jane is allowed to produce to any Topic whose name starts with 'Test-' from any host".
             You can do that by executing the CLI with following options:
             <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal User:Jane --producer --topic Test- --resource-pattern-type prefixed</code></pre>
@@ -1398,7 +1407,7 @@ <h4 class="anchor-heading"><a id="security_authz_examples" class="anchor-link"><
             <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --list --topic Test-topic</code></pre>
             However, this will only return the acls that have been added to this exact resource pattern. Other acls can exist that affect access to the topic,
             e.g. any acls on the topic wildcard '*', or any acls on prefixed resource patterns. Acls on the wildcard resource pattern can be queried explicitly:
-            <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --list --topic *</code></pre>
+            <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --list --topic '*'</code></pre>
             However, it is not necessarily possible to explicitly query for acls on prefixed resource patterns that match Test-topic as the name of such patterns may not be known.
             We can list <i>all</i> acls affecting Test-topic by using '--resource-pattern-type match', e.g.
             <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --authorizer-properties zookeeper.connect=localhost:2181 --list --topic Test-topic --resource-pattern-type match</code></pre>
@@ -1416,9 +1425,10 @@ <h4 class="anchor-heading"><a id="security_authz_examples" class="anchor-link"><
             Users having Alter permission on ClusterResource can use Admin API for ACL management. kafka-acls.sh script supports AdminClient API to manage ACLs without interacting with zookeeper/authorizer directly.
             All the above examples can be executed by using <b>--bootstrap-server</b> option. For example:
 
-            <pre class="line-numbers"><code class="language-bash">&gt; bin/kafka-acls.sh --bootstrap-server localhost:9092 --command-config /tmp/adminclient-configs.conf --add --allow-principal User:Bob --producer --topic Test-topic
+            <pre class="line-numbers"><code class="language-bash">bin/kafka-acls.sh --bootstrap-server localhost:9092 --command-config /tmp/adminclient-configs.conf --add --allow-principal User:Bob --producer --topic Test-topic
 bin/kafka-acls.sh --bootstrap-server localhost:9092 --command-config /tmp/adminclient-configs.conf --add --allow-principal User:Bob --consumer --topic Test-topic --group Group-1
-bin/kafka-acls.sh --bootstrap-server localhost:9092 --command-config /tmp/adminclient-configs.conf --list --topic Test-topic</code></pre></li>
+bin/kafka-acls.sh --bootstrap-server localhost:9092 --command-config /tmp/adminclient-configs.conf --list --topic Test-topic
+bin/kafka-acls.sh --bootstrap-server localhost:9092 --command-config /tmp/adminclient-configs.conf --add --allow-principal User:tokenRequester --operation CreateTokens --user-principal "owner1"</code></pre></li>
 
     </ul>
 
@@ -1440,6 +1450,8 @@ <h5 class="anchor-heading"><a id="operations_in_kafka" class="anchor-link"></a><
         <li>DescribeConfigs</li>
         <li>AlterConfigs</li>
         <li>IdempotentWrite</li>
+        <li>CreateTokens</li>
+        <li>DescribeTokens</li>
         <li>All</li>
     </ul>
     <h5 class="anchor-heading"><a id="resources_in_kafka" class="anchor-link"></a><a href="#resources_in_kafka">Resources in Kafka</a></h5>
@@ -1461,6 +1473,8 @@ <h5 class="anchor-heading"><a id="resources_in_kafka" class="anchor-link"></a><a
             a little special behavior in Kafka it is recommended to read
             <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-48+Delegation+token+support+for+Kafka#KIP-48DelegationtokensupportforKafka-DescribeDelegationTokenRequest">KIP-48</a>
             and the related upstream documentation at <a href="#security_delegation_token">Authentication using Delegation Tokens</a>.</li>
+        <li><b>User:</b> CreateToken and DescribeToken operations can be granted to User resources to allow creating and describing
+            tokens for other users. More info can be found in <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-373%3A+Allow+users+to+create+delegation+tokens+for+other+users">KIP-373</a>.</li>
     </ul>
     <h5 class="anchor-heading"><a id="operations_resources_and_protocols" class="anchor-link"></a><a href="#operations_resources_and_protocols">Operations and Resources on Protocols</a></h5>
     <p>In the below table we'll list the valid operations on resources that are executed by the Kafka API protocols.</p>
@@ -1836,6 +1850,12 @@ <h5 class="anchor-heading"><a id="operations_resources_and_protocols" class="anc
             <td>Creating delegation tokens has special rules, for this please see the
                 <a id="security_delegation_token_1" href="#security_delegation_token">Authentication using Delegation Tokens</a> section.</td>
         </tr>
+        <tr>
+            <td>CREATE_DELEGATION_TOKEN (38)</td>
+            <td>CreateTokens</td>
+            <td>User</td>
+            <td>Allows creating delegation tokens for the User resource.</td>
+        </tr>
         <tr>
             <td>RENEW_DELEGATION_TOKEN (39)</td>
             <td></td>
@@ -1857,6 +1877,12 @@ <h5 class="anchor-heading"><a id="operations_resources_and_protocols" class="anc
             <td>Describing delegation tokens has special rules, for this please see the
                 <a id="security_delegation_token_4" href="#security_delegation_token">Authentication using Delegation Tokens</a> section.</td>
         </tr>
+        <tr>
+            <td>DESCRIBE_DELEGATION_TOKEN (41)</td>
+            <td>DescribeTokens</td>
+            <td>User</td>
+            <td>Allows describing delegation tokens of the User resource.</td>
+        </tr>
         <tr>
             <td>DELETE_GROUPS (42)</td>
             <td>Delete</td>
diff --git a/docs/streams/architecture.html b/docs/streams/architecture.html
index a1773c5fa1250..e561231c311e6 100644
--- a/docs/streams/architecture.html
+++ b/docs/streams/architecture.html
@@ -161,6 +161,12 @@ <h3 class="anchor-heading"><a id="streams_architecture_recovery" class="anchor-l
         Starting in 2.6, Kafka Streams will guarantee that a task is only ever assigned to an instance with a fully caught-up local copy of the state, if such an instance
         exists. Standby tasks will increase the likelihood that a caught-up instance exists in the case of a failure.
     </p>
+    <p>
+        You can also configure standby replicas with rack awareness. When configured, Kafka Streams will attempt to
+        distribute a standby task on a different "rack" than the active one, thus having a faster recovery time when the
+        rack of the active tasks fails. See <code>rack.aware.assignment.tags</code>
+        in the <a href="/{{version}}/documentation/streams/developer-guide/config-streams.html#rack-aware-assignment-tags"><b>Kafka Streams Developer Guide</b></a> section.
+    </p>
 
     <div class="pagination">
         <a href="/{{version}}/documentation/streams/core-concepts" class="pagination__btn pagination__btn__prev">Previous</a>
diff --git a/docs/streams/developer-guide/config-streams.html b/docs/streams/developer-guide/config-streams.html
index dd9298d053ef5..0aee6b6e1ddb6 100644
--- a/docs/streams/developer-guide/config-streams.html
+++ b/docs/streams/developer-guide/config-streams.html
@@ -84,6 +84,7 @@
               <li><a class="reference internal" href="#partition-grouper" id="id12">partition.grouper</a></li>
               <li><a class="reference internal" href="#probing-rebalance-interval-ms" id="id30">probing.rebalance.interval.ms</a></li>
               <li><a class="reference internal" href="#processing-guarantee" id="id25">processing.guarantee</a></li>
+              <li><a class="reference internal" href="#rack-aware-assignment-tags" id="id34">rack.aware.assignment.tags</a></li>
               <li><a class="reference internal" href="#replication-factor" id="id13">replication.factor</a></li>
               <li><a class="reference internal" href="#rocksdb-config-setter" id="id20">rocksdb.config.setter</a></li>
               <li><a class="reference internal" href="#state-dir" id="id14">state.dir</a></li>
@@ -383,6 +384,13 @@ <h4><a class="toc-backref" href="#id23">num.standby.replicas</a><a class="header
             <td colspan="2">The amount of time in milliseconds to block waiting for input.</td>
             <td>100 milliseconds</td>
           </tr>
+          <tr class="row-even"><td>rack.aware.assignment.tags</td>
+            <td>Medium</td>
+            <td colspan="2">List of tag keys used to distribute standby replicas across Kafka Streams
+              clients. When configured, Kafka Streams will make a best-effort to distribute the standby tasks over
+              clients with different tag values.</td>
+            <td>the empty list</td>
+          </tr>
           <tr class="row-even"><td>replication.factor</td>
             <td>Medium</td>
             <td colspan="2">The replication factor for changelog topics and repartition topics created by the application.
@@ -677,6 +685,40 @@ <h4><a class="toc-backref" href="#id33">default.windowed.value.serde.inner</a><a
               <p>This is discussed in more detail in <a class="reference internal" href="datatypes.html#streams-developer-guide-serdes"><span class="std std-ref">Data types and serialization</span></a>.</p>
             </div></blockquote>
         </div>
+      <div class="section" id="rack-aware-assignment-tags">
+        <h4><a class="toc-backref" href="#id34">rack.aware.assignment.tags</a><a class="headerlink" href="#rack-aware-assignment-tags" title="Permalink to this headline"></a>
+        </h4>
+        <blockquote>
+          <div>
+            <p>
+              This configuration sets a list of tag keys used to distribute standby replicas across Kafka Streams
+              clients. When configured, Kafka Streams will make a best-effort to distribute the standby tasks over
+              clients with different tag values.
+            </p>
+            <p>
+              Tags for the Kafka Streams clients can be set via <code class="docutils literal"><span class="pre">client.tag.</span></code>
+              prefix. Example:
+            </p>
+            <pre><code class="language-text">
+Client-1                                   | Client-2
+_______________________________________________________________________
+client.tag.zone: eu-central-1a             | client.tag.zone: eu-central-1b
+client.tag.cluster: k8s-cluster1           | client.tag.cluster: k8s-cluster1
+rack.aware.assignment.tags: zone,cluster   | rack.aware.assignment.tags: zone,cluster
+
+
+Client-3                                   | Client-4
+_______________________________________________________________________
+client.tag.zone: eu-central-1a             | client.tag.zone: eu-central-1b
+client.tag.cluster: k8s-cluster2           | client.tag.cluster: k8s-cluster2
+rack.aware.assignment.tags: zone,cluster   | rack.aware.assignment.tags: zone,cluster</code></pre>
+            <p>
+              In the above example, we have four Kafka Streams clients across two zones (<code class="docutils literal"><span class="pre">eu-central-1a</span></code>, <code class="docutils literal"><span class="pre">eu-central-1b</span></code>) and across two clusters (<code class="docutils literal"><span class="pre">k8s-cluster1</span></code>, <code class="docutils literal"><span class="pre">k8s-cluster2</span></code>).
+              For an active task located on <code class="docutils literal"><span class="pre">Client-1</span></code>, Kafka Streams will allocate a standby task on <code class="docutils literal"><span class="pre">Client-4</span></code>, since <code class="docutils literal"><span class="pre">Client-4</span></code> has a different <code class="docutils literal"><span class="pre">zone</span></code> and a different <code class="docutils literal"><span class="pre">cluster</span></code> than <code class="docutils literal"><span class="pre">Client-1</span></code>.
+            </p>
+          </div>
+        </blockquote>
+      </div>
         <div class="section" id="max-task-idle-ms">
           <span id="streams-developer-guide-max-task-idle-ms"></span><h4><a class="toc-backref" href="#id28">max.task.idle.ms</a><a class="headerlink" href="#max-task-idle-ms" title="Permalink to this headline"></a></h4>
           <blockquote>
diff --git a/docs/streams/developer-guide/dsl-api.html b/docs/streams/developer-guide/dsl-api.html
index 3d8454abb2011..6b80656ef22ef 100644
--- a/docs/streams/developer-guide/dsl-api.html
+++ b/docs/streams/developer-guide/dsl-api.html
@@ -956,7 +956,7 @@ <h4 class="anchor-heading"><a id="streams_concepts_globalktable" class="anchor-l
 KTable&lt;byte[], Long&gt; aggregatedStream = groupedStream.aggregate(
     () -&gt; 0L, /* initializer */
     (aggKey, newValue, aggValue) -&gt; aggValue + newValue.length(), /* adder */
-    Materialized.as(&quot;aggregated-stream-store&quot;) /* state store name */
+    Materialized.&lt;String, Long, KeyValueStore&lt;Bytes, byte[]&gt;&gt;as(&quot;aggregated-stream-store&quot;) /* state store name */
         .withValueSerde(Serdes.Long()); /* serde for aggregate value */
 
 // Aggregating a KGroupedTable (note how the value type changes from String to Long)
@@ -964,7 +964,7 @@ <h4 class="anchor-heading"><a id="streams_concepts_globalktable" class="anchor-l
     () -&gt; 0L, /* initializer */
     (aggKey, newValue, aggValue) -&gt; aggValue + newValue.length(), /* adder */
     (aggKey, oldValue, aggValue) -&gt; aggValue - oldValue.length(), /* subtractor */
-    Materialized.as(&quot;aggregated-table-store&quot;) /* state store name */
+    Materialized.&lt;String, Long, KeyValueStore&lt;Bytes, byte[]&gt;&gt;as(&quot;aggregated-table-store&quot;) /* state store name */
 	.withValueSerde(Serdes.Long()) /* serde for aggregate value */
 
 
diff --git a/docs/streams/developer-guide/dsl-topology-naming.html b/docs/streams/developer-guide/dsl-topology-naming.html
index 9e687f9a940ee..cd11c132bb794 100644
--- a/docs/streams/developer-guide/dsl-topology-naming.html
+++ b/docs/streams/developer-guide/dsl-topology-naming.html
@@ -41,7 +41,7 @@ <h1>Naming Operators in a Kafka Streams DSL Application<a class="headerlink" hre
 		   you are required to explicitly name each one.
 	    </p>
 		<p>
-		   At the DLS layer, there are operators.  A single DSL operator may
+		   At the DSL layer, there are operators.  A single DSL operator may
 		   compile down to multiple <code>Processors</code> and <code>State Stores</code>, and
 		   if required <code>repartition topics</code>. But with the Kafka Streams
 		   DSL, all these names are generated for you. There is a relationship between
diff --git a/docs/streams/developer-guide/processor-api.html b/docs/streams/developer-guide/processor-api.html
index 90706e508c87e..e5bab51be9db4 100644
--- a/docs/streams/developer-guide/processor-api.html
+++ b/docs/streams/developer-guide/processor-api.html
@@ -375,7 +375,7 @@ <h2>
   Stores.persistentKeyValueStore(&quot;Counts&quot;),
     Serdes.String(),
     Serdes.Long())
-  .withLoggingEnabled(changlogConfig); // enable changelogging, with custom changelog settings</code></pre>
+  .withLoggingEnabled(changelogConfig); // enable changelogging, with custom changelog settings</code></pre>
             </div>
             <div class="section" id="timestamped-state-stores">
                 <span id="streams-developer-guide-state-store-timestamps"></span><h3><a class="toc-backref" href="#id11">Timestamped State Stores</a><a class="headerlink" href="#timestamped-state-stores" title="Permalink to this headline"></a></h3>
diff --git a/docs/streams/index.html b/docs/streams/index.html
index e38b3890af9ce..c24af4c2a3008 100644
--- a/docs/streams/index.html
+++ b/docs/streams/index.html
@@ -35,32 +35,25 @@ <h1>Kafka Streams</h1>
     </div>
     <h3 class="streams_intro">The easiest way to write mission-critical real-time applications and microservices</h3>
        <p class="streams__description">Kafka Streams is a client library for building applications and microservices, where the input and output data are stored in Kafka clusters. It combines the simplicity of writing and deploying standard Java and Scala applications on the client side with the benefits of Kafka's server-side cluster technology.</p>
+       <hr class="separator">
+       <h3>VIDEO TOUR OF THE STREAMS API</h3>
        <div class="video__series__grid">
-          <div class="yt__video__block">
-            <div class="yt__video__inner__block">
-                <iframe  class="yt_series video_1 active" style="display:block" src="https://www.youtube.com/embed/Z3JKCLG3VP4?rel=0&showinfo=0&end=602" frameborder="0" allowfullscreen></iframe>
-                <iframe  class="yt_series video_2" src="https://www.youtube.com/embed/LxxeXI1mPKo?rel=0&showinfo=0&end=622" frameborder="0" allowfullscreen></iframe>
-                <iframe  class="yt_series video_3" src="https://www.youtube.com/embed/7JYEEx7SBuE?rel=0&showinfo=0end=557" frameborder="0" allowfullscreen></iframe>
-                <iframe  class="yt_series video_4" src="https://www.youtube.com/embed/3kJgYIkAeHs?rel=0&showinfo=0&end=564" frameborder="0" allowfullscreen></iframe>
-             </div>
-            </div>
-            <div class="video__block">
-                <h3>TOUR OF THE STREAMS API</h3>
-                <div class="video__list">
-                   <p class="video__item video_list_1 active" onclick="$('.video__item').removeClass('active'); $(this).addClass('active');$('.yt_series').hide();$('.video_1').show();">
-                       <span class="video-number">1</span><span class="video__text">Intro to Streams</span>
-                   </p>
-                   <p class="video__item video_list_2" onclick="$('.video__item').removeClass('active'); $(this).addClass('active');$('.yt_series').hide();$('.video_2').show();">
-                       <span class="video-number">2</span><span class="video__text">Creating a Streams Application</span>
-                   </p>
-                   <p class="video__item video_list_3" onclick="$('.video__item').removeClass('active'); $(this).addClass('active');$('.yt_series').hide();$('.video_3').show();">
-                       <span class="video-number">3</span><span class="video__text">Transforming Data Pt. 1</span>
-                   </p>
-                   <p class="video__item video_list_4" onclick="$('.video__item').removeClass('active'); $(this).addClass('active');$('.yt_series').hide();$('.video_4').show();">
-                      <span class="video-number">4</span><span class="video__text">Transforming Data Pt. 11</span>
-                   </p>
-                </div>
-            </div>
+         <div class="video__block">
+           <div class="video__list">
+               <p class="video__item video_list_1 active">
+                   <span class="video-number">1</span><a href="https://www.youtube.com/embed/Z3JKCLG3VP4"><span class="video__text">Intro to Streams</span></a>
+               </p>
+               <p class="video__item video_list_2 active">
+                   <span class="video-number">2</span><a href="https://www.youtube.com/embed/LxxeXI1mPKo"><span class="video__text">Creating a Streams Application</span></a>
+               </p>
+               <p class="video__item video_list_3 active">
+                   <span class="video-number">3</span><a href="https://www.youtube.com/embed/7JYEEx7SBuE"><span class="video__text">Transforming Data Pt. 1</span></a>
+               </p>
+               <p class="video__item video_list_4 active">
+                   <span class="video-number">4</span><a href="https://www.youtube.com/embed/3kJgYIkAeHs"><span class="video__text">Transforming Data Pt. 2</span></a>
+               </p>
+           </div>
+         </div>
        </div>
        <hr class="separator">
        <div class="use-item-section">
diff --git a/docs/streams/tutorial.html b/docs/streams/tutorial.html
index a526de568abb1..017d7796821fc 100644
--- a/docs/streams/tutorial.html
+++ b/docs/streams/tutorial.html
@@ -452,7 +452,7 @@ <h4><a id="tutorial_code_wordcount" href="#tutorial_code_wordcount">Writing a th
     <p>
         Note that the <code>count</code> operator has a <code>Materialized</code> parameter that specifies that the
         running count should be stored in a state store named <code>counts-store</code>.
-        This <code>Counts</code> store can be queried in real-time, with details described in the <a href="/{{version}}/documentation/streams/developer-guide#streams_interactive_queries">Developer Manual</a>.
+        This <code>counts-store</code> store can be queried in real-time, with details described in the <a href="/{{version}}/documentation/streams/developer-guide#streams_interactive_queries">Developer Manual</a>.
     </p>
 
     <p>
@@ -490,10 +490,10 @@ <h4><a id="tutorial_code_wordcount" href="#tutorial_code_wordcount">Writing a th
     Processor: KSTREAM-FLATMAPVALUES-0000000001(stores: []) --> KSTREAM-KEY-SELECT-0000000002 <-- KSTREAM-SOURCE-0000000000
     Processor: KSTREAM-KEY-SELECT-0000000002(stores: []) --> KSTREAM-FILTER-0000000005 <-- KSTREAM-FLATMAPVALUES-0000000001
     Processor: KSTREAM-FILTER-0000000005(stores: []) --> KSTREAM-SINK-0000000004 <-- KSTREAM-KEY-SELECT-0000000002
-    Sink: KSTREAM-SINK-0000000004(topic: Counts-repartition) <-- KSTREAM-FILTER-0000000005
+    Sink: KSTREAM-SINK-0000000004(topic: counts-store-repartition) <-- KSTREAM-FILTER-0000000005
   Sub-topology: 1
-    Source: KSTREAM-SOURCE-0000000006(topics: Counts-repartition) --> KSTREAM-AGGREGATE-0000000003
-    Processor: KSTREAM-AGGREGATE-0000000003(stores: [Counts]) --> KTABLE-TOSTREAM-0000000007 <-- KSTREAM-SOURCE-0000000006
+    Source: KSTREAM-SOURCE-0000000006(topics: counts-store-repartition) --> KSTREAM-AGGREGATE-0000000003
+    Processor: KSTREAM-AGGREGATE-0000000003(stores: [counts-store]) --> KTABLE-TOSTREAM-0000000007 <-- KSTREAM-SOURCE-0000000006
     Processor: KTABLE-TOSTREAM-0000000007(stores: []) --> KSTREAM-SINK-0000000008 <-- KSTREAM-AGGREGATE-0000000003
     Sink: KSTREAM-SINK-0000000008(topic: streams-wordcount-output) <-- KTABLE-TOSTREAM-0000000007
 Global Stores:
@@ -501,14 +501,14 @@ <h4><a id="tutorial_code_wordcount" href="#tutorial_code_wordcount">Writing a th
 
     <p>
         As we can see above, the topology now contains two disconnected sub-topologies.
-        The first sub-topology's sink node <code>KSTREAM-SINK-0000000004</code> will write to a repartition topic <code>Counts-repartition</code>,
+        The first sub-topology's sink node <code>KSTREAM-SINK-0000000004</code> will write to a repartition topic <code>counts-store-repartition</code>,
         which will be read by the second sub-topology's source node <code>KSTREAM-SOURCE-0000000006</code>.
         The repartition topic is used to "shuffle" the source stream by its aggregation key, which is in this case the value string.
         In addition, inside the first sub-topology a stateless <code>KSTREAM-FILTER-0000000005</code> node is injected between the grouping <code>KSTREAM-KEY-SELECT-0000000002</code> node and the sink node to filter out any intermediate record whose aggregate key is empty.
     </p>
     <p>
-        In the second sub-topology, the aggregation node <code>KSTREAM-AGGREGATE-0000000003</code> is associated with a state store named <code>Counts</code> (the name is specified by the user in the <code>count</code> operator).
-        Upon receiving each record from its upcoming stream source node, the aggregation processor will first query its associated <code>Counts</code> store to get the current count for that key, augment by one, and then write the new count back to the store.
+        In the second sub-topology, the aggregation node <code>KSTREAM-AGGREGATE-0000000003</code> is associated with a state store named <code>counts-store</code> (the name is specified by the user in the <code>count</code> operator).
+        Upon receiving each record from its upcoming stream source node, the aggregation processor will first query its associated <code>counts-store</code> store to get the current count for that key, augment by one, and then write the new count back to the store.
         Each updated count for the key will also be piped downstream to the <code>KTABLE-TOSTREAM-0000000007</code> node, which interpret this update stream as a record stream before further piping to the sink node <code>KSTREAM-SINK-0000000008</code> for writing back to Kafka.
     </p>
 
diff --git a/docs/streams/upgrade-guide.html b/docs/streams/upgrade-guide.html
index febfc65ad5156..7b6075d6adae3 100644
--- a/docs/streams/upgrade-guide.html
+++ b/docs/streams/upgrade-guide.html
@@ -34,9 +34,9 @@ <h1>Upgrade Guide and API Changes</h1>
     </div>
 
     <p>
-        Upgrading from any older version to {{fullDotVersion}} is possible: if upgrading from 2.3 or below, you will need to do two rolling bounces, where during the first rolling bounce phase you set the config <code>upgrade.from="older version"</code>
-        (possible values are <code>"0.10.0" - "2.3"</code>) and during the second you remove it. This is required to safely upgrade to the new cooperative rebalancing protocol of the embedded consumer. Note that you will remain using the old eager
-        rebalancing protocol if you skip or delay the second rolling bounce, but you can safely switch over to cooperative at any time once the entire group is on 2.4+ by removing the config value and bouncing. For more details please refer to
+        Upgrading from any older version to {{fullDotVersion}} is possible: if upgrading from 3.2 or below, you will need to do two rolling bounces, where during the first rolling bounce phase you set the config <code>upgrade.from="older version"</code>
+        (possible values are <code>"0.10.0" - "3.2"</code>) and during the second you remove it. This is required to safely handle 2 changes. The first is introduction of the new cooperative rebalancing protocol of the embedded consumer. The second is a change in foreign-key join serialization format.
+        Note that you will remain using the old eager rebalancing protocol if you skip or delay the second rolling bounce, but you can safely switch over to cooperative at any time once the entire group is on 2.4+ by removing the config value and bouncing. For more details please refer to
         <a href="https://cwiki.apache.org/confluence/x/vAclBg">KIP-429</a>:
     </p>
     <ul>
diff --git a/docs/upgrade.html b/docs/upgrade.html
index ddcee32053eb5..e225fe93e5cbb 100644
--- a/docs/upgrade.html
+++ b/docs/upgrade.html
@@ -19,12 +19,79 @@
 
 <script id="upgrade-template" type="text/x-handlebars-template">
 
+<h5><a id="upgrade_330_notable" href="#upgrade_330_notable">Notable changes in 3.3.0</a></h5>
+    <ul>
+        <li>Introduced a new API <code>addMetricIfAbsent</code> to <code>Metrics</code> which would create a new Metric if not existing or return the same metric
+            if already registered. Note that this behaviour is different from <code>addMetric</code> API which throws an <code>IllegalArgumentException</code> when
+            trying to create an already existing metric. (See <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-843%3A+Adding+addMetricIfAbsent+method+to+Metrics">KIP-843</a>
+            for more details).
+        </li>
+    </ul>
+
+<h4><a id="upgrade_3_2_0" href="#upgrade_3_2_0">Upgrading to 3.2.0 from any version 0.8.x through 3.1.x</a></h4>
+
+<p><b>If you are upgrading from a version prior to 2.1.x, please see the note below about the change to the schema used to store consumer offsets.
+    Once you have changed the inter.broker.protocol.version to the latest version, it will not be possible to downgrade to a version prior to 2.1.</b></p>
+
+<p><b>For a rolling upgrade:</b></p>
+
+<ol>
+    <li>Update server.properties on all brokers and add the following properties. CURRENT_KAFKA_VERSION refers to the version you
+        are upgrading from. CURRENT_MESSAGE_FORMAT_VERSION refers to the message format version currently in use. If you have previously
+        overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
+        to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
+        <ul>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>3.1</code>, <code>3.0</code>, etc.)</li>
+            <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
+                following the upgrade</a> for the details on what this configuration does.)</li>
+        </ul>
+        If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
+        the inter-broker protocol version.
+        <ul>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>3.1</code>, <code>3.0</code>, etc.)</li>
+        </ul>
+    </li>
+    <li>Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
+        brokers will be running the latest version and you can verify that the cluster's behavior and performance meets expectations.
+        It is still possible to downgrade at this point if there are any problems.
+    </li>
+    <li>Once the cluster's behavior and performance has been verified, bump the protocol version by editing
+        <code>inter.broker.protocol.version</code> and setting it to <code>3.2</code>.
+    </li>
+    <li>Restart the brokers one by one for the new protocol version to take effect. Once the brokers begin using the latest
+        protocol version, it will no longer be possible to downgrade the cluster to an older version.
+    </li>
+    <li>If you have overridden the message format version as instructed above, then you need to do one more rolling restart to
+        upgrade it to its latest version. Once all (or most) consumers have been upgraded to 0.11.0 or later,
+        change log.message.format.version to 3.2 on each broker and restart them one by one. Note that the older Scala clients,
+        which are no longer maintained, do not support the message format introduced in 0.11, so to avoid conversion costs
+        (or to take advantage of <a href="#upgrade_11_exactly_once_semantics">exactly once semantics</a>),
+        the newer Java clients must be used.
+    </li>
+</ol>
+
 <h5><a id="upgrade_320_notable" href="#upgrade_320_notable">Notable changes in 3.2.0</a></h5>
     <ul>
         <li>Idempotence for the producer is enabled by default if no conflicting configurations are set. In 3.0.0 and 3.1.0, a bug prevented this default from being applied,
             which meant that idempotence remained disabled unless the user had explicitly set <code>enable.idempotence</code> to true
-            (See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a>for more details).
+            (See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a> for more details).
             This issue was fixed and the default is properly applied in 3.0.1, 3.1.1, and 3.2.0.</li>
+        <li>A notable exception is Connect that by default disables idempotent behavior for all of its
+            producers in order to uniformly support using a wide range of Kafka broker versions.
+            Users can change this behavior to enable idempotence for some or all producers
+            via Connect worker and/or connector configuration. Connect may enable idempotent producers
+            by default in a future major release.</li>
+        <li>Kafka has replaced log4j with reload4j due to security concerns.
+            This only affects modules that specify a logging backend (<code>connect-runtime</code> and <code>kafka-tools</code> are two such examples).
+            A number of modules, including <code>kafka-clients</code>, leave it to the application to specify the logging backend.
+            More information can be found at <a href="https://reload4j.qos.ch">reload4j</a>.
+            Projects that depend on the affected modules from the Kafka project should use
+            <a href="https://www.slf4j.org/manual.html#swapping">slf4j-log4j12 version 1.7.35 or above</a> or
+            slf4j-reload4j to avoid
+            <a href="https://www.slf4j.org/codes.html#no_tlm">possible compatibility issues originating from the logging framework</a>.</li>
+        <li>The example connectors, <code>FileStreamSourceConnector</code> and <code>FileStreamSinkConnector</code>, have been
+            removed from the default classpath. To use them in Kafka Connect standalone or distributed mode they need to be
+            explicitly added, for example <code>CLASSPATH=./lib/connect-file-3.2.0.jar ./bin/connect-distributed.sh</code>.</li>
     </ul>
 
 <h4><a id="upgrade_3_1_0" href="#upgrade_3_1_0">Upgrading to 3.1.0 from any version 0.8.x through 3.0.x</a></h4>
@@ -40,14 +107,14 @@ <h4><a id="upgrade_3_1_0" href="#upgrade_3_1_0">Upgrading to 3.1.0 from any vers
         overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
         to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>3.0</code>, <code>2.8</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>3.0</code>, <code>2.8</code>, etc.)</li>
             <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
                 following the upgrade</a> for the details on what this configuration does.)</li>
         </ul>
         If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
         the inter-broker protocol version.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>3.0</code>, <code>2.8</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>3.0</code>, <code>2.8</code>, etc.)</li>
         </ul>
     </li>
     <li>Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
@@ -73,8 +140,21 @@ <h5><a id="upgrade_311_notable" href="#upgrade_311_notable">Notable changes in 3
 <ul>
     <li>Idempotence for the producer is enabled by default if no conflicting configurations are set.
         A bug prevented the producer idempotence default from being applied which meant that it remained disabled unless the user had explicitly set
-	<code>enable.idempotence</code> to true. See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a>for more details.
+	<code>enable.idempotence</code> to true. See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a> for more details.
         This issue was fixed and the default is properly applied.</li>
+    <li>A notable exception is Connect that by default disables idempotent behavior for all of its
+        producers in order to uniformly support using a wide range of Kafka broker versions.
+        Users can change this behavior to enable idempotence for some or all producers
+        via Connect worker and/or connector configuration. Connect may enable idempotent producers
+        by default in a future major release.</li>
+    <li>Kafka has replaced log4j with reload4j due to security concerns.
+        This only affects modules that specify a logging backend (<code>connect-runtime</code> and <code>kafka-tools</code> are two such examples).
+        A number of modules, including <code>kafka-clients</code>, leave it to the application to specify the logging backend.
+        More information can be found at <a href="https://reload4j.qos.ch">reload4j</a>.
+        Projects that depend on the affected modules from the Kafka project should use
+        <a href="https://www.slf4j.org/manual.html#swapping">slf4j-log4j12 version 1.7.35 or above</a> or
+        slf4j-reload4j to avoid
+        <a href="https://www.slf4j.org/codes.html#no_tlm">possible compatibility issues originating from the logging framework</a>.</li>
 </ul>
 
 <h5><a id="upgrade_310_notable" href="#upgrade_310_notable">Notable changes in 3.1.0</a></h5>
@@ -98,7 +178,7 @@ <h5><a id="upgrade_310_notable" href="#upgrade_310_notable">Notable changes in 3
         <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-516%3A+Topic+Identifiers">KIP-516</a>.</li>
 </ul>
 
-<h4><a id="upgrade_3_0_0" href="#upgrade_3_0_0">Upgrading to 3.0.0 from any version 0.8.x through 2.8.x</a></h4>
+<h4><a id="upgrade_3_0_1" href="#upgrade_3_0_1">Upgrading to 3.0.1 from any version 0.8.x through 2.8.x</a></h4>
 
 <p><b>If you are upgrading from a version prior to 2.1.x, please see the note below about the change to the schema used to store consumer offsets.
     Once you have changed the inter.broker.protocol.version to the latest version, it will not be possible to downgrade to a version prior to 2.1.</b></p>
@@ -111,14 +191,14 @@ <h4><a id="upgrade_3_0_0" href="#upgrade_3_0_0">Upgrading to 3.0.0 from any vers
         overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
         to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.8</code>, <code>2.7</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.8</code>, <code>2.7</code>, etc.)</li>
             <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
                 following the upgrade</a> for the details on what this configuration does.)</li>
         </ul>
         If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
         the inter-broker protocol version.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.8</code>, <code>2.7</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.8</code>, <code>2.7</code>, etc.)</li>
         </ul>
     </li>
     <li>Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
@@ -144,7 +224,7 @@ <h5><a id="upgrade_301_notable" href="#upgrade_301_notable">Notable changes in 3
 <ul>
     <li>Idempotence for the producer is enabled by default if no conflicting configurations are set.
         A bug prevented the producer idempotence default from being applied which meant that it remained disabled unless the user had explicitly set
-	<code>enable.idempotence</code> to true. See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a>for more details.
+	<code>enable.idempotence</code> to true. See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a> for more details.
         This issue was fixed and the default is properly applied.</li>
 </ul>
 
@@ -153,8 +233,11 @@ <h5><a id="upgrade_300_notable" href="#upgrade_300_notable">Notable changes in 3
     <li>The producer has stronger delivery guarantees by default: <code>idempotence</code> is enabled and <code>acks</code> is set to <code>all</code> instead of <code>1</code>.
         See <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-679%3A+Producer+will+enable+the+strongest+delivery+guarantee+by+default">KIP-679</a> for details.
 	In 3.0.0 and 3.1.0, a bug prevented the idempotence default from being applied which meant that it remained disabled unless the user had explicitly set
-	<code>enable.idempotence</code> to true. Note that the bug did not affect the <code>acks=all</code> change. See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a>for more details.
-        This issue was fixed and the default is properly applied in 3.0.1, 3.1.1, and 3.2.0.
+	<code>enable.idempotence</code> to true. Note that the bug did not affect the <code>acks=all</code> change. See <a href="https://issues.apache.org/jira/browse/KAFKA-13598">KAFKA-13598</a> for more details.
+        This issue was fixed and the default is properly applied in 3.0.1, 3.1.1, and 3.2.0.</li>
+    <li>Java 8 and Scala 2.12 support have been deprecated since Apache Kafka 3.0 and will be removed in Apache Kafka 4.0.
+        See <a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181308223">KIP-750</a>
+        and <a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=181308218">KIP-751</a> for more details.</li>
     <li>ZooKeeper has been upgraded to version 3.6.3.</li>
     <li>A preview of KRaft mode is available, though upgrading to it from the 2.8 Early Access release is not possible. See
         the <code>config/kraft/README.md</code> file for details.</li>
@@ -166,7 +249,7 @@ <h5><a id="upgrade_300_notable" href="#upgrade_300_notable">Notable changes in 3
         <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-735%3A+Increase+default+consumer+session+timeout">KIP-735</a> for more details.</li>
     <li>The broker configuration <code>log.message.format.version</code> and topic configuration <code>message.format.version</code> have been deprecated.
         The value of both configurations is always assumed to be <code>3.0</code> if <code>inter.broker.protocol.version</code> is <code>3.0</code> or higher.
-        If <code>log.message.format.version</code> or <<code>message.format.version</code> are set, we recommend clearing them at the same time as the
+        If <code>log.message.format.version</code> or <code>message.format.version</code> are set, we recommend clearing them at the same time as the
         <code>inter.broker.protocol.version</code> upgrade to 3.0. This will avoid potential compatibility issues if the <code>inter.broker.protocol.version</code>
         is downgraded. See <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-724%3A+Drop+support+for+message+formats+v0+and+v1">KIP-724</a> for more details.</li>
     <li>The Streams API removed all deprecated APIs that were deprecated in version 2.5.0 or earlier.
@@ -265,14 +348,14 @@ <h4><a id="upgrade_2_8_1" href="#upgrade_2_8_1">Upgrading to 2.8.1 from any vers
         overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
         to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.7</code>, <code>2.6</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.7</code>, <code>2.6</code>, etc.)</li>
             <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
                 following the upgrade</a> for the details on what this configuration does.)</li>
         </ul>
         If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
         the inter-broker protocol version.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.7</code>, <code>2.6</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.7</code>, <code>2.6</code>, etc.)</li>
         </ul>
     </li>
     <li> Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
@@ -327,14 +410,14 @@ <h4><a id="upgrade_2_7_0" href="#upgrade_2_7_0">Upgrading to 2.7.0 from any vers
         overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
         to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.6</code>, <code>2.5</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.6</code>, <code>2.5</code>, etc.)</li>
             <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
                 following the upgrade</a> for the details on what this configuration does.)</li>
         </ul>
         If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
         the inter-broker protocol version.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.6</code>, <code>2.5</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.6</code>, <code>2.5</code>, etc.)</li>
         </ul>
     </li>
     <li> Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
@@ -441,14 +524,14 @@ <h4><a id="upgrade_2_6_0" href="#upgrade_2_6_0">Upgrading to 2.6.0 from any vers
         overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
         to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.5</code>, <code>2.4</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.5</code>, <code>2.4</code>, etc.)</li>
             <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
                 following the upgrade</a> for the details on what this configuration does.)</li>
         </ul>
         If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
         the inter-broker protocol version.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.5</code>, <code>2.4</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.5</code>, <code>2.4</code>, etc.)</li>
         </ul>
     </li>
     <li> Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
@@ -505,14 +588,14 @@ <h4><a id="upgrade_2_5_0" href="#upgrade_2_5_0">Upgrading to 2.5.0 from any vers
         overridden the message format version, you should keep its current value. Alternatively, if you are upgrading from a version prior
         to 0.11.0.x, then CURRENT_MESSAGE_FORMAT_VERSION should be set to match CURRENT_KAFKA_VERSION.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.4</code>, <code>2.3</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.4</code>, <code>2.3</code>, etc.)</li>
             <li>log.message.format.version=CURRENT_MESSAGE_FORMAT_VERSION  (See <a href="#upgrade_10_performance_impact">potential performance impact
                 following the upgrade</a> for the details on what this configuration does.)</li>
         </ul>
         If you are upgrading from version 0.11.0.x or above, and you have not overridden the message format, then you only need to override
         the inter-broker protocol version.
         <ul>
-            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g., <code>2.4</code>, <code>2.3</code>, etc.)</li>
+            <li>inter.broker.protocol.version=CURRENT_KAFKA_VERSION (e.g. <code>2.4</code>, <code>2.3</code>, etc.)</li>
         </ul>
     </li>
     <li> Upgrade the brokers one at a time: shut down the broker, update the code, and restart it. Once you have done so, the
@@ -681,7 +764,7 @@ <h5><a id="upgrade_240_notable" href="#upgrade_240_notable">Notable changes in 2
         can be found in <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-520%3A+Add+overloaded+Consumer%23committed+for+batching+partitions">KIP-520</a>).
     </li>
     <li>We've introduced a new <code>INVALID_RECORD</code> error in the produce response to distinguish from the <code>CORRUPT_MESSAGE</code> error.
-        To be more concrete, previously when a batch of records were sent as part of a single request to the broker and one or more of the records failed
+        To be more concrete, previously when a batch of records was sent as part of a single request to the broker and one or more of the records failed
         the validation due to various causes (mismatch magic bytes, crc checksum errors, null key for log compacted topics, etc), the whole batch would be rejected
         with the same and misleading <code>CORRUPT_MESSAGE</code>, and the caller of the producer client would see the corresponding exception from either
         the future object of <code>RecordMetadata</code> returned from the <code>send</code> call as well as in the <code>Callback#onCompletion(RecordMetadata metadata, Exception exception)</code>
@@ -1830,7 +1913,7 @@ <h5><a id="upgrade_10_breaking" href="#upgrade_10_breaking">Potential breaking c
          To maintain compatibility with old clients, this change only applies to Message format 0.10.0 and later.
          Clients that Produce/Fetch LZ4-compressed messages using v0/v1 (Message format 0.9.0) should continue
          to use the 0.9.0 framing implementation. Clients that use Produce/Fetch protocols v2 or later
-         should use interoperable LZ4f framing. A list of interoperable LZ4 libraries is available at http://www.lz4.org/
+         should use interoperable LZ4f framing. A list of interoperable LZ4 libraries is available at <a href="https://www.lz4.org/">https://www.lz4.org/</a>
 </ul>
 
 <h5><a id="upgrade_10_notable" href="#upgrade_10_notable">Notable changes in 0.10.0.0</a></h5>
diff --git a/generator/src/main/java/org/apache/kafka/message/FieldSpec.java b/generator/src/main/java/org/apache/kafka/message/FieldSpec.java
index d15b03cdb95db..1853458ee9751 100644
--- a/generator/src/main/java/org/apache/kafka/message/FieldSpec.java
+++ b/generator/src/main/java/org/apache/kafka/message/FieldSpec.java
@@ -300,6 +300,7 @@ String fieldDefault(HeaderGenerator headerGenerator,
         } else if ((type instanceof FieldType.Int8FieldType) ||
             (type instanceof FieldType.Int16FieldType) ||
             (type instanceof FieldType.Uint16FieldType) ||
+            (type instanceof FieldType.Uint32FieldType) ||
             (type instanceof FieldType.Int32FieldType) ||
             (type instanceof FieldType.Int64FieldType)) {
             int base = 10;
@@ -338,7 +339,7 @@ String fieldDefault(HeaderGenerator headerGenerator,
                 } else {
                     try {
                         int value = Integer.valueOf(defaultString, base);
-                        if (value < 0 || value > 65535) {
+                        if (value < 0 || value > MessageGenerator.UNSIGNED_SHORT_MAX) {
                             throw new RuntimeException("Invalid default for uint16 field " +
                                     name + ": out of range.");
                         }
@@ -348,6 +349,22 @@ String fieldDefault(HeaderGenerator headerGenerator,
                     }
                     return fieldDefault;
                 }
+            } else if (type instanceof FieldType.Uint32FieldType) {
+                if (defaultString.isEmpty()) {
+                    return "0";
+                } else {
+                    try {
+                        long value = Long.valueOf(defaultString, base);
+                        if (value < 0 || value > MessageGenerator.UNSIGNED_INT_MAX) {
+                            throw new RuntimeException("Invalid default for uint32 field " +
+                                    name + ": out of range.");
+                        }
+                    } catch (NumberFormatException e) {
+                        throw new RuntimeException("Invalid default for uint32 field " +
+                                name + ": " + defaultString, e);
+                    }
+                    return fieldDefault;
+                }
             } else if (type instanceof FieldType.Int32FieldType) {
                 if (defaultString.isEmpty()) {
                     return "0";
@@ -476,6 +493,8 @@ String fieldAbstractJavaType(HeaderGenerator headerGenerator,
             return "short";
         } else if (type instanceof FieldType.Uint16FieldType) {
             return "int";
+        } else if (type instanceof FieldType.Uint32FieldType) {
+            return "long";
         } else if (type instanceof FieldType.Int32FieldType) {
             return "int";
         } else if (type instanceof FieldType.Int64FieldType) {
diff --git a/generator/src/main/java/org/apache/kafka/message/FieldType.java b/generator/src/main/java/org/apache/kafka/message/FieldType.java
index e0009c22eaf09..24b79d47cbaba 100644
--- a/generator/src/main/java/org/apache/kafka/message/FieldType.java
+++ b/generator/src/main/java/org/apache/kafka/message/FieldType.java
@@ -122,6 +122,26 @@ public String toString() {
         }
     }
 
+    final class Uint32FieldType implements FieldType {
+        static final Uint32FieldType INSTANCE = new Uint32FieldType();
+        private static final String NAME = "uint32";
+
+        @Override
+        public String getBoxedJavaType(HeaderGenerator headerGenerator) {
+            return "Long";
+        }
+
+        @Override
+        public Optional<Integer> fixedLength() {
+            return Optional.of(4);
+        }
+
+        @Override
+        public String toString() {
+            return NAME;
+        }
+    }
+
     final class Int64FieldType implements FieldType {
         static final Int64FieldType INSTANCE = new Int64FieldType();
         private static final String NAME = "int64";
@@ -369,6 +389,8 @@ static FieldType parse(String string) {
                 return Int16FieldType.INSTANCE;
             case Uint16FieldType.NAME:
                 return Uint16FieldType.INSTANCE;
+            case Uint32FieldType.NAME:
+                return Uint32FieldType.INSTANCE;
             case Int32FieldType.NAME:
                 return Int32FieldType.INSTANCE;
             case Int64FieldType.NAME:
diff --git a/generator/src/main/java/org/apache/kafka/message/JsonConverterGenerator.java b/generator/src/main/java/org/apache/kafka/message/JsonConverterGenerator.java
index 2df8170b053b1..8ce07b9275d26 100644
--- a/generator/src/main/java/org/apache/kafka/message/JsonConverterGenerator.java
+++ b/generator/src/main/java/org/apache/kafka/message/JsonConverterGenerator.java
@@ -164,6 +164,11 @@ private void generateTargetFromJson(Target target, Versions curVersions) {
             buffer.printf("%s;%n", target.assignmentStatement(
                 String.format("MessageUtil.jsonNodeToUnsignedShort(%s, \"%s\")",
                     target.sourceVariable(), target.humanReadableName())));
+        } else if (target.field().type() instanceof FieldType.Uint32FieldType) {
+            headerGenerator.addImport(MessageGenerator.MESSAGE_UTIL_CLASS);
+            buffer.printf("%s;%n", target.assignmentStatement(
+                 String.format("MessageUtil.jsonNodeToUnsignedInt(%s, \"%s\")",
+                     target.sourceVariable(), target.humanReadableName())));
         } else if (target.field().type() instanceof FieldType.Int32FieldType) {
             headerGenerator.addImport(MessageGenerator.MESSAGE_UTIL_CLASS);
             buffer.printf("%s;%n", target.assignmentStatement(
@@ -346,7 +351,8 @@ private void generateTargetToJson(Target target, Versions versions) {
             headerGenerator.addImport(MessageGenerator.INT_NODE_CLASS);
             buffer.printf("%s;%n", target.assignmentStatement(
                 String.format("new IntNode(%s)", target.sourceVariable())));
-        } else if (target.field().type() instanceof FieldType.Int64FieldType) {
+        } else if (target.field().type() instanceof FieldType.Int64FieldType ||
+                (target.field().type() instanceof FieldType.Uint32FieldType)) {
             headerGenerator.addImport(MessageGenerator.LONG_NODE_CLASS);
             buffer.printf("%s;%n", target.assignmentStatement(
                 String.format("new LongNode(%s)", target.sourceVariable())));
diff --git a/generator/src/main/java/org/apache/kafka/message/MessageDataGenerator.java b/generator/src/main/java/org/apache/kafka/message/MessageDataGenerator.java
index b9923ee572314..235667480cc11 100644
--- a/generator/src/main/java/org/apache/kafka/message/MessageDataGenerator.java
+++ b/generator/src/main/java/org/apache/kafka/message/MessageDataGenerator.java
@@ -541,6 +541,8 @@ private String primitiveReadExpression(FieldType type) {
             return "_readable.readShort()";
         } else if (type instanceof FieldType.Uint16FieldType) {
             return "_readable.readUnsignedShort()";
+        } else if (type instanceof FieldType.Uint32FieldType) {
+            return "_readable.readUnsignedInt()";
         } else if (type instanceof FieldType.Int32FieldType) {
             return "_readable.readInt()";
         } else if (type instanceof FieldType.Int64FieldType) {
@@ -848,6 +850,8 @@ private String primitiveWriteExpression(FieldType type, String name) {
             return String.format("_writable.writeShort(%s)", name);
         } else if (type instanceof FieldType.Uint16FieldType) {
             return String.format("_writable.writeUnsignedShort(%s)", name);
+        } else if (type instanceof FieldType.Uint32FieldType) {
+            return String.format("_writable.writeUnsignedInt(%s)", name);
         } else if (type instanceof FieldType.Int32FieldType) {
             return String.format("_writable.writeInt(%s)", name);
         } else if (type instanceof FieldType.Int64FieldType) {
@@ -1372,7 +1376,8 @@ private void generateFieldHashCode(FieldSpec field) {
                     (field.type() instanceof FieldType.Int32FieldType)) {
             buffer.printf("hashCode = 31 * hashCode + %s;%n",
                 field.camelCaseName());
-        } else if (field.type() instanceof FieldType.Int64FieldType) {
+        } else if (field.type() instanceof FieldType.Int64FieldType ||
+                    (field.type() instanceof FieldType.Uint32FieldType)) {
             buffer.printf("hashCode = 31 * hashCode + ((int) (%s >> 32) ^ (int) %s);%n",
                 field.camelCaseName(), field.camelCaseName());
         } else if (field.type() instanceof FieldType.UUIDFieldType) {
@@ -1427,6 +1432,7 @@ private void generateFieldDuplicate(Target target) {
                 (field.type() instanceof FieldType.Int8FieldType) ||
                 (field.type() instanceof FieldType.Int16FieldType) ||
                 (field.type() instanceof FieldType.Uint16FieldType) ||
+                (field.type() instanceof FieldType.Uint32FieldType) ||
                 (field.type() instanceof FieldType.Int32FieldType) ||
                 (field.type() instanceof FieldType.Int64FieldType) ||
                 (field.type() instanceof FieldType.Float64FieldType) ||
@@ -1514,6 +1520,7 @@ private void generateFieldToString(String prefix, FieldSpec field) {
         } else if ((field.type() instanceof FieldType.Int8FieldType) ||
                 (field.type() instanceof FieldType.Int16FieldType) ||
                 (field.type() instanceof FieldType.Uint16FieldType) ||
+                (field.type() instanceof FieldType.Uint32FieldType) ||
                 (field.type() instanceof FieldType.Int32FieldType) ||
                 (field.type() instanceof FieldType.Int64FieldType) ||
                 (field.type() instanceof FieldType.Float64FieldType)) {
@@ -1576,13 +1583,21 @@ private void generateFieldMutator(String className, FieldSpec field) {
             field.fieldAbstractJavaType(headerGenerator, structRegistry));
         buffer.incrementIndent();
         if (field.type() instanceof FieldType.Uint16FieldType) {
-            buffer.printf("if (v < 0 || v > 65535) {%n");
+            buffer.printf("if (v < 0 || v > %d) {%n", MessageGenerator.UNSIGNED_SHORT_MAX);
             buffer.incrementIndent();
             buffer.printf("throw new RuntimeException(\"Invalid value \" + v + " +
                     "\" for unsigned short field.\");%n");
             buffer.decrementIndent();
             buffer.printf("}%n");
         }
+        if (field.type() instanceof FieldType.Uint32FieldType) {
+            buffer.printf("if (v < 0 || v > %dL) {%n", MessageGenerator.UNSIGNED_INT_MAX);
+            buffer.incrementIndent();
+            buffer.printf("throw new RuntimeException(\"Invalid value \" + v + " +
+                    "\" for unsigned int field.\");%n");
+            buffer.decrementIndent();
+            buffer.printf("}%n");
+        }
         buffer.printf("this.%s = v;%n", field.camelCaseName());
         buffer.printf("return this;%n");
         buffer.decrementIndent();
diff --git a/generator/src/main/java/org/apache/kafka/message/MessageGenerator.java b/generator/src/main/java/org/apache/kafka/message/MessageGenerator.java
index cfbeae84ecf3c..56f3f6ab0b2ce 100644
--- a/generator/src/main/java/org/apache/kafka/message/MessageGenerator.java
+++ b/generator/src/main/java/org/apache/kafka/message/MessageGenerator.java
@@ -156,6 +156,10 @@ public final class MessageGenerator {
 
     static final String DOUBLE_NODE_CLASS = "com.fasterxml.jackson.databind.node.DoubleNode";
 
+    static final long UNSIGNED_INT_MAX = 4294967295L;
+
+    static final int UNSIGNED_SHORT_MAX = 65535;
+
     /**
      * The Jackson serializer we use for JSON objects.
      */
diff --git a/generator/src/main/java/org/apache/kafka/message/SchemaGenerator.java b/generator/src/main/java/org/apache/kafka/message/SchemaGenerator.java
index 5ebd158839b10..a5ae8300d5350 100644
--- a/generator/src/main/java/org/apache/kafka/message/SchemaGenerator.java
+++ b/generator/src/main/java/org/apache/kafka/message/SchemaGenerator.java
@@ -250,6 +250,12 @@ private String fieldTypeToSchemaType(FieldType type,
                 throw new RuntimeException("Type " + type + " cannot be nullable.");
             }
             return "Type.UINT16";
+        } else if (type instanceof FieldType.Uint32FieldType) {
+            headerGenerator.addImport(MessageGenerator.TYPE_CLASS);
+            if (nullable) {
+                throw new RuntimeException("Type " + type + " cannot be nullable.");
+            }
+            return "Type.UNSIGNED_INT32";
         } else if (type instanceof FieldType.Int32FieldType) {
             headerGenerator.addImport(MessageGenerator.TYPE_CLASS);
             if (nullable) {
diff --git a/generator/src/test/java/org/apache/kafka/message/MessageGeneratorTest.java b/generator/src/test/java/org/apache/kafka/message/MessageGeneratorTest.java
index 07766f23f51ec..8eb38e999c76c 100644
--- a/generator/src/test/java/org/apache/kafka/message/MessageGeneratorTest.java
+++ b/generator/src/test/java/org/apache/kafka/message/MessageGeneratorTest.java
@@ -67,4 +67,10 @@ public void stripSuffixTest() throws Exception {
         } catch (RuntimeException e) {
         }
     }
+
+    @Test
+    public void testConstants() {
+        assertEquals(MessageGenerator.UNSIGNED_SHORT_MAX, 0xFFFF);
+        assertEquals(MessageGenerator.UNSIGNED_INT_MAX, 0xFFFFFFFFL);
+    }
 }
diff --git a/gradle.properties b/gradle.properties
index 753c3e6af33c0..8f3a7856ef61c 100644
--- a/gradle.properties
+++ b/gradle.properties
@@ -20,8 +20,8 @@ group=org.apache.kafka
 #  - tests/kafkatest/__init__.py
 #  - tests/kafkatest/version.py (variable DEV_VERSION)
 #  - kafka-merge-pr.py
-version=3.2.0-SNAPSHOT
-scalaVersion=2.13.6
+version=3.4.0-SNAPSHOT
+scalaVersion=2.13.8
 task=build
 org.gradle.jvmargs=-Xmx2g -Xss4m -XX:+UseParallelGC
 org.gradle.parallel=true
diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle
index 3581e1c04256a..7025d12eb54fe 100644
--- a/gradle/dependencies.gradle
+++ b/gradle/dependencies.gradle
@@ -62,27 +62,27 @@ versions += [
   checkstyle: "8.36.2",
   commonsCli: "1.4",
   dropwizardMetrics: "4.1.12.1",
-  gradle: "7.3.3",
+  gradle: "7.5.1",
   grgit: "4.1.1",
   httpclient: "4.5.13",
   easymock: "4.3",
-  jackson: "2.12.6",
+  jackson: "2.13.3",
+  jacksonDatabind: "2.13.3",
   jacoco: "0.8.7",
   javassist: "3.27.0-GA",
-  jetty: "9.4.44.v20210927",
+  jetty: "9.4.48.v20220622",
   jersey: "2.34",
   jline: "3.21.0",
-  jmh: "1.34",
+  jmh: "1.35",
   hamcrest: "2.2",
-  log4j: "1.2.17",
   scalaLogging: "3.9.4",
   jaxb: "2.3.0",
   jaxrs: "2.1.1",
   jfreechart: "1.0.0",
   jopt: "5.0.4",
   jose4j: "0.7.9",
-  junit: "5.8.2",
-  jqwik: "1.6.3",
+  junit: "5.9.0",
+  jqwik: "1.6.5",
   kafka_0100: "0.10.0.1",
   kafka_0101: "0.10.1.1",
   kafka_0102: "0.10.2.2",
@@ -98,24 +98,28 @@ versions += [
   kafka_26: "2.6.2",
   kafka_27: "2.7.1",
   kafka_28: "2.8.1",
-  kafka_30: "3.0.0",
-  kafka_31: "3.1.0",
+  kafka_30: "3.0.1",
+  kafka_31: "3.1.1",
+  kafka_32: "3.2.0",
   lz4: "1.8.0",
   mavenArtifact: "3.8.4",
   metrics: "2.2.0",
-  mockito: "4.3.1",
-  netty: "4.1.73.Final",
+  mockito: "4.6.1",
+  netty: "4.1.78.Final",
   powermock: "2.0.9",
   reflections: "0.9.12",
-  rocksDB: "6.27.3",
+  reload4j: "1.2.19",
+  rocksDB: "6.29.4.1",
   scalaCollectionCompat: "2.6.0",
   scalafmt: "2.7.5",
   scalaJava8Compat : "1.0.2",
   scoverage: "1.4.11",
-  slf4j: "1.7.32",
+  slf4j: "1.7.36",
   snappy: "1.1.8.4",
   spotbugs: "4.2.2",
-  zinc: "1.3.5",
+  swaggerAnnotations: "2.2.0",
+  swaggerJaxrs2: "2.2.0",
+  zinc: "1.6.1",
   zookeeper: "3.6.3",
   zstd: "1.5.2-1"
 ]
@@ -135,7 +139,7 @@ libs += [
   commonsCli: "commons-cli:commons-cli:$versions.commonsCli",
   easymock: "org.easymock:easymock:$versions.easymock",
   jacksonAnnotations: "com.fasterxml.jackson.core:jackson-annotations:$versions.jackson",
-  jacksonDatabind: "com.fasterxml.jackson.core:jackson-databind:$versions.jackson",
+  jacksonDatabind: "com.fasterxml.jackson.core:jackson-databind:$versions.jacksonDatabind",
   jacksonDataformatCsv: "com.fasterxml.jackson.dataformat:jackson-dataformat-csv:$versions.jackson",
   jacksonModuleScala: "com.fasterxml.jackson.module:jackson-module-scala_$versions.baseScala:$versions.jackson",
   jacksonJDK8Datatypes: "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:$versions.jackson",
@@ -177,7 +181,8 @@ libs += [
   kafkaStreams_28: "org.apache.kafka:kafka-streams:$versions.kafka_28",
   kafkaStreams_30: "org.apache.kafka:kafka-streams:$versions.kafka_30",
   kafkaStreams_31: "org.apache.kafka:kafka-streams:$versions.kafka_31",
-  log4j: "log4j:log4j:$versions.log4j",
+  kafkaStreams_32: "org.apache.kafka:kafka-streams:$versions.kafka_32",
+  log4j: "ch.qos.reload4j:reload4j:$versions.reload4j",
   lz4: "org.lz4:lz4-java:$versions.lz4",
   metrics: "com.yammer.metrics:metrics-core:$versions.metrics",
   dropwizardMetrics: "io.dropwizard.metrics:metrics-core:$versions.dropwizardMetrics",
@@ -198,6 +203,8 @@ libs += [
   slf4jApi: "org.slf4j:slf4j-api:$versions.slf4j",
   slf4jlog4j: "org.slf4j:slf4j-log4j12:$versions.slf4j",
   snappy: "org.xerial.snappy:snappy-java:$versions.snappy",
+  swaggerAnnotations: "io.swagger.core.v3:swagger-annotations:$versions.swaggerAnnotations", 
+  swaggerJaxrs2: "io.swagger.core.v3:swagger-jaxrs2:$versions.swaggerJaxrs2",
   zookeeper: "org.apache.zookeeper:zookeeper:$versions.zookeeper",
   jfreechart: "jfreechart:jfreechart:$versions.jfreechart",
   mavenArtifact: "org.apache.maven:maven-artifact:$versions.mavenArtifact",
diff --git a/gradle/openapi.template b/gradle/openapi.template
new file mode 100644
index 0000000000000..d15c40c0070a1
--- /dev/null
+++ b/gradle/openapi.template
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+openapi: 3.0.0
+info:
+  version: $kafkaVersion
+  title: Kafka Connect REST API
+  description: "This is the documentation of the [Apache Kafka](https://kafka.apache.org) Connect REST API."
+  contact:
+    email: dev@kafka.apache.org
+  license:
+    name: Apache 2.0
+    url: https://www.apache.org/licenses/LICENSE-2.0.html
diff --git a/gradle/spotbugs-exclude.xml b/gradle/spotbugs-exclude.xml
index 8e09cf926791b..a97831801d955 100644
--- a/gradle/spotbugs-exclude.xml
+++ b/gradle/spotbugs-exclude.xml
@@ -287,7 +287,7 @@ For a detailed description of spotbugs bug categories, see https://spotbugs.read
         <!-- Suppress warnings about ignoring the return value of await.
              This is done intentionally because we use other clues to determine
              if the wait was cut short. -->
-        <Class name="org.apache.kafka.connect.runtime.WorkerSourceTask"/>
+        <Class name="org.apache.kafka.connect.runtime.AbstractWorkerSourceTask"/>
         <Method name="execute"/>
         <Bug pattern="RV_RETURN_VALUE_IGNORED"/>
     </Match>
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index ee6ba9a3ac42d..593bdcca0f337 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionSha256Sum=c9490e938b221daf0094982288e4038deed954a3f12fb54cbf270ddf4e37d879
-distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.3-all.zip
+distributionSha=db9c8211ed63f61f60292c69e80d89196f9eb36665e369e7f00ac4cc841c2219
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-all.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
diff --git a/gradlew b/gradlew
index 08c1fc535dce6..3c53cb3fbb887 100755
--- a/gradlew
+++ b/gradlew
@@ -118,7 +118,7 @@ esac
 # Loop in case we encounter an error.
 for attempt in 1 2 3; do
   if [ ! -e "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" ]; then
-    if ! curl -s -S --retry 3 -L -o "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" "https://raw.githubusercontent.com/gradle/gradle/v7.3.3/gradle/wrapper/gradle-wrapper.jar"; then
+    if ! curl -s -S --retry 3 -L -o "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" "https://raw.githubusercontent.com/gradle/gradle/v7.5.1/gradle/wrapper/gradle-wrapper.jar"; then
       rm -f "$APP_HOME/gradle/wrapper/gradle-wrapper.jar"
       # Pause for a bit before looping in case the server throttled us.
       sleep 5
@@ -218,6 +218,12 @@ set -- \
         org.gradle.wrapper.GradleWrapperMain \
         "$@"
 
+# Stop when "xargs" is not available.
+if ! command -v xargs >/dev/null 2>&1
+then
+    die "xargs is not available"
+fi
+
 # Use "xargs" to parse quoted args.
 #
 # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetcher/ReplicaFetcherThreadBenchmark.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetcher/ReplicaFetcherThreadBenchmark.java
index 7f03788913722..b0f36522f3246 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetcher/ReplicaFetcherThreadBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetcher/ReplicaFetcherThreadBenchmark.java
@@ -17,18 +17,17 @@
 
 package org.apache.kafka.jmh.fetcher;
 
-import kafka.api.ApiVersion;
-import kafka.api.ApiVersion$;
 import kafka.cluster.BrokerEndPoint;
 import kafka.cluster.DelayedOperations;
-import kafka.cluster.IsrChangeListener;
+import kafka.cluster.AlterPartitionListener;
 import kafka.cluster.Partition;
 import kafka.log.CleanerConfig;
 import kafka.log.Defaults;
 import kafka.log.LogAppendInfo;
 import kafka.log.LogConfig;
 import kafka.log.LogManager;
-import kafka.server.AlterIsrManager;
+import kafka.server.AlterPartitionManager;
+import kafka.server.BrokerFeatures;
 import kafka.server.BrokerTopicStats;
 import kafka.server.FailedPartitions;
 import kafka.server.InitialFetchState;
@@ -38,6 +37,8 @@
 import kafka.server.OffsetAndEpoch;
 import kafka.server.OffsetTruncationState;
 import kafka.server.QuotaFactory;
+import kafka.server.RemoteLeaderEndPoint;
+import kafka.server.BrokerBlockingSender;
 import kafka.server.ReplicaFetcherThread;
 import kafka.server.ReplicaManager;
 import kafka.server.ReplicaQuota;
@@ -51,6 +52,7 @@
 import kafka.utils.Pool;
 import kafka.utils.TestUtils;
 import kafka.zk.KafkaZkClient;
+import org.apache.kafka.clients.FetchSessionHandler;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.TopicIdPartition;
 import org.apache.kafka.common.Uuid;
@@ -67,8 +69,10 @@
 import org.apache.kafka.common.requests.FetchRequest;
 import org.apache.kafka.common.requests.FetchResponse;
 import org.apache.kafka.common.requests.UpdateMetadataRequest;
+import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.mockito.Mockito;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -117,6 +121,7 @@ public class ReplicaFetcherThreadBenchmark {
     private Pool<TopicPartition, Partition> pool = new Pool<TopicPartition, Partition>(Option.empty());
     private Metrics metrics = new Metrics();
     private ReplicaManager replicaManager;
+    private ReplicaQuota replicaQuota;
     private Option<Uuid> topicId = Option.apply(Uuid.randomUuid());
 
     @Setup(Level.Trial)
@@ -145,7 +150,7 @@ public void setup() throws IOException {
             setFlushStartOffsetCheckpointMs(10000L).
             setRetentionCheckMs(1000L).
             setMaxPidExpirationMs(60000).
-            setInterBrokerProtocolVersion(ApiVersion.latestVersion()).
+            setInterBrokerProtocolVersion(MetadataVersion.latest()).
             setScheduler(scheduler).
             setBrokerTopicStats(brokerTopicStats).
             setLogDirFailureChannel(logDirFailureChannel).
@@ -166,16 +171,16 @@ public void setup() throws IOException {
                     .setLeader(0)
                     .setLeaderEpoch(0)
                     .setIsr(replicas)
-                    .setZkVersion(1)
+                    .setPartitionEpoch(1)
                     .setReplicas(replicas)
                     .setIsNew(true);
 
-            IsrChangeListener isrChangeListener = Mockito.mock(IsrChangeListener.class);
+            AlterPartitionListener alterPartitionListener = Mockito.mock(AlterPartitionListener.class);
             OffsetCheckpoints offsetCheckpoints = Mockito.mock(OffsetCheckpoints.class);
             Mockito.when(offsetCheckpoints.fetch(logDir.getAbsolutePath(), tp)).thenReturn(Option.apply(0L));
-            AlterIsrManager isrChannelManager = Mockito.mock(AlterIsrManager.class);
-            Partition partition = new Partition(tp, 100, ApiVersion$.MODULE$.latestVersion(),
-                    0, Time.SYSTEM, isrChangeListener, new DelayedOperationsMock(tp),
+            AlterPartitionManager isrChannelManager = Mockito.mock(AlterPartitionManager.class);
+            Partition partition = new Partition(tp, 100, MetadataVersion.latest(),
+                    0, Time.SYSTEM, alterPartitionListener, new DelayedOperationsMock(tp),
                     Mockito.mock(MetadataCache.class), logManager, isrChannelManager);
 
             partition.makeFollower(partitionState, offsetCheckpoints, topicId);
@@ -213,7 +218,7 @@ public RecordsSend<? extends BaseRecords> toSend() {
                 0, 0, 0, updatePartitionState, Collections.emptyList(), topicIds).build();
 
         // TODO: fix to support raft
-        ZkMetadataCache metadataCache = new ZkMetadataCache(0);
+        ZkMetadataCache metadataCache = new ZkMetadataCache(0, config.interBrokerProtocolVersion(), BrokerFeatures.createEmpty());
         metadataCache.updateMetadata(0, updateMetadataRequest);
 
         replicaManager = new ReplicaManagerBuilder().
@@ -227,15 +232,30 @@ public RecordsSend<? extends BaseRecords> toSend() {
             setBrokerTopicStats(brokerTopicStats).
             setMetadataCache(metadataCache).
             setLogDirFailureChannel(new LogDirFailureChannel(logDirs.size())).
-            setAlterIsrManager(TestUtils.createAlterIsrManager()).
+            setAlterPartitionManager(TestUtils.createAlterIsrManager()).
             build();
-        fetcher = new ReplicaFetcherBenchThread(config, replicaManager, pool);
+        replicaQuota = new ReplicaQuota() {
+            @Override
+            public boolean isQuotaExceeded() {
+                return false;
+            }
+
+            @Override
+            public void record(long value) {
+            }
+
+            @Override
+            public boolean isThrottled(TopicPartition topicPartition) {
+                return false;
+            }
+        };
+        fetcher = new ReplicaFetcherBenchThread(config, replicaManager, replicaQuota, pool);
         fetcher.addPartitions(initialFetchStates);
         // force a pass to move partitions to fetching state. We do this in the setup phase
         // so that we do not measure this time as part of the steady state work
         fetcher.doWork();
         // handle response to engage the incremental fetch session handler
-        fetcher.fetchSessionHandler().handleResponse(FetchResponse.of(Errors.NONE, 0, 999, initialFetched), ApiKeys.FETCH.latestVersion());
+        ((RemoteLeaderEndPoint) fetcher.leader()).fetchSessionHandler().handleResponse(FetchResponse.of(Errors.NONE, 0, 999, initialFetched), ApiKeys.FETCH.latestVersion());
     }
 
     @TearDown(Level.Trial)
@@ -286,33 +306,61 @@ static class ReplicaFetcherBenchThread extends ReplicaFetcherThread {
 
         ReplicaFetcherBenchThread(KafkaConfig config,
                                   ReplicaManager replicaManager,
+                                  ReplicaQuota replicaQuota,
                                   Pool<TopicPartition,
                                   Partition> partitions) {
             super("name",
-                    3,
-                    new BrokerEndPoint(3, "host", 3000),
-                    config,
-                    new FailedPartitions(),
-                    replicaManager,
-                    new Metrics(),
-                    Time.SYSTEM,
-                    new ReplicaQuota() {
+                    new RemoteLeaderEndPoint(
+                            String.format("[ReplicaFetcher replicaId=%d, leaderId=%d, fetcherId=%d", config.brokerId(), 3, 3),
+                            new BrokerBlockingSender(
+                                    new BrokerEndPoint(3, "host", 3000),
+                                    config,
+                                    new Metrics(),
+                                    Time.SYSTEM,
+                                    3,
+                                    String.format("broker-%d-fetcher-%d", 3, 3),
+                                    new LogContext(String.format("[ReplicaFetcher replicaId=%d, leaderId=%d, fetcherId=%d", config.brokerId(), 3, 3))
+                            ),
+                            new FetchSessionHandler(
+                                    new LogContext(String.format("[ReplicaFetcher replicaId=%d, leaderId=%d, fetcherId=%d", config.brokerId(), 3, 3)), 3),
+                            config,
+                            replicaManager,
+                            replicaQuota,
+                            config::interBrokerProtocolVersion
+                    ) {
                         @Override
-                        public boolean isQuotaExceeded() {
-                            return false;
+                        public long fetchEarliestOffset(TopicPartition topicPartition, int currentLeaderEpoch) {
+                            return 0;
                         }
 
                         @Override
-                        public void record(long value) {
+                        public Map<TopicPartition, EpochEndOffset> fetchEpochEndOffsets(Map<TopicPartition, OffsetForLeaderPartition> partitions) {
+                            scala.collection.mutable.Map<TopicPartition, EpochEndOffset> endOffsets = new scala.collection.mutable.HashMap<>();
+                            Iterator<TopicPartition> iterator = partitions.keys().iterator();
+                            while (iterator.hasNext()) {
+                                TopicPartition tp = iterator.next();
+                                endOffsets.put(tp, new EpochEndOffset()
+                                        .setPartition(tp.partition())
+                                        .setErrorCode(Errors.NONE.code())
+                                        .setLeaderEpoch(0)
+                                        .setEndOffset(100));
+                            }
+                            return endOffsets;
                         }
 
                         @Override
-                        public boolean isThrottled(TopicPartition topicPartition) {
-                            return false;
+                        public Map<TopicPartition, FetchResponseData.PartitionData> fetch(FetchRequest.Builder fetchRequest) {
+                            return new scala.collection.mutable.HashMap<>();
                         }
                     },
-                    Option.empty());
-            
+                    config,
+                    new FailedPartitions(),
+                    replicaManager,
+                    replicaQuota,
+                    String.format("[ReplicaFetcher replicaId=%d, leaderId=%d, fetcherId=%d", config.brokerId(), 3, 3),
+                    config::interBrokerProtocolVersion
+            );
+
             pool = partitions;
         }
 
@@ -346,30 +394,5 @@ public Option<LogAppendInfo> processPartitionData(TopicPartition topicPartition,
                                                           FetchResponseData.PartitionData partitionData) {
             return Option.empty();
         }
-
-        @Override
-        public long fetchEarliestOffsetFromLeader(TopicPartition topicPartition, int currentLeaderEpoch) {
-            return 0;
-        }
-
-        @Override
-        public Map<TopicPartition, EpochEndOffset> fetchEpochEndOffsets(Map<TopicPartition, OffsetForLeaderPartition> partitions) {
-            scala.collection.mutable.Map<TopicPartition, EpochEndOffset> endOffsets = new scala.collection.mutable.HashMap<>();
-            Iterator<TopicPartition> iterator = partitions.keys().iterator();
-            while (iterator.hasNext()) {
-                TopicPartition tp = iterator.next();
-                endOffsets.put(tp, new EpochEndOffset()
-                    .setPartition(tp.partition())
-                    .setErrorCode(Errors.NONE.code())
-                    .setLeaderEpoch(0)
-                    .setEndOffset(100));
-            }
-            return endOffsets;
-        }
-
-        @Override
-        public Map<TopicPartition, FetchResponseData.PartitionData> fetchFromLeader(FetchRequest.Builder fetchRequest) {
-            return new scala.collection.mutable.HashMap<>();
-        }
     }
 }
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/metadata/MetadataRequestBenchmark.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/metadata/MetadataRequestBenchmark.java
index 83dd7eb7905d4..80376948b89b1 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/metadata/MetadataRequestBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/metadata/MetadataRequestBenchmark.java
@@ -23,6 +23,7 @@
 import kafka.network.RequestChannel;
 import kafka.network.RequestConvertToJson;
 import kafka.server.AutoTopicCreationManager;
+import kafka.server.BrokerFeatures;
 import kafka.server.BrokerTopicStats;
 import kafka.server.ClientQuotaManager;
 import kafka.server.ClientRequestQuotaManager;
@@ -58,6 +59,7 @@
 import org.apache.kafka.common.security.auth.KafkaPrincipal;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.mockito.Mockito;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -108,7 +110,7 @@ public class MetadataRequestBenchmark {
     private KafkaZkClient kafkaZkClient = Mockito.mock(KafkaZkClient.class);
     private Metrics metrics = new Metrics();
     private int brokerId = 1;
-    private ZkMetadataCache metadataCache = MetadataCache.zkMetadataCache(brokerId);
+    private ZkMetadataCache metadataCache = MetadataCache.zkMetadataCache(brokerId, MetadataVersion.latest(), BrokerFeatures.createEmpty());
     private ClientQuotaManager clientQuotaManager = Mockito.mock(ClientQuotaManager.class);
     private ClientRequestQuotaManager clientRequestQuotaManager = Mockito.mock(ClientRequestQuotaManager.class);
     private ControllerMutationQuotaManager controllerMutationQuotaManager = Mockito.mock(ControllerMutationQuotaManager.class);
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/PartitionMakeFollowerBenchmark.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/PartitionMakeFollowerBenchmark.java
index 61a94c3bc8290..4daddd29bc51f 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/PartitionMakeFollowerBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/PartitionMakeFollowerBenchmark.java
@@ -17,16 +17,14 @@
 
 package org.apache.kafka.jmh.partition;
 
-import kafka.api.ApiVersion;
-import kafka.api.ApiVersion$;
 import kafka.cluster.DelayedOperations;
-import kafka.cluster.IsrChangeListener;
+import kafka.cluster.AlterPartitionListener;
 import kafka.cluster.Partition;
 import kafka.log.CleanerConfig;
 import kafka.log.Defaults;
 import kafka.log.LogConfig;
 import kafka.log.LogManager;
-import kafka.server.AlterIsrManager;
+import kafka.server.AlterPartitionManager;
 import kafka.server.BrokerTopicStats;
 import kafka.server.LogDirFailureChannel;
 import kafka.server.MetadataCache;
@@ -42,6 +40,7 @@
 import org.apache.kafka.common.record.SimpleRecord;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.mockito.Mockito;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -111,7 +110,7 @@ public void setup() throws IOException {
             setFlushStartOffsetCheckpointMs(10000L).
             setRetentionCheckMs(1000L).
             setMaxPidExpirationMs(60000).
-            setInterBrokerProtocolVersion(ApiVersion.latestVersion()).
+            setInterBrokerProtocolVersion(MetadataVersion.latest()).
             setScheduler(scheduler).
             setBrokerTopicStats(brokerTopicStats).
             setLogDirFailureChannel(logDirFailureChannel).
@@ -122,12 +121,12 @@ public void setup() throws IOException {
         topicId = OptionConverters.toScala(Optional.of(Uuid.randomUuid()));
 
         Mockito.when(offsetCheckpoints.fetch(logDir.getAbsolutePath(), tp)).thenReturn(Option.apply(0L));
-        IsrChangeListener isrChangeListener = Mockito.mock(IsrChangeListener.class);
-        AlterIsrManager alterIsrManager = Mockito.mock(AlterIsrManager.class);
+        AlterPartitionListener alterPartitionListener = Mockito.mock(AlterPartitionListener.class);
+        AlterPartitionManager alterPartitionManager = Mockito.mock(AlterPartitionManager.class);
         partition = new Partition(tp, 100,
-            ApiVersion$.MODULE$.latestVersion(), 0, Time.SYSTEM,
-            isrChangeListener, delayedOperations,
-            Mockito.mock(MetadataCache.class), logManager, alterIsrManager);
+            MetadataVersion.latest(), 0, Time.SYSTEM,
+            alterPartitionListener, delayedOperations,
+            Mockito.mock(MetadataCache.class), logManager, alterPartitionManager);
         partition.createLogIfNotExists(true, false, offsetCheckpoints, topicId);
         executorService.submit((Runnable) () -> {
             SimpleRecord[] simpleRecords = new SimpleRecord[] {
@@ -158,7 +157,7 @@ public boolean testMakeFollower() {
             .setLeader(0)
             .setLeaderEpoch(0)
             .setIsr(replicas)
-            .setZkVersion(1)
+            .setPartitionEpoch(1)
             .setReplicas(replicas)
             .setIsNew(true);
         return partition.makeFollower(partitionState, offsetCheckpoints, topicId);
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/UpdateFollowerFetchStateBenchmark.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/UpdateFollowerFetchStateBenchmark.java
index f41675500d34f..b2cf1ac5569bb 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/UpdateFollowerFetchStateBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/UpdateFollowerFetchStateBenchmark.java
@@ -17,16 +17,15 @@
 
 package org.apache.kafka.jmh.partition;
 
-import kafka.api.ApiVersion;
-import kafka.api.ApiVersion$;
 import kafka.cluster.DelayedOperations;
-import kafka.cluster.IsrChangeListener;
+import kafka.cluster.AlterPartitionListener;
 import kafka.cluster.Partition;
+import kafka.cluster.Replica;
 import kafka.log.CleanerConfig;
 import kafka.log.Defaults;
 import kafka.log.LogConfig;
 import kafka.log.LogManager;
-import kafka.server.AlterIsrManager;
+import kafka.server.AlterPartitionManager;
 import kafka.server.BrokerTopicStats;
 import kafka.server.LogDirFailureChannel;
 import kafka.server.LogOffsetMetadata;
@@ -39,6 +38,7 @@
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.mockito.Mockito;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -80,6 +80,8 @@ public class UpdateFollowerFetchStateBenchmark {
     private long nextOffset = 0;
     private LogManager logManager;
     private Partition partition;
+    private Replica replica1;
+    private Replica replica2;
 
     @Setup(Level.Trial)
     public void setUp() {
@@ -97,7 +99,7 @@ public void setUp() {
             setFlushStartOffsetCheckpointMs(10000L).
             setRetentionCheckMs(1000L).
             setMaxPidExpirationMs(60000).
-            setInterBrokerProtocolVersion(ApiVersion.latestVersion()).
+            setInterBrokerProtocolVersion(MetadataVersion.latest()).
             setScheduler(scheduler).
             setBrokerTopicStats(brokerTopicStats).
             setLogDirFailureChannel(logDirFailureChannel).
@@ -118,16 +120,18 @@ public void setUp() {
             .setLeader(0)
             .setLeaderEpoch(0)
             .setIsr(replicas)
-            .setZkVersion(1)
+            .setPartitionEpoch(1)
             .setReplicas(replicas)
             .setIsNew(true);
-        IsrChangeListener isrChangeListener = Mockito.mock(IsrChangeListener.class);
-        AlterIsrManager alterIsrManager = Mockito.mock(AlterIsrManager.class);
+        AlterPartitionListener alterPartitionListener = Mockito.mock(AlterPartitionListener.class);
+        AlterPartitionManager alterPartitionManager = Mockito.mock(AlterPartitionManager.class);
         partition = new Partition(topicPartition, 100,
-                ApiVersion$.MODULE$.latestVersion(), 0, Time.SYSTEM,
-                isrChangeListener, delayedOperations,
-                Mockito.mock(MetadataCache.class), logManager, alterIsrManager);
+                MetadataVersion.latest(), 0, Time.SYSTEM,
+                alterPartitionListener, delayedOperations,
+                Mockito.mock(MetadataCache.class), logManager, alterPartitionManager);
         partition.makeLeader(partitionState, offsetCheckpoints, topicId);
+        replica1 = partition.getReplica(1).get();
+        replica2 = partition.getReplica(2).get();
     }
 
     // avoid mocked DelayedOperations to avoid mocked class affecting benchmark results
@@ -167,9 +171,9 @@ private LogConfig createLogConfig() {
     @OutputTimeUnit(TimeUnit.NANOSECONDS)
     public void updateFollowerFetchStateBench() {
         // measure the impact of two follower fetches on the leader
-        partition.updateFollowerFetchState(1, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+        partition.updateFollowerFetchState(replica1, new LogOffsetMetadata(nextOffset, nextOffset, 0),
                 0, 1, nextOffset);
-        partition.updateFollowerFetchState(2, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+        partition.updateFollowerFetchState(replica2, new LogOffsetMetadata(nextOffset, nextOffset, 0),
                 0, 1, nextOffset);
         nextOffset++;
     }
@@ -179,9 +183,9 @@ public void updateFollowerFetchStateBench() {
     public void updateFollowerFetchStateBenchNoChange() {
         // measure the impact of two follower fetches on the leader when the follower didn't
         // end up fetching anything
-        partition.updateFollowerFetchState(1, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+        partition.updateFollowerFetchState(replica1, new LogOffsetMetadata(nextOffset, nextOffset, 0),
                 0, 1, 100);
-        partition.updateFollowerFetchState(2, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+        partition.updateFollowerFetchState(replica2, new LogOffsetMetadata(nextOffset, nextOffset, 0),
                 0, 1, 100);
     }
 }
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/record/CompressedRecordBatchValidationBenchmark.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/record/CompressedRecordBatchValidationBenchmark.java
index 24ac53e7866d3..cfbc66b66777d 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/record/CompressedRecordBatchValidationBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/record/CompressedRecordBatchValidationBenchmark.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.jmh.record;
 
-import kafka.api.ApiVersion;
 import kafka.common.LongRef;
 import kafka.log.AppendOrigin;
 import kafka.log.LogValidator;
@@ -26,6 +25,7 @@
 import org.apache.kafka.common.record.MemoryRecords;
 import org.apache.kafka.common.record.TimestampType;
 import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.Fork;
 import org.openjdk.jmh.annotations.Measurement;
@@ -58,7 +58,7 @@ public void measureValidateMessagesAndAssignOffsetsCompressed(Blackhole bh) {
                 CompressionCodec.getCompressionCodec(compressionType.id),
                 false,  messageVersion, TimestampType.CREATE_TIME, Long.MAX_VALUE, 0,
                 new AppendOrigin.Client$(),
-                ApiVersion.latestVersion(),
+                MetadataVersion.latest(),
                 brokerTopicStats,
                 requestLocal);
     }
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/CheckpointBench.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/CheckpointBench.java
index 21a8086e4dfd8..99fb8143274eb 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/CheckpointBench.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/CheckpointBench.java
@@ -16,12 +16,12 @@
  */
 package org.apache.kafka.jmh.server;
 
-import kafka.api.ApiVersion;
 import kafka.cluster.Partition;
 import kafka.log.CleanerConfig;
 import kafka.log.LogConfig;
 import kafka.log.LogManager;
-import kafka.server.AlterIsrManager;
+import kafka.server.AlterPartitionManager;
+import kafka.server.BrokerFeatures;
 import kafka.server.BrokerTopicStats;
 import kafka.server.KafkaConfig;
 import kafka.server.LogDirFailureChannel;
@@ -39,6 +39,7 @@
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.Fork;
 import org.openjdk.jmh.annotations.Level;
@@ -88,7 +89,7 @@ public class CheckpointBench {
     private QuotaFactory.QuotaManagers quotaManagers;
     private LogDirFailureChannel failureChannel;
     private LogManager logManager;
-    private AlterIsrManager alterIsrManager;
+    private AlterPartitionManager alterPartitionManager;
 
 
     @SuppressWarnings("deprecation")
@@ -107,17 +108,17 @@ public void setup() {
         this.logManager = TestUtils.createLogManager(JavaConverters.asScalaBuffer(files),
                 LogConfig.apply(), new MockConfigRepository(), CleanerConfig.apply(1, 4 * 1024 * 1024L, 0.9d,
                         1024 * 1024, 32 * 1024 * 1024,
-                        Double.MAX_VALUE, 15 * 1000, true, "MD5"), time, ApiVersion.latestVersion());
+                        Double.MAX_VALUE, 15 * 1000, true, "MD5"), time, MetadataVersion.latest(), 4);
         scheduler.startup();
         final BrokerTopicStats brokerTopicStats = new BrokerTopicStats();
         final MetadataCache metadataCache =
-                MetadataCache.zkMetadataCache(this.brokerProperties.brokerId());
+                MetadataCache.zkMetadataCache(this.brokerProperties.brokerId(), this.brokerProperties.interBrokerProtocolVersion(), BrokerFeatures.createEmpty());
         this.quotaManagers =
                 QuotaFactory.instantiate(this.brokerProperties,
                         this.metrics,
                         this.time, "");
 
-        this.alterIsrManager = TestUtils.createAlterIsrManager();
+        this.alterPartitionManager = TestUtils.createAlterIsrManager();
         this.replicaManager = new ReplicaManagerBuilder().
             setConfig(brokerProperties).
             setMetrics(metrics).
@@ -128,7 +129,7 @@ public void setup() {
             setBrokerTopicStats(brokerTopicStats).
             setMetadataCache(metadataCache).
             setLogDirFailureChannel(failureChannel).
-            setAlterIsrManager(alterIsrManager).
+            setAlterPartitionManager(alterPartitionManager).
             build();
         replicaManager.startup();
 
diff --git a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/PartitionCreationBench.java b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/PartitionCreationBench.java
index 937ac86bbcd57..c80608fa61ffb 100644
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/PartitionCreationBench.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/server/PartitionCreationBench.java
@@ -16,13 +16,13 @@
  */
 package org.apache.kafka.jmh.server;
 
-import kafka.api.ApiVersion;
 import kafka.cluster.Partition;
 import kafka.log.CleanerConfig;
 import kafka.log.Defaults;
 import kafka.log.LogConfig;
 import kafka.log.LogManager;
-import kafka.server.AlterIsrManager;
+import kafka.server.AlterPartitionManager;
+import kafka.server.BrokerFeatures;
 import kafka.server.BrokerTopicStats;
 import kafka.server.KafkaConfig;
 import kafka.server.LogDirFailureChannel;
@@ -44,6 +44,7 @@
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
@@ -94,7 +95,7 @@ public class PartitionCreationBench {
     private KafkaZkClient zkClient;
     private LogDirFailureChannel failureChannel;
     private LogManager logManager;
-    private AlterIsrManager alterIsrManager;
+    private AlterPartitionManager alterPartitionManager;
     private List<TopicPartition> topicPartitions;
 
     @SuppressWarnings("deprecation")
@@ -134,7 +135,7 @@ public void setup() {
             setFlushStartOffsetCheckpointMs(10000L).
             setRetentionCheckMs(1000L).
             setMaxPidExpirationMs(60000).
-            setInterBrokerProtocolVersion(ApiVersion.latestVersion()).
+            setInterBrokerProtocolVersion(MetadataVersion.latest()).
             setScheduler(scheduler).
             setBrokerTopicStats(brokerTopicStats).
             setLogDirFailureChannel(failureChannel).
@@ -149,7 +150,7 @@ public Properties getEntityConfigs(String rootEntityType, String sanitizedEntity
                 return new Properties();
             }
         };
-        this.alterIsrManager = TestUtils.createAlterIsrManager();
+        this.alterPartitionManager = TestUtils.createAlterIsrManager();
         this.replicaManager = new ReplicaManagerBuilder().
             setConfig(brokerProperties).
             setMetrics(metrics).
@@ -159,9 +160,9 @@ public Properties getEntityConfigs(String rootEntityType, String sanitizedEntity
             setLogManager(logManager).
             setQuotaManagers(quotaManagers).
             setBrokerTopicStats(brokerTopicStats).
-            setMetadataCache(new ZkMetadataCache(this.brokerProperties.brokerId())).
+            setMetadataCache(new ZkMetadataCache(this.brokerProperties.brokerId(), this.brokerProperties.interBrokerProtocolVersion(), BrokerFeatures.createEmpty())).
             setLogDirFailureChannel(failureChannel).
-            setAlterIsrManager(alterIsrManager).
+            setAlterPartitionManager(alterPartitionManager).
             build();
         replicaManager.startup();
         replicaManager.checkpointHighWatermarks();
@@ -222,7 +223,7 @@ public void makeFollower() {
                     .setLeader(0)
                     .setLeaderEpoch(0)
                     .setIsr(inSync)
-                    .setZkVersion(1)
+                    .setPartitionEpoch(1)
                     .setReplicas(replicas)
                     .setIsNew(true);
 
diff --git a/kafka-merge-pr.py b/kafka-merge-pr.py
index 89b756e04abae..1e0b3c25b9562 100755
--- a/kafka-merge-pr.py
+++ b/kafka-merge-pr.py
@@ -70,7 +70,7 @@
 
 DEV_BRANCH_NAME = "trunk"
 
-DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "3.2.0")
+DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "3.4.0")
 
 ORIGINAL_HEAD = ""
 
diff --git a/log4j-appender/src/main/java/org/apache/kafka/log4jappender/KafkaLog4jAppender.java b/log4j-appender/src/main/java/org/apache/kafka/log4jappender/KafkaLog4jAppender.java
index 23272a2cb5d3f..c561fc23608bf 100644
--- a/log4j-appender/src/main/java/org/apache/kafka/log4jappender/KafkaLog4jAppender.java
+++ b/log4j-appender/src/main/java/org/apache/kafka/log4jappender/KafkaLog4jAppender.java
@@ -43,6 +43,7 @@
 import static org.apache.kafka.clients.producer.ProducerConfig.MAX_BLOCK_MS_CONFIG;
 import static org.apache.kafka.clients.producer.ProducerConfig.RETRIES_CONFIG;
 import static org.apache.kafka.clients.producer.ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG;
+import static org.apache.kafka.clients.producer.ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG;
 import static org.apache.kafka.common.config.SaslConfigs.SASL_JAAS_CONFIG;
 import static org.apache.kafka.common.config.SaslConfigs.SASL_KERBEROS_SERVICE_NAME;
 import static org.apache.kafka.common.config.SaslConfigs.SASL_MECHANISM;
@@ -290,6 +291,9 @@ public void activateOptions() {
         props.put(DELIVERY_TIMEOUT_MS_CONFIG, deliveryTimeoutMs);
         props.put(LINGER_MS_CONFIG, lingerMs);
         props.put(BATCH_SIZE_CONFIG, batchSize);
+        // Disable idempotence to avoid deadlock when the producer network thread writes a log line while interacting
+        // with the TransactionManager, see KAFKA-13761 for more information.
+        props.put(ENABLE_IDEMPOTENCE_CONFIG, false);
 
         if (securityProtocol != null) {
             props.put(SECURITY_PROTOCOL_CONFIG, securityProtocol);
diff --git a/metadata/src/main/java/org/apache/kafka/controller/AclControlManager.java b/metadata/src/main/java/org/apache/kafka/controller/AclControlManager.java
index b0fbfb58a385d..d3fc0fe76edd8 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/AclControlManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/AclControlManager.java
@@ -41,11 +41,14 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 
 /**
@@ -142,7 +145,7 @@ static void validateNewAcl(AclBinding binding) {
 
     ControllerResult<List<AclDeleteResult>> deleteAcls(List<AclBindingFilter> filters) {
         List<AclDeleteResult> results = new ArrayList<>();
-        List<ApiMessageAndVersion> records = new ArrayList<>();
+        Set<ApiMessageAndVersion> records = new HashSet<>();
         for (AclBindingFilter filter : filters) {
             try {
                 validateFilter(filter);
@@ -152,11 +155,11 @@ ControllerResult<List<AclDeleteResult>> deleteAcls(List<AclBindingFilter> filter
                 results.add(new AclDeleteResult(ApiError.fromThrowable(e).exception()));
             }
         }
-        return ControllerResult.atomicOf(records, results);
+        return ControllerResult.atomicOf(records.stream().collect(Collectors.toList()), results);
     }
 
     AclDeleteResult deleteAclsForFilter(AclBindingFilter filter,
-                                        List<ApiMessageAndVersion> records) {
+                                        Set<ApiMessageAndVersion> records) {
         List<AclBindingDeleteResult> deleted = new ArrayList<>();
         for (Entry<Uuid, StandardAcl> entry : idToAcl.entrySet()) {
             Uuid id = entry.getKey();
diff --git a/metadata/src/main/java/org/apache/kafka/controller/BootstrapMetadata.java b/metadata/src/main/java/org/apache/kafka/controller/BootstrapMetadata.java
new file mode 100644
index 0000000000000..d9d0651a193ba
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/controller/BootstrapMetadata.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+import org.apache.kafka.common.metadata.FeatureLevelRecord;
+import org.apache.kafka.common.metadata.MetadataRecordType;
+import org.apache.kafka.metadata.util.SnapshotFileReader;
+import org.apache.kafka.metadata.util.SnapshotFileWriter;
+import org.apache.kafka.raft.Batch;
+import org.apache.kafka.raft.BatchReader;
+import org.apache.kafka.raft.RaftClient;
+import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
+import org.apache.kafka.snapshot.SnapshotReader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.function.Supplier;
+import java.util.stream.Stream;
+
+
+/**
+ * A read-only class that holds the controller bootstrap metadata. A file named "bootstrap.snapshot" is used and the
+ * format is the same as a KRaft snapshot.
+ */
+public class BootstrapMetadata {
+    private static final Logger log = LoggerFactory.getLogger(BootstrapMetadata.class);
+
+    public static final String BOOTSTRAP_FILE = "bootstrap.checkpoint";
+
+    private final MetadataVersion metadataVersion;
+
+    private final List<ApiMessageAndVersion> records;
+
+    BootstrapMetadata(MetadataVersion metadataVersion, List<ApiMessageAndVersion> records) {
+        this.metadataVersion = metadataVersion;
+        this.records = Collections.unmodifiableList(records);
+    }
+
+    public MetadataVersion metadataVersion() {
+        return this.metadataVersion;
+    }
+
+    public List<ApiMessageAndVersion> records() {
+        return records;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        BootstrapMetadata metadata = (BootstrapMetadata) o;
+        return metadataVersion == metadata.metadataVersion;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(metadataVersion);
+    }
+
+    @Override
+    public String toString() {
+        return "BootstrapMetadata{" +
+            "metadataVersion=" + metadataVersion +
+            '}';
+    }
+
+    /**
+     * A raft client listener that simply collects all of the commits and snapshots into a mapping of
+     * metadata record type to list of records.
+     */
+    private static class BootstrapListener implements RaftClient.Listener<ApiMessageAndVersion> {
+        private final List<ApiMessageAndVersion> records = new ArrayList<>();
+
+        @Override
+        public void handleCommit(BatchReader<ApiMessageAndVersion> reader) {
+            try {
+                while (reader.hasNext()) {
+                    Batch<ApiMessageAndVersion> batch = reader.next();
+                    records.addAll(batch.records());
+                }
+            } finally {
+                reader.close();
+            }
+        }
+
+        @Override
+        public void handleSnapshot(SnapshotReader<ApiMessageAndVersion> reader) {
+            try {
+                while (reader.hasNext()) {
+                    Batch<ApiMessageAndVersion> batch = reader.next();
+                    for (ApiMessageAndVersion messageAndVersion : batch) {
+                        records.add(messageAndVersion);
+                    }
+                }
+            } finally {
+                reader.close();
+            }
+        }
+    }
+
+    public static BootstrapMetadata create(MetadataVersion metadataVersion) {
+        return create(metadataVersion, new ArrayList<>());
+    }
+
+    public static BootstrapMetadata create(MetadataVersion metadataVersion, List<ApiMessageAndVersion> records) {
+        if (!metadataVersion.isKRaftSupported()) {
+            throw new IllegalArgumentException(String.format(
+                "Cannot create BootstrapMetadata with a non-KRaft metadata version %s. Minimum version is %s",
+                metadataVersion, MetadataVersion.MINIMUM_KRAFT_VERSION));
+        }
+        records.add(new ApiMessageAndVersion(
+            new FeatureLevelRecord()
+                .setName(MetadataVersion.FEATURE_NAME)
+                .setFeatureLevel(metadataVersion.featureLevel()),
+            FeatureLevelRecord.LOWEST_SUPPORTED_VERSION));
+
+        return new BootstrapMetadata(metadataVersion, records);
+    }
+
+    /**
+     * Load a bootstrap snapshot into a read-only bootstrap metadata object and return it.
+     *
+     * @param bootstrapDir              The directory from which to read the snapshot file.
+     * @param fallbackVersionSupplier   A function that returns the metadata.version to use when upgrading from an older KRaft
+     * @return                          The read-only bootstrap metadata
+     * @throws Exception
+     */
+    public static BootstrapMetadata load(Path bootstrapDir, Supplier<MetadataVersion> fallbackVersionSupplier) throws Exception {
+        final Path bootstrapPath = bootstrapDir.resolve(BOOTSTRAP_FILE);
+
+        if (!Files.exists(bootstrapPath)) {
+            // Upgrade scenario from KRaft prior to 3.3 (i.e., no bootstrap metadata present)
+            MetadataVersion fallbackVersion = fallbackVersionSupplier.get();
+            if (fallbackVersion.isKRaftSupported()) {
+                log.debug("Missing bootstrap file, this appears to be a KRaft cluster older than 3.3. Setting metadata.version to {}.",
+                    fallbackVersion.featureLevel());
+                return BootstrapMetadata.create(fallbackVersion);
+            } else {
+                throw new Exception(String.format("Could not set fallback bootstrap metadata with non-KRaft metadata version of %s", fallbackVersion));
+            }
+        }
+
+        BootstrapListener listener = new BootstrapListener();
+        try (SnapshotFileReader reader = new SnapshotFileReader(bootstrapPath.toString(), listener)) {
+            reader.startup();
+            reader.caughtUpFuture().get();
+        } catch (ExecutionException e) {
+            throw new Exception("Failed to load snapshot", e.getCause());
+        }
+
+        Optional<FeatureLevelRecord> metadataVersionRecord = listener.records.stream()
+            .flatMap(message -> {
+                MetadataRecordType type = MetadataRecordType.fromId(message.message().apiKey());
+                if (!type.equals(MetadataRecordType.FEATURE_LEVEL_RECORD)) {
+                    return Stream.empty();
+                }
+                FeatureLevelRecord record = (FeatureLevelRecord) message.message();
+                if (record.name().equals(MetadataVersion.FEATURE_NAME)) {
+                    return Stream.of(record);
+                } else {
+                    return Stream.empty();
+                }
+            })
+            .findFirst();
+
+        if (metadataVersionRecord.isPresent()) {
+            return new BootstrapMetadata(MetadataVersion.fromFeatureLevel(metadataVersionRecord.get().featureLevel()), listener.records);
+        } else {
+            throw new Exception("Expected a metadata.version to exist in the snapshot " + bootstrapPath + ", but none was found");
+        }
+    }
+
+    /**
+     * Write a set of bootstrap metadata to the bootstrap snapshot in a given directory
+     *
+     * @param metadata      The metadata to persist
+     * @param bootstrapDir  The directory in which to create the bootstrap snapshot
+     * @throws IOException
+     */
+    public static void write(BootstrapMetadata metadata, Path bootstrapDir) throws IOException {
+        final Path bootstrapPath = bootstrapDir.resolve(BootstrapMetadata.BOOTSTRAP_FILE);
+        if (Files.exists(bootstrapPath)) {
+            throw new IOException("Cannot write metadata bootstrap file " + bootstrapPath +
+                ". File already already exists.");
+        }
+        try (SnapshotFileWriter bootstrapWriter = SnapshotFileWriter.open(bootstrapPath)) {
+            bootstrapWriter.append(metadata.records());
+        }
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/controller/BrokerHeartbeatManager.java b/metadata/src/main/java/org/apache/kafka/controller/BrokerHeartbeatManager.java
index b95f0d327f434..428f1c5833ea4 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/BrokerHeartbeatManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/BrokerHeartbeatManager.java
@@ -17,18 +17,16 @@
 
 package org.apache.kafka.controller;
 
-import org.apache.kafka.common.errors.InvalidReplicationFactorException;
 import org.apache.kafka.common.message.BrokerHeartbeatRequestData;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.Time;
-import org.apache.kafka.metadata.UsableBroker;
+import org.apache.kafka.metadata.placement.UsableBroker;
 import org.slf4j.Logger;
 
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
-import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.Optional;
 import java.util.TreeSet;
@@ -344,6 +342,22 @@ private boolean hasValidSession(BrokerHeartbeatState broker) {
         }
     }
 
+    /**
+     * Register this broker if we haven't already, and make sure its fencing state is
+     * correct.
+     *
+     * @param brokerId          The broker ID.
+     * @param fenced            True only if the broker is currently fenced.
+     */
+    void register(int brokerId, boolean fenced) {
+        BrokerHeartbeatState broker = brokers.get(brokerId);
+        if (broker == null) {
+            touch(brokerId, fenced, -1);
+        } else if (broker.fenced() != fenced) {
+            touch(brokerId, fenced, broker.metadataOffset);
+        }
+    }
+
     /**
      * Update broker state, including lastContactNs.
      *
@@ -437,27 +451,11 @@ Optional<Integer> findOneStaleBroker() {
         return Optional.empty();
     }
 
-    /**
-     * Place replicas on unfenced brokers.
-     *
-     * @param startPartition    The partition ID to start with.
-     * @param numPartitions     The number of partitions to place.
-     * @param numReplicas       The number of replicas for each partition.
-     * @param idToRack          A function mapping broker id to broker rack.
-     * @param placer            The replica placer to use.
-     *
-     * @return                  A list of replica lists.
-     *
-     * @throws InvalidReplicationFactorException    If too many replicas were requested.
-     */
-    List<List<Integer>> placeReplicas(int startPartition,
-                                      int numPartitions,
-                                      short numReplicas,
-                                      Function<Integer, Optional<String>> idToRack,
-                                      ReplicaPlacer placer) {
-        Iterator<UsableBroker> iterator = new UsableBrokerIterator(
-            brokers.values().iterator(), idToRack);
-        return placer.place(startPartition, numPartitions, numReplicas, iterator);
+    Iterator<UsableBroker> usableBrokers(
+        Function<Integer, Optional<String>> idToRack
+    ) {
+        return new UsableBrokerIterator(brokers.values().iterator(),
+            idToRack);
     }
 
     static class UsableBrokerIterator implements Iterator<UsableBroker> {
@@ -513,17 +511,17 @@ BrokerControlState currentBrokerState(BrokerHeartbeatState broker) {
     /**
      * Calculate the next broker state for a broker that just sent a heartbeat request.
      *
-     * @param brokerId              The broker id.
-     * @param request               The incoming heartbeat request.
-     * @param lastCommittedOffset   The last committed offset of the quorum controller.
-     * @param hasLeaderships        A callback which evaluates to true if the broker leads
-     *                              at least one partition.
+     * @param brokerId                     The broker id.
+     * @param request                      The incoming heartbeat request.
+     * @param registerBrokerRecordOffset   The offset of the broker's {@link org.apache.kafka.common.metadata.RegisterBrokerRecord}.
+     * @param hasLeaderships               A callback which evaluates to true if the broker leads
+     *                                     at least one partition.
      *
-     * @return                      The current and next broker states.
+     * @return                             The current and next broker states.
      */
     BrokerControlStates calculateNextBrokerState(int brokerId,
                                                  BrokerHeartbeatRequestData request,
-                                                 long lastCommittedOffset,
+                                                 long registerBrokerRecordOffset,
                                                  Supplier<Boolean> hasLeaderships) {
         BrokerHeartbeatState broker = brokers.getOrDefault(brokerId,
             new BrokerHeartbeatState(brokerId));
@@ -535,17 +533,17 @@ BrokerControlStates calculateNextBrokerState(int brokerId,
                         "shutdown.", brokerId);
                     return new BrokerControlStates(currentState, SHUTDOWN_NOW);
                 } else if (!request.wantFence()) {
-                    if (request.currentMetadataOffset() >= lastCommittedOffset) {
+                    if (request.currentMetadataOffset() >= registerBrokerRecordOffset) {
                         log.info("The request from broker {} to unfence has been granted " +
-                                "because it has caught up with the last committed metadata " +
-                                "offset {}.", brokerId, lastCommittedOffset);
+                                "because it has caught up with the offset of it's register " +
+                                "broker record {}.", brokerId, registerBrokerRecordOffset);
                         return new BrokerControlStates(currentState, UNFENCED);
                     } else {
                         if (log.isDebugEnabled()) {
                             log.debug("The request from broker {} to unfence cannot yet " +
-                                "be granted because it has not caught up with the last " +
-                                "committed metadata offset {}. It is still at offset {}.",
-                                brokerId, lastCommittedOffset, request.currentMetadataOffset());
+                                "be granted because it has not caught up with the offset of " +
+                                "it's register broker record {}. It is still at offset {}.",
+                                brokerId, registerBrokerRecordOffset, request.currentMetadataOffset());
                         }
                         return new BrokerControlStates(currentState, FENCED);
                     }
diff --git a/metadata/src/main/java/org/apache/kafka/controller/BrokersToIsrs.java b/metadata/src/main/java/org/apache/kafka/controller/BrokersToIsrs.java
index aceb6ddae3a35..d12e6634061a7 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/BrokersToIsrs.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/BrokersToIsrs.java
@@ -29,7 +29,6 @@
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.NoSuchElementException;
-import java.util.Objects;
 
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER;
 import static org.apache.kafka.metadata.Replicas.NONE;
@@ -55,41 +54,6 @@ public class BrokersToIsrs {
 
     private final static int REPLICA_MASK = 0x7fff_ffff;
 
-    static class TopicIdPartition {
-        private final Uuid topicId;
-        private final int partitionId;
-
-        TopicIdPartition(Uuid topicId, int partitionId) {
-            this.topicId = topicId;
-            this.partitionId = partitionId;
-        }
-
-        public Uuid topicId() {
-            return topicId;
-        }
-
-        public int partitionId() {
-            return partitionId;
-        }
-
-        @Override
-        public boolean equals(Object o) {
-            if (!(o instanceof TopicIdPartition)) return false;
-            TopicIdPartition other = (TopicIdPartition) o;
-            return other.topicId.equals(topicId) && other.partitionId == partitionId;
-        }
-
-        @Override
-        public int hashCode() {
-            return Objects.hash(topicId, partitionId);
-        }
-
-        @Override
-        public String toString() {
-            return topicId + ":" + partitionId;
-        }
-    }
-
     static class PartitionsOnReplicaIterator implements Iterator<TopicIdPartition> {
         private final Iterator<Entry<Uuid, int[]>> iterator;
         private final boolean leaderOnly;
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ClusterControlManager.java b/metadata/src/main/java/org/apache/kafka/controller/ClusterControlManager.java
index fb3844f23bd51..d30f43242179f 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ClusterControlManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/ClusterControlManager.java
@@ -18,11 +18,13 @@
 package org.apache.kafka.controller;
 
 import org.apache.kafka.common.Endpoint;
+import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.errors.DuplicateBrokerRegistrationException;
 import org.apache.kafka.common.errors.InconsistentClusterIdException;
 import org.apache.kafka.common.errors.StaleBrokerEpochException;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.message.BrokerRegistrationRequestData;
+import org.apache.kafka.common.metadata.BrokerRegistrationChangeRecord;
 import org.apache.kafka.common.metadata.FenceBrokerRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord.BrokerEndpoint;
@@ -31,14 +33,21 @@
 import org.apache.kafka.common.metadata.RegisterBrokerRecord.BrokerFeatureCollection;
 import org.apache.kafka.common.metadata.UnfenceBrokerRecord;
 import org.apache.kafka.common.metadata.UnregisterBrokerRecord;
+import org.apache.kafka.common.protocol.ApiMessage;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.metadata.BrokerRegistration;
+import org.apache.kafka.metadata.BrokerRegistrationFencingChange;
+import org.apache.kafka.metadata.BrokerRegistrationInControlledShutdownChange;
 import org.apache.kafka.metadata.BrokerRegistrationReply;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
 import org.apache.kafka.metadata.VersionRange;
+import org.apache.kafka.metadata.placement.ReplicaPlacer;
+import org.apache.kafka.metadata.placement.StripedReplicaPlacer;
+import org.apache.kafka.metadata.placement.UsableBroker;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.apache.kafka.timeline.TimelineHashMap;
 import org.slf4j.Logger;
@@ -51,11 +60,15 @@
 import java.util.Map.Entry;
 import java.util.NoSuchElementException;
 import java.util.Optional;
+import java.util.OptionalLong;
+import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
-import static org.apache.kafka.common.metadata.MetadataRecordType.REGISTER_BROKER_RECORD;
+import static java.util.Collections.singletonList;
+import static java.util.concurrent.TimeUnit.NANOSECONDS;
 
 
 /**
@@ -64,6 +77,89 @@
  * brokers being fenced or unfenced, and broker feature versions.
  */
 public class ClusterControlManager {
+    final static long DEFAULT_SESSION_TIMEOUT_NS = NANOSECONDS.convert(18, TimeUnit.SECONDS);
+
+    static class Builder {
+        private LogContext logContext = null;
+        private String clusterId = null;
+        private Time time = Time.SYSTEM;
+        private SnapshotRegistry snapshotRegistry = null;
+        private long sessionTimeoutNs = DEFAULT_SESSION_TIMEOUT_NS;
+        private ReplicaPlacer replicaPlacer = null;
+        private ControllerMetrics controllerMetrics = null;
+        private FeatureControlManager featureControl = null;
+
+        Builder setLogContext(LogContext logContext) {
+            this.logContext = logContext;
+            return this;
+        }
+
+        Builder setClusterId(String clusterId) {
+            this.clusterId = clusterId;
+            return this;
+        }
+
+        Builder setTime(Time time) {
+            this.time = time;
+            return this;
+        }
+
+        Builder setSnapshotRegistry(SnapshotRegistry snapshotRegistry) {
+            this.snapshotRegistry = snapshotRegistry;
+            return this;
+        }
+
+        Builder setSessionTimeoutNs(long sessionTimeoutNs) {
+            this.sessionTimeoutNs = sessionTimeoutNs;
+            return this;
+        }
+
+        Builder setReplicaPlacer(ReplicaPlacer replicaPlacer) {
+            this.replicaPlacer = replicaPlacer;
+            return this;
+        }
+
+        Builder setControllerMetrics(ControllerMetrics controllerMetrics) {
+            this.controllerMetrics = controllerMetrics;
+            return this;
+        }
+
+        Builder setFeatureControlManager(FeatureControlManager featureControl) {
+            this.featureControl = featureControl;
+            return this;
+        }
+
+        ClusterControlManager build() {
+            if (logContext == null) {
+                logContext = new LogContext();
+            }
+            if (clusterId == null) {
+                clusterId = Uuid.randomUuid().toString();
+            }
+            if (snapshotRegistry == null) {
+                snapshotRegistry = new SnapshotRegistry(logContext);
+            }
+            if (replicaPlacer == null) {
+                replicaPlacer = new StripedReplicaPlacer(new Random());
+            }
+            if (controllerMetrics == null) {
+                throw new RuntimeException("You must specify ControllerMetrics");
+            }
+            if (featureControl == null) {
+                throw new RuntimeException("You must specify FeatureControlManager");
+            }
+            return new ClusterControlManager(logContext,
+                clusterId,
+                time,
+                snapshotRegistry,
+                sessionTimeoutNs,
+                replicaPlacer,
+                controllerMetrics,
+                featureControl
+            );
+        }
+    }
+
     class ReadyBrokersFuture {
         private final CompletableFuture<Void> future;
         private final int minBrokers;
@@ -122,6 +218,14 @@ boolean check() {
      */
     private final TimelineHashMap<Integer, BrokerRegistration> brokerRegistrations;
 
+    /**
+     * Save the offset of each broker registration record, we will only unfence a
+     * broker when its high watermark has reached its broker registration record,
+     * this is not necessarily the exact offset of each broker registration record
+     * but should not be smaller than it.
+     */
+    private final TimelineHashMap<Integer, Long> registerBrokerRecordOffsets;
+
     /**
      * A reference to the controller's metrics registry.
      */
@@ -138,13 +242,21 @@ boolean check() {
      */
     private Optional<ReadyBrokersFuture> readyBrokersFuture;
 
-    ClusterControlManager(LogContext logContext,
-                          String clusterId,
-                          Time time,
-                          SnapshotRegistry snapshotRegistry,
-                          long sessionTimeoutNs,
-                          ReplicaPlacer replicaPlacer,
-                          ControllerMetrics metrics) {
+    /**
+     * The feature control manager.
+     */
+    private final FeatureControlManager featureControl;
+
+    private ClusterControlManager(
+        LogContext logContext,
+        String clusterId,
+        Time time,
+        SnapshotRegistry snapshotRegistry,
+        long sessionTimeoutNs,
+        ReplicaPlacer replicaPlacer,
+        ControllerMetrics metrics,
+        FeatureControlManager featureControl
+    ) {
         this.logContext = logContext;
         this.clusterId = clusterId;
         this.log = logContext.logger(ClusterControlManager.class);
@@ -152,9 +264,15 @@ boolean check() {
         this.sessionTimeoutNs = sessionTimeoutNs;
         this.replicaPlacer = replicaPlacer;
         this.brokerRegistrations = new TimelineHashMap<>(snapshotRegistry, 0);
+        this.registerBrokerRecordOffsets = new TimelineHashMap<>(snapshotRegistry, 0);
         this.heartbeatManager = null;
         this.readyBrokersFuture = Optional.empty();
         this.controllerMetrics = metrics;
+        this.featureControl = featureControl;
+    }
+
+    ReplicaPlacer replicaPlacer() {
+        return replicaPlacer;
     }
 
     /**
@@ -178,6 +296,13 @@ Map<Integer, BrokerRegistration> brokerRegistrations() {
         return brokerRegistrations;
     }
 
+    Map<Integer, Map<String, VersionRange>> brokerSupportedVersions() {
+        return brokerRegistrations()
+            .entrySet()
+            .stream()
+            .collect(Collectors.toMap(Entry::getKey, entry -> entry.getValue().supportedFeatures()));
+    }
+
     Set<Integer> fencedBrokerIds() {
         return brokerRegistrations.values()
             .stream()
@@ -192,7 +317,7 @@ Set<Integer> fencedBrokerIds() {
     public ControllerResult<BrokerRegistrationReply> registerBroker(
             BrokerRegistrationRequestData request,
             long brokerEpoch,
-            FeatureMapAndEpoch finalizedFeatures) {
+            FinalizedControllerFeatures finalizedFeatures) {
         if (heartbeatManager == null) {
             throw new RuntimeException("ClusterControlManager is not active.");
         }
@@ -212,7 +337,6 @@ public ControllerResult<BrokerRegistrationReply> registerBroker(
                 if (!existing.incarnationId().equals(request.incarnationId())) {
                     // Remove any existing session for the old broker incarnation.
                     heartbeatManager.remove(brokerId);
-                    existing = null;
                 }
             }
         }
@@ -229,13 +353,14 @@ public ControllerResult<BrokerRegistrationReply> registerBroker(
                 setSecurityProtocol(listener.securityProtocol()));
         }
         for (BrokerRegistrationRequestData.Feature feature : request.features()) {
-            Optional<VersionRange> finalized = finalizedFeatures.map().get(feature.name());
+            Optional<Short> finalized = finalizedFeatures.get(feature.name());
             if (finalized.isPresent()) {
-                if (!finalized.get().contains(new VersionRange(feature.minSupportedVersion(),
-                        feature.maxSupportedVersion()))) {
+                if (!VersionRange.of(feature.minSupportedVersion(), feature.maxSupportedVersion()).contains(finalized.get())) {
                     throw new UnsupportedVersionException("Unable to register because " +
-                        "the broker has an unsupported version of " + feature.name());
+                            "the broker has an unsupported version of " + feature.name());
                 }
+            } else {
+                log.warn("Broker registered with feature {} that is unknown to the controller", feature.name());
             }
             record.features().add(new BrokerFeature().
                 setName(feature.name()).
@@ -243,19 +368,23 @@ public ControllerResult<BrokerRegistrationReply> registerBroker(
                 setMaxSupportedVersion(feature.maxSupportedVersion()));
         }
 
-        if (existing == null) {
-            heartbeatManager.touch(brokerId, true, -1);
-        } else {
-            heartbeatManager.touch(brokerId, existing.fenced(), -1);
-        }
+        heartbeatManager.register(brokerId, record.fenced());
 
         List<ApiMessageAndVersion> records = new ArrayList<>();
-        records.add(new ApiMessageAndVersion(record,
-            REGISTER_BROKER_RECORD.highestSupportedVersion()));
+        records.add(new ApiMessageAndVersion(record, featureControl.metadataVersion().
+            registerBrokerRecordVersion()));
         return ControllerResult.atomicOf(records, new BrokerRegistrationReply(brokerEpoch));
     }
 
-    public void replay(RegisterBrokerRecord record) {
+    public OptionalLong registerBrokerRecordOffset(int brokerId) {
+        if (registerBrokerRecordOffsets.containsKey(brokerId)) {
+            return OptionalLong.of(registerBrokerRecordOffsets.get(brokerId));
+        }
+        return OptionalLong.empty();
+    }
+
+    public void replay(RegisterBrokerRecord record, long offset) {
+        registerBrokerRecordOffsets.put(record.brokerId(), offset);
         int brokerId = record.brokerId();
         List<Endpoint> listeners = new ArrayList<>();
         for (BrokerEndpoint endpoint : record.endPoints()) {
@@ -265,16 +394,21 @@ public void replay(RegisterBrokerRecord record) {
         }
         Map<String, VersionRange> features = new HashMap<>();
         for (BrokerFeature feature : record.features()) {
-            features.put(feature.name(), new VersionRange(
+            features.put(feature.name(), VersionRange.of(
                 feature.minSupportedVersion(), feature.maxSupportedVersion()));
         }
-       
+
         // Update broker registrations.
         BrokerRegistration prevRegistration = brokerRegistrations.put(brokerId,
                 new BrokerRegistration(brokerId, record.brokerEpoch(),
                     record.incarnationId(), listeners, features,
-                    Optional.ofNullable(record.rack()), record.fenced()));
+                    Optional.ofNullable(record.rack()), record.fenced(),
+                    record.inControlledShutdown()));
         updateMetrics(prevRegistration, brokerRegistrations.get(brokerId));
+        if (heartbeatManager != null) {
+            if (prevRegistration != null) heartbeatManager.remove(brokerId);
+            heartbeatManager.register(brokerId, record.fenced());
+        }
         if (prevRegistration == null) {
             log.info("Registered new broker: {}", record);
         } else if (prevRegistration.incarnationId().equals(record.incarnationId())) {
@@ -285,15 +419,17 @@ public void replay(RegisterBrokerRecord record) {
     }
 
     public void replay(UnregisterBrokerRecord record) {
+        registerBrokerRecordOffsets.remove(record.brokerId());
         int brokerId = record.brokerId();
         BrokerRegistration registration = brokerRegistrations.get(brokerId);
         if (registration == null) {
             throw new RuntimeException(String.format("Unable to replay %s: no broker " +
-                "registration found for that id", record.toString()));
-        } else if (registration.epoch() !=  record.brokerEpoch()) {
+                "registration found for that id", record));
+        } else if (registration.epoch() != record.brokerEpoch()) {
             throw new RuntimeException(String.format("Unable to replay %s: no broker " +
-                "registration with that epoch found", record.toString()));
+                "registration with that epoch found", record));
         } else {
+            if (heartbeatManager != null) heartbeatManager.remove(brokerId);
             brokerRegistrations.remove(brokerId);
             updateMetrics(registration, brokerRegistrations.get(brokerId));
             log.info("Unregistered broker: {}", record);
@@ -301,39 +437,74 @@ public void replay(UnregisterBrokerRecord record) {
     }
 
     public void replay(FenceBrokerRecord record) {
-        int brokerId = record.id();
-        BrokerRegistration registration = brokerRegistrations.get(brokerId);
-        if (registration == null) {
-            throw new RuntimeException(String.format("Unable to replay %s: no broker " +
-                "registration found for that id", record.toString()));
-        } else if (registration.epoch() !=  record.epoch()) {
-            throw new RuntimeException(String.format("Unable to replay %s: no broker " +
-                "registration with that epoch found", record.toString()));
-        } else {
-            brokerRegistrations.put(brokerId, registration.cloneWithFencing(true));
-            updateMetrics(registration, brokerRegistrations.get(brokerId));
-            log.info("Fenced broker: {}", record);
-        }
+        replayRegistrationChange(
+            record,
+            record.id(),
+            record.epoch(),
+            BrokerRegistrationFencingChange.FENCE.asBoolean(),
+            BrokerRegistrationInControlledShutdownChange.NONE.asBoolean()
+        );
     }
 
     public void replay(UnfenceBrokerRecord record) {
-        int brokerId = record.id();
-        BrokerRegistration registration = brokerRegistrations.get(brokerId);
-        if (registration == null) {
+        replayRegistrationChange(
+            record,
+            record.id(),
+            record.epoch(),
+            BrokerRegistrationFencingChange.UNFENCE.asBoolean(),
+            BrokerRegistrationInControlledShutdownChange.NONE.asBoolean()
+        );
+    }
+
+    public void replay(BrokerRegistrationChangeRecord record) {
+        BrokerRegistrationFencingChange fencingChange =
+            BrokerRegistrationFencingChange.fromValue(record.fenced()).orElseThrow(
+                () -> new IllegalStateException(String.format("Unable to replay %s: unknown " +
+                    "value for fenced field: %d", record, record.fenced())));
+        BrokerRegistrationInControlledShutdownChange inControlledShutdownChange =
+            BrokerRegistrationInControlledShutdownChange.fromValue(record.inControlledShutdown()).orElseThrow(
+                () -> new IllegalStateException(String.format("Unable to replay %s: unknown " +
+                    "value for inControlledShutdown field: %d", record, record.inControlledShutdown())));
+        replayRegistrationChange(
+            record,
+            record.brokerId(),
+            record.brokerEpoch(),
+            fencingChange.asBoolean(),
+            inControlledShutdownChange.asBoolean()
+        );
+    }
+
+    private void replayRegistrationChange(
+        ApiMessage record,
+        int brokerId,
+        long brokerEpoch,
+        Optional<Boolean> fencingChange,
+        Optional<Boolean> inControlledShutdownChange
+    ) {
+        BrokerRegistration curRegistration = brokerRegistrations.get(brokerId);
+        if (curRegistration == null) {
             throw new RuntimeException(String.format("Unable to replay %s: no broker " +
                 "registration found for that id", record.toString()));
-        } else if (registration.epoch() !=  record.epoch()) {
+        } else if (curRegistration.epoch() != brokerEpoch) {
             throw new RuntimeException(String.format("Unable to replay %s: no broker " +
                 "registration with that epoch found", record.toString()));
         } else {
-            brokerRegistrations.put(brokerId, registration.cloneWithFencing(false));
-            updateMetrics(registration, brokerRegistrations.get(brokerId));
-            log.info("Unfenced broker: {}", record);
-        }
-        if (readyBrokersFuture.isPresent()) {
-            if (readyBrokersFuture.get().check()) {
-                readyBrokersFuture.get().future.complete(null);
-                readyBrokersFuture = Optional.empty();
+            BrokerRegistration nextRegistration = curRegistration.cloneWith(
+                fencingChange,
+                inControlledShutdownChange
+            );
+            if (!curRegistration.equals(nextRegistration)) {
+                brokerRegistrations.put(brokerId, nextRegistration);
+                updateMetrics(curRegistration, nextRegistration);
+            } else {
+                log.info("Ignoring no-op registration change for {}", curRegistration);
+            }
+            if (heartbeatManager != null) heartbeatManager.register(brokerId, nextRegistration.fenced());
+            if (readyBrokersFuture.isPresent()) {
+                if (readyBrokersFuture.get().check()) {
+                    readyBrokersFuture.get().future.complete(null);
+                    readyBrokersFuture = Optional.empty();
+                }
             }
         }
     }
@@ -345,40 +516,76 @@ private void updateMetrics(BrokerRegistration prevRegistration, BrokerRegistrati
             } else {
                 controllerMetrics.setActiveBrokerCount(controllerMetrics.activeBrokerCount() - 1);
             }
+            log.info("Removed broker: {}", prevRegistration.id());
         } else if (prevRegistration == null) {
             if (registration.fenced()) {
                 controllerMetrics.setFencedBrokerCount(controllerMetrics.fencedBrokerCount() + 1);
+                log.info("Added new fenced broker: {}", registration.id());
             } else {
                 controllerMetrics.setActiveBrokerCount(controllerMetrics.activeBrokerCount() + 1);
+                log.info("Added new unfenced broker: {}", registration.id());
             }
         } else {
             if (prevRegistration.fenced() && !registration.fenced()) {
                 controllerMetrics.setFencedBrokerCount(controllerMetrics.fencedBrokerCount() - 1);
                 controllerMetrics.setActiveBrokerCount(controllerMetrics.activeBrokerCount() + 1);
+                log.info("Unfenced broker: {}", registration.id());
             } else if (!prevRegistration.fenced() && registration.fenced()) {
                 controllerMetrics.setFencedBrokerCount(controllerMetrics.fencedBrokerCount() + 1);
                 controllerMetrics.setActiveBrokerCount(controllerMetrics.activeBrokerCount() - 1);
+                log.info("Fenced broker: {}", registration.id());
             }
         }
     }
 
-
-    public List<List<Integer>> placeReplicas(int startPartition,
-                                             int numPartitions,
-                                             short numReplicas) {
+    Iterator<UsableBroker> usableBrokers() {
         if (heartbeatManager == null) {
             throw new RuntimeException("ClusterControlManager is not active.");
         }
-        return heartbeatManager.placeReplicas(startPartition, numPartitions, numReplicas,
-            id -> brokerRegistrations.get(id).rack(), replicaPlacer);
+        return heartbeatManager.usableBrokers(
+            id -> brokerRegistrations.get(id).rack());
     }
 
+    /**
+     * Returns true if the broker is unfenced; Returns false if it is
+     * not or if it does not exist.
+     */
     public boolean unfenced(int brokerId) {
         BrokerRegistration registration = brokerRegistrations.get(brokerId);
         if (registration == null) return false;
         return !registration.fenced();
     }
 
+    /**
+     * Get a broker registration if it exists.
+     *
+     * @param brokerId The brokerId to get the registration for
+     * @return The current registration or null if the broker is not registered
+     */
+    public BrokerRegistration registration(int brokerId) {
+        return brokerRegistrations.get(brokerId);
+    }
+
+    /**
+     * Returns true if the broker is in controlled shutdown state; Returns false
+     * if it is not or if it does not exist.
+     */
+    public boolean inControlledShutdown(int brokerId) {
+        BrokerRegistration registration = brokerRegistrations.get(brokerId);
+        if (registration == null) return false;
+        return registration.inControlledShutdown();
+    }
+
+    /**
+     * Returns true if the broker is active. Active means not fenced nor in controlled
+     * shutdown; Returns false if it is not active or if it does not exist.
+     */
+    public boolean active(int brokerId) {
+        BrokerRegistration registration = brokerRegistrations.get(brokerId);
+        if (registration == null) return false;
+        return !registration.inControlledShutdown() && !registration.fenced();
+    }
+
     BrokerHeartbeatManager heartbeatManager() {
         if (heartbeatManager == null) {
             throw new RuntimeException("ClusterControlManager is not active.");
@@ -408,9 +615,11 @@ public void addReadyBrokersFuture(CompletableFuture<Void> future, int minBrokers
 
     class ClusterControlIterator implements Iterator<List<ApiMessageAndVersion>> {
         private final Iterator<Entry<Integer, BrokerRegistration>> iterator;
+        private final MetadataVersion metadataVersion;
 
         ClusterControlIterator(long epoch) {
             this.iterator = brokerRegistrations.entrySet(epoch).iterator();
+            this.metadataVersion = featureControl.metadataVersion();
         }
 
         @Override
@@ -437,17 +646,19 @@ public List<ApiMessageAndVersion> next() {
                     setMaxSupportedVersion(featureEntry.getValue().max()).
                     setMinSupportedVersion(featureEntry.getValue().min()));
             }
-            List<ApiMessageAndVersion> batch = new ArrayList<>();
-            batch.add(new ApiMessageAndVersion(new RegisterBrokerRecord().
+            RegisterBrokerRecord record = new RegisterBrokerRecord().
                 setBrokerId(brokerId).
                 setIncarnationId(registration.incarnationId()).
                 setBrokerEpoch(registration.epoch()).
                 setEndPoints(endpoints).
                 setFeatures(features).
                 setRack(registration.rack().orElse(null)).
-                setFenced(registration.fenced()),
-                    REGISTER_BROKER_RECORD.highestSupportedVersion()));
-            return batch;
+                setFenced(registration.fenced());
+            if (metadataVersion.isInControlledShutdownStateSupported()) {
+                record.setInControlledShutdown(registration.inControlledShutdown());
+            }
+            return singletonList(new ApiMessageAndVersion(record,
+                metadataVersion.registerBrokerRecordVersion()));
         }
     }
 
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ConfigurationControlManager.java b/metadata/src/main/java/org/apache/kafka/controller/ConfigurationControlManager.java
index f9caf2bb04cb7..4d6736b878d5b 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ConfigurationControlManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/ConfigurationControlManager.java
@@ -18,9 +18,11 @@
 package org.apache.kafka.controller;
 
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType;
+import org.apache.kafka.clients.admin.ConfigEntry;
 import org.apache.kafka.common.config.ConfigException;
 import org.apache.kafka.common.config.ConfigResource.Type;
 import org.apache.kafka.common.config.ConfigResource;
+import org.apache.kafka.common.config.types.Password;
 import org.apache.kafka.common.metadata.ConfigRecord;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.common.utils.LogContext;
@@ -51,26 +53,104 @@
 
 
 public class ConfigurationControlManager {
-    final static Consumer<ConfigResource> NO_OP_EXISTENCE_CHECKER = __ -> { };
+    public static final ConfigResource DEFAULT_NODE = new ConfigResource(Type.BROKER, "");
 
     private final Logger log;
     private final SnapshotRegistry snapshotRegistry;
     private final KafkaConfigSchema configSchema;
+    private final Consumer<ConfigResource> existenceChecker;
     private final Optional<AlterConfigPolicy> alterConfigPolicy;
     private final ConfigurationValidator validator;
     private final TimelineHashMap<ConfigResource, TimelineHashMap<String, String>> configData;
+    private final Map<String, Object> staticConfig;
+    private final ConfigResource currentController;
 
-    ConfigurationControlManager(LogContext logContext,
-                                SnapshotRegistry snapshotRegistry,
-                                KafkaConfigSchema configSchema,
-                                Optional<AlterConfigPolicy> alterConfigPolicy,
-                                ConfigurationValidator validator) {
+    static class Builder {
+        private LogContext logContext = null;
+        private SnapshotRegistry snapshotRegistry = null;
+        private KafkaConfigSchema configSchema = KafkaConfigSchema.EMPTY;
+        private Consumer<ConfigResource> existenceChecker = __ -> { };
+        private Optional<AlterConfigPolicy> alterConfigPolicy = Optional.empty();
+        private ConfigurationValidator validator = ConfigurationValidator.NO_OP;
+        private Map<String, Object> staticConfig = Collections.emptyMap();
+        private int nodeId = 0;
+
+        Builder setLogContext(LogContext logContext) {
+            this.logContext = logContext;
+            return this;
+        }
+
+        Builder setSnapshotRegistry(SnapshotRegistry snapshotRegistry) {
+            this.snapshotRegistry = snapshotRegistry;
+            return this;
+        }
+
+        Builder setKafkaConfigSchema(KafkaConfigSchema configSchema) {
+            this.configSchema = configSchema;
+            return this;
+        }
+
+        Builder setExistenceChecker(Consumer<ConfigResource> existenceChecker) {
+            this.existenceChecker = existenceChecker;
+            return this;
+        }
+
+        Builder setAlterConfigPolicy(Optional<AlterConfigPolicy> alterConfigPolicy) {
+            this.alterConfigPolicy = alterConfigPolicy;
+            return this;
+        }
+
+        Builder setValidator(ConfigurationValidator validator) {
+            this.validator = validator;
+            return this;
+        }
+
+        Builder setStaticConfig(Map<String, Object> staticConfig) {
+            this.staticConfig = staticConfig;
+            return this;
+        }
+
+        Builder setNodeId(int nodeId) {
+            this.nodeId = nodeId;
+            return this;
+        }
+
+        ConfigurationControlManager build() {
+            if (logContext == null) logContext = new LogContext();
+            if (snapshotRegistry == null) snapshotRegistry = new SnapshotRegistry(logContext);
+            return new ConfigurationControlManager(
+                logContext,
+                snapshotRegistry,
+                configSchema,
+                existenceChecker,
+                alterConfigPolicy,
+                validator,
+                staticConfig,
+                nodeId);
+        }
+    }
+
+    private ConfigurationControlManager(LogContext logContext,
+            SnapshotRegistry snapshotRegistry,
+            KafkaConfigSchema configSchema,
+            Consumer<ConfigResource> existenceChecker,
+            Optional<AlterConfigPolicy> alterConfigPolicy,
+            ConfigurationValidator validator,
+            Map<String, Object> staticConfig,
+            int nodeId) {
         this.log = logContext.logger(ConfigurationControlManager.class);
         this.snapshotRegistry = snapshotRegistry;
         this.configSchema = configSchema;
+        this.existenceChecker = existenceChecker;
         this.alterConfigPolicy = alterConfigPolicy;
         this.validator = validator;
         this.configData = new TimelineHashMap<>(snapshotRegistry, 0);
+        this.staticConfig = Collections.unmodifiableMap(new HashMap<>(staticConfig));
+        this.currentController = new ConfigResource(Type.BROKER, Integer.toString(nodeId));
+    }
+
+    SnapshotRegistry snapshotRegistry() {
+        return snapshotRegistry;
     }
 
     /**
@@ -88,14 +168,14 @@ public class ConfigurationControlManager {
      */
     ControllerResult<Map<ConfigResource, ApiError>> incrementalAlterConfigs(
             Map<ConfigResource, Map<String, Entry<OpType, String>>> configChanges,
-            Consumer<ConfigResource> existenceChecker) {
+            boolean newlyCreatedResource) {
         List<ApiMessageAndVersion> outputRecords = new ArrayList<>();
         Map<ConfigResource, ApiError> outputResults = new HashMap<>();
         for (Entry<ConfigResource, Map<String, Entry<OpType, String>>> resourceEntry :
                 configChanges.entrySet()) {
             incrementalAlterConfigResource(resourceEntry.getKey(),
                 resourceEntry.getValue(),
-                existenceChecker,
+                newlyCreatedResource,
                 outputRecords,
                 outputResults);
         }
@@ -104,7 +184,7 @@ ControllerResult<Map<ConfigResource, ApiError>> incrementalAlterConfigs(
 
     private void incrementalAlterConfigResource(ConfigResource configResource,
                                                 Map<String, Entry<OpType, String>> keysToOps,
-                                                Consumer<ConfigResource> existenceChecker,
+                                                boolean newlyCreatedResource,
                                                 List<ApiMessageAndVersion> outputRecords,
                                                 Map<ConfigResource, ApiError> outputResults) {
         List<ApiMessageAndVersion> newRecords = new ArrayList<>();
@@ -134,18 +214,23 @@ private void incrementalAlterConfigResource(ConfigResource configResource,
                             "key " + key + " because its type is not LIST."));
                         return;
                     }
-                    List<String> newValueParts = getParts(newValue, key, configResource);
+                    List<String> oldValueList = getParts(newValue, key, configResource);
                     if (opType == APPEND) {
-                        if (!newValueParts.contains(opValue)) {
-                            newValueParts.add(opValue);
+                        for (String value : opValue.split(",")) {
+                            if (!oldValueList.contains(value)) {
+                                oldValueList.add(value);
+                            }
+                        }
+                    } else {
+                        for (String value : opValue.split(",")) {
+                            oldValueList.remove(value);
                         }
-                        newValue = String.join(",", newValueParts);
-                    } else if (newValueParts.remove(opValue)) {
-                        newValue = String.join(",", newValueParts);
                     }
+                    newValue = String.join(",", oldValueList);
                     break;
             }
-            if (!Objects.equals(currentValue, newValue)) {
+            if (!Objects.equals(currentValue, newValue) || configResource.type().equals(Type.BROKER)) {
+                // KAFKA-14136 We need to generate records even if the value is unchanged to trigger reloads on the brokers
                 newRecords.add(new ApiMessageAndVersion(new ConfigRecord().
                     setResourceType(configResource.type().id()).
                     setResourceName(configResource.name()).
@@ -153,7 +238,7 @@ private void incrementalAlterConfigResource(ConfigResource configResource,
                     setValue(newValue), CONFIG_RECORD.highestSupportedVersion()));
             }
         }
-        ApiError error = validateAlterConfig(configResource, newRecords, existenceChecker);
+        ApiError error = validateAlterConfig(configResource, newRecords, newlyCreatedResource);
         if (error.isFailure()) {
             outputResults.put(configResource, error);
             return;
@@ -164,23 +249,27 @@ private void incrementalAlterConfigResource(ConfigResource configResource,
 
     private ApiError validateAlterConfig(ConfigResource configResource,
                                          List<ApiMessageAndVersion> newRecords,
-                                         Consumer<ConfigResource> existenceChecker) {
-        Map<String, String> newConfigs = new HashMap<>();
+                                         boolean newlyCreatedResource) {
+        Map<String, String> allConfigs = new HashMap<>();
+        Map<String, String> alteredConfigs = new HashMap<>();
         TimelineHashMap<String, String> existingConfigs = configData.get(configResource);
-        if (existingConfigs != null) newConfigs.putAll(existingConfigs);
+        if (existingConfigs != null) allConfigs.putAll(existingConfigs);
         for (ApiMessageAndVersion newRecord : newRecords) {
             ConfigRecord configRecord = (ConfigRecord) newRecord.message();
             if (configRecord.value() == null) {
-                newConfigs.remove(configRecord.name());
+                allConfigs.remove(configRecord.name());
             } else {
-                newConfigs.put(configRecord.name(), configRecord.value());
+                allConfigs.put(configRecord.name(), configRecord.value());
             }
+            alteredConfigs.put(configRecord.name(), configRecord.value());
         }
         try {
-            validator.validate(configResource, newConfigs);
-            existenceChecker.accept(configResource);
+            validator.validate(configResource, allConfigs);
+            if (!newlyCreatedResource) {
+                existenceChecker.accept(configResource);
+            }
             if (alterConfigPolicy.isPresent()) {
-                alterConfigPolicy.get().validate(new RequestMetadata(configResource, newConfigs));
+                alterConfigPolicy.get().validate(new RequestMetadata(configResource, alteredConfigs));
             }
         } catch (ConfigException e) {
             return new ApiError(INVALID_CONFIG, e.getMessage());
@@ -201,7 +290,7 @@ private ApiError validateAlterConfig(ConfigResource configResource,
      */
     ControllerResult<Map<ConfigResource, ApiError>> legacyAlterConfigs(
         Map<ConfigResource, Map<String, String>> newConfigs,
-        Consumer<ConfigResource> existenceChecker
+        boolean newlyCreatedResource
     ) {
         List<ApiMessageAndVersion> outputRecords = new ArrayList<>();
         Map<ConfigResource, ApiError> outputResults = new HashMap<>();
@@ -209,7 +298,7 @@ ControllerResult<Map<ConfigResource, ApiError>> legacyAlterConfigs(
             newConfigs.entrySet()) {
             legacyAlterConfigResource(resourceEntry.getKey(),
                 resourceEntry.getValue(),
-                existenceChecker,
+                newlyCreatedResource,
                 outputRecords,
                 outputResults);
         }
@@ -218,7 +307,7 @@ ControllerResult<Map<ConfigResource, ApiError>> legacyAlterConfigs(
 
     private void legacyAlterConfigResource(ConfigResource configResource,
                                            Map<String, String> newConfigs,
-                                           Consumer<ConfigResource> existenceChecker,
+                                           boolean newlyCreatedResource,
                                            List<ApiMessageAndVersion> outputRecords,
                                            Map<ConfigResource, ApiError> outputResults) {
         List<ApiMessageAndVersion> newRecords = new ArrayList<>();
@@ -230,7 +319,8 @@ private void legacyAlterConfigResource(ConfigResource configResource,
             String key = entry.getKey();
             String newValue = entry.getValue();
             String currentValue = currentConfigs.get(key);
-            if (!Objects.equals(newValue, currentValue)) {
+            if (!Objects.equals(currentValue, newValue) || configResource.type().equals(Type.BROKER)) {
+                // KAFKA-14136 We need to generate records even if the value is unchanged to trigger reloads on the brokers
                 newRecords.add(new ApiMessageAndVersion(new ConfigRecord().
                     setResourceType(configResource.type().id()).
                     setResourceName(configResource.name()).
@@ -247,7 +337,7 @@ private void legacyAlterConfigResource(ConfigResource configResource,
                     setValue(null), CONFIG_RECORD.highestSupportedVersion()));
             }
         }
-        ApiError error = validateAlterConfig(configResource, newRecords, existenceChecker);
+        ApiError error = validateAlterConfig(configResource, newRecords, newlyCreatedResource);
         if (error.isFailure()) {
             outputResults.put(configResource, error);
             return;
@@ -294,7 +384,11 @@ public void replay(ConfigRecord record) {
         if (configs.isEmpty()) {
             configData.remove(configResource);
         }
-        log.info("{}: set configuration {} to {}", configResource, record.name(), record.value());
+        if (configSchema.isSensitive(record)) {
+            log.info("{}: set configuration {} to {}", configResource, record.name(), Password.HIDDEN);
+        } else {
+            log.info("{}: set configuration {} to {}", configResource, record.name(), record.value());
+        }
     }
 
     // VisibleForTesting
@@ -313,7 +407,7 @@ public Map<ConfigResource, ResultOrError<Map<String, String>>> describeConfigs(
         for (Entry<ConfigResource, Collection<String>> resourceEntry : resources.entrySet()) {
             ConfigResource resource = resourceEntry.getKey();
             try {
-                validator.validate(resource, Collections.emptyMap());
+                validator.validate(resource);
             } catch (Throwable e) {
                 results.put(resource, new ResultOrError<>(ApiError.fromThrowable(e)));
                 continue;
@@ -352,6 +446,21 @@ boolean uncleanLeaderElectionEnabledForTopic(String name) {
         return false; // TODO: support configuring unclean leader election.
     }
 
+    Map<String, ConfigEntry> computeEffectiveTopicConfigs(Map<String, String> creationConfigs) {
+        return configSchema.resolveEffectiveTopicConfigs(staticConfig, clusterConfig(),
+            currentControllerConfig(), creationConfigs);
+    }
+
+    Map<String, String> clusterConfig() {
+        Map<String, String> result = configData.get(DEFAULT_NODE);
+        return (result == null) ? Collections.emptyMap() : result;
+    }
+
+    Map<String, String> currentControllerConfig() {
+        Map<String, String> result = configData.get(currentController);
+        return (result == null) ? Collections.emptyMap() : result;
+    }
+
     class ConfigurationControlIterator implements Iterator<List<ApiMessageAndVersion>> {
         private final long epoch;
         private final Iterator<Entry<ConfigResource, TimelineHashMap<String, String>>> iterator;
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ConfigurationValidator.java b/metadata/src/main/java/org/apache/kafka/controller/ConfigurationValidator.java
index b14580a4b7620..7e8f505f40b6e 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ConfigurationValidator.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/ConfigurationValidator.java
@@ -23,7 +23,20 @@
 
 
 public interface ConfigurationValidator {
-    ConfigurationValidator NO_OP = (__, ___) -> { };
+    ConfigurationValidator NO_OP = new ConfigurationValidator() {
+        @Override
+        public void validate(ConfigResource resource) { }
+
+        @Override
+        public void validate(ConfigResource resource, Map<String, String> config) { }
+    };
+
+    /**
+     * Throws an ApiException if the given resource is invalid to describe.
+     *
+     * @param resource      The configuration resource.
+     */
+    void validate(ConfigResource resource);
 
     /**
      * Throws an ApiException if a configuration is invalid for the given resource.
diff --git a/metadata/src/main/java/org/apache/kafka/controller/Controller.java b/metadata/src/main/java/org/apache/kafka/controller/Controller.java
index eafea6cdcc343..3622fe225dc8f 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/Controller.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/Controller.java
@@ -22,8 +22,8 @@
 import org.apache.kafka.common.config.ConfigResource;
 import org.apache.kafka.common.message.AllocateProducerIdsRequestData;
 import org.apache.kafka.common.message.AllocateProducerIdsResponseData;
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.BrokerHeartbeatRequestData;
@@ -36,184 +36,234 @@
 import org.apache.kafka.common.message.ElectLeadersResponseData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData;
+import org.apache.kafka.common.message.UpdateFeaturesRequestData;
+import org.apache.kafka.common.message.UpdateFeaturesResponseData;
 import org.apache.kafka.common.quota.ClientQuotaAlteration;
 import org.apache.kafka.common.quota.ClientQuotaEntity;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.metadata.BrokerHeartbeatReply;
 import org.apache.kafka.metadata.BrokerRegistrationReply;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
 import org.apache.kafka.metadata.authorizer.AclMutator;
 
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 
 
 public interface Controller extends AclMutator, AutoCloseable {
     /**
-     * Change partition ISRs.
+     * Change partition information.
      *
-     * @param request       The AlterIsrRequest data.
+     * @param context       The controller request context.
+     * @param request       The AlterPartitionRequest data.
      *
      * @return              A future yielding the response.
      */
-    CompletableFuture<AlterIsrResponseData> alterIsr(AlterIsrRequestData request);
+    CompletableFuture<AlterPartitionResponseData> alterPartition(
+        ControllerRequestContext context,
+        AlterPartitionRequestData request
+    );
 
     /**
      * Create a batch of topics.
      *
+     * @param context       The controller request context.
      * @param request       The CreateTopicsRequest data.
+     * @param describable   The topics which we have DESCRIBE permission on.
      *
      * @return              A future yielding the response.
      */
-    CompletableFuture<CreateTopicsResponseData>
-        createTopics(CreateTopicsRequestData request);
+    CompletableFuture<CreateTopicsResponseData> createTopics(
+        ControllerRequestContext context,
+        CreateTopicsRequestData request,
+        Set<String> describable
+    );
 
     /**
      * Unregister a broker.
      *
+     * @param context       The controller request context.
      * @param brokerId      The broker id to unregister.
      *
      * @return              A future that is completed successfully when the broker is
      *                      unregistered.
      */
-    CompletableFuture<Void> unregisterBroker(int brokerId);
+    CompletableFuture<Void> unregisterBroker(
+        ControllerRequestContext context,
+        int brokerId
+    );
 
     /**
      * Find the ids for topic names.
      *
-     * @param deadlineNs    The time by which this operation needs to be complete, before
-     *                      we will complete this operation with a timeout.
+     * @param context       The controller request context.
      * @param topicNames    The topic names to resolve.
      * @return              A future yielding a map from topic name to id.
      */
-    CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIds(long deadlineNs,
-                                                                     Collection<String> topicNames);
+    CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIds(
+        ControllerRequestContext context,
+        Collection<String> topicNames
+    );
 
     /**
      * Find the ids for all topic names. Note that this function should only be used for
      * integration tests.
      *
-     * @param deadlineNs    The time by which this operation needs to be complete, before
-     *                      we will complete this operation with a timeout.
+     * @param context       The controller request context.
      * @return              A future yielding a map from topic name to id.
      */
-    CompletableFuture<Map<String, Uuid>> findAllTopicIds(long deadlineNs);
+    CompletableFuture<Map<String, Uuid>> findAllTopicIds(
+        ControllerRequestContext context
+    );
 
     /**
      * Find the names for topic ids.
      *
-     * @param deadlineNs    The time by which this operation needs to be complete, before
-     *                      we will complete this operation with a timeout.
+     * @param context       The controller request context.
      * @param topicIds      The topic ids to resolve.
      * @return              A future yielding a map from topic id to name.
      */
-    CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNames(long deadlineNs,
-                                                                       Collection<Uuid> topicIds);
+    CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNames(
+        ControllerRequestContext context,
+        Collection<Uuid> topicIds
+    );
 
     /**
      * Delete a batch of topics.
      *
-     * @param deadlineNs    The time by which this operation needs to be complete, before
-     *                      we will complete this operation with a timeout.
+     * @param context       The controller request context.
      * @param topicIds      The IDs of the topics to delete.
      *
      * @return              A future yielding the response.
      */
-    CompletableFuture<Map<Uuid, ApiError>> deleteTopics(long deadlineNs,
-                                                        Collection<Uuid> topicIds);
+    CompletableFuture<Map<Uuid, ApiError>> deleteTopics(
+        ControllerRequestContext context,
+        Collection<Uuid> topicIds
+    );
 
     /**
      * Describe the current configuration of various resources.
      *
+     * @param context       The controller request context.
      * @param resources     A map from resources to the collection of config keys that we
      *                      want to describe for each.  If the collection is empty, then
      *                      all configuration keys will be described.
      *
-     * @return
+     * @return              A future yielding a map from config resources to results.
      */
-    CompletableFuture<Map<ConfigResource, ResultOrError<Map<String, String>>>>
-        describeConfigs(Map<ConfigResource, Collection<String>> resources);
+    CompletableFuture<Map<ConfigResource, ResultOrError<Map<String, String>>>> describeConfigs(
+        ControllerRequestContext context,
+        Map<ConfigResource, Collection<String>> resources
+    );
 
     /**
      * Elect new partition leaders.
      *
+     * @param context       The controller request context.
      * @param request       The request.
      *
      * @return              A future yielding the elect leaders response.
      */
-    CompletableFuture<ElectLeadersResponseData> electLeaders(ElectLeadersRequestData request);
+    CompletableFuture<ElectLeadersResponseData> electLeaders(
+        ControllerRequestContext context,
+        ElectLeadersRequestData request
+    );
 
     /**
      * Get the current finalized feature ranges for each feature.
      *
+     * @param context       The controller request context.
+     *
      * @return              A future yielding the feature ranges.
      */
-    CompletableFuture<FeatureMapAndEpoch> finalizedFeatures();
+    CompletableFuture<FinalizedControllerFeatures> finalizedFeatures(
+        ControllerRequestContext context
+    );
 
     /**
      * Perform some incremental configuration changes.
      *
+     * @param context       The controller request context.
      * @param configChanges The changes.
      * @param validateOnly  True if we should validate the changes but not apply them.
      *
      * @return              A future yielding a map from config resources to error results.
      */
     CompletableFuture<Map<ConfigResource, ApiError>> incrementalAlterConfigs(
+        ControllerRequestContext context,
         Map<ConfigResource, Map<String, Map.Entry<AlterConfigOp.OpType, String>>> configChanges,
-        boolean validateOnly);
+        boolean validateOnly
+    );
 
     /**
      * Start or stop some partition reassignments.
      *
+     * @param context       The controller request context.
      * @param request       The alter partition reassignments request.
      *
      * @return              A future yielding the results.
      */
-    CompletableFuture<AlterPartitionReassignmentsResponseData>
-        alterPartitionReassignments(AlterPartitionReassignmentsRequestData request);
+    CompletableFuture<AlterPartitionReassignmentsResponseData> alterPartitionReassignments(
+        ControllerRequestContext context,
+        AlterPartitionReassignmentsRequestData request
+    );
 
     /**
      * List ongoing partition reassignments.
      *
+     * @param context       The controller request context.
      * @param request       The list partition reassignments request.
      *
      * @return              A future yielding the results.
      */
-    CompletableFuture<ListPartitionReassignmentsResponseData>
-        listPartitionReassignments(ListPartitionReassignmentsRequestData request);
+    CompletableFuture<ListPartitionReassignmentsResponseData> listPartitionReassignments(
+        ControllerRequestContext context,
+        ListPartitionReassignmentsRequestData request
+    );
 
     /**
      * Perform some configuration changes using the legacy API.
      *
+     * @param context       The controller request context.
      * @param newConfigs    The new configuration maps to apply.
      * @param validateOnly  True if we should validate the changes but not apply them.
      *
      * @return              A future yielding a map from config resources to error results.
      */
     CompletableFuture<Map<ConfigResource, ApiError>> legacyAlterConfigs(
-        Map<ConfigResource, Map<String, String>> newConfigs, boolean validateOnly);
+        ControllerRequestContext context,
+        Map<ConfigResource, Map<String, String>> newConfigs,
+        boolean validateOnly
+    );
 
     /**
      * Process a heartbeat from a broker.
      *
+     * @param context       The controller request context.
      * @param request      The broker heartbeat request.
      *
      * @return             A future yielding the broker heartbeat reply.
      */
     CompletableFuture<BrokerHeartbeatReply> processBrokerHeartbeat(
-        BrokerHeartbeatRequestData request);
+        ControllerRequestContext context,
+        BrokerHeartbeatRequestData request
+    );
 
     /**
      * Attempt to register the given broker.
      *
+     * @param context       The controller request context.
      * @param request      The registration request.
      *
      * @return             A future yielding the broker registration reply.
      */
     CompletableFuture<BrokerRegistrationReply> registerBroker(
-        BrokerRegistrationRequestData request);
+        ControllerRequestContext context,
+        BrokerRegistrationRequestData request
+    );
 
     /**
      * Wait for the given number of brokers to be registered and unfenced.
@@ -228,23 +278,44 @@ CompletableFuture<BrokerRegistrationReply> registerBroker(
     /**
      * Perform some client quota changes
      *
-     * @param quotaAlterations The list of quotas to alter
-     * @param validateOnly     True if we should validate the changes but not apply them.
-     * @return                 A future yielding a map of quota entities to error results.
+     * @param context           The controller request context.
+     * @param quotaAlterations  The list of quotas to alter
+     * @param validateOnly      True if we should validate the changes but not apply them.
+     *
+     * @return                  A future yielding a map of quota entities to error results.
      */
     CompletableFuture<Map<ClientQuotaEntity, ApiError>> alterClientQuotas(
-        Collection<ClientQuotaAlteration> quotaAlterations, boolean validateOnly
+        ControllerRequestContext context,
+        Collection<ClientQuotaAlteration> quotaAlterations,
+        boolean validateOnly
     );
 
     /**
      * Allocate a block of producer IDs for transactional and idempotent producers
+     *
+     * @param context   The controller request context.
      * @param request   The allocate producer IDs request
+     *
      * @return          A future which yields a new producer ID block as a response
      */
     CompletableFuture<AllocateProducerIdsResponseData> allocateProducerIds(
+        ControllerRequestContext context,
         AllocateProducerIdsRequestData request
     );
 
+    /**
+     * Update a set of feature flags
+     *
+     * @param context   The controller request context.
+     * @param request   The update features request
+     *
+     * @return          A future which yields the result of the action
+     */
+    CompletableFuture<UpdateFeaturesResponseData> updateFeatures(
+        ControllerRequestContext context,
+        UpdateFeaturesRequestData request
+    );
+
     /**
      * Begin writing a controller snapshot.  If there was already an ongoing snapshot, it
      * simply returns information about that snapshot rather than starting a new one.
@@ -256,13 +327,16 @@ CompletableFuture<AllocateProducerIdsResponseData> allocateProducerIds(
     /**
      * Create partitions on certain topics.
      *
-     * @param deadlineNs    The time by which this operation needs to be complete, before
-     *                      we will complete this operation with a timeout.
      * @param topics        The list of topics to create partitions for.
+     * @param validateOnly  If true, the request is validated, but no partitions will be created.
+     *
      * @return              A future yielding per-topic results.
      */
-    CompletableFuture<List<CreatePartitionsTopicResult>>
-            createPartitions(long deadlineNs, List<CreatePartitionsTopic> topics);
+    CompletableFuture<List<CreatePartitionsTopicResult>> createPartitions(
+        ControllerRequestContext context,
+        List<CreatePartitionsTopic> topics,
+        boolean validateOnly
+    );
 
     /**
      * Begin shutting down, but don't block.  You must still call close to clean up all
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ControllerMetrics.java b/metadata/src/main/java/org/apache/kafka/controller/ControllerMetrics.java
index fa03e058de430..ff243aebfcb69 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ControllerMetrics.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/ControllerMetrics.java
@@ -51,5 +51,21 @@ public interface ControllerMetrics extends AutoCloseable {
 
     int preferredReplicaImbalanceCount();
 
+    void incrementMetadataErrorCount();
+
+    int metadataErrorCount();
+
+    void setLastAppliedRecordOffset(long offset);
+
+    long lastAppliedRecordOffset();
+
+    void setLastCommittedRecordOffset(long offset);
+
+    long lastCommittedRecordOffset();
+
+    void setLastAppliedRecordTimestamp(long timestamp);
+
+    long lastAppliedRecordTimestamp();
+
     void close();
 }
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ControllerRequestContext.java b/metadata/src/main/java/org/apache/kafka/controller/ControllerRequestContext.java
new file mode 100644
index 0000000000000..e4bc2f3eb4a82
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/controller/ControllerRequestContext.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+
+import org.apache.kafka.common.message.RequestHeaderData;
+import org.apache.kafka.common.security.auth.KafkaPrincipal;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.server.authorizer.AuthorizableRequestContext;
+
+import java.util.OptionalLong;
+
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.NANOSECONDS;
+
+
+public class ControllerRequestContext {
+
+    public static OptionalLong requestTimeoutMsToDeadlineNs(
+        Time time,
+        int millisecondsOffset
+    ) {
+        return OptionalLong.of(time.nanoseconds() + NANOSECONDS.convert(millisecondsOffset, MILLISECONDS));
+    }
+
+    private final KafkaPrincipal principal;
+    private final OptionalLong deadlineNs;
+    private final RequestHeaderData requestHeader;
+
+    public ControllerRequestContext(
+        RequestHeaderData requestHeader,
+        KafkaPrincipal principal,
+        OptionalLong deadlineNs
+    ) {
+        this.requestHeader = requestHeader;
+        this.principal = principal;
+        this.deadlineNs = deadlineNs;
+    }
+
+    public ControllerRequestContext(
+        AuthorizableRequestContext requestContext,
+        OptionalLong deadlineNs
+    ) {
+        this(
+            new RequestHeaderData()
+                .setRequestApiKey((short) requestContext.requestType())
+                .setRequestApiVersion((short) requestContext.requestVersion())
+                .setCorrelationId(requestContext.correlationId())
+                .setClientId(requestContext.clientId()),
+            requestContext.principal(),
+            deadlineNs
+        );
+    }
+
+    public RequestHeaderData requestHeader() {
+        return requestHeader;
+    }
+
+    public KafkaPrincipal principal() {
+        return principal;
+    }
+
+    public OptionalLong deadlineNs() {
+        return deadlineNs;
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/controller/FeatureControlManager.java b/metadata/src/main/java/org/apache/kafka/controller/FeatureControlManager.java
index ed7c98cbb6d24..c092abcdccaa8 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/FeatureControlManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/FeatureControlManager.java
@@ -25,128 +25,308 @@
 import java.util.Map.Entry;
 import java.util.Map;
 import java.util.NoSuchElementException;
-import java.util.Set;
+import java.util.Optional;
 import java.util.TreeMap;
+import java.util.function.Consumer;
 
+import org.apache.kafka.clients.ApiVersions;
+import org.apache.kafka.clients.admin.FeatureUpdate;
 import org.apache.kafka.common.metadata.FeatureLevelRecord;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.ApiError;
-import org.apache.kafka.server.common.ApiMessageAndVersion;
-import org.apache.kafka.metadata.FeatureMap;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
 import org.apache.kafka.metadata.VersionRange;
+import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.apache.kafka.timeline.TimelineHashMap;
+import org.apache.kafka.timeline.TimelineObject;
+import org.slf4j.Logger;
 
 import static org.apache.kafka.common.metadata.MetadataRecordType.FEATURE_LEVEL_RECORD;
 
 
 public class FeatureControlManager {
+    public static class Builder {
+        private LogContext logContext = null;
+        private SnapshotRegistry snapshotRegistry = null;
+        private QuorumFeatures quorumFeatures = null;
+        private MetadataVersion metadataVersion = MetadataVersion.MINIMUM_KRAFT_VERSION;
+
+        Builder setLogContext(LogContext logContext) {
+            this.logContext = logContext;
+            return this;
+        }
+
+        Builder setSnapshotRegistry(SnapshotRegistry snapshotRegistry) {
+            this.snapshotRegistry = snapshotRegistry;
+            return this;
+        }
+
+        Builder setQuorumFeatures(QuorumFeatures quorumFeatures) {
+            this.quorumFeatures = quorumFeatures;
+            return this;
+        }
+
+        Builder setMetadataVersion(MetadataVersion metadataVersion) {
+            this.metadataVersion = metadataVersion;
+            return this;
+        }
+
+        public FeatureControlManager build() {
+            if (logContext == null) logContext = new LogContext();
+            if (snapshotRegistry == null) snapshotRegistry = new SnapshotRegistry(logContext);
+            if (quorumFeatures == null) {
+                quorumFeatures = new QuorumFeatures(0, new ApiVersions(), QuorumFeatures.defaultFeatureMap(),
+                        Collections.emptyList());
+            }
+            return new FeatureControlManager(logContext,
+                quorumFeatures,
+                snapshotRegistry,
+                metadataVersion);
+        }
+    }
+
+    private final Logger log;
+
     /**
      * An immutable map containing the features supported by this controller's software.
      */
-    private final Map<String, VersionRange> supportedFeatures;
+    private final QuorumFeatures quorumFeatures;
 
     /**
      * Maps feature names to finalized version ranges.
      */
-    private final TimelineHashMap<String, VersionRange> finalizedVersions;
+    private final TimelineHashMap<String, Short> finalizedVersions;
+
+    /**
+     * The current metadata version
+     */
+    private final TimelineObject<MetadataVersion> metadataVersion;
+
+    /**
+     * A boolean to see if we have encountered a metadata.version or not.
+     */
+    private final TimelineObject<Boolean> sawMetadataVersion;
 
-    FeatureControlManager(Map<String, VersionRange> supportedFeatures,
-                          SnapshotRegistry snapshotRegistry) {
-        this.supportedFeatures = supportedFeatures;
+    private FeatureControlManager(
+        LogContext logContext,
+        QuorumFeatures quorumFeatures,
+        SnapshotRegistry snapshotRegistry,
+        MetadataVersion metadataVersion
+    ) {
+        this.log = logContext.logger(FeatureControlManager.class);
+        this.quorumFeatures = quorumFeatures;
         this.finalizedVersions = new TimelineHashMap<>(snapshotRegistry, 0);
+        this.metadataVersion = new TimelineObject<>(snapshotRegistry, metadataVersion);
+        this.sawMetadataVersion = new TimelineObject<>(snapshotRegistry, false);
     }
 
     ControllerResult<Map<String, ApiError>> updateFeatures(
-            Map<String, VersionRange> updates, Set<String> downgradeables,
-            Map<Integer, Map<String, VersionRange>> brokerFeatures) {
+        Map<String, Short> updates,
+        Map<String, FeatureUpdate.UpgradeType> upgradeTypes,
+        Map<Integer, Map<String, VersionRange>> brokerFeatures,
+        boolean validateOnly
+    ) {
         TreeMap<String, ApiError> results = new TreeMap<>();
         List<ApiMessageAndVersion> records = new ArrayList<>();
-        for (Entry<String, VersionRange> entry : updates.entrySet()) {
+        for (Entry<String, Short> entry : updates.entrySet()) {
             results.put(entry.getKey(), updateFeature(entry.getKey(), entry.getValue(),
-                downgradeables.contains(entry.getKey()), brokerFeatures, records));
+                upgradeTypes.getOrDefault(entry.getKey(), FeatureUpdate.UpgradeType.UPGRADE), brokerFeatures, records));
         }
 
-        return ControllerResult.atomicOf(records, results);
+        if (validateOnly) {
+            return ControllerResult.of(Collections.emptyList(), results);
+        } else {
+            return ControllerResult.atomicOf(records, results);
+        }
+    }
+
+    MetadataVersion metadataVersion() {
+        return metadataVersion.get();
     }
 
-    private ApiError updateFeature(String featureName,
-                                   VersionRange newRange,
-                                   boolean downgradeable,
-                                   Map<Integer, Map<String, VersionRange>> brokerFeatures,
-                                   List<ApiMessageAndVersion> records) {
-        if (newRange.min() <= 0) {
-            return new ApiError(Errors.INVALID_UPDATE_VERSION,
-                "The lower value for the new range cannot be less than 1.");
-        }
-        if (newRange.max() <= 0) {
-            return new ApiError(Errors.INVALID_UPDATE_VERSION,
-                "The upper value for the new range cannot be less than 1.");
-        }
-        VersionRange localRange = supportedFeatures.get(featureName);
-        if (localRange == null || !localRange.contains(newRange)) {
-            return new ApiError(Errors.INVALID_UPDATE_VERSION,
-                "The controller does not support the given feature range.");
-        }
-        for (Entry<Integer, Map<String, VersionRange>> brokerEntry :
-            brokerFeatures.entrySet()) {
+    private ApiError updateFeature(
+        String featureName,
+        short newVersion,
+        FeatureUpdate.UpgradeType upgradeType,
+        Map<Integer, Map<String, VersionRange>> brokersAndFeatures,
+        List<ApiMessageAndVersion> records
+    ) {
+        if (upgradeType.equals(FeatureUpdate.UpgradeType.UNKNOWN)) {
+            return invalidUpdateVersion(featureName, newVersion,
+                "The controller does not support the given upgrade type.");
+        }
+
+        final Short currentVersion;
+        if (featureName.equals(MetadataVersion.FEATURE_NAME)) {
+            currentVersion = metadataVersion.get().featureLevel();
+        } else {
+            currentVersion = finalizedVersions.get(featureName);
+        }
+
+        if (newVersion < 0) {
+            return invalidUpdateVersion(featureName, newVersion,
+                "A feature version cannot be less than 0.");
+        }
+
+        Optional<String> reasonNotSupported = quorumFeatures.reasonNotSupported(featureName, newVersion);
+        if (reasonNotSupported.isPresent()) {
+            return invalidUpdateVersion(featureName, newVersion, reasonNotSupported.get());
+        }
+
+        for (Entry<Integer, Map<String, VersionRange>> brokerEntry : brokersAndFeatures.entrySet()) {
             VersionRange brokerRange = brokerEntry.getValue().get(featureName);
-            if (brokerRange == null || !brokerRange.contains(newRange)) {
-                return new ApiError(Errors.INVALID_UPDATE_VERSION,
+            if (brokerRange == null) {
+                return invalidUpdateVersion(featureName, newVersion,
+                    "Broker " + brokerEntry.getKey() + " does not support this feature.");
+            } else if (!brokerRange.contains(newVersion)) {
+                return invalidUpdateVersion(featureName, newVersion,
                     "Broker " + brokerEntry.getKey() + " does not support the given " +
-                        "feature range.");
+                    "version. It supports " + brokerRange.min() + " to " + brokerRange.max() + ".");
+            }
+        }
+
+        if (currentVersion != null && newVersion < currentVersion) {
+            if (upgradeType.equals(FeatureUpdate.UpgradeType.UPGRADE)) {
+                return invalidUpdateVersion(featureName, newVersion,
+                    "Can't downgrade the version of this feature without setting the " +
+                    "upgrade type to either safe or unsafe downgrade.");
             }
         }
-        VersionRange currentRange = finalizedVersions.get(featureName);
-        if (currentRange != null && currentRange.max() > newRange.max()) {
-            if (!downgradeable) {
-                return new ApiError(Errors.INVALID_UPDATE_VERSION,
-                    "Can't downgrade the maximum version of this feature without " +
-                    "setting downgradable to true.");
+
+        if (featureName.equals(MetadataVersion.FEATURE_NAME)) {
+            // Perform additional checks if we're updating metadata.version
+            return updateMetadataVersion(newVersion, upgradeType.equals(FeatureUpdate.UpgradeType.UNSAFE_DOWNGRADE), records::add);
+        } else {
+            records.add(new ApiMessageAndVersion(
+                new FeatureLevelRecord()
+                    .setName(featureName)
+                    .setFeatureLevel(newVersion),
+                FEATURE_LEVEL_RECORD.highestSupportedVersion()));
+            return ApiError.NONE;
+        }
+    }
+
+    private ApiError invalidUpdateVersion(String feature, short version, String message) {
+        String errorMessage = String.format("Invalid update version %d for feature %s. %s", version, feature, message);
+        log.debug(errorMessage);
+        return new ApiError(Errors.INVALID_UPDATE_VERSION, errorMessage);
+    }
+
+    /**
+     * Perform some additional validation for metadata.version updates.
+     */
+    private ApiError updateMetadataVersion(
+        short newVersionLevel,
+        boolean allowUnsafeDowngrade,
+        Consumer<ApiMessageAndVersion> recordConsumer
+    ) {
+        MetadataVersion currentVersion = metadataVersion();
+        final MetadataVersion newVersion;
+        try {
+            newVersion = MetadataVersion.fromFeatureLevel(newVersionLevel);
+        } catch (IllegalArgumentException e) {
+            return invalidMetadataVersion(newVersionLevel, "Unknown metadata.version.");
+        }
+
+        if (newVersion.isLessThan(currentVersion)) {
+            // This is a downgrade
+            boolean metadataChanged = MetadataVersion.checkIfMetadataChanged(currentVersion, newVersion);
+            if (!metadataChanged) {
+                log.info("Downgrading metadata.version from {} to {}.", currentVersion, newVersion);
+            } else if (allowUnsafeDowngrade) {
+                log.info("Downgrading metadata.version unsafely from {} to {}.", currentVersion, newVersion);
+            } else {
+                return invalidMetadataVersion(newVersionLevel, "Refusing to perform the requested " +
+                        "downgrade because it might delete metadata information. Retry using " +
+                        "UNSAFE_DOWNGRADE if you want to force the downgrade to proceed.");
             }
+        } else {
+            log.info("Upgrading metadata.version from {} to {}.", currentVersion, newVersion);
         }
-        records.add(new ApiMessageAndVersion(
-            new FeatureLevelRecord().setName(featureName).
-                setMinFeatureLevel(newRange.min()).setMaxFeatureLevel(newRange.max()),
-            FEATURE_LEVEL_RECORD.highestSupportedVersion()));
+
+        recordConsumer.accept(new ApiMessageAndVersion(
+            new FeatureLevelRecord()
+                .setName(MetadataVersion.FEATURE_NAME)
+                .setFeatureLevel(newVersionLevel), FEATURE_LEVEL_RECORD.lowestSupportedVersion()));
         return ApiError.NONE;
     }
 
-    FeatureMapAndEpoch finalizedFeatures(long lastCommittedOffset) {
-        Map<String, VersionRange> features = new HashMap<>();
-        for (Entry<String, VersionRange> entry : finalizedVersions.entrySet(lastCommittedOffset)) {
+    private ApiError invalidMetadataVersion(short version, String message) {
+        String errorMessage = String.format("Invalid metadata.version %d. %s", version, message);
+        log.error(errorMessage);
+        return new ApiError(Errors.INVALID_UPDATE_VERSION, errorMessage);
+    }
+
+    FinalizedControllerFeatures finalizedFeatures(long epoch) {
+        Map<String, Short> features = new HashMap<>();
+        features.put(MetadataVersion.FEATURE_NAME, metadataVersion.get(epoch).featureLevel());
+        for (Entry<String, Short> entry : finalizedVersions.entrySet(epoch)) {
             features.put(entry.getKey(), entry.getValue());
         }
-        return new FeatureMapAndEpoch(new FeatureMap(features), lastCommittedOffset);
+        return new FinalizedControllerFeatures(features, epoch);
+    }
+
+    /**
+     * @return true if a FeatureLevelRecord for "metadata.version" has been replayed. False otherwise
+     */
+    boolean sawMetadataVersion() {
+        return this.sawMetadataVersion.get();
     }
 
     public void replay(FeatureLevelRecord record) {
-        finalizedVersions.put(record.name(),
-            new VersionRange(record.minFeatureLevel(), record.maxFeatureLevel()));
+        VersionRange range = quorumFeatures.localSupportedFeature(record.name());
+        if (!range.contains(record.featureLevel())) {
+            throw new RuntimeException("Tried to apply FeatureLevelRecord " + record + ", but this controller only " +
+                "supports versions " + range);
+        }
+        if (record.name().equals(MetadataVersion.FEATURE_NAME)) {
+            log.info("Setting metadata.version to {}", record.featureLevel());
+            metadataVersion.set(MetadataVersion.fromFeatureLevel(record.featureLevel()));
+            sawMetadataVersion.set(true);
+        } else {
+            if (record.featureLevel() == 0) {
+                log.info("Removing feature {}", record.name());
+                finalizedVersions.remove(record.name());
+            } else {
+                log.info("Setting feature {} to {}", record.name(), record.featureLevel());
+                finalizedVersions.put(record.name(), record.featureLevel());
+            }
+        }
     }
 
     class FeatureControlIterator implements Iterator<List<ApiMessageAndVersion>> {
-        private final Iterator<Entry<String, VersionRange>> iterator;
+        private final Iterator<Entry<String, Short>> iterator;
+        private final MetadataVersion metadataVersion;
+        private boolean wroteVersion = false;
 
         FeatureControlIterator(long epoch) {
             this.iterator = finalizedVersions.entrySet(epoch).iterator();
+            this.metadataVersion = FeatureControlManager.this.metadataVersion.get(epoch);
         }
 
         @Override
         public boolean hasNext() {
-            return iterator.hasNext();
+            return !wroteVersion || iterator.hasNext();
         }
 
         @Override
         public List<ApiMessageAndVersion> next() {
+            // Write the metadata.version first
+            if (!wroteVersion) {
+                wroteVersion = true;
+                return Collections.singletonList(new ApiMessageAndVersion(new FeatureLevelRecord()
+                    .setName(MetadataVersion.FEATURE_NAME)
+                    .setFeatureLevel(metadataVersion.featureLevel()), FEATURE_LEVEL_RECORD.lowestSupportedVersion()));
+            }
+            // Then write the rest of the features
             if (!hasNext()) throw new NoSuchElementException();
-            Entry<String, VersionRange> entry = iterator.next();
-            VersionRange versions = entry.getValue();
-            return Collections.singletonList(new ApiMessageAndVersion(new FeatureLevelRecord().
-                setName(entry.getKey()).
-                setMinFeatureLevel(versions.min()).
-                setMaxFeatureLevel(versions.max()), FEATURE_LEVEL_RECORD.highestSupportedVersion()));
+            Entry<String, Short> entry = iterator.next();
+            return Collections.singletonList(new ApiMessageAndVersion(new FeatureLevelRecord()
+                .setName(entry.getKey())
+                .setFeatureLevel(entry.getValue()), FEATURE_LEVEL_RECORD.highestSupportedVersion()));
         }
     }
 
diff --git a/metadata/src/main/java/org/apache/kafka/controller/PartitionChangeBuilder.java b/metadata/src/main/java/org/apache/kafka/controller/PartitionChangeBuilder.java
index cf0f6bfd609af..cdd6d4416f1bc 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/PartitionChangeBuilder.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/PartitionChangeBuilder.java
@@ -19,6 +19,7 @@
 
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.metadata.PartitionChangeRecord;
+import org.apache.kafka.metadata.LeaderRecoveryState;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.Replicas;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
@@ -30,7 +31,6 @@
 import java.util.List;
 import java.util.Optional;
 import java.util.function.Function;
-import java.util.function.Supplier;
 
 import static org.apache.kafka.common.metadata.MetadataRecordType.PARTITION_CHANGE_RECORD;
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER;
@@ -48,35 +48,55 @@ public static boolean changeRecordIsNoOp(PartitionChangeRecord record) {
         if (record.replicas() != null) return false;
         if (record.removingReplicas() != null) return false;
         if (record.addingReplicas() != null) return false;
+        if (record.leaderRecoveryState() != LeaderRecoveryState.NO_CHANGE) return false;
         return true;
     }
 
+    /**
+     * Election types.
+     */
+    public enum Election {
+        /**
+         * Perform leader election to keep the partition online. Elect the preferred replica if it is in the ISR.
+         */
+        PREFERRED,
+        /**
+         * Perform leader election from the ISR to keep the partition online.
+         */
+        ONLINE,
+        /**
+         * Prefer replicas in the ISR but keep the partition online even if it requires picking a leader that is not in the ISR.
+         */
+        UNCLEAN
+    }
+
     private final PartitionRegistration partition;
     private final Uuid topicId;
     private final int partitionId;
     private final Function<Integer, Boolean> isAcceptableLeader;
-    private final Supplier<Boolean> uncleanElectionOk;
+    private final boolean isLeaderRecoverySupported;
     private List<Integer> targetIsr;
     private List<Integer> targetReplicas;
     private List<Integer> targetRemoving;
     private List<Integer> targetAdding;
-    private boolean alwaysElectPreferredIfPossible;
+    private Election election = Election.ONLINE;
+    private LeaderRecoveryState targetLeaderRecoveryState;
 
     public PartitionChangeBuilder(PartitionRegistration partition,
                                   Uuid topicId,
                                   int partitionId,
                                   Function<Integer, Boolean> isAcceptableLeader,
-                                  Supplier<Boolean> uncleanElectionOk) {
+                                  boolean isLeaderRecoverySupported) {
         this.partition = partition;
         this.topicId = topicId;
         this.partitionId = partitionId;
         this.isAcceptableLeader = isAcceptableLeader;
-        this.uncleanElectionOk = uncleanElectionOk;
+        this.isLeaderRecoverySupported = isLeaderRecoverySupported;
         this.targetIsr = Replicas.toList(partition.isr);
         this.targetReplicas = Replicas.toList(partition.replicas);
         this.targetRemoving = Replicas.toList(partition.removingReplicas);
         this.targetAdding = Replicas.toList(partition.addingReplicas);
-        this.alwaysElectPreferredIfPossible = false;
+        this.targetLeaderRecoveryState = partition.leaderRecoveryState;
     }
 
     public PartitionChangeBuilder setTargetIsr(List<Integer> targetIsr) {
@@ -89,8 +109,8 @@ public PartitionChangeBuilder setTargetReplicas(List<Integer> targetReplicas) {
         return this;
     }
 
-    public PartitionChangeBuilder setAlwaysElectPreferredIfPossible(boolean alwaysElectPreferredIfPossible) {
-        this.alwaysElectPreferredIfPossible = alwaysElectPreferredIfPossible;
+    public PartitionChangeBuilder setElection(Election election) {
+        this.election = election;
         return this;
     }
 
@@ -104,53 +124,114 @@ public PartitionChangeBuilder setTargetAdding(List<Integer> targetAdding) {
         return this;
     }
 
-    boolean shouldTryElection() {
-        // If the new isr doesn't have the current leader, we need to try to elect a new
-        // one. Note: this also handles the case where the current leader is NO_LEADER,
-        // since that value cannot appear in targetIsr.
-        if (!targetIsr.contains(partition.leader)) return true;
-
-        // Check if we want to try to get away from a non-preferred leader.
-        if (alwaysElectPreferredIfPossible && !partition.hasPreferredLeader()) return true;
-
-        return false;
+    public PartitionChangeBuilder setTargetLeaderRecoveryState(LeaderRecoveryState targetLeaderRecoveryState) {
+        this.targetLeaderRecoveryState = targetLeaderRecoveryState;
+        return this;
     }
 
-    class BestLeader {
+    // VisibleForTesting
+    static class ElectionResult {
         final int node;
         final boolean unclean;
 
-        BestLeader() {
-            for (int replica : targetReplicas) {
-                if (targetIsr.contains(replica) && isAcceptableLeader.apply(replica)) {
-                    this.node = replica;
-                    this.unclean = false;
-                    return;
-                }
-            }
-            if (uncleanElectionOk.get()) {
-                for (int replica : targetReplicas) {
-                    if (isAcceptableLeader.apply(replica)) {
-                        this.node = replica;
-                        this.unclean = true;
-                        return;
-                    }
-                }
+        private ElectionResult(int node, boolean unclean) {
+            this.node = node;
+            this.unclean = unclean;
+        }
+    }
+
+    // VisibleForTesting
+    /**
+     * Perform leader election based on the partition state and leader election type.
+     *
+     * See documentation for the Election type to see more details on the election types supported.
+     */
+    ElectionResult electLeader() {
+        if (election == Election.PREFERRED) {
+            return electPreferredLeader();
+        }
+
+        return electAnyLeader();
+    }
+
+    /**
+     * Assumes that the election type is Election.PREFERRED
+     */
+    private ElectionResult electPreferredLeader() {
+        int preferredReplica = targetReplicas.get(0);
+        if (isValidNewLeader(preferredReplica)) {
+            return new ElectionResult(preferredReplica, false);
+        }
+
+        if (isValidNewLeader(partition.leader)) {
+            // Don't consider a new leader since the current leader meets all the constraints
+            return new ElectionResult(partition.leader, false);
+        }
+
+        Optional<Integer> onlineLeader = targetReplicas.stream()
+            .skip(1)
+            .filter(this::isValidNewLeader)
+            .findFirst();
+        if (onlineLeader.isPresent()) {
+            return new ElectionResult(onlineLeader.get(), false);
+        }
+
+        return new ElectionResult(NO_LEADER, false);
+    }
+
+    /**
+     * Assumes that the election type is either Election.ONLINE or Election.UNCLEAN
+     */
+    private ElectionResult electAnyLeader() {
+        if (isValidNewLeader(partition.leader)) {
+            // Don't consider a new leader since the current leader meets all the constraints
+            return new ElectionResult(partition.leader, false);
+        }
+
+        Optional<Integer> onlineLeader = targetReplicas.stream()
+            .filter(this::isValidNewLeader)
+            .findFirst();
+        if (onlineLeader.isPresent()) {
+            return new ElectionResult(onlineLeader.get(), false);
+        }
+
+        if (election == Election.UNCLEAN) {
+            // Attempt unclean leader election
+            Optional<Integer> uncleanLeader = targetReplicas.stream()
+                .filter(replica -> isAcceptableLeader.apply(replica))
+                .findFirst();
+            if (uncleanLeader.isPresent()) {
+                return new ElectionResult(uncleanLeader.get(), true);
             }
-            this.node = NO_LEADER;
-            this.unclean = false;
         }
+
+        return new ElectionResult(NO_LEADER, false);
+    }
+
+    private boolean isValidNewLeader(int replica) {
+        return targetIsr.contains(replica) && isAcceptableLeader.apply(replica);
     }
 
     private void tryElection(PartitionChangeRecord record) {
-        BestLeader bestLeader = new BestLeader();
-        if (bestLeader.node != partition.leader) {
-            log.debug("Setting new leader for topicId {}, partition {} to {}", topicId, partitionId, bestLeader.node);
-            record.setLeader(bestLeader.node);
-            if (bestLeader.unclean) {
+        ElectionResult electionResult = electLeader();
+        if (electionResult.node != partition.leader) {
+            log.debug(
+                "Setting new leader for topicId {}, partition {} to {} using {} election",
+                topicId,
+                partitionId,
+                electionResult.node,
+                electionResult.unclean ? "an unclean" : "a clean"
+            );
+            record.setLeader(electionResult.node);
+            if (electionResult.unclean) {
                 // If the election was unclean, we have to forcibly set the ISR to just the
                 // new leader. This can result in data loss!
-                record.setIsr(Collections.singletonList(bestLeader.node));
+                record.setIsr(Collections.singletonList(electionResult.node));
+                if (partition.leaderRecoveryState != LeaderRecoveryState.RECOVERING &&
+                    isLeaderRecoverySupported) {
+                    // And mark the leader recovery state as RECOVERING
+                    record.setLeaderRecoveryState(LeaderRecoveryState.RECOVERING.value());
+                }
             }
         } else {
             log.debug("Failed to find a new leader with current state: {}", this);
@@ -163,7 +244,7 @@ private void tryElection(PartitionChangeRecord record) {
      * We need to bump the leader epoch if:
      * 1. The leader changed, or
      * 2. The new ISR does not contain all the nodes that the old ISR did, or
-     * 3. The new replia list does not contain all the nodes that the old replia list did.
+     * 3. The new replica list does not contain all the nodes that the old replica list did.
      *
      * Changes that do NOT fall in any of these categories will increase the partition epoch, but
      * not the leader epoch. Note that if the leader epoch increases, the partition epoch will
@@ -222,13 +303,12 @@ public Optional<ApiMessageAndVersion> build() {
 
         completeReassignmentIfNeeded();
 
-        if (shouldTryElection()) {
-            tryElection(record);
-        }
+        tryElection(record);
 
         triggerLeaderEpochBumpIfNeeded(record);
 
-        if (!targetIsr.isEmpty() && !targetIsr.equals(Replicas.toList(partition.isr))) {
+        if (record.isr() == null && !targetIsr.isEmpty() && !targetIsr.equals(Replicas.toList(partition.isr))) {
+            // Set the new ISR if it is different from the current ISR and unclean leader election didn't already set it.
             record.setIsr(targetIsr);
         }
         if (!targetReplicas.isEmpty() && !targetReplicas.equals(Replicas.toList(partition.replicas))) {
@@ -240,6 +320,10 @@ public Optional<ApiMessageAndVersion> build() {
         if (!targetAdding.equals(Replicas.toList(partition.addingReplicas))) {
             record.setAddingReplicas(targetAdding);
         }
+        if (targetLeaderRecoveryState != partition.leaderRecoveryState) {
+            record.setLeaderRecoveryState(targetLeaderRecoveryState.value());
+        }
+
         if (changeRecordIsNoOp(record)) {
             return Optional.empty();
         } else {
@@ -255,12 +339,12 @@ public String toString() {
             ", topicId=" + topicId +
             ", partitionId=" + partitionId +
             ", isAcceptableLeader=" + isAcceptableLeader +
-            ", uncleanElectionOk=" + uncleanElectionOk +
             ", targetIsr=" + targetIsr +
             ", targetReplicas=" + targetReplicas +
             ", targetRemoving=" + targetRemoving +
             ", targetAdding=" + targetAdding +
-            ", alwaysElectPreferredIfPossible=" + alwaysElectPreferredIfPossible +
+            ", election=" + election +
+            ", targetLeaderRecoveryState=" + targetLeaderRecoveryState +
             ')';
     }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ProducerIdControlManager.java b/metadata/src/main/java/org/apache/kafka/controller/ProducerIdControlManager.java
index d6491e2c1405b..178ef46bdb55e 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ProducerIdControlManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/ProducerIdControlManager.java
@@ -23,6 +23,7 @@
 import org.apache.kafka.server.common.ProducerIdsBlock;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.apache.kafka.timeline.TimelineLong;
+import org.apache.kafka.timeline.TimelineObject;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -33,17 +34,19 @@
 public class ProducerIdControlManager {
 
     private final ClusterControlManager clusterControlManager;
-    private final TimelineLong nextProducerId; // Initializes to 0
+    private final TimelineObject<ProducerIdsBlock> nextProducerBlock;
+    private final TimelineLong brokerEpoch;
 
     ProducerIdControlManager(ClusterControlManager clusterControlManager, SnapshotRegistry snapshotRegistry) {
         this.clusterControlManager = clusterControlManager;
-        this.nextProducerId = new TimelineLong(snapshotRegistry);
+        this.nextProducerBlock = new TimelineObject<>(snapshotRegistry, ProducerIdsBlock.EMPTY);
+        this.brokerEpoch = new TimelineLong(snapshotRegistry);
     }
 
     ControllerResult<ProducerIdsBlock> generateNextProducerId(int brokerId, long brokerEpoch) {
         clusterControlManager.checkBrokerEpoch(brokerId, brokerEpoch);
 
-        long firstProducerIdInBlock = nextProducerId.get();
+        long firstProducerIdInBlock = nextProducerBlock.get().firstProducerId();
         if (firstProducerIdInBlock > Long.MAX_VALUE - ProducerIdsBlock.PRODUCER_ID_BLOCK_SIZE) {
             throw new UnknownServerException("Exhausted all producerIds as the next block's end producerId " +
                 "has exceeded the int64 type limit");
@@ -60,25 +63,26 @@ ControllerResult<ProducerIdsBlock> generateNextProducerId(int brokerId, long bro
     }
 
     void replay(ProducerIdsRecord record) {
-        long currentNextProducerId = nextProducerId.get();
+        long currentNextProducerId = nextProducerBlock.get().firstProducerId();
         if (record.nextProducerId() <= currentNextProducerId) {
             throw new RuntimeException("Next Producer ID from replayed record (" + record.nextProducerId() + ")" +
                 " is not greater than current next Producer ID (" + currentNextProducerId + ")");
         } else {
-            nextProducerId.set(record.nextProducerId());
+            nextProducerBlock.set(new ProducerIdsBlock(record.brokerId(), record.nextProducerId(), ProducerIdsBlock.PRODUCER_ID_BLOCK_SIZE));
+            brokerEpoch.set(record.brokerEpoch());
         }
     }
 
     Iterator<List<ApiMessageAndVersion>> iterator(long epoch) {
         List<ApiMessageAndVersion> records = new ArrayList<>(1);
 
-        long producerId = nextProducerId.get(epoch);
-        if (producerId > 0) {
+        ProducerIdsBlock producerIdBlock = nextProducerBlock.get(epoch);
+        if (producerIdBlock.firstProducerId() > 0) {
             records.add(new ApiMessageAndVersion(
                 new ProducerIdsRecord()
-                    .setNextProducerId(producerId)
-                    .setBrokerId(0)
-                    .setBrokerEpoch(0L),
+                    .setNextProducerId(producerIdBlock.firstProducerId())
+                    .setBrokerId(producerIdBlock.assignedBrokerId())
+                    .setBrokerEpoch(brokerEpoch.get(epoch)),
                 (short) 0));
         }
         return Collections.singleton(records).iterator();
diff --git a/metadata/src/main/java/org/apache/kafka/controller/QuorumController.java b/metadata/src/main/java/org/apache/kafka/controller/QuorumController.java
index 11594ed77d2c9..3fee25841ba74 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/QuorumController.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/QuorumController.java
@@ -18,6 +18,7 @@
 package org.apache.kafka.controller;
 
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType;
+import org.apache.kafka.clients.admin.FeatureUpdate;
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.acl.AclBinding;
 import org.apache.kafka.common.acl.AclBindingFilter;
@@ -26,12 +27,13 @@
 import org.apache.kafka.common.errors.BrokerIdNotRegisteredException;
 import org.apache.kafka.common.errors.InvalidRequestException;
 import org.apache.kafka.common.errors.NotControllerException;
+import org.apache.kafka.common.errors.StaleBrokerEpochException;
 import org.apache.kafka.common.errors.UnknownServerException;
 import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
 import org.apache.kafka.common.message.AllocateProducerIdsRequestData;
 import org.apache.kafka.common.message.AllocateProducerIdsResponseData;
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.BrokerHeartbeatRequestData;
@@ -44,12 +46,16 @@
 import org.apache.kafka.common.message.ElectLeadersResponseData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData;
+import org.apache.kafka.common.message.UpdateFeaturesRequestData;
+import org.apache.kafka.common.message.UpdateFeaturesResponseData;
 import org.apache.kafka.common.metadata.AccessControlEntryRecord;
+import org.apache.kafka.common.metadata.BrokerRegistrationChangeRecord;
 import org.apache.kafka.common.metadata.ConfigRecord;
 import org.apache.kafka.common.metadata.ClientQuotaRecord;
 import org.apache.kafka.common.metadata.FeatureLevelRecord;
 import org.apache.kafka.common.metadata.FenceBrokerRecord;
 import org.apache.kafka.common.metadata.MetadataRecordType;
+import org.apache.kafka.common.metadata.NoOpRecord;
 import org.apache.kafka.common.metadata.PartitionChangeRecord;
 import org.apache.kafka.common.metadata.PartitionRecord;
 import org.apache.kafka.common.metadata.ProducerIdsRecord;
@@ -67,23 +73,26 @@
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.controller.SnapshotGenerator.Section;
-import org.apache.kafka.metadata.KafkaConfigSchema;
-import org.apache.kafka.metadata.authorizer.ClusterMetadataAuthorizer;
-import org.apache.kafka.server.authorizer.AclCreateResult;
-import org.apache.kafka.server.authorizer.AclDeleteResult;
-import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.apache.kafka.metadata.BrokerHeartbeatReply;
 import org.apache.kafka.metadata.BrokerRegistrationReply;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
-import org.apache.kafka.metadata.VersionRange;
-import org.apache.kafka.queue.EventQueue;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
+import org.apache.kafka.metadata.KafkaConfigSchema;
+import org.apache.kafka.metadata.authorizer.ClusterMetadataAuthorizer;
+import org.apache.kafka.metadata.placement.ReplicaPlacer;
+import org.apache.kafka.metadata.placement.StripedReplicaPlacer;
 import org.apache.kafka.queue.EventQueue.EarliestDeadlineFunction;
+import org.apache.kafka.queue.EventQueue;
 import org.apache.kafka.queue.KafkaEventQueue;
 import org.apache.kafka.raft.Batch;
 import org.apache.kafka.raft.BatchReader;
 import org.apache.kafka.raft.LeaderAndEpoch;
 import org.apache.kafka.raft.OffsetAndEpoch;
 import org.apache.kafka.raft.RaftClient;
+import org.apache.kafka.server.authorizer.AclCreateResult;
+import org.apache.kafka.server.authorizer.AclDeleteResult;
+import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
+import org.apache.kafka.server.fault.FaultHandler;
 import org.apache.kafka.server.policy.AlterConfigPolicy;
 import org.apache.kafka.server.policy.CreateTopicPolicy;
 import org.apache.kafka.snapshot.SnapshotReader;
@@ -94,6 +103,7 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.Map;
@@ -101,6 +111,7 @@
 import java.util.OptionalInt;
 import java.util.OptionalLong;
 import java.util.Random;
+import java.util.Set;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.RejectedExecutionException;
@@ -110,7 +121,6 @@
 import java.util.stream.Collectors;
 
 import static java.util.concurrent.TimeUnit.MICROSECONDS;
-import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.concurrent.TimeUnit.NANOSECONDS;
 
 
@@ -129,6 +139,10 @@
  * the fact that the controller may have several operations in progress at any given
  * point.  The future associated with each operation will not be completed until the
  * results of the operation have been made durable to the metadata log.
+ *
+ * The QuorumController uses the "metadata.version" feature flag as a mechanism to control
+ * the usage of new log record schemas. Starting with 3.3, this version must be set before
+ * the controller can fully initialize.
  */
 public final class QuorumController implements Controller {
     /**
@@ -137,28 +151,48 @@ public final class QuorumController implements Controller {
     static public class Builder {
         private final int nodeId;
         private final String clusterId;
+        private FaultHandler fatalFaultHandler = null;
+        private FaultHandler metadataFaultHandler = null;
         private Time time = Time.SYSTEM;
         private String threadNamePrefix = null;
         private LogContext logContext = null;
-        private KafkaConfigSchema configSchema = new KafkaConfigSchema(Collections.emptyMap());
+        private KafkaConfigSchema configSchema = KafkaConfigSchema.EMPTY;
         private RaftClient<ApiMessageAndVersion> raftClient = null;
-        private Map<String, VersionRange> supportedFeatures = Collections.emptyMap();
+        private QuorumFeatures quorumFeatures = null;
         private short defaultReplicationFactor = 3;
         private int defaultNumPartitions = 1;
         private ReplicaPlacer replicaPlacer = new StripedReplicaPlacer(new Random());
         private long snapshotMaxNewRecordBytes = Long.MAX_VALUE;
-        private long sessionTimeoutNs = NANOSECONDS.convert(18, TimeUnit.SECONDS);
+        private OptionalLong leaderImbalanceCheckIntervalNs = OptionalLong.empty();
+        private OptionalLong maxIdleIntervalNs = OptionalLong.empty();
+        private long sessionTimeoutNs = ClusterControlManager.DEFAULT_SESSION_TIMEOUT_NS;
         private ControllerMetrics controllerMetrics = null;
         private Optional<CreateTopicPolicy> createTopicPolicy = Optional.empty();
         private Optional<AlterConfigPolicy> alterConfigPolicy = Optional.empty();
         private ConfigurationValidator configurationValidator = ConfigurationValidator.NO_OP;
         private Optional<ClusterMetadataAuthorizer> authorizer = Optional.empty();
+        private Map<String, Object> staticConfig = Collections.emptyMap();
+        private BootstrapMetadata bootstrapMetadata = null;
 
         public Builder(int nodeId, String clusterId) {
             this.nodeId = nodeId;
             this.clusterId = clusterId;
         }
 
+        public Builder setFatalFaultHandler(FaultHandler fatalFaultHandler) {
+            this.fatalFaultHandler = fatalFaultHandler;
+            return this;
+        }
+
+        public Builder setMetadataFaultHandler(FaultHandler metadataFaultHandler) {
+            this.metadataFaultHandler = metadataFaultHandler;
+            return this;
+        }
+
+        public int nodeId() {
+            return nodeId;
+        }
+
         public Builder setTime(Time time) {
             this.time = time;
             return this;
@@ -184,8 +218,8 @@ public Builder setRaftClient(RaftClient<ApiMessageAndVersion> logManager) {
             return this;
         }
 
-        public Builder setSupportedFeatures(Map<String, VersionRange> supportedFeatures) {
-            this.supportedFeatures = supportedFeatures;
+        public Builder setQuorumFeatures(QuorumFeatures quorumFeatures) {
+            this.quorumFeatures = quorumFeatures;
             return this;
         }
 
@@ -209,6 +243,16 @@ public Builder setSnapshotMaxNewRecordBytes(long value) {
             return this;
         }
 
+        public Builder setLeaderImbalanceCheckIntervalNs(OptionalLong value) {
+            this.leaderImbalanceCheckIntervalNs = value;
+            return this;
+        }
+
+        public Builder setMaxIdleIntervalNs(OptionalLong value) {
+            this.maxIdleIntervalNs = value;
+            return this;
+        }
+
         public Builder setSessionTimeoutNs(long sessionTimeoutNs) {
             this.sessionTimeoutNs = sessionTimeoutNs;
             return this;
@@ -219,6 +263,11 @@ public Builder setMetrics(ControllerMetrics controllerMetrics) {
             return this;
         }
 
+        public Builder setBootstrapMetadata(BootstrapMetadata bootstrapMetadata) {
+            this.bootstrapMetadata = bootstrapMetadata;
+            return this;
+        }
+
         public Builder setCreateTopicPolicy(Optional<CreateTopicPolicy> createTopicPolicy) {
             this.createTopicPolicy = createTopicPolicy;
             return this;
@@ -239,11 +288,25 @@ public Builder setAuthorizer(ClusterMetadataAuthorizer authorizer) {
             return this;
         }
 
+        public Builder setStaticConfig(Map<String, Object> staticConfig) {
+            this.staticConfig = staticConfig;
+            return this;
+        }
+
         @SuppressWarnings("unchecked")
         public QuorumController build() throws Exception {
             if (raftClient == null) {
-                throw new RuntimeException("You must set a raft client.");
+                throw new IllegalStateException("You must set a raft client.");
+            } else if (bootstrapMetadata == null) {
+                throw new IllegalStateException("You must specify an initial metadata.version using the kafka-storage tool.");
+            } else if (quorumFeatures == null) {
+                throw new IllegalStateException("You must specify the quorum features");
+            } else if (fatalFaultHandler == null) {
+                throw new IllegalStateException("You must specify a fatal fault handler.");
+            } else if (metadataFaultHandler == null) {
+                throw new IllegalStateException("You must specify a metadata fault handler.");
             }
+
             if (threadNamePrefix == null) {
                 threadNamePrefix = String.format("Node%d_", nodeId);
             }
@@ -254,14 +317,36 @@ public QuorumController build() throws Exception {
                 controllerMetrics = (ControllerMetrics) Class.forName(
                     "org.apache.kafka.controller.MockControllerMetrics").getConstructor().newInstance();
             }
+
             KafkaEventQueue queue = null;
             try {
                 queue = new KafkaEventQueue(time, logContext, threadNamePrefix + "QuorumController");
-                return new QuorumController(logContext, nodeId, clusterId, queue, time,
-                    configSchema, raftClient, supportedFeatures, defaultReplicationFactor,
-                    defaultNumPartitions, replicaPlacer, snapshotMaxNewRecordBytes,
-                    sessionTimeoutNs, controllerMetrics, createTopicPolicy,
-                    alterConfigPolicy, configurationValidator, authorizer);
+                return new QuorumController(
+                    fatalFaultHandler,
+                    metadataFaultHandler,
+                    logContext,
+                    nodeId,
+                    clusterId,
+                    queue,
+                    time,
+                    configSchema,
+                    raftClient,
+                    quorumFeatures,
+                    defaultReplicationFactor,
+                    defaultNumPartitions,
+                    replicaPlacer,
+                    snapshotMaxNewRecordBytes,
+                    leaderImbalanceCheckIntervalNs,
+                    maxIdleIntervalNs,
+                    sessionTimeoutNs,
+                    controllerMetrics,
+                    createTopicPolicy,
+                    alterConfigPolicy,
+                    configurationValidator,
+                    authorizer,
+                    staticConfig,
+                    bootstrapMetadata
+                );
             } catch (Exception e) {
                 Utils.closeQuietly(queue, "event queue");
                 throw e;
@@ -271,7 +356,7 @@ public QuorumController build() throws Exception {
 
     /**
      * Checks that a configuration resource exists.
-     *
+     * <p>
      * This object must be used only from the controller event thread.
      */
     class ConfigResourceExistenceChecker implements Consumer<ConfigResource> {
@@ -345,7 +430,7 @@ private Throwable handleEventException(String name,
                                            OptionalLong startProcessingTimeNs,
                                            Throwable exception) {
         if (!startProcessingTimeNs.isPresent()) {
-            log.info("unable to start processing {} because of {}.", name,
+            log.error("{}: unable to start processing because of {}.", name,
                 exception.getClass().getSimpleName());
             if (exception instanceof ApiException) {
                 return exception;
@@ -361,12 +446,18 @@ private Throwable handleEventException(String name,
                 exception.getClass().getSimpleName(), deltaUs);
             return exception;
         }
-        log.warn("{}: failed with unknown server exception {} at epoch {} in {} us.  " +
-            "Reverting to last committed offset {}.",
-            this, exception.getClass().getSimpleName(), curClaimEpoch, deltaUs,
-            lastCommittedOffset, exception);
-        raftClient.resign(curClaimEpoch);
-        renounce();
+        if (isActiveController()) {
+            log.warn("{}: failed with unknown server exception {} at epoch {} in {} us.  " +
+                    "Renouncing leadership and reverting to the last committed offset {}.",
+                    name, exception.getClass().getSimpleName(), curClaimEpoch, deltaUs,
+                    lastCommittedOffset, exception);
+            renounce();
+        } else {
+            log.warn("{}: failed with unknown server exception {} in {} us.  " +
+                    "The controller is already in standby mode.",
+                    name, exception.getClass().getSimpleName(), deltaUs,
+                    exception);
+        }
         return new UnknownServerException(exception);
     }
 
@@ -424,7 +515,7 @@ void createSnapshotGenerator(long committedOffset, int committedEpoch, long comm
             if (!snapshotRegistry.hasSnapshot(committedOffset)) {
                 throw new RuntimeException(
                     String.format(
-                        "Cannot generate a snapshot at committed offset %s because it does not exists in the snapshot registry.",
+                        "Cannot generate a snapshot at committed offset %d because it does not exists in the snapshot registry.",
                         committedOffset
                     )
                 );
@@ -557,21 +648,27 @@ public String toString() {
         }
     }
 
-    // VisibleForTesting
+    // Visible for testing
     ReplicationControlManager replicationControl() {
         return replicationControl;
     }
 
-    // VisibleForTesting
-    <T> CompletableFuture<T> appendReadEvent(String name, Supplier<T> handler) {
-        ControllerReadEvent<T> event = new ControllerReadEvent<T>(name, handler);
-        queue.append(event);
-        return event.future();
+    // Visible for testing
+    ClusterControlManager clusterControl() {
+        return clusterControl;
     }
 
-    <T> CompletableFuture<T> appendReadEvent(String name, long deadlineNs, Supplier<T> handler) {
+    <T> CompletableFuture<T> appendReadEvent(
+        String name,
+        OptionalLong deadlineNs,
+        Supplier<T> handler
+    ) {
         ControllerReadEvent<T> event = new ControllerReadEvent<T>(name, handler);
-        queue.appendWithDeadline(deadlineNs, event);
+        if (deadlineNs.isPresent()) {
+            queue.appendWithDeadline(deadlineNs.getAsLong(), event);
+        } else {
+            queue.append(event);
+        }
         return event.future();
     }
 
@@ -581,17 +678,17 @@ interface ControllerWriteOperation<T> {
          * operation.  In general, this operation should not modify the "hard state" of
          * the controller.  That modification will happen later on, when we replay the
          * records generated by this function.
-         *
+         * <p>
          * There are cases where this function modifies the "soft state" of the
          * controller.  Mainly, this happens when we process cluster heartbeats.
-         *
+         * <p>
          * This function also generates an RPC result.  In general, if the RPC resulted in
          * an error, the RPC result will be an error, and the generated record list will
          * be empty.  This would happen if we tried to create a topic with incorrect
          * parameters, for example.  Of course, partial errors are possible for batch
          * operations.
          *
-         * @return              A result containing a list of records, and the RPC result.
+         * @return A result containing a list of records, and the RPC result.
          */
         ControllerResult<T> generateRecordsAndResult() throws Exception;
 
@@ -600,7 +697,8 @@ interface ControllerWriteOperation<T> {
          * with the end offset at which those records were placed.  If there were no
          * records to write, we'll just pass the last write offset.
          */
-        default void processBatchEndOffset(long offset) {}
+        default void processBatchEndOffset(long offset) {
+        }
     }
 
     /**
@@ -630,7 +728,7 @@ public void run() throws Exception {
             long now = time.nanoseconds();
             controllerMetrics.updateEventQueueTime(NANOSECONDS.toMillis(now - eventCreatedTimeNs));
             int controllerEpoch = curClaimEpoch;
-            if (controllerEpoch == -1) {
+            if (!isActiveController()) {
                 throw newNotControllerException();
             }
             startProcessingTimeNs = OptionalLong.of(now);
@@ -643,22 +741,39 @@ public void run() throws Exception {
                 OptionalLong maybeOffset = purgatory.highestPendingOffset();
                 if (!maybeOffset.isPresent()) {
                     // If the purgatory is empty, there are no pending operations and no
-                    // uncommitted state.  We can return immediately.
+                    // uncommitted state.  We can complete immediately.
                     resultAndOffset = ControllerResultAndOffset.of(-1, result);
                     log.debug("Completing read-only operation {} immediately because " +
                         "the purgatory is empty.", this);
                     complete(null);
-                    return;
+                } else {
+                    // If there are operations in the purgatory, we want to wait for the latest
+                    // one to complete before returning our result to the user.
+                    resultAndOffset = ControllerResultAndOffset.of(maybeOffset.getAsLong(), result);
+                    log.debug("Read-only operation {} will be completed when the log " +
+                        "reaches offset {}", this, resultAndOffset.offset());
                 }
-                // If there are operations in the purgatory, we want to wait for the latest
-                // one to complete before returning our result to the user.
-                resultAndOffset = ControllerResultAndOffset.of(maybeOffset.getAsLong(), result);
-                log.debug("Read-only operation {} will be completed when the log " +
-                    "reaches offset {}", this, resultAndOffset.offset());
             } else {
-                // If the operation returned a batch of records, those records need to be
-                // written before we can return our result to the user.  Here, we hand off
-                // the batch of records to the raft client.  They will be written out
+                // Start by trying to apply the record to our in-memory state. This should always
+                // succeed; if it does not, that's a fatal error. It is important to do this before
+                // scheduling the record for Raft replication.
+                int i = 1;
+                for (ApiMessageAndVersion message : result.records()) {
+                    try {
+                        replay(message.message(), Optional.empty(), writeOffset + result.records().size());
+                    } catch (Throwable e) {
+                        String failureMessage = String.format("Unable to apply %s record, which was " +
+                            "%d of %d record(s) in the batch following last writeOffset %d.",
+                            message.message().getClass().getSimpleName(), i, result.records().size(),
+                            writeOffset);
+                        throw fatalFaultHandler.handleFault(failureMessage, e);
+                    }
+                    i++;
+                }
+
+                // If the operation returned a batch of records, and those records could be applied,
+                // they need to be written before we can return our result to the user.  Here, we
+                // hand off the batch of records to the raft client.  They will be written out
                 // asynchronously.
                 final long offset;
                 if (result.isAtomic()) {
@@ -667,16 +782,22 @@ public void run() throws Exception {
                     offset = raftClient.scheduleAppend(controllerEpoch, result.records());
                 }
                 op.processBatchEndOffset(offset);
-                writeOffset = offset;
+                updateWriteOffset(offset);
                 resultAndOffset = ControllerResultAndOffset.of(offset, result);
-                for (ApiMessageAndVersion message : result.records()) {
-                    replay(message.message(), Optional.empty(), offset);
-                }
                 snapshotRegistry.getOrCreateSnapshot(offset);
+
                 log.debug("Read-write operation {} will be completed when the log " +
                     "reaches offset {}.", this, resultAndOffset.offset());
             }
-            purgatory.add(resultAndOffset.offset(), this);
+
+            // After every controller write event, schedule a leader rebalance if there are any topic partition
+            // with leader that is not the preferred leader.
+            maybeScheduleNextBalancePartitionLeaders();
+
+            // Remember the latest offset and future if it is not already completed
+            if (!future.isDone()) {
+                purgatory.add(resultAndOffset.offset(), this);
+            }
         }
 
         @Override
@@ -701,28 +822,31 @@ public String toString() {
         }
     }
 
-    private <T> CompletableFuture<T> appendWriteEvent(String name,
-                                                      long deadlineNs,
-                                                      ControllerWriteOperation<T> op) {
+    private <T> CompletableFuture<T> prependWriteEvent(String name,
+                                                       ControllerWriteOperation<T> op) {
         ControllerWriteEvent<T> event = new ControllerWriteEvent<>(name, op);
-        queue.appendWithDeadline(deadlineNs, event);
+        queue.prepend(event);
         return event.future();
     }
 
-    private <T> CompletableFuture<T> appendWriteEvent(String name,
-                                                      ControllerWriteOperation<T> op) {
+    <T> CompletableFuture<T> appendWriteEvent(String name,
+                                              OptionalLong deadlineNs,
+                                              ControllerWriteOperation<T> op) {
         ControllerWriteEvent<T> event = new ControllerWriteEvent<>(name, op);
-        queue.append(event);
+        if (deadlineNs.isPresent()) {
+            queue.appendWithDeadline(deadlineNs.getAsLong(), event);
+        } else {
+            queue.append(event);
+        }
         return event.future();
     }
 
     class QuorumMetaLogListener implements RaftClient.Listener<ApiMessageAndVersion> {
-
         @Override
         public void handleCommit(BatchReader<ApiMessageAndVersion> reader) {
             appendRaftEvent("handleCommit[baseOffset=" + reader.baseOffset() + "]", () -> {
                 try {
-                    boolean isActiveController = curClaimEpoch != -1;
+                    maybeCompleteAuthorizerInitialLoad();
                     long processedRecordsSize = 0;
                     while (reader.hasNext()) {
                         Batch<ApiMessageAndVersion> batch = reader.next();
@@ -730,7 +854,7 @@ public void handleCommit(BatchReader<ApiMessageAndVersion> reader) {
                         int epoch = batch.epoch();
                         List<ApiMessageAndVersion> messages = batch.records();
 
-                        if (isActiveController) {
+                        if (isActiveController()) {
                             // If the controller is active, the records were already replayed,
                             // so we don't need to do it here.
                             log.debug("Completing purgatory items up to offset {} and epoch {}.", offset, epoch);
@@ -743,7 +867,6 @@ public void handleCommit(BatchReader<ApiMessageAndVersion> reader) {
                             // otherwise, we should delete up to the current committed offset.
                             snapshotRegistry.deleteSnapshotsUpTo(
                                 snapshotGeneratorManager.snapshotLastOffsetFromLog().orElse(offset));
-
                         } else {
                             // If the controller is a standby, replay the records that were
                             // created by the active controller.
@@ -758,14 +881,21 @@ public void handleCommit(BatchReader<ApiMessageAndVersion> reader) {
                                         "offset {} and epoch {}.", offset, epoch);
                                 }
                             }
-                            for (ApiMessageAndVersion messageAndVersion : messages) {
-                                replay(messageAndVersion.message(), Optional.empty(), offset);
+                            int i = 1;
+                            for (ApiMessageAndVersion message : messages) {
+                                try {
+                                    replay(message.message(), Optional.empty(), offset);
+                                } catch (Throwable e) {
+                                    String failureMessage = String.format("Unable to apply %s record on standby " +
+                                            "controller, which was %d of %d record(s) in the batch with baseOffset %d.",
+                                            message.message().getClass().getSimpleName(), i, messages.size(),
+                                            batch.baseOffset());
+                                    throw metadataFaultHandler.handleFault(failureMessage, e);
+                                }
+                                i++;
                             }
                         }
-
-                        lastCommittedOffset = offset;
-                        lastCommittedEpoch = epoch;
-                        lastCommittedTimestamp = batch.appendTimestamp();
+                        updateLastCommittedState(offset, epoch, batch.appendTimestamp());
                         processedRecordsSize += batch.sizeInBytes();
                     }
 
@@ -780,15 +910,10 @@ public void handleCommit(BatchReader<ApiMessageAndVersion> reader) {
         public void handleSnapshot(SnapshotReader<ApiMessageAndVersion> reader) {
             appendRaftEvent(String.format("handleSnapshot[snapshotId=%s]", reader.snapshotId()), () -> {
                 try {
-                    boolean isActiveController = curClaimEpoch != -1;
-                    if (isActiveController) {
-                        throw new IllegalStateException(
-                            String.format(
-                                "Asked to load snapshot (%s) when it is the active controller (%s)",
-                                reader.snapshotId(),
-                                curClaimEpoch
-                            )
-                        );
+                    if (isActiveController()) {
+                        throw fatalFaultHandler.handleFault(String.format("Asked to load snapshot " +
+                            "(%s) when it is the active controller (%d)", reader.snapshotId(),
+                            curClaimEpoch));
                     }
                     log.info("Starting to replay snapshot ({}), from last commit offset ({}) and epoch ({})",
                         reader.snapshotId(), lastCommittedOffset, lastCommittedEpoch);
@@ -802,32 +927,35 @@ public void handleSnapshot(SnapshotReader<ApiMessageAndVersion> reader) {
 
                         if (log.isDebugEnabled()) {
                             if (log.isTraceEnabled()) {
-                                log.trace(
-                                    "Replaying snapshot ({}) batch with last offset of {}: {}",
-                                    reader.snapshotId(),
-                                    offset,
-                                    messages
-                                      .stream()
-                                      .map(ApiMessageAndVersion::toString)
-                                      .collect(Collectors.joining(", "))
-                                );
+                                log.trace("Replaying snapshot ({}) batch with last offset of {}: {}",
+                                    reader.snapshotId(), offset, messages.stream().map(ApiMessageAndVersion::toString).
+                                        collect(Collectors.joining(", ")));
                             } else {
-                                log.debug(
-                                    "Replaying snapshot ({}) batch with last offset of {}",
-                                    reader.snapshotId(),
-                                    offset
-                                );
+                                log.debug("Replaying snapshot ({}) batch with last offset of {}",
+                                    reader.snapshotId(), offset);
                             }
                         }
 
-                        for (ApiMessageAndVersion messageAndVersion : messages) {
-                            replay(messageAndVersion.message(), Optional.of(reader.snapshotId()), offset);
+                        int i = 1;
+                        for (ApiMessageAndVersion message : messages) {
+                            try {
+                                replay(message.message(), Optional.of(reader.snapshotId()), reader.lastContainedLogOffset());
+                            } catch (Throwable e) {
+                                String failureMessage = String.format("Unable to apply %s record " +
+                                        "from snapshot %s on standby controller, which was %d of " +
+                                        "%d record(s) in the batch with baseOffset %d.",
+                                        message.message().getClass().getSimpleName(), reader.snapshotId(),
+                                        i, messages.size(), batch.baseOffset());
+                                throw metadataFaultHandler.handleFault(failureMessage, e);
+                            }
+                            i++;
                         }
                     }
-
-                    lastCommittedOffset = reader.lastContainedLogOffset();
-                    lastCommittedEpoch = reader.lastContainedLogEpoch();
-                    lastCommittedTimestamp = reader.lastContainedLogTimestamp();
+                    updateLastCommittedState(
+                        reader.lastContainedLogOffset(),
+                        reader.lastContainedLogEpoch(),
+                        reader.lastContainedLogTimestamp()
+                    );
                     snapshotRegistry.getOrCreateSnapshot(lastCommittedOffset);
                     authorizer.ifPresent(a -> a.loadSnapshot(aclControlManager.idToAcl()));
                 } finally {
@@ -847,27 +975,86 @@ public void handleLeaderChange(LeaderAndEpoch newLeader) {
                             newEpoch + ", but we never renounced controller epoch " +
                             curEpoch);
                     }
-                    log.info(
-                        "Becoming the active controller at epoch {}, committed offset {} and committed epoch {}.",
-                        newEpoch, lastCommittedOffset, lastCommittedEpoch
-                    );
 
                     curClaimEpoch = newEpoch;
                     controllerMetrics.setActive(true);
-                    writeOffset = lastCommittedOffset;
+                    updateWriteOffset(lastCommittedOffset);
                     clusterControl.activate();
 
+                    // Check if we need to bootstrap metadata into the log. This must happen before we can
+                    // write any other records to the log since we need the metadata.version to determine the correct
+                    // record version
+                    final MetadataVersion metadataVersion;
+                    if (!featureControl.sawMetadataVersion()) {
+                        final CompletableFuture<Map<String, ApiError>> future;
+                        if (!bootstrapMetadata.metadataVersion().isKRaftSupported()) {
+                            metadataVersion = MetadataVersion.MINIMUM_KRAFT_VERSION;
+                            future = new CompletableFuture<>();
+                            future.completeExceptionally(
+                                new IllegalStateException("Cannot become leader without a KRaft supported version. " +
+                                    "Got " + bootstrapMetadata.metadataVersion()));
+                        } else {
+                            metadataVersion = bootstrapMetadata.metadataVersion();
+
+                            // This call is here instead of inside the appendWriteEvent for testing purposes.
+                            final List<ApiMessageAndVersion> bootstrapRecords = bootstrapMetadata.records();
+
+                            // We prepend the bootstrap event in order to ensure the bootstrap metadata is written before
+                            // any external controller write events are processed.
+                            future = prependWriteEvent("bootstrapMetadata", () -> {
+                                if (metadataVersion.isAtLeast(MetadataVersion.IBP_3_3_IV0)) {
+                                    log.info("Initializing metadata.version to {}", metadataVersion.featureLevel());
+                                } else {
+                                    log.info("Upgrading KRaft cluster and initializing metadata.version to {}",
+                                        metadataVersion.featureLevel());
+                                }
+                                return ControllerResult.atomicOf(bootstrapRecords, null);
+                            });
+                        }
+                        future.whenComplete((result, exception) -> {
+                            if (exception != null) {
+                                log.error("Failed to bootstrap metadata.", exception);
+                                appendRaftEvent("bootstrapMetadata[" + curClaimEpoch + "]", () -> {
+                                    if (isActiveController()) {
+                                        log.warn("Renouncing the leadership at oldEpoch {} since we could not bootstrap " +
+                                                        "metadata. Reverting to last committed offset {}.",
+                                                curClaimEpoch, lastCommittedOffset);
+                                        renounce();
+                                    } else {
+                                        log.warn("Unable to bootstrap metadata on standby controller.");
+                                    }
+                                });
+                            }
+                        });
+                    } else {
+                        metadataVersion = featureControl.metadataVersion();
+                    }
+
+                    log.info(
+                        "Becoming the active controller at epoch {}, committed offset {}, committed epoch {}, and metadata.version {}",
+                        newEpoch, lastCommittedOffset, lastCommittedEpoch, metadataVersion.featureLevel()
+                    );
+
                     // Before switching to active, create an in-memory snapshot at the last committed offset. This is
                     // required because the active controller assumes that there is always an in-memory snapshot at the
                     // last committed offset.
                     snapshotRegistry.getOrCreateSnapshot(lastCommittedOffset);
+
+                    // When becoming the active controller, schedule a leader rebalance if there are any topic partition
+                    // with leader that is not the preferred leader.
+                    maybeScheduleNextBalancePartitionLeaders();
+
+                    // When becoming leader schedule periodic write of the no op record
+                    maybeScheduleNextWriteNoOpRecord();
                 });
-            } else if (curClaimEpoch != -1) {
+            } else if (isActiveController()) {
                 appendRaftEvent("handleRenounce[" + curClaimEpoch + "]", () -> {
-                    log.warn("Renouncing the leadership at oldEpoch {} due to a metadata " +
-                            "log event. Reverting to last committed offset {}.", curClaimEpoch,
-                        lastCommittedOffset);
-                    renounce();
+                    if (isActiveController()) {
+                        log.warn("Renouncing the leadership at oldEpoch {} due to a metadata " +
+                                "log event. Reverting to last committed offset {}.", curClaimEpoch,
+                                lastCommittedOffset);
+                        renounce();
+                    }
                 });
             }
         }
@@ -882,31 +1069,96 @@ private void appendRaftEvent(String name, Runnable runnable) {
                 if (this != metaLogListener) {
                     log.debug("Ignoring {} raft event from an old registration", name);
                 } else {
-                    runnable.run();
+                    try {
+                        runnable.run();
+                    } finally {
+                        maybeCompleteAuthorizerInitialLoad();
+                    }
                 }
             });
         }
     }
 
-    private void renounce() {
-        curClaimEpoch = -1;
-        controllerMetrics.setActive(false);
-        purgatory.failAll(newNotControllerException());
+    private void maybeCompleteAuthorizerInitialLoad() {
+        if (!needToCompleteAuthorizerLoad) return;
+        OptionalLong highWatermark = raftClient.highWatermark();
+        if (highWatermark.isPresent()) {
+            if (lastCommittedOffset + 1 >= highWatermark.getAsLong()) {
+                log.info("maybeCompleteAuthorizerInitialLoad: completing authorizer " +
+                    "initial load at last committed offset {}.", lastCommittedOffset);
+                authorizer.get().completeInitialLoad();
+                needToCompleteAuthorizerLoad = false;
+            } else {
+                log.trace("maybeCompleteAuthorizerInitialLoad: can't proceed because " +
+                    "lastCommittedOffset  = {}, but highWatermark = {}.",
+                    lastCommittedOffset, highWatermark.getAsLong());
+            }
+        } else {
+            log.trace("maybeCompleteAuthorizerInitialLoad: highWatermark not set.");
+        }
+    }
 
-        if (snapshotRegistry.hasSnapshot(lastCommittedOffset)) {
-            newBytesSinceLastSnapshot = 0;
-            snapshotRegistry.revertToSnapshot(lastCommittedOffset);
-            authorizer.ifPresent(a -> a.loadSnapshot(aclControlManager.idToAcl()));
+    private boolean isActiveController() {
+        return curClaimEpoch != -1;
+    }
+
+    private void updateWriteOffset(long offset) {
+        writeOffset = offset;
+        if (isActiveController()) {
+            controllerMetrics.setLastAppliedRecordOffset(writeOffset);
+            // This is not truly the append timestamp. The KRaft client doesn't expose the append time when scheduling a write.
+            // This is good enough because this is called right after the records were given to the KRAft client for appending and
+            // the default append linger for KRaft is 25ms.
+            controllerMetrics.setLastAppliedRecordTimestamp(time.milliseconds());
         } else {
-            resetState();
-            raftClient.unregister(metaLogListener);
-            metaLogListener = new QuorumMetaLogListener();
-            raftClient.register(metaLogListener);
+            // Change the last applied record metrics back to the last committed state. Inactive controllers report the last committed
+            // state while active controllers report the latest state which may include uncommitted data.
+            controllerMetrics.setLastAppliedRecordOffset(lastCommittedOffset);
+            controllerMetrics.setLastAppliedRecordTimestamp(lastCommittedTimestamp);
         }
+    }
+
+    private void updateLastCommittedState(long offset, int epoch, long timestamp) {
+        lastCommittedOffset = offset;
+        lastCommittedEpoch = epoch;
+        lastCommittedTimestamp = timestamp;
+
+        controllerMetrics.setLastCommittedRecordOffset(offset);
+        if (!isActiveController()) {
+            controllerMetrics.setLastAppliedRecordOffset(offset);
+            controllerMetrics.setLastAppliedRecordTimestamp(timestamp);
+        }
+    }
 
-        writeOffset = -1;
-        clusterControl.deactivate();
-        cancelMaybeFenceReplicas();
+    private void renounce() {
+        try {
+            if (curClaimEpoch == -1) {
+                throw new RuntimeException("Cannot renounce leadership because we are not the " +
+                        "current leader.");
+            }
+            raftClient.resign(curClaimEpoch);
+            curClaimEpoch = -1;
+            controllerMetrics.setActive(false);
+            purgatory.failAll(newNotControllerException());
+
+            if (snapshotRegistry.hasSnapshot(lastCommittedOffset)) {
+                newBytesSinceLastSnapshot = 0;
+                snapshotRegistry.revertToSnapshot(lastCommittedOffset);
+                authorizer.ifPresent(a -> a.loadSnapshot(aclControlManager.idToAcl()));
+            } else {
+                resetState();
+                raftClient.unregister(metaLogListener);
+                metaLogListener = new QuorumMetaLogListener();
+                raftClient.register(metaLogListener);
+            }
+            updateWriteOffset(-1);
+            clusterControl.deactivate();
+            cancelMaybeFenceReplicas();
+            cancelMaybeBalancePartitionLeaders();
+            cancelNextWriteNoOpRecord();
+        } catch (Throwable e) {
+            fatalFaultHandler.handleFault("exception while renouncing leadership", e);
+        }
     }
 
     private <T> void scheduleDeferredWriteEvent(String name, long deadlineNs,
@@ -915,7 +1167,7 @@ private <T> void scheduleDeferredWriteEvent(String name, long deadlineNs,
         queue.scheduleDeferred(name, new EarliestDeadlineFunction(deadlineNs), event);
         event.future.exceptionally(e -> {
             if (e instanceof UnknownServerException && e.getCause() != null &&
-                    e.getCause() instanceof RejectedExecutionException) {
+                e.getCause() instanceof RejectedExecutionException) {
                 log.error("Cancelling deferred write event {} because the event queue " +
                     "is now closed.", name);
                 return null;
@@ -953,74 +1205,178 @@ private void cancelMaybeFenceReplicas() {
         queue.cancelDeferred(MAYBE_FENCE_REPLICAS);
     }
 
-    @SuppressWarnings("unchecked")
-    private void replay(ApiMessage message, Optional<OffsetAndEpoch> snapshotId, long offset) {
-        try {
-            MetadataRecordType type = MetadataRecordType.fromId(message.apiKey());
-            switch (type) {
-                case REGISTER_BROKER_RECORD:
-                    clusterControl.replay((RegisterBrokerRecord) message);
-                    break;
-                case UNREGISTER_BROKER_RECORD:
-                    clusterControl.replay((UnregisterBrokerRecord) message);
-                    break;
-                case TOPIC_RECORD:
-                    replicationControl.replay((TopicRecord) message);
-                    break;
-                case PARTITION_RECORD:
-                    replicationControl.replay((PartitionRecord) message);
-                    break;
-                case CONFIG_RECORD:
-                    configurationControl.replay((ConfigRecord) message);
-                    break;
-                case PARTITION_CHANGE_RECORD:
-                    replicationControl.replay((PartitionChangeRecord) message);
-                    break;
-                case FENCE_BROKER_RECORD:
-                    clusterControl.replay((FenceBrokerRecord) message);
-                    break;
-                case UNFENCE_BROKER_RECORD:
-                    clusterControl.replay((UnfenceBrokerRecord) message);
-                    break;
-                case REMOVE_TOPIC_RECORD:
-                    replicationControl.replay((RemoveTopicRecord) message);
-                    break;
-                case FEATURE_LEVEL_RECORD:
-                    featureControl.replay((FeatureLevelRecord) message);
-                    break;
-                case CLIENT_QUOTA_RECORD:
-                    clientQuotaControlManager.replay((ClientQuotaRecord) message);
-                    break;
-                case PRODUCER_IDS_RECORD:
-                    producerIdControlManager.replay((ProducerIdsRecord) message);
-                    break;
-                case ACCESS_CONTROL_ENTRY_RECORD:
-                    aclControlManager.replay((AccessControlEntryRecord) message, snapshotId);
-                    break;
-                case REMOVE_ACCESS_CONTROL_ENTRY_RECORD:
-                    aclControlManager.replay((RemoveAccessControlEntryRecord) message, snapshotId);
-                    break;
-                default:
-                    throw new RuntimeException("Unhandled record type " + type);
+    private static final String MAYBE_BALANCE_PARTITION_LEADERS = "maybeBalancePartitionLeaders";
+
+    private void maybeScheduleNextBalancePartitionLeaders() {
+        if (imbalancedScheduled != ImbalanceSchedule.SCHEDULED &&
+            leaderImbalanceCheckIntervalNs.isPresent() &&
+            replicationControl.arePartitionLeadersImbalanced()) {
+
+            log.debug(
+                "Scheduling write event for {} because scheduled ({}), checkIntervalNs ({}) and isImbalanced ({})",
+                MAYBE_BALANCE_PARTITION_LEADERS,
+                imbalancedScheduled,
+                leaderImbalanceCheckIntervalNs,
+                replicationControl.arePartitionLeadersImbalanced()
+            );
+
+            ControllerWriteEvent<Boolean> event = new ControllerWriteEvent<>(MAYBE_BALANCE_PARTITION_LEADERS, () -> {
+                ControllerResult<Boolean> result = replicationControl.maybeBalancePartitionLeaders();
+
+                // reschedule the operation after the leaderImbalanceCheckIntervalNs interval.
+                // Mark the imbalance event as completed and reschedule if necessary
+                if (result.response()) {
+                    imbalancedScheduled = ImbalanceSchedule.IMMEDIATELY;
+                } else {
+                    imbalancedScheduled = ImbalanceSchedule.DEFERRED;
+                }
+
+                // Note that rescheduling this event here is not required because MAYBE_BALANCE_PARTITION_LEADERS
+                // is a ControllerWriteEvent. ControllerWriteEvent always calls this method after the records
+                // generated by a ControllerWriteEvent have been applied.
+
+                return result;
+            });
+
+            long delayNs = time.nanoseconds();
+            if (imbalancedScheduled == ImbalanceSchedule.DEFERRED) {
+                delayNs += leaderImbalanceCheckIntervalNs.getAsLong();
+            } else {
+                // The current implementation of KafkaEventQueue always picks from the deferred collection of operations
+                // before picking from the non-deferred collection of operations. This can result in some unfairness if
+                // deferred operation are scheduled for immediate execution. This delays them by a small amount of time.
+                delayNs += NANOSECONDS.convert(10, TimeUnit.MILLISECONDS);
             }
-        } catch (Exception e) {
-            if (snapshotId.isPresent()) {
-                log.error("Error replaying record {} from snapshot {} at last offset {}.",
-                    message.toString(), snapshotId.get(), offset, e);
+
+            queue.scheduleDeferred(MAYBE_BALANCE_PARTITION_LEADERS, new EarliestDeadlineFunction(delayNs), event);
+
+            imbalancedScheduled = ImbalanceSchedule.SCHEDULED;
+        }
+    }
+
+    private void cancelMaybeBalancePartitionLeaders() {
+        imbalancedScheduled = ImbalanceSchedule.DEFERRED;
+        queue.cancelDeferred(MAYBE_BALANCE_PARTITION_LEADERS);
+    }
+
+    private static final String WRITE_NO_OP_RECORD = "writeNoOpRecord";
+
+    private void maybeScheduleNextWriteNoOpRecord() {
+        if (!noOpRecordScheduled &&
+            maxIdleIntervalNs.isPresent() &&
+            featureControl.metadataVersion().isNoOpRecordSupported()) {
+
+            log.debug(
+                "Scheduling write event for {} because maxIdleIntervalNs ({}) and metadataVersion ({})",
+                WRITE_NO_OP_RECORD,
+                maxIdleIntervalNs.getAsLong(),
+                featureControl.metadataVersion()
+            );
+
+            ControllerWriteEvent<Void> event = new ControllerWriteEvent<>(WRITE_NO_OP_RECORD, () -> {
+                noOpRecordScheduled = false;
+                maybeScheduleNextWriteNoOpRecord();
+
+                return ControllerResult.of(
+                    Arrays.asList(new ApiMessageAndVersion(new NoOpRecord(), (short) 0)),
+                    null
+                );
+            });
+
+            long delayNs = time.nanoseconds() + maxIdleIntervalNs.getAsLong();
+            queue.scheduleDeferred(WRITE_NO_OP_RECORD, new EarliestDeadlineFunction(delayNs), event);
+            noOpRecordScheduled = true;
+        }
+    }
+
+    private void cancelNextWriteNoOpRecord() {
+        noOpRecordScheduled = false;
+        queue.cancelDeferred(WRITE_NO_OP_RECORD);
+    }
+
+    private void handleFeatureControlChange() {
+        // The feature control maybe have changed. On the active controller cancel or schedule noop
+        // record writes accordingly.
+        if (isActiveController()) {
+            if (featureControl.metadataVersion().isNoOpRecordSupported()) {
+                maybeScheduleNextWriteNoOpRecord();
             } else {
-                log.error("Error replaying record {} at last offset {}.",
-                    message.toString(), offset, e);
+                cancelNextWriteNoOpRecord();
             }
         }
     }
 
+    /**
+     * Apply the metadata record to its corresponding in-memory state(s)
+     *
+     * @param message           The metadata record
+     * @param snapshotId        The snapshotId if this record is from a snapshot
+     * @param batchLastOffset   The offset of the last record in the log batch, or the lastContainedLogOffset
+     *                          if this record is from a snapshot, this is used along with RegisterBrokerRecord
+     */
+    private void replay(ApiMessage message, Optional<OffsetAndEpoch> snapshotId, long batchLastOffset) {
+        MetadataRecordType type = MetadataRecordType.fromId(message.apiKey());
+        switch (type) {
+            case REGISTER_BROKER_RECORD:
+                clusterControl.replay((RegisterBrokerRecord) message, batchLastOffset);
+                break;
+            case UNREGISTER_BROKER_RECORD:
+                clusterControl.replay((UnregisterBrokerRecord) message);
+                break;
+            case TOPIC_RECORD:
+                replicationControl.replay((TopicRecord) message);
+                break;
+            case PARTITION_RECORD:
+                replicationControl.replay((PartitionRecord) message);
+                break;
+            case CONFIG_RECORD:
+                configurationControl.replay((ConfigRecord) message);
+                break;
+            case PARTITION_CHANGE_RECORD:
+                replicationControl.replay((PartitionChangeRecord) message);
+                break;
+            case FENCE_BROKER_RECORD:
+                clusterControl.replay((FenceBrokerRecord) message);
+                break;
+            case UNFENCE_BROKER_RECORD:
+                clusterControl.replay((UnfenceBrokerRecord) message);
+                break;
+            case REMOVE_TOPIC_RECORD:
+                replicationControl.replay((RemoveTopicRecord) message);
+                break;
+            case FEATURE_LEVEL_RECORD:
+                featureControl.replay((FeatureLevelRecord) message);
+                handleFeatureControlChange();
+                break;
+            case CLIENT_QUOTA_RECORD:
+                clientQuotaControlManager.replay((ClientQuotaRecord) message);
+                break;
+            case PRODUCER_IDS_RECORD:
+                producerIdControlManager.replay((ProducerIdsRecord) message);
+                break;
+            case BROKER_REGISTRATION_CHANGE_RECORD:
+                clusterControl.replay((BrokerRegistrationChangeRecord) message);
+                break;
+            case ACCESS_CONTROL_ENTRY_RECORD:
+                aclControlManager.replay((AccessControlEntryRecord) message, snapshotId);
+                break;
+            case REMOVE_ACCESS_CONTROL_ENTRY_RECORD:
+                aclControlManager.replay((RemoveAccessControlEntryRecord) message, snapshotId);
+                break;
+            case NO_OP_RECORD:
+                // NoOpRecord is an empty record and doesn't need to be replayed
+                break;
+            default:
+                throw new RuntimeException("Unhandled record type " + type);
+        }
+    }
+
     private void maybeGenerateSnapshot(long batchSizeInBytes) {
         newBytesSinceLastSnapshot += batchSizeInBytes;
         if (newBytesSinceLastSnapshot >= snapshotMaxNewRecordBytes &&
             snapshotGeneratorManager.generator == null
         ) {
-            boolean isActiveController = curClaimEpoch != -1;
-            if (!isActiveController) {
+            if (!isActiveController()) {
                 // The active controller creates in-memory snapshot every time an uncommitted
                 // batch gets appended. The in-active controller can be more efficient and only
                 // create an in-memory snapshot when needed.
@@ -1043,13 +1399,27 @@ private void resetState() {
         snapshotRegistry.reset();
 
         newBytesSinceLastSnapshot = 0;
-        lastCommittedOffset = -1;
-        lastCommittedEpoch = -1;
-        lastCommittedTimestamp = -1;
+        updateLastCommittedState(-1, -1, -1);
     }
 
+    /**
+     * Handles faults that should normally be fatal to the process.
+     */
+    private final FaultHandler fatalFaultHandler;
+
+    /**
+     * Handles faults in metadata handling that are normally not fatal.
+     */
+    private final FaultHandler metadataFaultHandler;
+
+    /**
+     * The slf4j log context, used to create new loggers.
+     */
     private final LogContext logContext;
 
+    /**
+     * The slf4j logger.
+     */
     private final Logger log;
 
     /**
@@ -1182,6 +1552,12 @@ private void resetState() {
      */
     private long lastCommittedTimestamp = -1;
 
+    /**
+     * True if we need to complete the authorizer initial load.
+     * This must be accessed only by the event queue thread.
+     */
+    private boolean needToCompleteAuthorizerLoad;
+
     /**
      * If we have called scheduleWrite, this is the last offset we got back from it.
      */
@@ -1197,24 +1573,65 @@ private void resetState() {
      */
     private long newBytesSinceLastSnapshot = 0;
 
-    private QuorumController(LogContext logContext,
-                             int nodeId,
-                             String clusterId,
-                             KafkaEventQueue queue,
-                             Time time,
-                             KafkaConfigSchema configSchema,
-                             RaftClient<ApiMessageAndVersion> raftClient,
-                             Map<String, VersionRange> supportedFeatures,
-                             short defaultReplicationFactor,
-                             int defaultNumPartitions,
-                             ReplicaPlacer replicaPlacer,
-                             long snapshotMaxNewRecordBytes,
-                             long sessionTimeoutNs,
-                             ControllerMetrics controllerMetrics,
-                             Optional<CreateTopicPolicy> createTopicPolicy,
-                             Optional<AlterConfigPolicy> alterConfigPolicy,
-                             ConfigurationValidator configurationValidator,
-                             Optional<ClusterMetadataAuthorizer> authorizer) {
+    /**
+     * How long to delay partition leader balancing operations.
+     */
+    private final OptionalLong leaderImbalanceCheckIntervalNs;
+
+    /**
+     * How log to delay between appending NoOpRecord to the log.
+     */
+    private final OptionalLong maxIdleIntervalNs;
+
+    private enum ImbalanceSchedule {
+        // The leader balancing operation has been scheduled
+        SCHEDULED,
+        // If the leader balancing operation should be scheduled, schedule it with a delay
+        DEFERRED,
+        // If the leader balancing operation should be scheduled, schedule it immediately
+        IMMEDIATELY
+    }
+
+    /**
+     * Tracks the scheduling state for partition leader balancing operations.
+     */
+    private ImbalanceSchedule imbalancedScheduled = ImbalanceSchedule.DEFERRED;
+
+    /**
+     * Tracks if the a write of the NoOpRecord has been scheduled.
+     */
+    private boolean noOpRecordScheduled = false;
+
+    private final BootstrapMetadata bootstrapMetadata;
+
+    private QuorumController(
+        FaultHandler fatalFaultHandler,
+        FaultHandler metadataFaultHandler,
+        LogContext logContext,
+        int nodeId,
+        String clusterId,
+        KafkaEventQueue queue,
+        Time time,
+        KafkaConfigSchema configSchema,
+        RaftClient<ApiMessageAndVersion> raftClient,
+        QuorumFeatures quorumFeatures,
+        short defaultReplicationFactor,
+        int defaultNumPartitions,
+        ReplicaPlacer replicaPlacer,
+        long snapshotMaxNewRecordBytes,
+        OptionalLong leaderImbalanceCheckIntervalNs,
+        OptionalLong maxIdleIntervalNs,
+        long sessionTimeoutNs,
+        ControllerMetrics controllerMetrics,
+        Optional<CreateTopicPolicy> createTopicPolicy,
+        Optional<AlterConfigPolicy> alterConfigPolicy,
+        ConfigurationValidator configurationValidator,
+        Optional<ClusterMetadataAuthorizer> authorizer,
+        Map<String, Object> staticConfig,
+        BootstrapMetadata bootstrapMetadata
+    ) {
+        this.fatalFaultHandler = fatalFaultHandler;
+        this.metadataFaultHandler = metadataFaultHandler;
         this.logContext = logContext;
         this.log = logContext.logger(QuorumController.class);
         this.nodeId = nodeId;
@@ -1225,122 +1642,182 @@ private QuorumController(LogContext logContext,
         this.snapshotRegistry = new SnapshotRegistry(logContext);
         this.purgatory = new ControllerPurgatory();
         this.resourceExists = new ConfigResourceExistenceChecker();
-        this.configurationControl = new ConfigurationControlManager(logContext,
-            snapshotRegistry, configSchema, alterConfigPolicy, configurationValidator);
+        this.configurationControl = new ConfigurationControlManager.Builder().
+            setLogContext(logContext).
+            setSnapshotRegistry(snapshotRegistry).
+            setKafkaConfigSchema(configSchema).
+            setExistenceChecker(resourceExists).
+            setAlterConfigPolicy(alterConfigPolicy).
+            setValidator(configurationValidator).
+            setStaticConfig(staticConfig).
+            setNodeId(nodeId).
+            build();
         this.clientQuotaControlManager = new ClientQuotaControlManager(snapshotRegistry);
-        this.clusterControl = new ClusterControlManager(logContext, clusterId, time,
-            snapshotRegistry, sessionTimeoutNs, replicaPlacer, controllerMetrics);
-        this.featureControl = new FeatureControlManager(supportedFeatures, snapshotRegistry);
+        this.featureControl = new FeatureControlManager.Builder().
+            setLogContext(logContext).
+            setQuorumFeatures(quorumFeatures).
+            setSnapshotRegistry(snapshotRegistry).
+            build();
+        this.clusterControl = new ClusterControlManager.Builder().
+            setLogContext(logContext).
+            setClusterId(clusterId).
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(sessionTimeoutNs).
+            setReplicaPlacer(replicaPlacer).
+            setControllerMetrics(controllerMetrics).
+            setFeatureControlManager(featureControl).
+            build();
         this.producerIdControlManager = new ProducerIdControlManager(clusterControl, snapshotRegistry);
         this.snapshotMaxNewRecordBytes = snapshotMaxNewRecordBytes;
-        this.replicationControl = new ReplicationControlManager(snapshotRegistry,
-            logContext, defaultReplicationFactor, defaultNumPartitions,
-            configurationControl, clusterControl, controllerMetrics, createTopicPolicy);
+        this.leaderImbalanceCheckIntervalNs = leaderImbalanceCheckIntervalNs;
+        this.maxIdleIntervalNs = maxIdleIntervalNs;
+        this.replicationControl = new ReplicationControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setLogContext(logContext).
+            setDefaultReplicationFactor(defaultReplicationFactor).
+            setDefaultNumPartitions(defaultNumPartitions).
+            setMaxElectionsPerImbalance(ReplicationControlManager.MAX_ELECTIONS_PER_IMBALANCE).
+            setConfigurationControl(configurationControl).
+            setClusterControl(clusterControl).
+            setControllerMetrics(controllerMetrics).
+            setCreateTopicPolicy(createTopicPolicy).
+            setFeatureControl(featureControl).
+            build();
         this.authorizer = authorizer;
         authorizer.ifPresent(a -> a.setAclMutator(this));
         this.aclControlManager = new AclControlManager(snapshotRegistry, authorizer);
         this.raftClient = raftClient;
+        this.bootstrapMetadata = bootstrapMetadata;
         this.metaLogListener = new QuorumMetaLogListener();
         this.curClaimEpoch = -1;
-        this.writeOffset = -1L;
+        this.needToCompleteAuthorizerLoad = authorizer.isPresent();
+        updateWriteOffset(-1);
 
         resetState();
 
+        log.info("Creating new QuorumController with clusterId {}, authorizer {}.", clusterId, authorizer);
+
         this.raftClient.register(metaLogListener);
     }
 
     @Override
-    public CompletableFuture<AlterIsrResponseData> alterIsr(AlterIsrRequestData request) {
+    public CompletableFuture<AlterPartitionResponseData> alterPartition(
+        ControllerRequestContext context,
+        AlterPartitionRequestData request
+    ) {
         if (request.topics().isEmpty()) {
-            return CompletableFuture.completedFuture(new AlterIsrResponseData());
+            return CompletableFuture.completedFuture(new AlterPartitionResponseData());
         }
-        return appendWriteEvent("alterIsr", () ->
-            replicationControl.alterIsr(request));
+        return appendWriteEvent("alterPartition", context.deadlineNs(),
+            () -> replicationControl.alterPartition(context, request));
     }
 
     @Override
-    public CompletableFuture<CreateTopicsResponseData>
-            createTopics(CreateTopicsRequestData request) {
+    public CompletableFuture<CreateTopicsResponseData> createTopics(
+        ControllerRequestContext context,
+        CreateTopicsRequestData request, Set<String> describable
+    ) {
         if (request.topics().isEmpty()) {
             return CompletableFuture.completedFuture(new CreateTopicsResponseData());
         }
-        return appendWriteEvent("createTopics",
-            time.nanoseconds() + NANOSECONDS.convert(request.timeoutMs(), MILLISECONDS),
-            () -> replicationControl.createTopics(request));
+        return appendWriteEvent("createTopics", context.deadlineNs(),
+            () -> replicationControl.createTopics(request, describable));
     }
 
     @Override
-    public CompletableFuture<Void> unregisterBroker(int brokerId) {
-        return appendWriteEvent("unregisterBroker",
+    public CompletableFuture<Void> unregisterBroker(
+        ControllerRequestContext context,
+        int brokerId
+    ) {
+        return appendWriteEvent("unregisterBroker", context.deadlineNs(),
             () -> replicationControl.unregisterBroker(brokerId));
     }
 
     @Override
-    public CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIds(long deadlineNs,
-                                                                            Collection<String> names) {
-        if (names.isEmpty()) return CompletableFuture.completedFuture(Collections.emptyMap());
-        return appendReadEvent("findTopicIds", deadlineNs,
+    public CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIds(
+        ControllerRequestContext context,
+        Collection<String> names
+    ) {
+        if (names.isEmpty())
+            return CompletableFuture.completedFuture(Collections.emptyMap());
+        return appendReadEvent("findTopicIds", context.deadlineNs(),
             () -> replicationControl.findTopicIds(lastCommittedOffset, names));
     }
 
     @Override
-    public CompletableFuture<Map<String, Uuid>> findAllTopicIds(long deadlineNs) {
-        return appendReadEvent("findAllTopicIds", deadlineNs,
+    public CompletableFuture<Map<String, Uuid>> findAllTopicIds(
+        ControllerRequestContext context
+    ) {
+        return appendReadEvent("findAllTopicIds", context.deadlineNs(),
             () -> replicationControl.findAllTopicIds(lastCommittedOffset));
     }
 
     @Override
-    public CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNames(long deadlineNs,
-                                                                              Collection<Uuid> ids) {
-        if (ids.isEmpty()) return CompletableFuture.completedFuture(Collections.emptyMap());
-        return appendReadEvent("findTopicNames", deadlineNs,
+    public CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNames(
+        ControllerRequestContext context,
+        Collection<Uuid> ids
+    ) {
+        if (ids.isEmpty())
+            return CompletableFuture.completedFuture(Collections.emptyMap());
+        return appendReadEvent("findTopicNames", context.deadlineNs(),
             () -> replicationControl.findTopicNames(lastCommittedOffset, ids));
     }
 
     @Override
-    public CompletableFuture<Map<Uuid, ApiError>> deleteTopics(long deadlineNs,
-                                                               Collection<Uuid> ids) {
-        if (ids.isEmpty()) return CompletableFuture.completedFuture(Collections.emptyMap());
-        return appendWriteEvent("deleteTopics", deadlineNs,
+    public CompletableFuture<Map<Uuid, ApiError>> deleteTopics(
+        ControllerRequestContext context,
+        Collection<Uuid> ids
+    ) {
+        if (ids.isEmpty())
+            return CompletableFuture.completedFuture(Collections.emptyMap());
+        return appendWriteEvent("deleteTopics", context.deadlineNs(),
             () -> replicationControl.deleteTopics(ids));
     }
 
     @Override
-    public CompletableFuture<Map<ConfigResource, ResultOrError<Map<String, String>>>>
-            describeConfigs(Map<ConfigResource, Collection<String>> resources) {
-        return appendReadEvent("describeConfigs", () ->
-            configurationControl.describeConfigs(lastCommittedOffset, resources));
+    public CompletableFuture<Map<ConfigResource, ResultOrError<Map<String, String>>>> describeConfigs(
+        ControllerRequestContext context,
+        Map<ConfigResource, Collection<String>> resources
+    ) {
+        return appendReadEvent("describeConfigs", context.deadlineNs(),
+            () -> configurationControl.describeConfigs(lastCommittedOffset, resources));
     }
 
     @Override
-    public CompletableFuture<ElectLeadersResponseData>
-            electLeaders(ElectLeadersRequestData request) {
+    public CompletableFuture<ElectLeadersResponseData> electLeaders(
+        ControllerRequestContext context,
+        ElectLeadersRequestData request
+    ) {
         // If topicPartitions is null, we will try to trigger a new leader election on
         // all partitions (!).  But if it's empty, there is nothing to do.
         if (request.topicPartitions() != null && request.topicPartitions().isEmpty()) {
             return CompletableFuture.completedFuture(new ElectLeadersResponseData());
         }
-        return appendWriteEvent("electLeaders",
-            time.nanoseconds() + NANOSECONDS.convert(request.timeoutMs(), MILLISECONDS),
+        return appendWriteEvent("electLeaders", context.deadlineNs(),
             () -> replicationControl.electLeaders(request));
     }
 
     @Override
-    public CompletableFuture<FeatureMapAndEpoch> finalizedFeatures() {
-        return appendReadEvent("getFinalizedFeatures",
+    public CompletableFuture<FinalizedControllerFeatures> finalizedFeatures(
+        ControllerRequestContext context
+    ) {
+        return appendReadEvent("getFinalizedFeatures", context.deadlineNs(),
             () -> featureControl.finalizedFeatures(lastCommittedOffset));
     }
 
     @Override
     public CompletableFuture<Map<ConfigResource, ApiError>> incrementalAlterConfigs(
+        ControllerRequestContext context,
         Map<ConfigResource, Map<String, Entry<OpType, String>>> configChanges,
-        boolean validateOnly) {
+        boolean validateOnly
+    ) {
         if (configChanges.isEmpty()) {
             return CompletableFuture.completedFuture(Collections.emptyMap());
         }
-        return appendWriteEvent("incrementalAlterConfigs", () -> {
+        return appendWriteEvent("incrementalAlterConfigs", context.deadlineNs(), () -> {
             ControllerResult<Map<ConfigResource, ApiError>> result =
-                configurationControl.incrementalAlterConfigs(configChanges, resourceExists);
+                configurationControl.incrementalAlterConfigs(configChanges, false);
             if (validateOnly) {
                 return result.withoutRecords();
             } else {
@@ -1350,37 +1827,41 @@ public CompletableFuture<Map<ConfigResource, ApiError>> incrementalAlterConfigs(
     }
 
     @Override
-    public CompletableFuture<AlterPartitionReassignmentsResponseData>
-            alterPartitionReassignments(AlterPartitionReassignmentsRequestData request) {
+    public CompletableFuture<AlterPartitionReassignmentsResponseData> alterPartitionReassignments(
+        ControllerRequestContext context,
+        AlterPartitionReassignmentsRequestData request
+    ) {
         if (request.topics().isEmpty()) {
             return CompletableFuture.completedFuture(new AlterPartitionReassignmentsResponseData());
         }
-        return appendWriteEvent("alterPartitionReassignments",
-            time.nanoseconds() + NANOSECONDS.convert(request.timeoutMs(), MILLISECONDS),
+        return appendWriteEvent("alterPartitionReassignments", context.deadlineNs(),
             () -> replicationControl.alterPartitionReassignments(request));
     }
 
     @Override
-    public CompletableFuture<ListPartitionReassignmentsResponseData>
-            listPartitionReassignments(ListPartitionReassignmentsRequestData request) {
+    public CompletableFuture<ListPartitionReassignmentsResponseData> listPartitionReassignments(
+        ControllerRequestContext context,
+        ListPartitionReassignmentsRequestData request
+    ) {
         if (request.topics() != null && request.topics().isEmpty()) {
             return CompletableFuture.completedFuture(
                 new ListPartitionReassignmentsResponseData().setErrorMessage(null));
         }
-        return appendReadEvent("listPartitionReassignments",
-            time.nanoseconds() + NANOSECONDS.convert(request.timeoutMs(), MILLISECONDS),
+        return appendReadEvent("listPartitionReassignments", context.deadlineNs(),
             () -> replicationControl.listPartitionReassignments(request.topics()));
     }
 
     @Override
     public CompletableFuture<Map<ConfigResource, ApiError>> legacyAlterConfigs(
-            Map<ConfigResource, Map<String, String>> newConfigs, boolean validateOnly) {
+        ControllerRequestContext context,
+        Map<ConfigResource, Map<String, String>> newConfigs, boolean validateOnly
+    ) {
         if (newConfigs.isEmpty()) {
             return CompletableFuture.completedFuture(Collections.emptyMap());
         }
-        return appendWriteEvent("legacyAlterConfigs", () -> {
+        return appendWriteEvent("legacyAlterConfigs", context.deadlineNs(), () -> {
             ControllerResult<Map<ConfigResource, ApiError>> result =
-                configurationControl.legacyAlterConfigs(newConfigs, resourceExists);
+                configurationControl.legacyAlterConfigs(newConfigs, false);
             if (validateOnly) {
                 return result.withoutRecords();
             } else {
@@ -1390,17 +1871,24 @@ public CompletableFuture<Map<ConfigResource, ApiError>> legacyAlterConfigs(
     }
 
     @Override
-    public CompletableFuture<BrokerHeartbeatReply>
-            processBrokerHeartbeat(BrokerHeartbeatRequestData request) {
-        return appendWriteEvent("processBrokerHeartbeat",
+    public CompletableFuture<BrokerHeartbeatReply> processBrokerHeartbeat(
+        ControllerRequestContext context,
+        BrokerHeartbeatRequestData request
+    ) {
+        return appendWriteEvent("processBrokerHeartbeat", context.deadlineNs(),
             new ControllerWriteOperation<BrokerHeartbeatReply>() {
                 private final int brokerId = request.brokerId();
                 private boolean inControlledShutdown = false;
 
                 @Override
                 public ControllerResult<BrokerHeartbeatReply> generateRecordsAndResult() {
+                    OptionalLong offsetForRegisterBrokerRecord = clusterControl.registerBrokerRecordOffset(brokerId);
+                    if (!offsetForRegisterBrokerRecord.isPresent()) {
+                        throw new StaleBrokerEpochException(
+                            String.format("Receive a heartbeat from broker %d before registration", brokerId));
+                    }
                     ControllerResult<BrokerHeartbeatReply> result = replicationControl.
-                        processBrokerHeartbeat(request, lastCommittedOffset);
+                        processBrokerHeartbeat(request, offsetForRegisterBrokerRecord.getAsLong());
                     inControlledShutdown = result.response().inControlledShutdown();
                     rescheduleMaybeFenceStaleBrokers();
                     return result;
@@ -1417,9 +1905,11 @@ public void processBatchEndOffset(long offset) {
     }
 
     @Override
-    public CompletableFuture<BrokerRegistrationReply>
-            registerBroker(BrokerRegistrationRequestData request) {
-        return appendWriteEvent("registerBroker", () -> {
+    public CompletableFuture<BrokerRegistrationReply> registerBroker(
+        ControllerRequestContext context,
+        BrokerRegistrationRequestData request
+    ) {
+        return appendWriteEvent("registerBroker", context.deadlineNs(), () -> {
             ControllerResult<BrokerRegistrationReply> result = clusterControl.
                 registerBroker(request, writeOffset + 1, featureControl.
                     finalizedFeatures(Long.MAX_VALUE));
@@ -1430,11 +1920,14 @@ public void processBatchEndOffset(long offset) {
 
     @Override
     public CompletableFuture<Map<ClientQuotaEntity, ApiError>> alterClientQuotas(
-            Collection<ClientQuotaAlteration> quotaAlterations, boolean validateOnly) {
+        ControllerRequestContext context,
+        Collection<ClientQuotaAlteration> quotaAlterations,
+        boolean validateOnly
+    ) {
         if (quotaAlterations.isEmpty()) {
             return CompletableFuture.completedFuture(Collections.emptyMap());
         }
-        return appendWriteEvent("alterClientQuotas", () -> {
+        return appendWriteEvent("alterClientQuotas", context.deadlineNs(), () -> {
             ControllerResult<Map<ClientQuotaEntity, ApiError>> result =
                 clientQuotaControlManager.alterClientQuotas(quotaAlterations);
             if (validateOnly) {
@@ -1447,22 +1940,63 @@ public CompletableFuture<Map<ClientQuotaEntity, ApiError>> alterClientQuotas(
 
     @Override
     public CompletableFuture<AllocateProducerIdsResponseData> allocateProducerIds(
-            AllocateProducerIdsRequestData request) {
-        return appendWriteEvent("allocateProducerIds",
+        ControllerRequestContext context,
+        AllocateProducerIdsRequestData request
+    ) {
+        return appendWriteEvent("allocateProducerIds", context.deadlineNs(),
             () -> producerIdControlManager.generateNextProducerId(request.brokerId(), request.brokerEpoch()))
             .thenApply(result -> new AllocateProducerIdsResponseData()
-                    .setProducerIdStart(result.firstProducerId())
-                    .setProducerIdLen(result.size()));
+                .setProducerIdStart(result.firstProducerId())
+                .setProducerIdLen(result.size()));
+    }
+
+    @Override
+    public CompletableFuture<UpdateFeaturesResponseData> updateFeatures(
+        ControllerRequestContext context,
+        UpdateFeaturesRequestData request
+    ) {
+        return appendWriteEvent("updateFeatures", context.deadlineNs(), () -> {
+            Map<String, Short> updates = new HashMap<>();
+            Map<String, FeatureUpdate.UpgradeType> upgradeTypes = new HashMap<>();
+            request.featureUpdates().forEach(featureUpdate -> {
+                String featureName = featureUpdate.feature();
+                upgradeTypes.put(featureName, FeatureUpdate.UpgradeType.fromCode(featureUpdate.upgradeType()));
+                updates.put(featureName, featureUpdate.maxVersionLevel());
+            });
+            return featureControl.updateFeatures(updates, upgradeTypes, clusterControl.brokerSupportedVersions(),
+                request.validateOnly());
+        }).thenApply(result -> {
+            UpdateFeaturesResponseData responseData = new UpdateFeaturesResponseData();
+            responseData.setResults(new UpdateFeaturesResponseData.UpdatableFeatureResultCollection(result.size()));
+            result.forEach((featureName, error) -> responseData.results().add(
+                new UpdateFeaturesResponseData.UpdatableFeatureResult()
+                    .setFeature(featureName)
+                    .setErrorCode(error.error().code())
+                    .setErrorMessage(error.message())));
+            return responseData;
+        });
     }
 
     @Override
-    public CompletableFuture<List<CreatePartitionsTopicResult>>
-            createPartitions(long deadlineNs, List<CreatePartitionsTopic> topics) {
+    public CompletableFuture<List<CreatePartitionsTopicResult>> createPartitions(
+        ControllerRequestContext context,
+        List<CreatePartitionsTopic> topics,
+        boolean validateOnly
+    ) {
         if (topics.isEmpty()) {
             return CompletableFuture.completedFuture(Collections.emptyList());
         }
-        return appendWriteEvent("createPartitions", deadlineNs,
-            () -> replicationControl.createPartitions(topics));
+
+        return appendWriteEvent("createPartitions", context.deadlineNs(), () -> {
+            final ControllerResult<List<CreatePartitionsTopicResult>> result = replicationControl.createPartitions(topics);
+            if (validateOnly) {
+                log.debug("Validate-only CreatePartitions result(s): {}", result.response());
+                return result.withoutRecords();
+            } else {
+                log.debug("CreatePartitions result(s): {}", result.response());
+                return result;
+            }
+        });
     }
 
     @Override
@@ -1482,13 +2016,21 @@ public CompletableFuture<Long> beginWritingSnapshot() {
     }
 
     @Override
-    public CompletableFuture<List<AclCreateResult>> createAcls(List<AclBinding> aclBindings) {
-        return appendWriteEvent("createAcls", () -> aclControlManager.createAcls(aclBindings));
+    public CompletableFuture<List<AclCreateResult>> createAcls(
+        ControllerRequestContext context,
+        List<AclBinding> aclBindings
+    ) {
+        return appendWriteEvent("createAcls", context.deadlineNs(),
+            () -> aclControlManager.createAcls(aclBindings));
     }
 
     @Override
-    public CompletableFuture<List<AclDeleteResult>> deleteAcls(List<AclBindingFilter> filters) {
-        return appendWriteEvent("deleteAcls", () -> aclControlManager.deleteAcls(filters));
+    public CompletableFuture<List<AclDeleteResult>> deleteAcls(
+        ControllerRequestContext context,
+        List<AclBindingFilter> filters
+    ) {
+        return appendWriteEvent("deleteAcls", context.deadlineNs(),
+            () -> aclControlManager.deleteAcls(filters));
     }
 
     @Override
@@ -1518,6 +2060,11 @@ public int curClaimEpoch() {
         return curClaimEpoch;
     }
 
+    // Visible for testing
+    MetadataVersion metadataVersion() {
+        return featureControl.metadataVersion();
+    }
+
     @Override
     public void close() throws InterruptedException {
         queue.close();
diff --git a/metadata/src/main/java/org/apache/kafka/controller/QuorumControllerMetrics.java b/metadata/src/main/java/org/apache/kafka/controller/QuorumControllerMetrics.java
index 03cd47c8272c9..b96a687b0f34e 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/QuorumControllerMetrics.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/QuorumControllerMetrics.java
@@ -21,9 +21,13 @@
 import com.yammer.metrics.core.Histogram;
 import com.yammer.metrics.core.MetricName;
 import com.yammer.metrics.core.MetricsRegistry;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.server.metrics.KafkaYammerMetrics;
 
 import java.util.Arrays;
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 
 public final class QuorumControllerMetrics implements ControllerMetrics {
     private final static MetricName ACTIVE_CONTROLLER_COUNT = getMetricName(
@@ -44,7 +48,17 @@ public final class QuorumControllerMetrics implements ControllerMetrics {
         "KafkaController", "OfflinePartitionsCount");
     private final static MetricName PREFERRED_REPLICA_IMBALANCE_COUNT = getMetricName(
         "KafkaController", "PreferredReplicaImbalanceCount");
-    
+    private final static MetricName METADATA_ERROR_COUNT = getMetricName(
+            "KafkaController", "MetadataErrorCount");
+    private final static MetricName LAST_APPLIED_RECORD_OFFSET = getMetricName(
+        "KafkaController", "LastAppliedRecordOffset");
+    private final static MetricName LAST_COMMITTED_RECORD_OFFSET = getMetricName(
+        "KafkaController", "LastCommittedRecordOffset");
+    private final static MetricName LAST_APPLIED_RECORD_TIMESTAMP = getMetricName(
+        "KafkaController", "LastAppliedRecordTimestamp");
+    private final static MetricName LAST_APPLIED_RECORD_LAG_MS = getMetricName(
+        "KafkaController", "LastAppliedRecordLagMs");
+
     private final MetricsRegistry registry;
     private volatile boolean active;
     private volatile int fencedBrokerCount;
@@ -53,6 +67,10 @@ public final class QuorumControllerMetrics implements ControllerMetrics {
     private volatile int globalPartitionCount;
     private volatile int offlinePartitionCount;
     private volatile int preferredReplicaImbalanceCount;
+    private volatile AtomicInteger metadataErrorCount;
+    private final AtomicLong lastAppliedRecordOffset = new AtomicLong(0);
+    private final AtomicLong lastCommittedRecordOffset = new AtomicLong(0);
+    private final AtomicLong lastAppliedRecordTimestamp = new AtomicLong(0);
     private final Gauge<Integer> activeControllerCount;
     private final Gauge<Integer> fencedBrokerCountGauge;
     private final Gauge<Integer> activeBrokerCountGauge;
@@ -60,10 +78,18 @@ public final class QuorumControllerMetrics implements ControllerMetrics {
     private final Gauge<Integer> globalTopicCountGauge;
     private final Gauge<Integer> offlinePartitionCountGauge;
     private final Gauge<Integer> preferredReplicaImbalanceCountGauge;
+    private final Gauge<Integer> metadataErrorCountGauge;
+    private final Gauge<Long> lastAppliedRecordOffsetGauge;
+    private final Gauge<Long> lastCommittedRecordOffsetGauge;
+    private final Gauge<Long> lastAppliedRecordTimestampGauge;
+    private final Gauge<Long> lastAppliedRecordLagMsGauge;
     private final Histogram eventQueueTime;
     private final Histogram eventQueueProcessingTime;
 
-    public QuorumControllerMetrics(MetricsRegistry registry) {
+    public QuorumControllerMetrics(
+        MetricsRegistry registry,
+        Time time
+    ) {
         this.registry = Objects.requireNonNull(registry);
         this.active = false;
         this.fencedBrokerCount = 0;
@@ -72,6 +98,7 @@ public QuorumControllerMetrics(MetricsRegistry registry) {
         this.globalPartitionCount = 0;
         this.offlinePartitionCount = 0;
         this.preferredReplicaImbalanceCount = 0;
+        this.metadataErrorCount = new AtomicInteger(0);
         this.activeControllerCount = registry.newGauge(ACTIVE_CONTROLLER_COUNT, new Gauge<Integer>() {
             @Override
             public Integer value() {
@@ -116,6 +143,36 @@ public Integer value() {
                 return preferredReplicaImbalanceCount;
             }
         });
+        this.metadataErrorCountGauge = registry.newGauge(METADATA_ERROR_COUNT, new Gauge<Integer>() {
+            @Override
+            public Integer value() {
+                return metadataErrorCount.get();
+            }
+        });
+        lastAppliedRecordOffsetGauge = registry.newGauge(LAST_APPLIED_RECORD_OFFSET, new Gauge<Long>() {
+            @Override
+            public Long value() {
+                return lastAppliedRecordOffset.get();
+            }
+        });
+        lastCommittedRecordOffsetGauge = registry.newGauge(LAST_COMMITTED_RECORD_OFFSET, new Gauge<Long>() {
+            @Override
+            public Long value() {
+                return lastCommittedRecordOffset.get();
+            }
+        });
+        lastAppliedRecordTimestampGauge = registry.newGauge(LAST_APPLIED_RECORD_TIMESTAMP, new Gauge<Long>() {
+            @Override
+            public Long value() {
+                return lastAppliedRecordTimestamp.get();
+            }
+        });
+        lastAppliedRecordLagMsGauge = registry.newGauge(LAST_APPLIED_RECORD_LAG_MS, new Gauge<Long>() {
+            @Override
+            public Long value() {
+                return time.milliseconds() - lastAppliedRecordTimestamp.get();
+            }
+        });
     }
 
     @Override
@@ -197,22 +254,66 @@ public int preferredReplicaImbalanceCount() {
         return this.preferredReplicaImbalanceCount;
     }
 
+    @Override
+    public void incrementMetadataErrorCount() {
+        this.metadataErrorCount.getAndIncrement();
+    }
+
+    @Override
+    public int metadataErrorCount() {
+        return this.metadataErrorCount.get();
+    }
+    @Override
+    public void setLastAppliedRecordOffset(long offset) {
+        lastAppliedRecordOffset.set(offset);
+    }
+
+    @Override
+    public long lastAppliedRecordOffset() {
+        return lastAppliedRecordOffset.get();
+    }
+
+    @Override
+    public void setLastCommittedRecordOffset(long offset) {
+        lastCommittedRecordOffset.set(offset);
+    }
+
+    @Override
+    public long lastCommittedRecordOffset() {
+        return lastCommittedRecordOffset.get();
+    }
+
+    @Override
+    public void setLastAppliedRecordTimestamp(long timestamp) {
+        lastAppliedRecordTimestamp.set(timestamp);
+    }
+
+    @Override
+    public long lastAppliedRecordTimestamp() {
+        return lastAppliedRecordTimestamp.get();
+    }
+
     @Override
     public void close() {
         Arrays.asList(
             ACTIVE_CONTROLLER_COUNT,
+            FENCED_BROKER_COUNT,
+            ACTIVE_BROKER_COUNT,
             EVENT_QUEUE_TIME_MS,
             EVENT_QUEUE_PROCESSING_TIME_MS,
             GLOBAL_TOPIC_COUNT,
             GLOBAL_PARTITION_COUNT,
             OFFLINE_PARTITION_COUNT,
-            PREFERRED_REPLICA_IMBALANCE_COUNT).forEach(this.registry::removeMetric);
+            PREFERRED_REPLICA_IMBALANCE_COUNT,
+            METADATA_ERROR_COUNT,
+            LAST_APPLIED_RECORD_OFFSET,
+            LAST_COMMITTED_RECORD_OFFSET,
+            LAST_APPLIED_RECORD_TIMESTAMP,
+            LAST_APPLIED_RECORD_LAG_MS
+        ).forEach(registry::removeMetric);
     }
 
     private static MetricName getMetricName(String type, String name) {
-        final String group = "kafka.controller";
-        final StringBuilder mbeanNameBuilder = new StringBuilder();
-        mbeanNameBuilder.append(group).append(":type=").append(type).append(",name=").append(name);
-        return new MetricName(group, type, name, null, mbeanNameBuilder.toString());
+        return KafkaYammerMetrics.getMetricName("kafka.controller", type, name);
     }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/controller/QuorumFeatures.java b/metadata/src/main/java/org/apache/kafka/controller/QuorumFeatures.java
new file mode 100644
index 0000000000000..36725c251857c
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/controller/QuorumFeatures.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+import org.apache.kafka.clients.ApiVersions;
+import org.apache.kafka.clients.NodeApiVersions;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.feature.SupportedVersionRange;
+import org.apache.kafka.metadata.VersionRange;
+import org.apache.kafka.server.common.MetadataVersion;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * A holder class of the local node's supported feature flags as well as the ApiVersions of other nodes.
+ */
+public class QuorumFeatures {
+    private static final VersionRange DISABLED = VersionRange.of(0, 0);
+
+    private static final Logger log = LoggerFactory.getLogger(QuorumFeatures.class);
+
+    private final int nodeId;
+    private final ApiVersions apiVersions;
+    private final Map<String, VersionRange> localSupportedFeatures;
+    private final List<Integer> quorumNodeIds;
+
+    QuorumFeatures(
+        int nodeId,
+        ApiVersions apiVersions,
+        Map<String, VersionRange> localSupportedFeatures,
+        List<Integer> quorumNodeIds
+    ) {
+        this.nodeId = nodeId;
+        this.apiVersions = apiVersions;
+        this.localSupportedFeatures = Collections.unmodifiableMap(localSupportedFeatures);
+        this.quorumNodeIds = Collections.unmodifiableList(quorumNodeIds);
+    }
+
+    public static QuorumFeatures create(
+        int nodeId,
+        ApiVersions apiVersions,
+        Map<String, VersionRange> localSupportedFeatures,
+        Collection<Node> quorumNodes
+    ) {
+        List<Integer> nodeIds = quorumNodes.stream().map(Node::id).collect(Collectors.toList());
+        return new QuorumFeatures(nodeId, apiVersions, localSupportedFeatures, nodeIds);
+    }
+
+    public static Map<String, VersionRange> defaultFeatureMap() {
+        Map<String, VersionRange> features = new HashMap<>(1);
+        features.put(MetadataVersion.FEATURE_NAME, VersionRange.of(
+            MetadataVersion.MINIMUM_KRAFT_VERSION.featureLevel(),
+            MetadataVersion.latest().featureLevel()));
+        return features;
+    }
+
+    /**
+     * Return the reason a specific feature level is not supported, or Optional.empty if it is supported.
+     *
+     * @param featureName   The feature name.
+     * @param level         The feature level.
+     * @return              The reason why the feature level is not supported, or Optional.empty if it is supported.
+     */
+    public Optional<String> reasonNotSupported(String featureName, short level) {
+        VersionRange localRange = localSupportedFeatures.getOrDefault(featureName, DISABLED);
+        if (!localRange.contains(level)) {
+            if (localRange.equals(DISABLED)) {
+                return Optional.of("Local controller " + nodeId + " does not support this feature.");
+            } else {
+                return Optional.of("Local controller " + nodeId + " only supports versions " + localRange);
+            }
+        }
+        List<String> missing = new ArrayList<>();
+        for (int id : quorumNodeIds) {
+            if (nodeId == id) {
+                continue; // We get the local node's features from localSupportedFeatures.
+            }
+            NodeApiVersions nodeVersions = apiVersions.get(Integer.toString(id));
+            if (nodeVersions == null) {
+                missing.add(Integer.toString(id));
+                continue;
+            }
+            SupportedVersionRange supportedRange = nodeVersions.supportedFeatures().get(featureName);
+            VersionRange range = supportedRange == null ? DISABLED :
+                    VersionRange.of(supportedRange.min(), supportedRange.max());
+            if (!range.contains(level)) {
+                if (range.equals(DISABLED)) {
+                    return Optional.of("Controller " + id + " does not support this feature.");
+                } else {
+                    return Optional.of("Controller " + id + " only supports versions " + range);
+                }
+            }
+        }
+        if (!missing.isEmpty()) {
+            log.info("Unable to get feature level information for controller(s): " + String.join(", ", missing));
+        }
+        return Optional.empty();
+    }
+
+    VersionRange localSupportedFeature(String featureName) {
+        return localSupportedFeatures.getOrDefault(featureName, DISABLED);
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ReplicationControlManager.java b/metadata/src/main/java/org/apache/kafka/controller/ReplicationControlManager.java
index ad9e3c7ecc9a0..4ffb339967c2f 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ReplicationControlManager.java
+++ b/metadata/src/main/java/org/apache/kafka/controller/ReplicationControlManager.java
@@ -17,7 +17,9 @@
 
 package org.apache.kafka.controller;
 
+import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.clients.admin.AlterConfigOp.OpType;
+import org.apache.kafka.clients.admin.ConfigEntry;
 import org.apache.kafka.common.ElectionType;
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.config.ConfigResource;
@@ -34,8 +36,8 @@
 import org.apache.kafka.common.errors.UnknownTopicIdException;
 import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
 import org.apache.kafka.common.internals.Topic;
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignablePartition;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignableTopic;
@@ -50,6 +52,7 @@
 import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableReplicaAssignment;
 import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableTopic;
 import org.apache.kafka.common.message.CreateTopicsRequestData.CreatableTopicCollection;
+import org.apache.kafka.common.message.CreateTopicsRequestData.CreateableTopicConfigCollection;
 import org.apache.kafka.common.message.CreateTopicsResponseData;
 import org.apache.kafka.common.message.CreateTopicsResponseData.CreatableTopicResult;
 import org.apache.kafka.common.message.ElectLeadersRequestData;
@@ -61,6 +64,7 @@
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData.OngoingPartitionReassignment;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData.OngoingTopicReassignment;
+import org.apache.kafka.common.metadata.BrokerRegistrationChangeRecord;
 import org.apache.kafka.common.metadata.FenceBrokerRecord;
 import org.apache.kafka.common.metadata.PartitionChangeRecord;
 import org.apache.kafka.common.metadata.PartitionRecord;
@@ -71,15 +75,23 @@
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.common.utils.LogContext;
-import org.apache.kafka.controller.BrokersToIsrs.TopicIdPartition;
-import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.apache.kafka.metadata.BrokerHeartbeatReply;
 import org.apache.kafka.metadata.BrokerRegistration;
+import org.apache.kafka.metadata.BrokerRegistrationFencingChange;
+import org.apache.kafka.metadata.BrokerRegistrationInControlledShutdownChange;
+import org.apache.kafka.metadata.KafkaConfigSchema;
+import org.apache.kafka.metadata.LeaderRecoveryState;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.Replicas;
+import org.apache.kafka.metadata.placement.ClusterDescriber;
+import org.apache.kafka.metadata.placement.PlacementSpec;
+import org.apache.kafka.metadata.placement.UsableBroker;
+import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.server.policy.CreateTopicPolicy;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.apache.kafka.timeline.TimelineHashMap;
+import org.apache.kafka.timeline.TimelineHashSet;
 import org.apache.kafka.timeline.TimelineInteger;
 import org.slf4j.Logger;
 
@@ -87,34 +99,37 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Optional;
-import java.util.function.Function;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.ListIterator;
-import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Optional;
 import java.util.OptionalInt;
+import java.util.Set;
+import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
 import static org.apache.kafka.clients.admin.AlterConfigOp.OpType.SET;
 import static org.apache.kafka.common.config.ConfigResource.Type.TOPIC;
-import static org.apache.kafka.common.metadata.MetadataRecordType.FENCE_BROKER_RECORD;
 import static org.apache.kafka.common.metadata.MetadataRecordType.PARTITION_RECORD;
 import static org.apache.kafka.common.metadata.MetadataRecordType.REMOVE_TOPIC_RECORD;
 import static org.apache.kafka.common.metadata.MetadataRecordType.TOPIC_RECORD;
-import static org.apache.kafka.common.metadata.MetadataRecordType.UNFENCE_BROKER_RECORD;
-import static org.apache.kafka.common.metadata.MetadataRecordType.UNREGISTER_BROKER_RECORD;
 import static org.apache.kafka.common.protocol.Errors.FENCED_LEADER_EPOCH;
+import static org.apache.kafka.common.protocol.Errors.INELIGIBLE_REPLICA;
 import static org.apache.kafka.common.protocol.Errors.INVALID_REQUEST;
 import static org.apache.kafka.common.protocol.Errors.INVALID_UPDATE_VERSION;
+import static org.apache.kafka.common.protocol.Errors.NEW_LEADER_ELECTED;
+import static org.apache.kafka.common.protocol.Errors.NONE;
 import static org.apache.kafka.common.protocol.Errors.NO_REASSIGNMENT_IN_PROGRESS;
+import static org.apache.kafka.common.protocol.Errors.OPERATION_NOT_ATTEMPTED;
+import static org.apache.kafka.common.protocol.Errors.TOPIC_AUTHORIZATION_FAILED;
 import static org.apache.kafka.common.protocol.Errors.UNKNOWN_TOPIC_ID;
 import static org.apache.kafka.common.protocol.Errors.UNKNOWN_TOPIC_OR_PARTITION;
-import static org.apache.kafka.controller.ConfigurationControlManager.NO_OP_EXISTENCE_CHECKER;
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER;
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER_CHANGE;
 
@@ -125,6 +140,109 @@
  * of each partition, as well as administrative tasks like creating or deleting topics.
  */
 public class ReplicationControlManager {
+    static final int MAX_ELECTIONS_PER_IMBALANCE = 1_000;
+
+    static class Builder {
+        private SnapshotRegistry snapshotRegistry = null;
+        private LogContext logContext = null;
+        private short defaultReplicationFactor = (short) 3;
+        private int defaultNumPartitions = 1;
+        private int maxElectionsPerImbalance = MAX_ELECTIONS_PER_IMBALANCE;
+        private ConfigurationControlManager configurationControl = null;
+        private ClusterControlManager clusterControl = null;
+        private ControllerMetrics controllerMetrics = null;
+        private Optional<CreateTopicPolicy> createTopicPolicy = Optional.empty();
+        private FeatureControlManager featureControl = null;
+
+        Builder setSnapshotRegistry(SnapshotRegistry snapshotRegistry) {
+            this.snapshotRegistry = snapshotRegistry;
+            return this;
+        }
+
+        Builder setLogContext(LogContext logContext) {
+            this.logContext = logContext;
+            return this;
+        }
+
+        Builder setDefaultReplicationFactor(short defaultReplicationFactor) {
+            this.defaultReplicationFactor = defaultReplicationFactor;
+            return this;
+        }
+
+        Builder setDefaultNumPartitions(int defaultNumPartitions) {
+            this.defaultNumPartitions = defaultNumPartitions;
+            return this;
+        }
+
+        Builder setMaxElectionsPerImbalance(int maxElectionsPerImbalance) {
+            this.maxElectionsPerImbalance = maxElectionsPerImbalance;
+            return this;
+        }
+
+        Builder setConfigurationControl(ConfigurationControlManager configurationControl) {
+            this.configurationControl = configurationControl;
+            return this;
+        }
+
+        Builder setClusterControl(ClusterControlManager clusterControl) {
+            this.clusterControl = clusterControl;
+            return this;
+        }
+
+        Builder setControllerMetrics(ControllerMetrics controllerMetrics) {
+            this.controllerMetrics = controllerMetrics;
+            return this;
+        }
+
+        Builder setCreateTopicPolicy(Optional<CreateTopicPolicy> createTopicPolicy) {
+            this.createTopicPolicy = createTopicPolicy;
+            return this;
+        }
+
+        public Builder setFeatureControl(FeatureControlManager featureControl) {
+            this.featureControl = featureControl;
+            return this;
+        }
+
+        ReplicationControlManager build() {
+            if (configurationControl == null) {
+                throw new IllegalStateException("Configuration control must be set before building");
+            } else if (clusterControl == null) {
+                throw new IllegalStateException("Cluster controller must be set before building");
+            } else if (controllerMetrics == null) {
+                throw new IllegalStateException("Metrics must be set before building");
+            }
+            if (logContext == null) logContext = new LogContext();
+            if (snapshotRegistry == null) snapshotRegistry = configurationControl.snapshotRegistry();
+            if (featureControl == null) {
+                featureControl = new FeatureControlManager.Builder().
+                    setLogContext(logContext).
+                    setSnapshotRegistry(snapshotRegistry).
+                    setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                        QuorumFeatures.defaultFeatureMap(),
+                        Collections.singletonList(0))).
+                    setMetadataVersion(MetadataVersion.latest()).
+                    build();
+            }
+            return new ReplicationControlManager(snapshotRegistry,
+                logContext,
+                defaultReplicationFactor,
+                defaultNumPartitions,
+                maxElectionsPerImbalance,
+                configurationControl,
+                clusterControl,
+                controllerMetrics,
+                createTopicPolicy,
+                featureControl);
+        }
+    }
+
+    class KRaftClusterDescriber implements ClusterDescriber {
+        @Override
+        public Iterator<UsableBroker> usableBrokers() {
+            return clusterControl.usableBrokers();
+        }
+    }
 
     static class TopicControlInfo {
         private final String name;
@@ -146,6 +264,15 @@ public Uuid topicId() {
         }
     }
 
+    /**
+     * Translate a CreateableTopicConfigCollection to a map from string to string.
+     */
+    static Map<String, String> translateCreationConfigs(CreateableTopicConfigCollection collection) {
+        HashMap<String, String> result = new HashMap<>();
+        collection.forEach(config -> result.put(config.name(), config.value()));
+        return Collections.unmodifiableMap(result);
+    }
+
     private final SnapshotRegistry snapshotRegistry;
     private final Logger log;
 
@@ -162,14 +289,14 @@ public Uuid topicId() {
     private final int defaultNumPartitions;
 
     /**
-     * A count of the total number of partitions in the cluster.
+     * Maximum number of leader elections to perform during one partition leader balancing operation.
      */
-    private final TimelineInteger globalPartitionCount;
+    private final int maxElectionsPerImbalance;
 
     /**
-     * A count of the number of partitions that do not have their first replica as a leader.
+     * A count of the total number of partitions in the cluster.
      */
-    private final TimelineInteger preferredReplicaImbalanceCount;
+    private final TimelineInteger globalPartitionCount;
 
     /**
      * A reference to the controller's configuration control manager.
@@ -191,11 +318,33 @@ public Uuid topicId() {
      */
     private final Optional<CreateTopicPolicy> createTopicPolicy;
 
+    /**
+     * The feature control manager.
+     */
+    private final FeatureControlManager featureControl;
+
     /**
      * Maps topic names to topic UUIDs.
      */
     private final TimelineHashMap<String, Uuid> topicsByName;
 
+    /**
+     * We try to prevent topics from being created if their names would collide with
+     * existing topics when periods in the topic name are replaced with underscores.
+     * The reason for this is that some per-topic metrics do replace periods with
+     * underscores, and would therefore be ambiguous otherwise.
+     *
+     * This map is from normalized topic name to a set of topic names. So if we had two
+     * topics named foo.bar and foo_bar this map would contain
+     * a mapping from foo_bar to a set containing foo.bar and foo_bar.
+     *
+     * Since we reject topic creations that would collide, under normal conditions the
+     * sets in this map should only have a size of 1. However, if the cluster was
+     * upgraded from a version prior to KAFKA-13743, it may be possible to have more
+     * values here, since collidiing topic names will be "grandfathered in."
+     */
+    private final TimelineHashMap<String, TimelineHashSet<String>> topicsWithCollisionChars;
+
     /**
      * Maps topic UUIDs to structures containing topic information, including partitions.
      */
@@ -211,32 +360,58 @@ public Uuid topicId() {
      */
     private final TimelineHashMap<Uuid, int[]> reassigningTopics;
 
-    ReplicationControlManager(SnapshotRegistry snapshotRegistry,
-                              LogContext logContext,
-                              short defaultReplicationFactor,
-                              int defaultNumPartitions,
-                              ConfigurationControlManager configurationControl,
-                              ClusterControlManager clusterControl,
-                              ControllerMetrics controllerMetrics,
-                              Optional<CreateTopicPolicy> createTopicPolicy) {
+    /**
+     * The set of topic partitions for which the leader is not the preferred leader.
+     */
+    private final TimelineHashSet<TopicIdPartition> imbalancedPartitions;
+
+    /**
+     * A ClusterDescriber which supplies cluster information to our ReplicaPlacer.
+     */
+    final KRaftClusterDescriber clusterDescriber = new KRaftClusterDescriber();
+
+    private ReplicationControlManager(
+        SnapshotRegistry snapshotRegistry,
+        LogContext logContext,
+        short defaultReplicationFactor,
+        int defaultNumPartitions,
+        int maxElectionsPerImbalance,
+        ConfigurationControlManager configurationControl,
+        ClusterControlManager clusterControl,
+        ControllerMetrics controllerMetrics,
+        Optional<CreateTopicPolicy> createTopicPolicy,
+        FeatureControlManager featureControl
+    ) {
         this.snapshotRegistry = snapshotRegistry;
         this.log = logContext.logger(ReplicationControlManager.class);
         this.defaultReplicationFactor = defaultReplicationFactor;
         this.defaultNumPartitions = defaultNumPartitions;
+        this.maxElectionsPerImbalance = maxElectionsPerImbalance;
         this.configurationControl = configurationControl;
         this.controllerMetrics = controllerMetrics;
         this.createTopicPolicy = createTopicPolicy;
+        this.featureControl = featureControl;
         this.clusterControl = clusterControl;
         this.globalPartitionCount = new TimelineInteger(snapshotRegistry);
-        this.preferredReplicaImbalanceCount = new TimelineInteger(snapshotRegistry);
         this.topicsByName = new TimelineHashMap<>(snapshotRegistry, 0);
+        this.topicsWithCollisionChars = new TimelineHashMap<>(snapshotRegistry, 0);
         this.topics = new TimelineHashMap<>(snapshotRegistry, 0);
         this.brokersToIsrs = new BrokersToIsrs(snapshotRegistry);
         this.reassigningTopics = new TimelineHashMap<>(snapshotRegistry, 0);
+        this.imbalancedPartitions = new TimelineHashSet<>(snapshotRegistry, 0);
     }
 
     public void replay(TopicRecord record) {
         topicsByName.put(record.name(), record.topicId());
+        if (Topic.hasCollisionChars(record.name())) {
+            String normalizedName = Topic.unifyCollisionChars(record.name());
+            TimelineHashSet<String> topicNames = topicsWithCollisionChars.get(normalizedName);
+            if (topicNames == null) {
+                topicNames = new TimelineHashSet<>(snapshotRegistry, 1);
+                topicsWithCollisionChars.put(normalizedName, topicNames);
+            }
+            topicNames.add(record.name());
+        }
         topics.put(record.topicId(),
             new TopicControlInfo(record.name(), snapshotRegistry, record.topicId()));
         controllerMetrics.setGlobalTopicsCount(topics.size());
@@ -270,11 +445,15 @@ public void replay(PartitionRecord record) {
             updateReassigningTopicsIfNeeded(record.topicId(), record.partitionId(),
                     prevPartInfo.isReassigning(), newPartInfo.isReassigning());
         }
-        if (newPartInfo.leader != newPartInfo.preferredReplica()) {
-            preferredReplicaImbalanceCount.increment();
+
+        if (newPartInfo.hasPreferredLeader()) {
+            imbalancedPartitions.remove(new TopicIdPartition(record.topicId(), record.partitionId()));
+        } else {
+            imbalancedPartitions.add(new TopicIdPartition(record.topicId(), record.partitionId()));
         }
+
         controllerMetrics.setOfflinePartitionCount(brokersToIsrs.offlinePartitionCount());
-        controllerMetrics.setPreferredReplicaImbalanceCount(preferredReplicaImbalanceCount.get());
+        controllerMetrics.setPreferredReplicaImbalanceCount(imbalancedPartitions.size());
     }
 
     private void updateReassigningTopicsIfNeeded(Uuid topicId, int partitionId,
@@ -316,11 +495,16 @@ public void replay(PartitionChangeRecord record) {
         String topicPart = topicInfo.name + "-" + record.partitionId() + " with topic ID " +
             record.topicId();
         newPartitionInfo.maybeLogPartitionChange(log, topicPart, prevPartitionInfo);
-        if (!newPartitionInfo.hasPreferredLeader() && prevPartitionInfo.hasPreferredLeader()) {
-            preferredReplicaImbalanceCount.increment();
+
+        if (newPartitionInfo.hasPreferredLeader()) {
+            imbalancedPartitions.remove(new TopicIdPartition(record.topicId(), record.partitionId()));
+        } else {
+            imbalancedPartitions.add(new TopicIdPartition(record.topicId(), record.partitionId()));
         }
+
         controllerMetrics.setOfflinePartitionCount(brokersToIsrs.offlinePartitionCount());
-        controllerMetrics.setPreferredReplicaImbalanceCount(preferredReplicaImbalanceCount.get());
+        controllerMetrics.setPreferredReplicaImbalanceCount(imbalancedPartitions.size());
+
         if (record.removingReplicas() != null || record.addingReplicas() != null) {
             log.info("Replayed partition assignment change {} for topic {}", record, topicInfo.name);
         } else if (log.isTraceEnabled()) {
@@ -336,19 +520,32 @@ public void replay(RemoveTopicRecord record) {
                 " to remove.");
         }
         topicsByName.remove(topic.name);
+        if (Topic.hasCollisionChars(topic.name)) {
+            String normalizedName = Topic.unifyCollisionChars(topic.name);
+            TimelineHashSet<String> colliding = topicsWithCollisionChars.get(normalizedName);
+            if (colliding != null) {
+                colliding.remove(topic.name);
+                if (colliding.isEmpty()) {
+                    topicsWithCollisionChars.remove(topic.name);
+                }
+            }
+        }
         reassigningTopics.remove(record.topicId());
 
         // Delete the configurations associated with this topic.
         configurationControl.deleteTopicConfigs(topic.name);
 
-        // Remove the entries for this topic in brokersToIsrs.
-        for (PartitionRegistration partition : topic.parts.values()) {
+        for (Map.Entry<Integer, PartitionRegistration> entry : topic.parts.entrySet()) {
+            int partitionId = entry.getKey();
+            PartitionRegistration partition = entry.getValue();
+
+            // Remove the entries for this topic in brokersToIsrs.
             for (int i = 0; i < partition.isr.length; i++) {
                 brokersToIsrs.removeTopicEntryForBroker(topic.id, partition.isr[i]);
             }
-            if (partition.leader != partition.preferredReplica()) {
-                preferredReplicaImbalanceCount.decrement();
-            }
+
+            imbalancedPartitions.remove(new TopicIdPartition(record.topicId(), partitionId));
+
             globalPartitionCount.decrement();
         }
         brokersToIsrs.removeTopicEntryForBroker(topic.id, NO_LEADER);
@@ -356,17 +553,17 @@ public void replay(RemoveTopicRecord record) {
         controllerMetrics.setGlobalTopicsCount(topics.size());
         controllerMetrics.setGlobalPartitionCount(globalPartitionCount.get());
         controllerMetrics.setOfflinePartitionCount(brokersToIsrs.offlinePartitionCount());
-        controllerMetrics.setPreferredReplicaImbalanceCount(preferredReplicaImbalanceCount.get());
+        controllerMetrics.setPreferredReplicaImbalanceCount(imbalancedPartitions.size());
         log.info("Removed topic {} with ID {}.", topic.name, record.topicId());
     }
 
     ControllerResult<CreateTopicsResponseData>
-            createTopics(CreateTopicsRequestData request) {
+            createTopics(CreateTopicsRequestData request, Set<String> describable) {
         Map<String, ApiError> topicErrors = new HashMap<>();
         List<ApiMessageAndVersion> records = new ArrayList<>();
 
         // Check the topic names.
-        validateNewTopicNames(topicErrors, request.topics());
+        validateNewTopicNames(topicErrors, request.topics(), topicsWithCollisionChars);
 
         // Identify topics that already exist and mark them with the appropriate error
         request.topics().stream().filter(creatableTopic -> topicsByName.containsKey(creatableTopic.name()))
@@ -378,7 +575,7 @@ public void replay(RemoveTopicRecord record) {
         Map<ConfigResource, Map<String, Entry<OpType, String>>> configChanges =
             computeConfigChanges(topicErrors, request.topics());
         ControllerResult<Map<ConfigResource, ApiError>> configResult =
-            configurationControl.incrementalAlterConfigs(configChanges, NO_OP_EXISTENCE_CHECKER);
+            configurationControl.incrementalAlterConfigs(configChanges, true);
         for (Entry<ConfigResource, ApiError> entry : configResult.response().entrySet()) {
             if (entry.getValue().isFailure()) {
                 topicErrors.put(entry.getKey().name(), entry.getValue());
@@ -392,7 +589,7 @@ public void replay(RemoveTopicRecord record) {
             if (topicErrors.containsKey(topic.name())) continue;
             ApiError error;
             try {
-                error = createTopic(topic, records, successes);
+                error = createTopic(topic, records, successes, describable.contains(topic.name()));
             } catch (ApiException e) {
                 error = ApiError.fromThrowable(e);
             }
@@ -434,7 +631,9 @@ public void replay(RemoveTopicRecord record) {
 
     private ApiError createTopic(CreatableTopic topic,
                                  List<ApiMessageAndVersion> records,
-                                 Map<String, CreatableTopicResult> successes) {
+                                 Map<String, CreatableTopicResult> successes,
+                                 boolean authorizedToReturnConfigs) {
+        Map<String, String> creationConfigs = translateCreationConfigs(topic.configs());
         Map<Integer, PartitionRegistration> newParts = new HashMap<>();
         if (!topic.assignments().isEmpty()) {
             if (topic.replicationFactor() != -1) {
@@ -457,24 +656,28 @@ private ApiError createTopic(CreatableTopic topic,
                 validateManualPartitionAssignment(assignment.brokerIds(), replicationFactor);
                 replicationFactor = OptionalInt.of(assignment.brokerIds().size());
                 List<Integer> isr = assignment.brokerIds().stream().
-                    filter(clusterControl::unfenced).collect(Collectors.toList());
+                    filter(clusterControl::active).collect(Collectors.toList());
                 if (isr.isEmpty()) {
                     return new ApiError(Errors.INVALID_REPLICA_ASSIGNMENT,
                         "All brokers specified in the manual partition assignment for " +
-                        "partition " + assignment.partitionIndex() + " are fenced.");
+                        "partition " + assignment.partitionIndex() + " are fenced or in controlled shutdown.");
                 }
                 newParts.put(assignment.partitionIndex(), new PartitionRegistration(
                     Replicas.toArray(assignment.brokerIds()), Replicas.toArray(isr),
-                    Replicas.NONE, Replicas.NONE, isr.get(0), 0, 0));
+                    Replicas.NONE, Replicas.NONE, isr.get(0), LeaderRecoveryState.RECOVERED, 0, 0));
+            }
+            for (int i = 0; i < newParts.size(); i++) {
+                if (!newParts.containsKey(i)) {
+                    return new ApiError(Errors.INVALID_REPLICA_ASSIGNMENT,
+                            "partitions should be a consecutive 0-based integer sequence");
+                }
             }
             ApiError error = maybeCheckCreateTopicPolicy(() -> {
                 Map<Integer, List<Integer>> assignments = new HashMap<>();
                 newParts.entrySet().forEach(e -> assignments.put(e.getKey(),
                     Replicas.toList(e.getValue().replicas)));
-                Map<String, String> configs = new HashMap<>();
-                topic.configs().forEach(config -> configs.put(config.name(), config.value()));
                 return new CreateTopicPolicy.RequestMetadata(
-                    topic.name(), null, null, assignments, configs);
+                    topic.name(), null, null, assignments, creationConfigs);
             });
             if (error.isFailure()) return error;
         } else if (topic.replicationFactor() < -1 || topic.replicationFactor() == 0) {
@@ -489,34 +692,70 @@ private ApiError createTopic(CreatableTopic topic,
             short replicationFactor = topic.replicationFactor() == -1 ?
                 defaultReplicationFactor : topic.replicationFactor();
             try {
-                List<List<Integer>> replicas = clusterControl.
-                    placeReplicas(0, numPartitions, replicationFactor);
-                for (int partitionId = 0; partitionId < replicas.size(); partitionId++) {
-                    int[] r = Replicas.toArray(replicas.get(partitionId));
+                List<List<Integer>> partitions = clusterControl.replicaPlacer().place(new PlacementSpec(
+                    0,
+                    numPartitions,
+                    replicationFactor
+                ), clusterDescriber);
+                for (int partitionId = 0; partitionId < partitions.size(); partitionId++) {
+                    List<Integer> replicas = partitions.get(partitionId);
+                    List<Integer> isr = replicas.stream().
+                        filter(clusterControl::active).collect(Collectors.toList());
+                    // If the ISR is empty, it means that all brokers are fenced or
+                    // in controlled shutdown. To be consistent with the replica placer,
+                    // we reject the create topic request with INVALID_REPLICATION_FACTOR.
+                    if (isr.isEmpty()) {
+                        return new ApiError(Errors.INVALID_REPLICATION_FACTOR,
+                            "Unable to replicate the partition " + replicationFactor +
+                                " time(s): All brokers are currently fenced or in controlled shutdown.");
+                    }
                     newParts.put(partitionId,
-                        new PartitionRegistration(r, r, Replicas.NONE, Replicas.NONE, r[0], 0, 0));
+                        new PartitionRegistration(
+                            Replicas.toArray(replicas),
+                            Replicas.toArray(isr),
+                            Replicas.NONE,
+                            Replicas.NONE,
+                            isr.get(0),
+                            LeaderRecoveryState.RECOVERED,
+                            0,
+                            0));
                 }
             } catch (InvalidReplicationFactorException e) {
                 return new ApiError(Errors.INVALID_REPLICATION_FACTOR,
                     "Unable to replicate the partition " + replicationFactor +
                         " time(s): " + e.getMessage());
             }
-            ApiError error = maybeCheckCreateTopicPolicy(() -> {
-                Map<String, String> configs = new HashMap<>();
-                topic.configs().forEach(config -> configs.put(config.name(), config.value()));
-                return new CreateTopicPolicy.RequestMetadata(
-                    topic.name(), numPartitions, replicationFactor, null, configs);
-            });
+            ApiError error = maybeCheckCreateTopicPolicy(() -> new CreateTopicPolicy.RequestMetadata(
+                topic.name(), numPartitions, replicationFactor, null, creationConfigs));
             if (error.isFailure()) return error;
         }
         Uuid topicId = Uuid.randomUuid();
-        successes.put(topic.name(), new CreatableTopicResult().
+        CreatableTopicResult result = new CreatableTopicResult().
             setName(topic.name()).
             setTopicId(topicId).
-            setErrorCode((short) 0).
-            setErrorMessage(null).
-            setNumPartitions(newParts.size()).
-            setReplicationFactor((short) newParts.get(0).replicas.length));
+            setErrorCode(NONE.code()).
+            setErrorMessage(null);
+        if (authorizedToReturnConfigs) {
+            Map<String, ConfigEntry> effectiveConfig = configurationControl.
+                computeEffectiveTopicConfigs(creationConfigs);
+            List<String> configNames = new ArrayList<>(effectiveConfig.keySet());
+            configNames.sort(String::compareTo);
+            for (String configName : configNames) {
+                ConfigEntry entry = effectiveConfig.get(configName);
+                result.configs().add(new CreateTopicsResponseData.CreatableTopicConfigs().
+                    setName(entry.name()).
+                    setValue(entry.isSensitive() ? null : entry.value()).
+                    setReadOnly(entry.isReadOnly()).
+                    setConfigSource(KafkaConfigSchema.translateConfigSource(entry.source()).id()).
+                    setIsSensitive(entry.isSensitive()));
+            }
+            result.setNumPartitions(newParts.size());
+            result.setReplicationFactor((short) newParts.values().iterator().next().replicas.length);
+            result.setTopicConfigErrorCode(NONE.code());
+        } else {
+            result.setTopicConfigErrorCode(TOPIC_AUTHORIZATION_FAILED.code());
+        }
+        successes.put(topic.name(), result);
         records.add(new ApiMessageAndVersion(new TopicRecord().
             setName(topic.name()).
             setTopicId(topicId), TOPIC_RECORD.highestSupportedVersion()));
@@ -540,7 +779,8 @@ private ApiError maybeCheckCreateTopicPolicy(Supplier<CreateTopicPolicy.RequestM
     }
 
     static void validateNewTopicNames(Map<String, ApiError> topicErrors,
-                                      CreatableTopicCollection topics) {
+                                      CreatableTopicCollection topics,
+                                      Map<String, ? extends Set<String>> topicsWithCollisionChars) {
         for (CreatableTopic topic : topics) {
             if (topicErrors.containsKey(topic.name())) continue;
             try {
@@ -549,6 +789,15 @@ static void validateNewTopicNames(Map<String, ApiError> topicErrors,
                 topicErrors.put(topic.name(),
                     new ApiError(Errors.INVALID_TOPIC_EXCEPTION, e.getMessage()));
             }
+            if (Topic.hasCollisionChars(topic.name())) {
+                String normalizedName = Topic.unifyCollisionChars(topic.name());
+                Set<String> colliding = topicsWithCollisionChars.get(normalizedName);
+                if (colliding != null) {
+                    topicErrors.put(topic.name(), new ApiError(Errors.INVALID_TOPIC_EXCEPTION,
+                        "Topic '" + topic.name() + "' collides with existing topic: " +
+                            colliding.iterator().next()));
+                }
+            }
         }
     }
 
@@ -559,10 +808,18 @@ static void validateNewTopicNames(Map<String, ApiError> topicErrors,
         for (CreatableTopic topic : topics) {
             if (topicErrors.containsKey(topic.name())) continue;
             Map<String, Entry<OpType, String>> topicConfigs = new HashMap<>();
+            List<String> nullConfigs = new ArrayList<>();
             for (CreateTopicsRequestData.CreateableTopicConfig config : topic.configs()) {
-                topicConfigs.put(config.name(), new SimpleImmutableEntry<>(SET, config.value()));
+                if (config.value() == null) {
+                    nullConfigs.add(config.name());
+                } else {
+                    topicConfigs.put(config.name(), new SimpleImmutableEntry<>(SET, config.value()));
+                }
             }
-            if (!topicConfigs.isEmpty()) {
+            if (!nullConfigs.isEmpty()) {
+                topicErrors.put(topic.name(), new ApiError(Errors.INVALID_CONFIG,
+                    "Null value not supported for topic configs: " + String.join(",", nullConfigs)));
+            } else if (!topicConfigs.isEmpty()) {
                 configChanges.put(new ConfigResource(TOPIC, topic.name()), topicConfigs);
             }
         }
@@ -662,94 +919,75 @@ BrokersToIsrs brokersToIsrs() {
         return brokersToIsrs;
     }
 
-    ControllerResult<AlterIsrResponseData> alterIsr(AlterIsrRequestData request) {
+    // VisibleForTesting
+    Set<TopicIdPartition> imbalancedPartitions() {
+        return new HashSet<>(imbalancedPartitions);
+    }
+
+    ControllerResult<AlterPartitionResponseData> alterPartition(
+        ControllerRequestContext context,
+        AlterPartitionRequestData request
+    ) {
+        short requestVersion = context.requestHeader().requestApiVersion();
         clusterControl.checkBrokerEpoch(request.brokerId(), request.brokerEpoch());
-        AlterIsrResponseData response = new AlterIsrResponseData();
+        AlterPartitionResponseData response = new AlterPartitionResponseData();
         List<ApiMessageAndVersion> records = new ArrayList<>();
-        for (AlterIsrRequestData.TopicData topicData : request.topics()) {
-            AlterIsrResponseData.TopicData responseTopicData =
-                new AlterIsrResponseData.TopicData().setName(topicData.name());
+        for (AlterPartitionRequestData.TopicData topicData : request.topics()) {
+            AlterPartitionResponseData.TopicData responseTopicData =
+                new AlterPartitionResponseData.TopicData().
+                    setTopicName(topicData.topicName()).
+                    setTopicId(topicData.topicId());
             response.topics().add(responseTopicData);
-            Uuid topicId = topicsByName.get(topicData.name());
-            if (topicId == null || !topics.containsKey(topicId)) {
-                for (AlterIsrRequestData.PartitionData partitionData : topicData.partitions()) {
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
+
+            Uuid topicId = requestVersion > 1 ? topicData.topicId() : topicsByName.get(topicData.topicName());
+            if (topicId == null || topicId.equals(Uuid.ZERO_UUID) || !topics.containsKey(topicId)) {
+                Errors error = requestVersion > 1 ? UNKNOWN_TOPIC_ID : UNKNOWN_TOPIC_OR_PARTITION;
+                for (AlterPartitionRequestData.PartitionData partitionData : topicData.partitions()) {
+                    responseTopicData.partitions().add(new AlterPartitionResponseData.PartitionData().
                         setPartitionIndex(partitionData.partitionIndex()).
-                        setErrorCode(UNKNOWN_TOPIC_OR_PARTITION.code()));
+                        setErrorCode(error.code()));
                 }
-                log.info("Rejecting alterIsr request for unknown topic ID {}.", topicId);
+                log.info("Rejecting AlterPartition request for unknown topic ID {} or name {}.",
+                    topicData.topicId(), topicData.topicName());
                 continue;
             }
+
             TopicControlInfo topic = topics.get(topicId);
-            for (AlterIsrRequestData.PartitionData partitionData : topicData.partitions()) {
+            for (AlterPartitionRequestData.PartitionData partitionData : topicData.partitions()) {
                 int partitionId = partitionData.partitionIndex();
                 PartitionRegistration partition = topic.parts.get(partitionId);
-                if (partition == null) {
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
-                        setPartitionIndex(partitionId).
-                        setErrorCode(UNKNOWN_TOPIC_OR_PARTITION.code()));
-                    log.info("Rejecting alterIsr request for unknown partition {}-{}.",
-                        topic.name, partitionId);
-                    continue;
-                }
-                if (partitionData.leaderEpoch() != partition.leaderEpoch) {
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
-                        setPartitionIndex(partitionId).
-                        setErrorCode(FENCED_LEADER_EPOCH.code()));
-                    log.debug("Rejecting alterIsr request from node {} for {}-{} because " +
-                        "the current leader epoch is {}, not {}.", request.brokerId(), topic.name,
-                        partitionId, partition.leaderEpoch, partitionData.leaderEpoch());
-                    continue;
-                }
-                if (request.brokerId() != partition.leader) {
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
-                        setPartitionIndex(partitionId).
-                        setErrorCode(INVALID_REQUEST.code()));
-                    log.info("Rejecting alterIsr request from node {} for {}-{} because " +
-                        "the current leader is {}.", request.brokerId(), topic.name,
-                        partitionId, partition.leader);
-                    continue;
-                }
-                if (partitionData.currentIsrVersion() != partition.partitionEpoch) {
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
-                        setPartitionIndex(partitionId).
-                        setErrorCode(INVALID_UPDATE_VERSION.code()));
-                    log.info("Rejecting alterIsr request from node {} for {}-{} because " +
-                        "the current partition epoch is {}, not {}.", request.brokerId(),
-                        topic.name, partitionId, partition.partitionEpoch,
-                        partitionData.currentIsrVersion());
-                    continue;
-                }
-                int[] newIsr = Replicas.toArray(partitionData.newIsr());
-                if (!Replicas.validateIsr(partition.replicas, newIsr)) {
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
-                        setPartitionIndex(partitionId).
-                        setErrorCode(INVALID_REQUEST.code()));
-                    log.error("Rejecting alterIsr request from node {} for {}-{} because " +
-                        "it specified an invalid ISR {}.", request.brokerId(),
-                        topic.name, partitionId, partitionData.newIsr());
-                    continue;
-                }
-                if (!Replicas.contains(newIsr, partition.leader)) {
-                    // An alterIsr request can't ask for the current leader to be removed.
-                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
-                        setPartitionIndex(partitionId).
-                        setErrorCode(INVALID_REQUEST.code()));
-                    log.error("Rejecting alterIsr request from node {} for {}-{} because " +
-                            "it specified an invalid ISR {} that doesn't include itself.",
-                            request.brokerId(), topic.name, partitionId, partitionData.newIsr());
+
+                Errors validationError = validateAlterPartitionData(
+                    request.brokerId(),
+                    topic,
+                    partitionId,
+                    partition,
+                    context.requestHeader().requestApiVersion(),
+                    partitionData);
+
+                if (validationError != Errors.NONE) {
+                    responseTopicData.partitions().add(
+                        new AlterPartitionResponseData.PartitionData()
+                            .setPartitionIndex(partitionId)
+                            .setErrorCode(validationError.code())
+                    );
+
                     continue;
                 }
-                // At this point, we have decided to perform the ISR change. We use
-                // PartitionChangeBuilder to find out what its effect will be.
-                PartitionChangeBuilder builder = new PartitionChangeBuilder(partition,
+
+                PartitionChangeBuilder builder = new PartitionChangeBuilder(
+                    partition,
                     topic.id,
                     partitionId,
-                    r -> clusterControl.unfenced(r),
-                    () -> configurationControl.uncleanLeaderElectionEnabledForTopic(topicData.name()));
+                    clusterControl::active,
+                    featureControl.metadataVersion().isLeaderRecoverySupported());
+                if (configurationControl.uncleanLeaderElectionEnabledForTopic(topic.name())) {
+                    builder.setElection(PartitionChangeBuilder.Election.UNCLEAN);
+                }
                 builder.setTargetIsr(partitionData.newIsr());
+                builder.setTargetLeaderRecoveryState(
+                    LeaderRecoveryState.of(partitionData.leaderRecoveryState()));
                 Optional<ApiMessageAndVersion> record = builder.build();
-                Errors result = Errors.NONE;
                 if (record.isPresent()) {
                     records.add(record.get());
                     PartitionChangeRecord change = (PartitionChangeRecord) record.get().message();
@@ -760,44 +998,167 @@ ControllerResult<AlterIsrResponseData> alterIsr(AlterIsrRequestData request) {
                     }
                     if (change.leader() != request.brokerId() &&
                             change.leader() != NO_LEADER_CHANGE) {
-                        // Normally, an alterIsr request, which is made by the partition
+                        // Normally, an AlterPartition request, which is made by the partition
                         // leader itself, is not allowed to modify the partition leader.
                         // However, if there is an ongoing partition reassignment and the
                         // ISR change completes it, then the leader may change as part of
                         // the changes made during reassignment cleanup.
                         //
-                        // In this case, we report back FENCED_LEADER_EPOCH to the leader
-                        // which made the alterIsr request. This lets it know that it must
+                        // In this case, we report back NEW_LEADER_ELECTED to the leader
+                        // which made the AlterPartition request. This lets it know that it must
                         // fetch new metadata before trying again. This return code is
                         // unusual because we both return an error and generate a new
                         // metadata record. We usually only do one or the other.
-                        log.info("AlterIsr request from node {} for {}-{} completed " +
+                        // FENCED_LEADER_EPOCH is used for request version below or equal to 1.
+                        Errors error = requestVersion > 1 ? NEW_LEADER_ELECTED : FENCED_LEADER_EPOCH;
+                        log.info("AlterPartition request from node {} for {}-{} completed " +
                             "the ongoing partition reassignment and triggered a " +
-                            "leadership change. Reutrning FENCED_LEADER_EPOCH.",
-                            request.brokerId(), topic.name, partitionId);
-                        responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
+                            "leadership change. Returning {}.",
+                            request.brokerId(), topic.name, partitionId, error);
+                        responseTopicData.partitions().add(new AlterPartitionResponseData.PartitionData().
                             setPartitionIndex(partitionId).
-                            setErrorCode(FENCED_LEADER_EPOCH.code()));
+                            setErrorCode(error.code()));
                         continue;
                     } else if (change.removingReplicas() != null ||
                             change.addingReplicas() != null) {
-                        log.info("AlterIsr request from node {} for {}-{} completed " +
+                        log.info("AlterPartition request from node {} for {}-{} completed " +
                             "the ongoing partition reassignment.", request.brokerId(),
                             topic.name, partitionId);
                     }
                 }
-                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().
+
+                /* Setting the LeaderRecoveryState field is always safe because it will always be the
+                 * same as the value set in the request. For version 0, that is always the default
+                 * RECOVERED which is ignored when serializing to version 0. For any other version, the
+                 * LeaderRecoveryState field is supported.
+                 */
+                responseTopicData.partitions().add(new AlterPartitionResponseData.PartitionData().
                     setPartitionIndex(partitionId).
-                    setErrorCode(result.code()).
+                    setErrorCode(Errors.NONE.code()).
                     setLeaderId(partition.leader).
+                    setIsr(Replicas.toList(partition.isr)).
+                    setLeaderRecoveryState(partition.leaderRecoveryState.value()).
                     setLeaderEpoch(partition.leaderEpoch).
-                    setCurrentIsrVersion(partition.partitionEpoch).
-                    setIsr(Replicas.toList(partition.isr)));
+                    setPartitionEpoch(partition.partitionEpoch));
             }
         }
+
         return ControllerResult.of(records, response);
     }
 
+    /**
+     * Validate the partition information included in the alter partition request.
+     *
+     * @param brokerId id of the broker requesting the alter partition
+     * @param topic current topic information store by the replication manager
+     * @param partitionId partition id being altered
+     * @param partition current partition registration for the partition being altered
+     * @param partitionData partition data from the alter partition request
+     *
+     * @return Errors.NONE for valid alter partition data; otherwise the validation error
+     */
+    private Errors validateAlterPartitionData(
+        int brokerId,
+        TopicControlInfo topic,
+        int partitionId,
+        PartitionRegistration partition,
+        short requestApiVersion,
+        AlterPartitionRequestData.PartitionData partitionData
+    ) {
+        if (partition == null) {
+            log.info("Rejecting AlterPartition request for unknown partition {}-{}.",
+                    topic.name, partitionId);
+
+            return UNKNOWN_TOPIC_OR_PARTITION;
+        }
+        if (partitionData.leaderEpoch() != partition.leaderEpoch) {
+            log.debug("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "the current leader epoch is {}, not {}.", brokerId, topic.name,
+                    partitionId, partition.leaderEpoch, partitionData.leaderEpoch());
+
+            return FENCED_LEADER_EPOCH;
+        }
+        if (brokerId != partition.leader) {
+            log.info("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "the current leader is {}.", brokerId, topic.name,
+                    partitionId, partition.leader);
+
+            return INVALID_REQUEST;
+        }
+        if (partitionData.partitionEpoch() != partition.partitionEpoch) {
+            log.info("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "the current partition epoch is {}, not {}.", brokerId,
+                    topic.name, partitionId, partition.partitionEpoch,
+                    partitionData.partitionEpoch());
+
+            return INVALID_UPDATE_VERSION;
+        }
+        int[] newIsr = Replicas.toArray(partitionData.newIsr());
+        if (!Replicas.validateIsr(partition.replicas, newIsr)) {
+            log.error("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "it specified an invalid ISR {}.", brokerId,
+                    topic.name, partitionId, partitionData.newIsr());
+
+            return INVALID_REQUEST;
+        }
+        if (!Replicas.contains(newIsr, partition.leader)) {
+            // The ISR must always include the current leader.
+            log.error("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "it specified an invalid ISR {} that doesn't include itself.",
+                    brokerId, topic.name, partitionId, partitionData.newIsr());
+
+            return INVALID_REQUEST;
+        }
+        LeaderRecoveryState leaderRecoveryState = LeaderRecoveryState.of(partitionData.leaderRecoveryState());
+        if (leaderRecoveryState == LeaderRecoveryState.RECOVERING && newIsr.length > 1) {
+            log.info("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "the ISR {} had more than one replica while the leader was still " +
+                    "recovering from an unclean leader election {}.",
+                    brokerId, topic.name, partitionId, partitionData.newIsr(),
+                    leaderRecoveryState);
+
+            return INVALID_REQUEST;
+        }
+        if (partition.leaderRecoveryState == LeaderRecoveryState.RECOVERED &&
+                leaderRecoveryState == LeaderRecoveryState.RECOVERING) {
+            log.info("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "the leader recovery state cannot change from RECOVERED to RECOVERING.",
+                    brokerId, topic.name, partitionId);
+
+            return INVALID_REQUEST;
+        }
+
+        List<IneligibleReplica> ineligibleReplicas = ineligibleReplicasForIsr(newIsr);
+        if (!ineligibleReplicas.isEmpty()) {
+            log.info("Rejecting AlterPartition request from node {} for {}-{} because " +
+                    "it specified ineligible replicas {} in the new ISR {}.",
+                    brokerId, topic.name, partitionId, ineligibleReplicas, partitionData.newIsr());
+
+            if (requestApiVersion > 1) {
+                return INELIGIBLE_REPLICA;
+            } else {
+                return OPERATION_NOT_ATTEMPTED;
+            }
+        }
+
+        return Errors.NONE;
+    }
+
+    private List<IneligibleReplica> ineligibleReplicasForIsr(int[] replicas) {
+        List<IneligibleReplica> ineligibleReplicas = new ArrayList<>(0);
+        for (Integer replicaId : replicas) {
+            BrokerRegistration registration = clusterControl.registration(replicaId);
+            if (registration == null) {
+                ineligibleReplicas.add(new IneligibleReplica(replicaId, "not registered"));
+            } else if (registration.inControlledShutdown()) {
+                ineligibleReplicas.add(new IneligibleReplica(replicaId, "shutting down"));
+            } else if (registration.fenced()) {
+                ineligibleReplicas.add(new IneligibleReplica(replicaId, "fenced"));
+            }
+        }
+        return ineligibleReplicas;
+    }
+
     /**
      * Generate the appropriate records to handle a broker being fenced.
      *
@@ -807,7 +1168,6 @@ ControllerResult<AlterIsrResponseData> alterIsr(AlterIsrRequestData request) {
      * @param brokerId      The broker id.
      * @param records       The record list to append to.
      */
-
     void handleBrokerFenced(int brokerId, List<ApiMessageAndVersion> records) {
         BrokerRegistration brokerRegistration = clusterControl.brokerRegistrations().get(brokerId);
         if (brokerRegistration == null) {
@@ -815,9 +1175,16 @@ void handleBrokerFenced(int brokerId, List<ApiMessageAndVersion> records) {
         }
         generateLeaderAndIsrUpdates("handleBrokerFenced", brokerId, NO_LEADER, records,
             brokersToIsrs.partitionsWithBrokerInIsr(brokerId));
-        records.add(new ApiMessageAndVersion(new FenceBrokerRecord().
-            setId(brokerId).setEpoch(brokerRegistration.epoch()),
-            FENCE_BROKER_RECORD.highestSupportedVersion()));
+        if (featureControl.metadataVersion().isBrokerRegistrationChangeRecordSupported()) {
+            records.add(new ApiMessageAndVersion(new BrokerRegistrationChangeRecord().
+                    setBrokerId(brokerId).setBrokerEpoch(brokerRegistration.epoch()).
+                    setFenced(BrokerRegistrationFencingChange.FENCE.value()),
+                    (short) 0));
+        } else {
+            records.add(new ApiMessageAndVersion(new FenceBrokerRecord().
+                    setId(brokerId).setEpoch(brokerRegistration.epoch()),
+                    (short) 0));
+        }
     }
 
     /**
@@ -836,13 +1203,13 @@ void handleBrokerUnregistered(int brokerId, long brokerEpoch,
             brokersToIsrs.partitionsWithBrokerInIsr(brokerId));
         records.add(new ApiMessageAndVersion(new UnregisterBrokerRecord().
             setBrokerId(brokerId).setBrokerEpoch(brokerEpoch),
-            UNREGISTER_BROKER_RECORD.highestSupportedVersion()));
+            (short) 0));
     }
 
     /**
      * Generate the appropriate records to handle a broker becoming unfenced.
      *
-     * First, we create an UnfenceBrokerRecord. Then, we check if if there are any
+     * First, we create an UnfenceBrokerRecord. Then, we check if there are any
      * partitions that don't currently have a leader that should be led by the newly
      * unfenced broker.
      *
@@ -851,12 +1218,42 @@ void handleBrokerUnregistered(int brokerId, long brokerEpoch,
      * @param records       The record list to append to.
      */
     void handleBrokerUnfenced(int brokerId, long brokerEpoch, List<ApiMessageAndVersion> records) {
-        records.add(new ApiMessageAndVersion(new UnfenceBrokerRecord().setId(brokerId).
-            setEpoch(brokerEpoch), UNFENCE_BROKER_RECORD.highestSupportedVersion()));
+        if (featureControl.metadataVersion().isBrokerRegistrationChangeRecordSupported()) {
+            records.add(new ApiMessageAndVersion(new BrokerRegistrationChangeRecord().
+                setBrokerId(brokerId).setBrokerEpoch(brokerEpoch).
+                setFenced(BrokerRegistrationFencingChange.UNFENCE.value()),
+                (short) 0));
+        } else {
+            records.add(new ApiMessageAndVersion(new UnfenceBrokerRecord().setId(brokerId).
+                setEpoch(brokerEpoch), (short) 0));
+        }
         generateLeaderAndIsrUpdates("handleBrokerUnfenced", NO_LEADER, brokerId, records,
             brokersToIsrs.partitionsWithNoLeader());
     }
 
+    /**
+     * Generate the appropriate records to handle a broker starting a controlled shutdown.
+     *
+     * First, we create an BrokerRegistrationChangeRecord. Then, we remove this broker
+     * from any non-singleton ISR and elect new leaders for partitions led by this
+     * broker.
+     *
+     * @param brokerId      The broker id.
+     * @param brokerEpoch   The broker epoch.
+     * @param records       The record list to append to.
+     */
+    void handleBrokerInControlledShutdown(int brokerId, long brokerEpoch, List<ApiMessageAndVersion> records) {
+        if (featureControl.metadataVersion().isInControlledShutdownStateSupported()
+                && !clusterControl.inControlledShutdown(brokerId)) {
+            records.add(new ApiMessageAndVersion(new BrokerRegistrationChangeRecord().
+                setBrokerId(brokerId).setBrokerEpoch(brokerEpoch).
+                setInControlledShutdown(BrokerRegistrationInControlledShutdownChange.IN_CONTROLLED_SHUTDOWN.value()),
+                (short) 1));
+        }
+        generateLeaderAndIsrUpdates("enterControlledShutdown[" + brokerId + "]",
+            brokerId, NO_LEADER, records, brokersToIsrs.partitionsWithBrokerInIsr(brokerId));
+    }
+
     ControllerResult<ElectLeadersResponseData> electLeaders(ElectLeadersRequestData request) {
         ElectionType electionType = electionType(request.electionType());
         List<ApiMessageAndVersion> records = new ArrayList<>();
@@ -935,13 +1332,16 @@ ApiError electLeader(String topic, int partitionId, ElectionType electionType,
             return new ApiError(Errors.ELECTION_NOT_NEEDED);
         }
 
+        PartitionChangeBuilder.Election election = PartitionChangeBuilder.Election.PREFERRED;
+        if (electionType == ElectionType.UNCLEAN) {
+            election = PartitionChangeBuilder.Election.UNCLEAN;
+        }
         PartitionChangeBuilder builder = new PartitionChangeBuilder(partition,
             topicId,
             partitionId,
-            r -> clusterControl.unfenced(r),
-            () -> electionType == ElectionType.UNCLEAN);
-
-        builder.setAlwaysElectPreferredIfPossible(electionType == ElectionType.PREFERRED);
+            clusterControl::active,
+            featureControl.metadataVersion().isLeaderRecoverySupported());
+        builder.setElection(election);
         Optional<ApiMessageAndVersion> record = builder.build();
         if (!record.isPresent()) {
             if (electionType == ElectionType.PREFERRED) {
@@ -955,13 +1355,13 @@ ApiError electLeader(String topic, int partitionId, ElectionType electionType,
     }
 
     ControllerResult<BrokerHeartbeatReply> processBrokerHeartbeat(
-                BrokerHeartbeatRequestData request, long lastCommittedOffset) {
+                BrokerHeartbeatRequestData request, long registerBrokerRecordOffset) {
         int brokerId = request.brokerId();
         long brokerEpoch = request.brokerEpoch();
         clusterControl.checkBrokerEpoch(brokerId, brokerEpoch);
         BrokerHeartbeatManager heartbeatManager = clusterControl.heartbeatManager();
         BrokerControlStates states = heartbeatManager.calculateNextBrokerState(brokerId,
-            request, lastCommittedOffset, () -> brokersToIsrs.hasLeaderships(brokerId));
+            request, registerBrokerRecordOffset, () -> brokersToIsrs.hasLeaderships(brokerId));
         List<ApiMessageAndVersion> records = new ArrayList<>();
         if (states.current() != states.next()) {
             switch (states.next()) {
@@ -972,8 +1372,7 @@ ControllerResult<BrokerHeartbeatReply> processBrokerHeartbeat(
                     handleBrokerUnfenced(brokerId, brokerEpoch, records);
                     break;
                 case CONTROLLED_SHUTDOWN:
-                    generateLeaderAndIsrUpdates("enterControlledShutdown[" + brokerId + "]",
-                        brokerId, NO_LEADER, records, brokersToIsrs.partitionsWithBrokerInIsr(brokerId));
+                    handleBrokerInControlledShutdown(brokerId, brokerEpoch, records);
                     break;
                 case SHUTDOWN_NOW:
                     handleBrokerFenced(brokerId, records);
@@ -983,7 +1382,7 @@ ControllerResult<BrokerHeartbeatReply> processBrokerHeartbeat(
         heartbeatManager.touch(brokerId,
             states.next().fenced(),
             request.currentMetadataOffset());
-        boolean isCaughtUp = request.currentMetadataOffset() >= lastCommittedOffset;
+        boolean isCaughtUp = request.currentMetadataOffset() >= registerBrokerRecordOffset;
         BrokerHeartbeatReply reply = new BrokerHeartbeatReply(isCaughtUp,
                 states.next().fenced(),
                 states.next().inControlledShutdown(),
@@ -1016,9 +1415,53 @@ ControllerResult<Void> maybeFenceOneStaleBroker() {
         return ControllerResult.of(records, null);
     }
 
-    // Visible for testing
-    Boolean isBrokerUnfenced(int brokerId) {
-        return clusterControl.unfenced(brokerId);
+    boolean arePartitionLeadersImbalanced() {
+        return !imbalancedPartitions.isEmpty();
+    }
+
+    /**
+     * Attempt to elect a preferred leader for all topic partitions which have a leader that is not the preferred replica.
+     *
+     * The response() method in the return object is true if this method returned without electing all possible preferred replicas.
+     * The quorum controlller should reschedule this operation immediately if it is true.
+     *
+     * @return All of the election records and if there may be more available preferred replicas to elect as leader
+     */
+    ControllerResult<Boolean> maybeBalancePartitionLeaders() {
+        List<ApiMessageAndVersion> records = new ArrayList<>();
+
+        boolean rescheduleImmidiately = false;
+        for (TopicIdPartition topicPartition : imbalancedPartitions) {
+            if (records.size() >= maxElectionsPerImbalance) {
+                rescheduleImmidiately = true;
+                break;
+            }
+
+            TopicControlInfo topic = topics.get(topicPartition.topicId());
+            if (topic == null) {
+                log.error("Skipping unknown imbalanced topic {}", topicPartition);
+                continue;
+            }
+
+            PartitionRegistration partition = topic.parts.get(topicPartition.partitionId());
+            if (partition == null) {
+                log.error("Skipping unknown imbalanced partition {}", topicPartition);
+                continue;
+            }
+
+            // Attempt to perform a preferred leader election
+            PartitionChangeBuilder builder = new PartitionChangeBuilder(
+                partition,
+                topicPartition.topicId(),
+                topicPartition.partitionId(),
+                clusterControl::active,
+                featureControl.metadataVersion().isLeaderRecoverySupported()
+            );
+            builder.setElection(PartitionChangeBuilder.Election.PREFERRED);
+            builder.build().ifPresent(records::add);
+        }
+
+        return ControllerResult.of(records, rescheduleImmidiately);
     }
 
     ControllerResult<List<CreatePartitionsTopicResult>>
@@ -1040,7 +1483,7 @@ Boolean isBrokerUnfenced(int brokerId) {
                 setErrorCode(apiError.error().code()).
                 setErrorMessage(apiError.message()));
         }
-        return new ControllerResult<>(records, results, true);
+        return ControllerResult.atomicOf(records, results);
     }
 
     void createPartitions(CreatePartitionsTopic topic,
@@ -1094,28 +1537,41 @@ void createPartitions(CreatePartitionsTopic topic,
                     OptionalInt.of(replicationFactor));
                 placements.add(assignment.brokerIds());
                 List<Integer> isr = assignment.brokerIds().stream().
-                    filter(clusterControl::unfenced).collect(Collectors.toList());
+                    filter(clusterControl::active).collect(Collectors.toList());
                 if (isr.isEmpty()) {
                     throw new InvalidReplicaAssignmentException(
                         "All brokers specified in the manual partition assignment for " +
-                            "partition " + (startPartitionId + i) + " are fenced.");
+                            "partition " + (startPartitionId + i) + " are fenced or in controlled shutdown.");
                 }
                 isrs.add(isr);
             }
         } else {
-            placements = clusterControl.placeReplicas(startPartitionId, additional,
-                replicationFactor);
+            placements = clusterControl.replicaPlacer().place(new PlacementSpec(
+                startPartitionId,
+                additional,
+                replicationFactor
+            ), clusterDescriber);
             isrs = placements;
         }
         int partitionId = startPartitionId;
         for (int i = 0; i < placements.size(); i++) {
-            List<Integer> placement = placements.get(i);
-            List<Integer> isr = isrs.get(i);
+            List<Integer> replicas = placements.get(i);
+            List<Integer> isr = isrs.get(i).stream().
+                filter(clusterControl::active).collect(Collectors.toList());
+            // If the ISR is empty, it means that all brokers are fenced or
+            // in controlled shutdown. To be consistent with the replica placer,
+            // we reject the create topic request with INVALID_REPLICATION_FACTOR.
+            if (isr.isEmpty()) {
+                throw new InvalidReplicationFactorException(
+                    "Unable to replicate the partition " + replicationFactor +
+                        " time(s): All brokers are currently fenced or in controlled shutdown.");
+            }
             records.add(new ApiMessageAndVersion(new PartitionRecord().
                 setPartitionId(partitionId).
                 setTopicId(topicId).
-                setReplicas(placement).
+                setReplicas(replicas).
                 setIsr(isr).
+                setLeaderRecoveryState(LeaderRecoveryState.RECOVERED.value()).
                 setRemovingReplicas(Collections.emptyList()).
                 setAddingReplicas(Collections.emptyList()).
                 setLeader(isr.get(0)).
@@ -1188,14 +1644,14 @@ void generateLeaderAndIsrUpdates(String context,
         // where there is an unclean leader election which chooses a leader from outside
         // the ISR.
         Function<Integer, Boolean> isAcceptableLeader =
-            r -> (r != brokerToRemove) && (r == brokerToAdd || clusterControl.unfenced(r));
+            r -> (r != brokerToRemove) && (r == brokerToAdd || clusterControl.active(r));
 
         while (iterator.hasNext()) {
             TopicIdPartition topicIdPart = iterator.next();
             TopicControlInfo topic = topics.get(topicIdPart.topicId());
             if (topic == null) {
                 throw new RuntimeException("Topic ID " + topicIdPart.topicId() +
-                        " existed in isrMembers, but not in the topics map.");
+                    " existed in isrMembers, but not in the topics map.");
             }
             PartitionRegistration partition = topic.parts.get(topicIdPart.partitionId());
             if (partition == null) {
@@ -1206,7 +1662,10 @@ void generateLeaderAndIsrUpdates(String context,
                 topicIdPart.topicId(),
                 topicIdPart.partitionId(),
                 isAcceptableLeader,
-                () -> configurationControl.uncleanLeaderElectionEnabledForTopic(topic.name));
+                featureControl.metadataVersion().isLeaderRecoverySupported());
+            if (configurationControl.uncleanLeaderElectionEnabledForTopic(topic.name)) {
+                builder.setElection(PartitionChangeBuilder.Election.UNCLEAN);
+            }
 
             // Note: if brokerToRemove was passed as NO_LEADER, this is a no-op (the new
             // target ISR will be the same as the old one).
@@ -1312,8 +1771,11 @@ Optional<ApiMessageAndVersion> cancelPartitionReassignment(String topicName,
         PartitionChangeBuilder builder = new PartitionChangeBuilder(part,
             tp.topicId(),
             tp.partitionId(),
-            r -> clusterControl.unfenced(r),
-            () -> configurationControl.uncleanLeaderElectionEnabledForTopic(topicName));
+            clusterControl::active,
+            featureControl.metadataVersion().isLeaderRecoverySupported());
+        if (configurationControl.uncleanLeaderElectionEnabledForTopic(topicName)) {
+            builder.setElection(PartitionChangeBuilder.Election.UNCLEAN);
+        }
         builder.setTargetIsr(revert.isr()).
             setTargetReplicas(revert.replicas()).
             setTargetRemoving(Collections.emptyList()).
@@ -1361,8 +1823,8 @@ Optional<ApiMessageAndVersion> changePartitionReassignment(TopicIdPartition tp,
         PartitionChangeBuilder builder = new PartitionChangeBuilder(part,
             tp.topicId(),
             tp.partitionId(),
-            r -> clusterControl.unfenced(r),
-            () -> false);
+            clusterControl::active,
+            featureControl.metadataVersion().isLeaderRecoverySupported());
         if (!reassignment.merged().equals(currentReplicas)) {
             builder.setTargetReplicas(reassignment.merged());
         }
@@ -1460,4 +1922,19 @@ public List<ApiMessageAndVersion> next() {
     ReplicationControlIterator iterator(long epoch) {
         return new ReplicationControlIterator(epoch);
     }
+
+    private static final class IneligibleReplica {
+        private final int replicaId;
+        private final String reason;
+
+        private IneligibleReplica(int replicaId, String reason) {
+            this.replicaId = replicaId;
+            this.reason = reason;
+        }
+
+        @Override
+        public String toString() {
+            return replicaId + " (" + reason + ")";
+        }
+    }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/controller/TopicIdPartition.java b/metadata/src/main/java/org/apache/kafka/controller/TopicIdPartition.java
new file mode 100644
index 0000000000000..b594c4a4ec351
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/controller/TopicIdPartition.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+import java.util.Objects;
+import org.apache.kafka.common.Uuid;
+
+final class TopicIdPartition {
+    private final Uuid topicId;
+    private final int partitionId;
+
+    TopicIdPartition(Uuid topicId, int partitionId) {
+        this.topicId = topicId;
+        this.partitionId = partitionId;
+    }
+
+    public Uuid topicId() {
+        return topicId;
+    }
+
+    public int partitionId() {
+        return partitionId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (!(o instanceof TopicIdPartition)) return false;
+        TopicIdPartition other = (TopicIdPartition) o;
+        return other.topicId.equals(topicId) && other.partitionId == partitionId;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(topicId, partitionId);
+    }
+
+    @Override
+    public String toString() {
+        return topicId + ":" + partitionId;
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/image/AclsDelta.java b/metadata/src/main/java/org/apache/kafka/image/AclsDelta.java
index c6379186187d0..a255e3ad8f19e 100644
--- a/metadata/src/main/java/org/apache/kafka/image/AclsDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/AclsDelta.java
@@ -22,6 +22,7 @@
 import org.apache.kafka.common.metadata.RemoveAccessControlEntryRecord;
 import org.apache.kafka.metadata.authorizer.StandardAcl;
 import org.apache.kafka.metadata.authorizer.StandardAclWithId;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -43,6 +44,12 @@ public AclsDelta(AclsImage image) {
         this.image = image;
     }
 
+    /**
+     * Returns a Map of deltas from ACL ID to optional StandardAcl. An empty optional value indicates the ACL
+     * is for removal. An optional with a value indicates the ACL is to be added.
+     *
+     * @return Map of deltas.
+     */
     public Map<Uuid, Optional<StandardAcl>> changes() {
         return changes;
     }
@@ -51,6 +58,10 @@ void finishSnapshot() {
         this.isSnapshotDelta = true;
     }
 
+    public void handleMetadataVersionChange(MetadataVersion newVersion) {
+        // no-op
+    }
+
     public boolean isSnapshotDelta() {
         return isSnapshotDelta;
     }
@@ -60,8 +71,22 @@ public void replay(AccessControlEntryRecord record) {
         changes.put(aclWithId.id(), Optional.of(aclWithId.acl()));
     }
 
+    /**
+     * This method replays a RemoveAccessControlEntryRecord record. If the current image contains the ACL
+     * the removal is stored as an Optional.empty() value in the Map. If the changes Map contains the ACL
+     * it means the ACL was recently applied and isn't in the image yet, in which case the ACL can be totally removed
+     * from the Map because there is no need to add it then delete it when the changes are applied.
+     *
+     * @param record Log metadata record to replay.
+     */
     public void replay(RemoveAccessControlEntryRecord record) {
-        changes.put(record.id(), Optional.empty());
+        if (image.acls().containsKey(record.id())) {
+            changes.put(record.id(), Optional.empty());
+        } else if (changes.containsKey(record.id())) {
+            changes.remove(record.id());
+        } else {
+            throw new IllegalStateException("Failed to find existing ACL with ID " + record.id() + " in either image or changes");
+        }
     }
 
     public AclsImage apply() {
diff --git a/metadata/src/main/java/org/apache/kafka/image/ClientQuotasDelta.java b/metadata/src/main/java/org/apache/kafka/image/ClientQuotasDelta.java
index 4b574b3ada5dd..1255f75e03fc0 100644
--- a/metadata/src/main/java/org/apache/kafka/image/ClientQuotasDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/ClientQuotasDelta.java
@@ -19,6 +19,7 @@
 
 import org.apache.kafka.common.metadata.ClientQuotaRecord;
 import org.apache.kafka.common.quota.ClientQuotaEntity;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -47,6 +48,10 @@ public void finishSnapshot() {
         }
     }
 
+    public void handleMetadataVersionChange(MetadataVersion newVersion) {
+        // no-op
+    }
+
     public void replay(ClientQuotaRecord record) {
         ClientQuotaEntity entity = ClientQuotaImage.dataToEntity(record.entity());
         ClientQuotaDelta change = changes.computeIfAbsent(entity, __ ->
diff --git a/metadata/src/main/java/org/apache/kafka/image/ClusterDelta.java b/metadata/src/main/java/org/apache/kafka/image/ClusterDelta.java
index 6c48b8ecde575..39d6fdb3d744c 100644
--- a/metadata/src/main/java/org/apache/kafka/image/ClusterDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/ClusterDelta.java
@@ -23,6 +23,9 @@
 import org.apache.kafka.common.metadata.UnfenceBrokerRecord;
 import org.apache.kafka.common.metadata.UnregisterBrokerRecord;
 import org.apache.kafka.metadata.BrokerRegistration;
+import org.apache.kafka.metadata.BrokerRegistrationFencingChange;
+import org.apache.kafka.metadata.BrokerRegistrationInControlledShutdownChange;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -61,6 +64,10 @@ public void finishSnapshot() {
         }
     }
 
+    public void handleMetadataVersionChange(MetadataVersion newVersion) {
+        // no-op
+    }
+
     public void replay(RegisterBrokerRecord record) {
         BrokerRegistration broker = BrokerRegistration.fromRecord(record);
         changedBrokers.put(broker.id(), Optional.of(broker));
@@ -85,22 +92,38 @@ private BrokerRegistration getBrokerOrThrow(int brokerId, long epoch, String act
     }
 
     public void replay(FenceBrokerRecord record) {
-        BrokerRegistration broker = getBrokerOrThrow(record.id(), record.epoch(), "fence");
-        changedBrokers.put(record.id(), Optional.of(broker.cloneWithFencing(true)));
+        BrokerRegistration curRegistration = getBrokerOrThrow(record.id(), record.epoch(), "fence");
+        changedBrokers.put(record.id(), Optional.of(curRegistration.cloneWith(
+            BrokerRegistrationFencingChange.FENCE.asBoolean(),
+            Optional.empty()
+        )));
     }
 
     public void replay(UnfenceBrokerRecord record) {
-        BrokerRegistration broker = getBrokerOrThrow(record.id(), record.epoch(), "unfence");
-        changedBrokers.put(record.id(), Optional.of(broker.cloneWithFencing(false)));
+        BrokerRegistration curRegistration = getBrokerOrThrow(record.id(), record.epoch(), "unfence");
+        changedBrokers.put(record.id(), Optional.of(curRegistration.cloneWith(
+            BrokerRegistrationFencingChange.UNFENCE.asBoolean(),
+            Optional.empty()
+        )));
     }
 
     public void replay(BrokerRegistrationChangeRecord record) {
-        BrokerRegistration broker =
+        BrokerRegistration curRegistration =
             getBrokerOrThrow(record.brokerId(), record.brokerEpoch(), "change");
-        if (record.fenced() < 0) {
-            changedBrokers.put(record.brokerId(), Optional.of(broker.cloneWithFencing(false)));
-        } else if (record.fenced() > 0) {
-            changedBrokers.put(record.brokerId(), Optional.of(broker.cloneWithFencing(true)));
+        BrokerRegistrationFencingChange fencingChange =
+            BrokerRegistrationFencingChange.fromValue(record.fenced()).orElseThrow(
+                () -> new IllegalStateException(String.format("Unable to replay %s: unknown " +
+                    "value for fenced field: %d", record, record.fenced())));
+        BrokerRegistrationInControlledShutdownChange inControlledShutdownChange =
+            BrokerRegistrationInControlledShutdownChange.fromValue(record.inControlledShutdown()).orElseThrow(
+                () -> new IllegalStateException(String.format("Unable to replay %s: unknown " +
+                    "value for inControlledShutdown field: %d", record, record.inControlledShutdown())));
+        BrokerRegistration nextRegistration = curRegistration.cloneWith(
+            fencingChange.asBoolean(),
+            inControlledShutdownChange.asBoolean()
+        );
+        if (!curRegistration.equals(nextRegistration)) {
+            changedBrokers.put(record.brokerId(), Optional.of(nextRegistration));
         }
     }
 
diff --git a/metadata/src/main/java/org/apache/kafka/image/ClusterImage.java b/metadata/src/main/java/org/apache/kafka/image/ClusterImage.java
index 3cf36fa0885cd..d513cbca35f88 100644
--- a/metadata/src/main/java/org/apache/kafka/image/ClusterImage.java
+++ b/metadata/src/main/java/org/apache/kafka/image/ClusterImage.java
@@ -19,6 +19,7 @@
 
 import org.apache.kafka.metadata.BrokerRegistration;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -27,7 +28,6 @@
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
 
-
 /**
  * Represents the cluster in the metadata image.
  *
@@ -54,10 +54,10 @@ public BrokerRegistration broker(int nodeId) {
         return brokers.get(nodeId);
     }
 
-    public void write(Consumer<List<ApiMessageAndVersion>> out) {
+    public void write(Consumer<List<ApiMessageAndVersion>> out, MetadataVersion metadataVersion) {
         List<ApiMessageAndVersion> batch = new ArrayList<>();
         for (BrokerRegistration broker : brokers.values()) {
-            batch.add(broker.toRecord());
+            batch.add(broker.toRecord(metadataVersion));
         }
         out.accept(batch);
     }
diff --git a/metadata/src/main/java/org/apache/kafka/image/ConfigurationsDelta.java b/metadata/src/main/java/org/apache/kafka/image/ConfigurationsDelta.java
index d0f5848770e41..2a4bf1a1ca2e3 100644
--- a/metadata/src/main/java/org/apache/kafka/image/ConfigurationsDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/ConfigurationsDelta.java
@@ -21,6 +21,7 @@
 import org.apache.kafka.common.config.ConfigResource.Type;
 import org.apache.kafka.common.metadata.ConfigRecord;
 import org.apache.kafka.common.metadata.RemoveTopicRecord;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -52,6 +53,10 @@ public void finishSnapshot() {
         }
     }
 
+    public void handleMetadataVersionChange(MetadataVersion newVersion) {
+        // no-op
+    }
+
     public void replay(ConfigRecord record) {
         ConfigResource resource =
             new ConfigResource(Type.forId(record.resourceType()), record.resourceName());
diff --git a/metadata/src/main/java/org/apache/kafka/image/FeaturesDelta.java b/metadata/src/main/java/org/apache/kafka/image/FeaturesDelta.java
index 781c496f19b6e..7f431c2d0615f 100644
--- a/metadata/src/main/java/org/apache/kafka/image/FeaturesDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/FeaturesDelta.java
@@ -18,8 +18,7 @@
 package org.apache.kafka.image;
 
 import org.apache.kafka.common.metadata.FeatureLevelRecord;
-import org.apache.kafka.common.metadata.RemoveFeatureLevelRecord;
-import org.apache.kafka.metadata.VersionRange;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -33,16 +32,22 @@
 public final class FeaturesDelta {
     private final FeaturesImage image;
 
-    private final Map<String, Optional<VersionRange>> changes = new HashMap<>();
+    private final Map<String, Optional<Short>> changes = new HashMap<>();
+
+    private MetadataVersion metadataVersionChange = null;
 
     public FeaturesDelta(FeaturesImage image) {
         this.image = image;
     }
 
-    public Map<String, Optional<VersionRange>> changes() {
+    public Map<String, Optional<Short>> changes() {
         return changes;
     }
 
+    public Optional<MetadataVersion> metadataVersionChange() {
+        return Optional.ofNullable(metadataVersionChange);
+    }
+
     public void finishSnapshot() {
         for (String featureName : image.finalizedVersions().keySet()) {
             if (!changes.containsKey(featureName)) {
@@ -52,42 +57,53 @@ public void finishSnapshot() {
     }
 
     public void replay(FeatureLevelRecord record) {
-        changes.put(record.name(), Optional.of(
-            new VersionRange(record.minFeatureLevel(), record.maxFeatureLevel())));
-    }
-
-    public void replay(RemoveFeatureLevelRecord record) {
-        changes.put(record.name(), Optional.empty());
+        if (record.name().equals(MetadataVersion.FEATURE_NAME)) {
+            metadataVersionChange = MetadataVersion.fromFeatureLevel(record.featureLevel());
+        } else {
+            if (record.featureLevel() == 0) {
+                changes.put(record.name(), Optional.empty());
+            } else {
+                changes.put(record.name(), Optional.of(record.featureLevel()));
+            }
+        }
     }
 
     public FeaturesImage apply() {
-        Map<String, VersionRange> newFinalizedVersions =
+        Map<String, Short> newFinalizedVersions =
             new HashMap<>(image.finalizedVersions().size());
-        for (Entry<String, VersionRange> entry : image.finalizedVersions().entrySet()) {
+        for (Entry<String, Short> entry : image.finalizedVersions().entrySet()) {
             String name = entry.getKey();
-            Optional<VersionRange> change = changes.get(name);
+            Optional<Short> change = changes.get(name);
             if (change == null) {
                 newFinalizedVersions.put(name, entry.getValue());
             } else if (change.isPresent()) {
                 newFinalizedVersions.put(name, change.get());
             }
         }
-        for (Entry<String, Optional<VersionRange>> entry : changes.entrySet()) {
+        for (Entry<String, Optional<Short>> entry : changes.entrySet()) {
             String name = entry.getKey();
-            Optional<VersionRange> change = entry.getValue();
+            Optional<Short> change = entry.getValue();
             if (!newFinalizedVersions.containsKey(name)) {
                 if (change.isPresent()) {
                     newFinalizedVersions.put(name, change.get());
                 }
             }
         }
-        return new FeaturesImage(newFinalizedVersions);
+
+        final MetadataVersion metadataVersion;
+        if (metadataVersionChange == null) {
+            metadataVersion = image.metadataVersion();
+        } else {
+            metadataVersion = metadataVersionChange;
+        }
+        return new FeaturesImage(newFinalizedVersions, metadataVersion);
     }
 
     @Override
     public String toString() {
         return "FeaturesDelta(" +
             "changes=" + changes +
+            ", metadataVersionChange=" + metadataVersionChange +
             ')';
     }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/image/FeaturesImage.java b/metadata/src/main/java/org/apache/kafka/image/FeaturesImage.java
index f5f372936a1bd..4cfb1260f1dcb 100644
--- a/metadata/src/main/java/org/apache/kafka/image/FeaturesImage.java
+++ b/metadata/src/main/java/org/apache/kafka/image/FeaturesImage.java
@@ -18,8 +18,8 @@
 package org.apache.kafka.image;
 
 import org.apache.kafka.common.metadata.FeatureLevelRecord;
-import org.apache.kafka.metadata.VersionRange;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -28,7 +28,6 @@
 import java.util.Map.Entry;
 import java.util.Optional;
 import java.util.function.Consumer;
-import java.util.stream.Collectors;
 
 import static org.apache.kafka.common.metadata.MetadataRecordType.FEATURE_LEVEL_RECORD;
 
@@ -39,34 +38,47 @@
  * This class is thread-safe.
  */
 public final class FeaturesImage {
-    public static final FeaturesImage EMPTY = new FeaturesImage(Collections.emptyMap());
+    public static final FeaturesImage EMPTY = new FeaturesImage(Collections.emptyMap(), MetadataVersion.MINIMUM_KRAFT_VERSION);
 
-    private final Map<String, VersionRange> finalizedVersions;
+    private final Map<String, Short> finalizedVersions;
 
-    public FeaturesImage(Map<String, VersionRange> finalizedVersions) {
+    private final MetadataVersion metadataVersion;
+
+    public FeaturesImage(Map<String, Short> finalizedVersions, MetadataVersion metadataVersion) {
         this.finalizedVersions = Collections.unmodifiableMap(finalizedVersions);
+        this.metadataVersion = metadataVersion;
     }
 
     public boolean isEmpty() {
         return finalizedVersions.isEmpty();
     }
 
-    Map<String, VersionRange> finalizedVersions() {
+    public MetadataVersion metadataVersion() {
+        return metadataVersion;
+    }
+
+    public Map<String, Short> finalizedVersions() {
         return finalizedVersions;
     }
 
-    private Optional<VersionRange> finalizedVersion(String feature) {
+    private Optional<Short> finalizedVersion(String feature) {
         return Optional.ofNullable(finalizedVersions.get(feature));
     }
 
     public void write(Consumer<List<ApiMessageAndVersion>> out) {
         List<ApiMessageAndVersion> batch = new ArrayList<>();
-        for (Entry<String, VersionRange> entry : finalizedVersions.entrySet()) {
+        // Write out the metadata.version record first, and then the rest of the finalized features
+        batch.add(new ApiMessageAndVersion(new FeatureLevelRecord().
+            setName(MetadataVersion.FEATURE_NAME).
+            setFeatureLevel(metadataVersion.featureLevel()), FEATURE_LEVEL_RECORD.lowestSupportedVersion()));
+
+        for (Entry<String, Short> entry : finalizedVersions.entrySet()) {
+            if (entry.getKey().equals(MetadataVersion.FEATURE_NAME)) {
+                continue;
+            }
             batch.add(new ApiMessageAndVersion(new FeatureLevelRecord().
                 setName(entry.getKey()).
-                setMinFeatureLevel(entry.getValue().min()).
-                setMaxFeatureLevel(entry.getValue().max()),
-                FEATURE_LEVEL_RECORD.highestSupportedVersion()));
+                setFeatureLevel(entry.getValue()), FEATURE_LEVEL_RECORD.highestSupportedVersion()));
         }
         out.accept(batch);
     }
@@ -83,9 +95,12 @@ public boolean equals(Object o) {
         return finalizedVersions.equals(other.finalizedVersions);
     }
 
+
     @Override
     public String toString() {
-        return finalizedVersions.entrySet().stream().
-            map(e -> e.getKey() + ":" + e.getValue()).collect(Collectors.joining(", "));
+        return "FeaturesImage{" +
+                "finalizedVersions=" + finalizedVersions +
+                ", metadataVersion=" + metadataVersion +
+                '}';
     }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/image/MetadataDelta.java b/metadata/src/main/java/org/apache/kafka/image/MetadataDelta.java
index 0ba285f8dae9b..25e141ea0dba2 100644
--- a/metadata/src/main/java/org/apache/kafka/image/MetadataDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/MetadataDelta.java
@@ -29,7 +29,6 @@
 import org.apache.kafka.common.metadata.ProducerIdsRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.metadata.RemoveAccessControlEntryRecord;
-import org.apache.kafka.common.metadata.RemoveFeatureLevelRecord;
 import org.apache.kafka.common.metadata.RemoveTopicRecord;
 import org.apache.kafka.common.metadata.TopicRecord;
 import org.apache.kafka.common.metadata.UnfenceBrokerRecord;
@@ -37,9 +36,11 @@
 import org.apache.kafka.common.protocol.ApiMessage;
 import org.apache.kafka.raft.OffsetAndEpoch;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.Iterator;
 import java.util.List;
+import java.util.Optional;
 
 
 /**
@@ -143,6 +144,14 @@ public AclsDelta getOrCreateAclsDelta() {
         return aclsDelta;
     }
 
+    public Optional<MetadataVersion> metadataVersionChanged() {
+        if (featuresDelta == null) {
+            return Optional.empty();
+        } else {
+            return featuresDelta.metadataVersionChange();
+        }
+    }
+
     public void read(long highestOffset, int highestEpoch, Iterator<List<ApiMessageAndVersion>> reader) {
         while (reader.hasNext()) {
             List<ApiMessageAndVersion> batch = reader.next();
@@ -194,9 +203,6 @@ public void replay(long offset, int epoch, ApiMessage record) {
             case PRODUCER_IDS_RECORD:
                 replay((ProducerIdsRecord) record);
                 break;
-            case REMOVE_FEATURE_LEVEL_RECORD:
-                replay((RemoveFeatureLevelRecord) record);
-                break;
             case BROKER_REGISTRATION_CHANGE_RECORD:
                 replay((BrokerRegistrationChangeRecord) record);
                 break;
@@ -206,6 +212,11 @@ public void replay(long offset, int epoch, ApiMessage record) {
             case REMOVE_ACCESS_CONTROL_ENTRY_RECORD:
                 replay((RemoveAccessControlEntryRecord) record);
                 break;
+            case NO_OP_RECORD:
+                /* NoOpRecord is an empty record and doesn't need to be replayed beyond
+                 * updating the highest offset and epoch.
+                 */
+                break;
             default:
                 throw new RuntimeException("Unknown metadata record type " + type);
         }
@@ -253,6 +264,15 @@ public void replay(RemoveTopicRecord record) {
 
     public void replay(FeatureLevelRecord record) {
         getOrCreateFeaturesDelta().replay(record);
+        featuresDelta.metadataVersionChange().ifPresent(changedMetadataVersion -> {
+            // If any feature flags change, need to immediately check if any metadata needs to be downgraded.
+            getOrCreateClusterDelta().handleMetadataVersionChange(changedMetadataVersion);
+            getOrCreateConfigsDelta().handleMetadataVersionChange(changedMetadataVersion);
+            getOrCreateTopicsDelta().handleMetadataVersionChange(changedMetadataVersion);
+            getOrCreateClientQuotasDelta().handleMetadataVersionChange(changedMetadataVersion);
+            getOrCreateProducerIdsDelta().handleMetadataVersionChange(changedMetadataVersion);
+            getOrCreateAclsDelta().handleMetadataVersionChange(changedMetadataVersion);
+        });
     }
 
     public void replay(BrokerRegistrationChangeRecord record) {
@@ -267,10 +287,6 @@ public void replay(ProducerIdsRecord record) {
         getOrCreateProducerIdsDelta().replay(record);
     }
 
-    public void replay(RemoveFeatureLevelRecord record) {
-        getOrCreateFeaturesDelta().replay(record);
-    }
-
     public void replay(AccessControlEntryRecord record) {
         getOrCreateAclsDelta().replay(record);
     }
diff --git a/metadata/src/main/java/org/apache/kafka/image/MetadataImage.java b/metadata/src/main/java/org/apache/kafka/image/MetadataImage.java
index c887572ea8d27..55d572127e975 100644
--- a/metadata/src/main/java/org/apache/kafka/image/MetadataImage.java
+++ b/metadata/src/main/java/org/apache/kafka/image/MetadataImage.java
@@ -23,6 +23,7 @@
 import java.util.List;
 import java.util.Objects;
 import java.util.function.Consumer;
+import org.apache.kafka.server.common.MetadataVersion;
 
 
 /**
@@ -120,8 +121,11 @@ public AclsImage acls() {
     }
 
     public void write(Consumer<List<ApiMessageAndVersion>> out) {
+        MetadataVersion metadataVersion = features.metadataVersion();
+        // Features should be written out first so we can include the metadata.version at the beginning of the
+        // snapshot
         features.write(out);
-        cluster.write(out);
+        cluster.write(out, metadataVersion);
         topics.write(out);
         configs.write(out);
         clientQuotas.write(out);
diff --git a/metadata/src/main/java/org/apache/kafka/image/ProducerIdsDelta.java b/metadata/src/main/java/org/apache/kafka/image/ProducerIdsDelta.java
index 99dd20786346e..62052799284ca 100644
--- a/metadata/src/main/java/org/apache/kafka/image/ProducerIdsDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/ProducerIdsDelta.java
@@ -18,6 +18,7 @@
 package org.apache.kafka.image;
 
 import org.apache.kafka.common.metadata.ProducerIdsRecord;
+import org.apache.kafka.server.common.MetadataVersion;
 
 
 public final class ProducerIdsDelta {
@@ -39,6 +40,10 @@ public void finishSnapshot() {
         // Nothing to do
     }
 
+    public void handleMetadataVersionChange(MetadataVersion newVersion) {
+        // no-op
+    }
+
     public void replay(ProducerIdsRecord record) {
         nextProducerId = record.nextProducerId();
     }
diff --git a/metadata/src/main/java/org/apache/kafka/image/TopicsDelta.java b/metadata/src/main/java/org/apache/kafka/image/TopicsDelta.java
index f9d8087879ba8..66f12102a5935 100644
--- a/metadata/src/main/java/org/apache/kafka/image/TopicsDelta.java
+++ b/metadata/src/main/java/org/apache/kafka/image/TopicsDelta.java
@@ -24,6 +24,7 @@
 import org.apache.kafka.common.metadata.RemoveTopicRecord;
 import org.apache.kafka.common.metadata.TopicRecord;
 import org.apache.kafka.metadata.Replicas;
+import org.apache.kafka.server.common.MetadataVersion;
 
 import java.util.Collections;
 import java.util.HashMap;
@@ -117,6 +118,10 @@ public void finishSnapshot() {
         }
     }
 
+    public void handleMetadataVersionChange(MetadataVersion newVersion) {
+        // no-op
+    }
+
     public TopicsImage apply() {
         Map<Uuid, TopicImage> newTopicsById = new HashMap<>(image.topicsById().size());
         Map<String, TopicImage> newTopicsByName = new HashMap<>(image.topicsByName().size());
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistration.java b/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistration.java
index fd5eb65d1a2b8..d1d345506530b 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistration.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistration.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.common.Endpoint;
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.Uuid;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord.BrokerEndpoint;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord.BrokerFeature;
@@ -36,9 +37,6 @@
 import java.util.Optional;
 import java.util.stream.Collectors;
 
-import static org.apache.kafka.common.metadata.MetadataRecordType.REGISTER_BROKER_RECORD;
-
-
 /**
  * An immutable class which represents broker registrations.
  */
@@ -58,6 +56,7 @@ private static Map<String, Endpoint> listenersToMap(Collection<Endpoint> listene
     private final Map<String, VersionRange> supportedFeatures;
     private final Optional<String> rack;
     private final boolean fenced;
+    private final boolean inControlledShutdown;
 
     public BrokerRegistration(int id,
                               long epoch,
@@ -65,8 +64,10 @@ public BrokerRegistration(int id,
                               List<Endpoint> listeners,
                               Map<String, VersionRange> supportedFeatures,
                               Optional<String> rack,
-                              boolean fenced) {
-        this(id, epoch, incarnationId, listenersToMap(listeners), supportedFeatures, rack, fenced);
+                              boolean fenced,
+                              boolean inControlledShutdown) {
+        this(id, epoch, incarnationId, listenersToMap(listeners), supportedFeatures, rack,
+            fenced, inControlledShutdown);
     }
 
     public BrokerRegistration(int id,
@@ -75,7 +76,8 @@ public BrokerRegistration(int id,
                               Map<String, Endpoint> listeners,
                               Map<String, VersionRange> supportedFeatures,
                               Optional<String> rack,
-                              boolean fenced) {
+                              boolean fenced,
+                              boolean inControlledShutdown) {
         this.id = id;
         this.epoch = epoch;
         this.incarnationId = incarnationId;
@@ -92,6 +94,7 @@ public BrokerRegistration(int id,
         Objects.requireNonNull(rack);
         this.rack = rack;
         this.fenced = fenced;
+        this.inControlledShutdown = inControlledShutdown;
     }
 
     public static BrokerRegistration fromRecord(RegisterBrokerRecord record) {
@@ -104,7 +107,7 @@ public static BrokerRegistration fromRecord(RegisterBrokerRecord record) {
         }
         Map<String, VersionRange> supportedFeatures = new HashMap<>();
         for (BrokerFeature feature : record.features()) {
-            supportedFeatures.put(feature.name(), new VersionRange(
+            supportedFeatures.put(feature.name(), VersionRange.of(
                 feature.minSupportedVersion(), feature.maxSupportedVersion()));
         }
         return new BrokerRegistration(record.brokerId(),
@@ -113,7 +116,8 @@ public static BrokerRegistration fromRecord(RegisterBrokerRecord record) {
             listeners,
             supportedFeatures,
             Optional.ofNullable(record.rack()),
-            record.fenced());
+            record.fenced(),
+            record.inControlledShutdown());
     }
 
     public int id() {
@@ -152,13 +156,22 @@ public boolean fenced() {
         return fenced;
     }
 
-    public ApiMessageAndVersion toRecord() {
+    public boolean inControlledShutdown() {
+        return inControlledShutdown;
+    }
+
+    public ApiMessageAndVersion toRecord(MetadataVersion metadataVersion) {
         RegisterBrokerRecord registrationRecord = new RegisterBrokerRecord().
             setBrokerId(id).
             setRack(rack.orElse(null)).
             setBrokerEpoch(epoch).
             setIncarnationId(incarnationId).
             setFenced(fenced);
+
+        if (metadataVersion.isInControlledShutdownStateSupported()) {
+            registrationRecord.setInControlledShutdown(inControlledShutdown);
+        }
+
         for (Entry<String, Endpoint> entry : listeners.entrySet()) {
             Endpoint endpoint = entry.getValue();
             registrationRecord.endPoints().add(new BrokerEndpoint().
@@ -167,20 +180,22 @@ public ApiMessageAndVersion toRecord() {
                 setPort(endpoint.port()).
                 setSecurityProtocol(endpoint.securityProtocol().id));
         }
+
         for (Entry<String, VersionRange> entry : supportedFeatures.entrySet()) {
             registrationRecord.features().add(new BrokerFeature().
                 setName(entry.getKey()).
                 setMinSupportedVersion(entry.getValue().min()).
                 setMaxSupportedVersion(entry.getValue().max()));
         }
+
         return new ApiMessageAndVersion(registrationRecord,
-                REGISTER_BROKER_RECORD.highestSupportedVersion());
+            metadataVersion.registerBrokerRecordVersion());
     }
 
     @Override
     public int hashCode() {
         return Objects.hash(id, epoch, incarnationId, listeners, supportedFeatures,
-            rack, fenced);
+            rack, fenced, inControlledShutdown);
     }
 
     @Override
@@ -193,7 +208,8 @@ public boolean equals(Object o) {
             other.listeners.equals(listeners) &&
             other.supportedFeatures.equals(supportedFeatures) &&
             other.rack.equals(rack) &&
-            other.fenced == fenced;
+            other.fenced == fenced &&
+            other.inControlledShutdown == inControlledShutdown;
     }
 
     @Override
@@ -213,12 +229,30 @@ public String toString() {
         bld.append("}");
         bld.append(", rack=").append(rack);
         bld.append(", fenced=").append(fenced);
+        bld.append(", inControlledShutdown=").append(inControlledShutdown);
         bld.append(")");
         return bld.toString();
     }
 
-    public BrokerRegistration cloneWithFencing(boolean fencing) {
-        return new BrokerRegistration(id, epoch, incarnationId, listeners,
-            supportedFeatures, rack, fencing);
+    public BrokerRegistration cloneWith(
+        Optional<Boolean> fencingChange,
+        Optional<Boolean> inControlledShutdownChange
+    ) {
+        boolean newFenced = fencingChange.orElse(fenced);
+        boolean newInControlledShutdownChange = inControlledShutdownChange.orElse(inControlledShutdown);
+
+        if (newFenced == fenced && newInControlledShutdownChange == inControlledShutdown)
+            return this;
+
+        return new BrokerRegistration(
+            id,
+            epoch,
+            incarnationId,
+            listeners,
+            supportedFeatures,
+            rack,
+            newFenced,
+            newInControlledShutdownChange
+        );
     }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistrationFencingChange.java b/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistrationFencingChange.java
new file mode 100644
index 0000000000000..ab0bfc6a81f84
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistrationFencingChange.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+
+public enum BrokerRegistrationFencingChange {
+    FENCE(1, Optional.of(true)),
+    NONE(0, Optional.empty()),
+    UNFENCE(-1, Optional.of(false));
+
+    private final byte value;
+
+    private final Optional<Boolean> asBoolean;
+
+    private final static Map<Byte, BrokerRegistrationFencingChange> VALUE_TO_ENUM =
+        Arrays.stream(BrokerRegistrationFencingChange.values()).
+                collect(Collectors.toMap(v -> Byte.valueOf(v.value()), Function.identity()));
+
+    public static Optional<BrokerRegistrationFencingChange> fromValue(byte value) {
+        return Optional.ofNullable(VALUE_TO_ENUM.get(value));
+    }
+
+    BrokerRegistrationFencingChange(int value, Optional<Boolean> asBoolean) {
+        this.value = (byte) value;
+        this.asBoolean = asBoolean;
+    }
+
+    public Optional<Boolean> asBoolean() {
+        return asBoolean;
+    }
+
+    public byte value() {
+        return value;
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistrationInControlledShutdownChange.java b/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistrationInControlledShutdownChange.java
new file mode 100644
index 0000000000000..39f8abf595e7a
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/BrokerRegistrationInControlledShutdownChange.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+public enum BrokerRegistrationInControlledShutdownChange {
+    // Note that Optional.of(true) is not a valid state change here. The only
+    // way to leave the in controlled shutdown state is by registering the
+    // broker with a new incarnation id.
+    NONE(0, Optional.empty()),
+    IN_CONTROLLED_SHUTDOWN(1, Optional.of(true));
+
+    private final byte value;
+
+    private final Optional<Boolean> asBoolean;
+
+    private final static Map<Byte, BrokerRegistrationInControlledShutdownChange> VALUE_TO_ENUM =
+        Arrays.stream(BrokerRegistrationInControlledShutdownChange.values()).
+            collect(Collectors.toMap(v -> Byte.valueOf(v.value()), Function.identity()));
+
+    public static Optional<BrokerRegistrationInControlledShutdownChange> fromValue(byte value) {
+        return Optional.ofNullable(VALUE_TO_ENUM.get(value));
+    }
+
+    BrokerRegistrationInControlledShutdownChange(int value, Optional<Boolean> asBoolean) {
+        this.value = (byte) value;
+        this.asBoolean = asBoolean;
+    }
+
+    public Optional<Boolean> asBoolean() {
+        return asBoolean;
+    }
+
+    public byte value() {
+        return value;
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/ConfigSynonym.java b/metadata/src/main/java/org/apache/kafka/metadata/ConfigSynonym.java
new file mode 100644
index 0000000000000..d331a476c92c0
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/ConfigSynonym.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.metadata;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.TimeUnit;
+import java.util.function.Function;
+
+
+/**
+ * Represents a synonym for a configuration plus a conversion function. The conversion
+ * function is necessary for cases where the synonym is denominated in different units
+ * (hours versus milliseconds, etc.)
+ */
+public class ConfigSynonym {
+    private static final Logger log = LoggerFactory.getLogger(ConfigSynonym.class);
+
+    public static final Function<String, String> HOURS_TO_MILLISECONDS = input -> {
+        int hours = valueToInt(input, 0, "hoursToMilliseconds");
+        return String.valueOf(TimeUnit.MILLISECONDS.convert(hours, TimeUnit.HOURS));
+    };
+
+    public static final Function<String, String> MINUTES_TO_MILLISECONDS = input -> {
+        int hours = valueToInt(input, 0, "minutesToMilliseconds");
+        return String.valueOf(TimeUnit.MILLISECONDS.convert(hours, TimeUnit.MINUTES));
+    };
+
+    private static int valueToInt(String input, int defaultValue, String what) {
+        if (input == null) return defaultValue;
+        String trimmedInput = input.trim();
+        if (trimmedInput.isEmpty()) {
+            return defaultValue;
+        }
+        try {
+            return Integer.parseInt(trimmedInput);
+        } catch (Exception e) {
+            log.error("{} failed: unable to parse '{}' as an integer.", what, trimmedInput, e);
+            return defaultValue;
+        }
+    }
+
+    private final String name;
+    private final Function<String, String> converter;
+
+    public ConfigSynonym(String name, Function<String, String> converter) {
+        this.name = name;
+        this.converter = converter;
+    }
+
+    public ConfigSynonym(String name) {
+        this(name, Function.identity());
+    }
+
+    public String name() {
+        return name;
+    }
+
+    public Function<String, String> converter() {
+        return converter;
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/FeatureMap.java b/metadata/src/main/java/org/apache/kafka/metadata/FeatureMap.java
deleted file mode 100644
index 272c87d213831..0000000000000
--- a/metadata/src/main/java/org/apache/kafka/metadata/FeatureMap.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.kafka.metadata;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-
-/**
- * A map of feature names to their supported versions.
- */
-public class FeatureMap {
-    private final Map<String, VersionRange> features;
-
-    public FeatureMap(Map<String, VersionRange> features) {
-        this.features = Collections.unmodifiableMap(new HashMap<>(features));
-    }
-
-    public Optional<VersionRange> get(String name) {
-        return Optional.ofNullable(features.get(name));
-    }
-
-    public Map<String, VersionRange> features() {
-        return features;
-    }
-
-    @Override
-    public int hashCode() {
-        return features.hashCode();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (!(o instanceof FeatureMap)) return false;
-        FeatureMap other = (FeatureMap) o;
-        return features.equals(other.features);
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder bld = new StringBuilder();
-        bld.append("{");
-        bld.append(features.keySet().stream().sorted().
-            map(k -> k + ": " + features.get(k)).
-            collect(Collectors.joining(", ")));
-        bld.append("}");
-        return bld.toString();
-    }
-}
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/FeatureMapAndEpoch.java b/metadata/src/main/java/org/apache/kafka/metadata/FinalizedControllerFeatures.java
similarity index 61%
rename from metadata/src/main/java/org/apache/kafka/metadata/FeatureMapAndEpoch.java
rename to metadata/src/main/java/org/apache/kafka/metadata/FinalizedControllerFeatures.java
index 26096ea7a333e..2ebce9e3e6ced 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/FeatureMapAndEpoch.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/FinalizedControllerFeatures.java
@@ -17,23 +17,31 @@
 
 package org.apache.kafka.metadata;
 
+import java.util.Collections;
+import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
 
 
 /**
  * A map of feature names to their supported versions.
  */
-public class FeatureMapAndEpoch {
-    private final FeatureMap map;
+public class FinalizedControllerFeatures {
+    private final Map<String, Short> featureMap;
     private final long epoch;
 
-    public FeatureMapAndEpoch(FeatureMap map, long epoch) {
-        this.map = map;
+    public FinalizedControllerFeatures(Map<String, Short> featureMap, long epoch) {
+        this.featureMap = Collections.unmodifiableMap(featureMap);
         this.epoch = epoch;
     }
 
-    public FeatureMap map() {
-        return map;
+    public Optional<Short> get(String name) {
+        return Optional.ofNullable(featureMap.get(name));
+    }
+
+    public Set<String> featureNames() {
+        return featureMap.keySet();
     }
 
     public long epoch() {
@@ -42,21 +50,21 @@ public long epoch() {
 
     @Override
     public int hashCode() {
-        return Objects.hash(map, epoch);
+        return Objects.hash(featureMap, epoch);
     }
 
     @Override
     public boolean equals(Object o) {
-        if (!(o instanceof FeatureMapAndEpoch)) return false;
-        FeatureMapAndEpoch other = (FeatureMapAndEpoch) o;
-        return map.equals(other.map) && epoch == other.epoch;
+        if (!(o instanceof FinalizedControllerFeatures)) return false;
+        FinalizedControllerFeatures other = (FinalizedControllerFeatures) o;
+        return featureMap.equals(other.featureMap) && epoch == other.epoch;
     }
 
     @Override
     public String toString() {
         StringBuilder bld = new StringBuilder();
         bld.append("{");
-        bld.append("map=").append(map.toString());
+        bld.append("featureMap=").append(featureMap.toString());
         bld.append(", epoch=").append(epoch);
         bld.append("}");
         return bld.toString();
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/KafkaConfigSchema.java b/metadata/src/main/java/org/apache/kafka/metadata/KafkaConfigSchema.java
index 88bd5e9bfc140..d15d6623e38b8 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/KafkaConfigSchema.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/KafkaConfigSchema.java
@@ -17,12 +17,21 @@
 
 package org.apache.kafka.metadata;
 
+import org.apache.kafka.clients.admin.ConfigEntry;
+import org.apache.kafka.clients.admin.ConfigEntry.ConfigSource;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigResource;
+import org.apache.kafka.common.config.types.Password;
 import org.apache.kafka.common.metadata.ConfigRecord;
+import org.apache.kafka.common.requests.DescribeConfigsResponse;
 
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.function.Function;
 
+import static java.util.Collections.emptyList;
 import static java.util.Collections.emptyMap;
 
 
@@ -32,12 +41,75 @@
  * determining the type of config keys (string, int, password, etc.)
  */
 public class KafkaConfigSchema {
-    public static final KafkaConfigSchema EMPTY = new KafkaConfigSchema(emptyMap());
+    public static final KafkaConfigSchema EMPTY = new KafkaConfigSchema(emptyMap(), emptyMap());
+
+    private static final ConfigDef EMPTY_CONFIG_DEF = new ConfigDef();
+
+    /**
+     * Translate a ConfigDef.Type to its equivalent for ConfigEntry.ConfigType.
+     *
+     * We do not want this code in ConfigEntry, since that is a public-facing API. On the
+     * other hand, putting this code in ConfigDef.Type would introduce an unwanted dependency
+     * from org.apache.kafka.common.config to org.apache.kafka.clients.admin. So it
+     * makes sense to put it here.
+     */
+    public static ConfigEntry.ConfigType translateConfigType(ConfigDef.Type type) {
+        switch (type) {
+            case BOOLEAN:
+                return ConfigEntry.ConfigType.BOOLEAN;
+            case STRING:
+                return ConfigEntry.ConfigType.STRING;
+            case INT:
+                return ConfigEntry.ConfigType.INT;
+            case SHORT:
+                return ConfigEntry.ConfigType.SHORT;
+            case LONG:
+                return ConfigEntry.ConfigType.LONG;
+            case DOUBLE:
+                return ConfigEntry.ConfigType.DOUBLE;
+            case LIST:
+                return ConfigEntry.ConfigType.LIST;
+            case CLASS:
+                return ConfigEntry.ConfigType.CLASS;
+            case PASSWORD:
+                return ConfigEntry.ConfigType.PASSWORD;
+            default:
+                return ConfigEntry.ConfigType.UNKNOWN;
+        }
+    }
+
+    private static final Map<ConfigEntry.ConfigSource, DescribeConfigsResponse.ConfigSource> TRANSLATE_CONFIG_SOURCE_MAP;
+
+    static {
+        Map<ConfigEntry.ConfigSource, DescribeConfigsResponse.ConfigSource> map = new HashMap<>();
+        for (DescribeConfigsResponse.ConfigSource source : DescribeConfigsResponse.ConfigSource.values()) {
+            map.put(source.source(), source);
+        }
+        TRANSLATE_CONFIG_SOURCE_MAP = Collections.unmodifiableMap(map);
+    }
+
+    /**
+     * Translate a ConfigEntry.ConfigSource enum to its equivalent for DescribeConfigsResponse.
+     *
+     * We do not want this code in ConfigEntry, since that is a public-facing API. On the
+     * other hand, putting this code in DescribeConfigsResponse would introduce an unwanted
+     * dependency from org.apache.kafka.common.requests to org.apache.kafka.clients.admin.
+     * So it makes sense to put it here.
+     */
+    public static DescribeConfigsResponse.ConfigSource translateConfigSource(ConfigEntry.ConfigSource configSource) {
+        DescribeConfigsResponse.ConfigSource result = TRANSLATE_CONFIG_SOURCE_MAP.get(configSource);
+        if (result != null) return result;
+        return DescribeConfigsResponse.ConfigSource.UNKNOWN;
+    }
 
     private final Map<ConfigResource.Type, ConfigDef> configDefs;
 
-    public KafkaConfigSchema(Map<ConfigResource.Type, ConfigDef> configDefs) {
+    private final Map<String, List<ConfigSynonym>> logConfigSynonyms;
+
+    public KafkaConfigSchema(Map<ConfigResource.Type, ConfigDef> configDefs,
+                             Map<String, List<ConfigSynonym>> logConfigSynonyms) {
         this.configDefs = configDefs;
+        this.logConfigSynonyms = logConfigSynonyms;
     }
 
     /**
@@ -84,4 +156,91 @@ public String getDefault(ConfigResource.Type type, String key) {
         }
         return ConfigDef.convertToString(configKey.defaultValue, configKey.type);
     }
+
+    public Map<String, ConfigEntry> resolveEffectiveTopicConfigs(
+            Map<String, ? extends Object> staticNodeConfig,
+            Map<String, ? extends Object> dynamicClusterConfigs,
+            Map<String, ? extends Object> dynamicNodeConfigs,
+            Map<String, ? extends Object> dynamicTopicConfigs) {
+        ConfigDef configDef = configDefs.getOrDefault(ConfigResource.Type.TOPIC, EMPTY_CONFIG_DEF);
+        HashMap<String, ConfigEntry> effectiveConfigs = new HashMap<>();
+        for (ConfigDef.ConfigKey configKey : configDef.configKeys().values()) {
+            ConfigEntry entry = resolveEffectiveTopicConfig(configKey, staticNodeConfig,
+                dynamicClusterConfigs, dynamicNodeConfigs, dynamicTopicConfigs);
+            effectiveConfigs.put(entry.name(), entry);
+        }
+        return effectiveConfigs;
+    }
+
+    private ConfigEntry resolveEffectiveTopicConfig(ConfigDef.ConfigKey configKey,
+            Map<String, ? extends Object> staticNodeConfig,
+            Map<String, ? extends Object> dynamicClusterConfigs,
+            Map<String, ? extends Object> dynamicNodeConfigs,
+            Map<String, ? extends Object> dynamicTopicConfigs) {
+        if (dynamicTopicConfigs.containsKey(configKey.name)) {
+            return toConfigEntry(configKey,
+                dynamicTopicConfigs.get(configKey.name),
+                ConfigSource.DYNAMIC_TOPIC_CONFIG, Function.identity());
+        }
+        List<ConfigSynonym> synonyms = logConfigSynonyms.getOrDefault(configKey.name, emptyList());
+        for (ConfigSynonym synonym : synonyms) {
+            if (dynamicNodeConfigs.containsKey(synonym.name())) {
+                return toConfigEntry(configKey, dynamicNodeConfigs.get(synonym.name()),
+                    ConfigSource.DYNAMIC_BROKER_CONFIG, synonym.converter());
+            }
+        }
+        for (ConfigSynonym synonym : synonyms) {
+            if (dynamicClusterConfigs.containsKey(synonym.name())) {
+                return toConfigEntry(configKey, dynamicClusterConfigs.get(synonym.name()),
+                    ConfigSource.DYNAMIC_DEFAULT_BROKER_CONFIG, synonym.converter());
+            }
+        }
+        for (ConfigSynonym synonym : synonyms) {
+            if (staticNodeConfig.containsKey(synonym.name())) {
+                return toConfigEntry(configKey, staticNodeConfig.get(synonym.name()),
+                    ConfigSource.STATIC_BROKER_CONFIG, synonym.converter());
+            }
+        }
+        return toConfigEntry(configKey, configKey.hasDefault() ? configKey.defaultValue : null,
+            ConfigSource.DEFAULT_CONFIG, Function.identity());
+    }
+
+    private ConfigEntry toConfigEntry(ConfigDef.ConfigKey configKey,
+                                      Object value,
+                                      ConfigSource source,
+                                      Function<String, String> converter) {
+        // Convert the value into a nulllable string suitable for storing in ConfigEntry.
+        String stringValue = null;
+        if (value != null) {
+            if (value instanceof String) {
+                // The value may already be a string if it's coming from a Map<String, String>.
+                // Then it doesn't need to be converted.
+                stringValue = (String) value;
+            } else if (value instanceof Password) {
+                // We want the actual value here, not [hidden], which is what we'd get
+                // from Password#toString. While we don't return sensitive config values
+                // over the wire to users, we may need the real value internally.
+                stringValue = ((Password) value).value();
+            } else {
+                try {
+                    // Use the ConfigDef function here which will handle List, Class, etc.
+                    stringValue = ConfigDef.convertToString(value, configKey.type);
+                } catch (Exception e) {
+                    throw new RuntimeException("Unable to convert " + configKey.name + " to string.", e);
+                }
+            }
+        }
+        if (stringValue != null) {
+            stringValue = converter.apply(stringValue);
+        }
+        return new ConfigEntry(
+            configKey.name,
+            stringValue,
+            source,
+            configKey.type().isSensitive(),
+            false, // "readonly" is always false, for now.
+            emptyList(), // we don't populate synonyms, for now.
+            translateConfigType(configKey.type()),
+            configKey.documentation);
+    }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/LeaderRecoveryState.java b/metadata/src/main/java/org/apache/kafka/metadata/LeaderRecoveryState.java
new file mode 100644
index 0000000000000..08086751b709d
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/LeaderRecoveryState.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import java.util.Optional;
+
+public enum LeaderRecoveryState {
+    /**
+     * Represent that the election for the partition was either an ISR election or the
+     * leader recovered from an unclean leader election.
+     */
+    RECOVERED((byte) 0),
+
+    /**
+     * Represent that the election for the partition was an unclean leader election and
+     * that the leader is recovering from it.
+     */
+    RECOVERING((byte) 1);
+
+    /**
+     * A special value used to represent that the LeaderRecoveryState field of a
+     * PartitionChangeRecord didn't change.
+     */
+    public static final byte NO_CHANGE = (byte) -1;
+
+    public static LeaderRecoveryState of(byte value) {
+        return optionalOf(value)
+            .orElseThrow(() -> new IllegalArgumentException(String.format("Value %s is not a valid leader recovery state", value)));
+    }
+
+    public static Optional<LeaderRecoveryState> optionalOf(byte value) {
+        if (value == RECOVERED.value()) {
+            return Optional.of(RECOVERED);
+        }
+        if (value == RECOVERING.value()) {
+            return Optional.of(RECOVERING);
+        }
+
+        return Optional.empty();
+    }
+
+    private final byte value;
+
+    private LeaderRecoveryState(byte value) {
+        this.value = value;
+    }
+
+    public byte value() {
+        return value;
+    }
+
+    public LeaderRecoveryState changeTo(byte value) {
+        if (value == NO_CHANGE) {
+            return this;
+        }
+
+        return of(value);
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/PartitionRegistration.java b/metadata/src/main/java/org/apache/kafka/metadata/PartitionRegistration.java
index 933bda95cad67..1c42bd07c8e07 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/PartitionRegistration.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/PartitionRegistration.java
@@ -39,6 +39,7 @@ public class PartitionRegistration {
     public final int[] removingReplicas;
     public final int[] addingReplicas;
     public final int leader;
+    public final LeaderRecoveryState leaderRecoveryState;
     public final int leaderEpoch;
     public final int partitionEpoch;
 
@@ -52,18 +53,20 @@ public PartitionRegistration(PartitionRecord record) {
             Replicas.toArray(record.removingReplicas()),
             Replicas.toArray(record.addingReplicas()),
             record.leader(),
+            LeaderRecoveryState.of(record.leaderRecoveryState()),
             record.leaderEpoch(),
             record.partitionEpoch());
     }
 
     public PartitionRegistration(int[] replicas, int[] isr, int[] removingReplicas,
-                                 int[] addingReplicas, int leader, int leaderEpoch,
-                                 int partitionEpoch) {
+                                 int[] addingReplicas, int leader, LeaderRecoveryState leaderRecoveryState,
+                                 int leaderEpoch, int partitionEpoch) {
         this.replicas = replicas;
         this.isr = isr;
         this.removingReplicas = removingReplicas;
         this.addingReplicas = addingReplicas;
         this.leader = leader;
+        this.leaderRecoveryState = leaderRecoveryState;
         this.leaderEpoch = leaderEpoch;
         this.partitionEpoch = partitionEpoch;
     }
@@ -76,6 +79,7 @@ public PartitionRegistration merge(PartitionChangeRecord record) {
             removingReplicas : Replicas.toArray(record.removingReplicas());
         int[] newAddingReplicas = (record.addingReplicas() == null) ?
             addingReplicas : Replicas.toArray(record.addingReplicas());
+
         int newLeader;
         int newLeaderEpoch;
         if (record.leader() == NO_LEADER_CHANGE) {
@@ -85,11 +89,15 @@ public PartitionRegistration merge(PartitionChangeRecord record) {
             newLeader = record.leader();
             newLeaderEpoch = leaderEpoch + 1;
         }
+
+        LeaderRecoveryState newLeaderRecoveryState = leaderRecoveryState.changeTo(record.leaderRecoveryState());
+
         return new PartitionRegistration(newReplicas,
             newIsr,
             newRemovingReplicas,
             newAddingReplicas,
             newLeader,
+            newLeaderRecoveryState,
             newLeaderEpoch,
             partitionEpoch + 1);
     }
@@ -126,6 +134,11 @@ public String diff(PartitionRegistration prev) {
                 append(prev.leader).append(" -> ").append(leader);
             prefix = ", ";
         }
+        if (leaderRecoveryState != prev.leaderRecoveryState) {
+            builder.append(prefix).append("leaderRecoveryState: ").
+                append(prev.leaderRecoveryState).append(" -> ").append(leaderRecoveryState);
+            prefix = ", ";
+        }
         if (leaderEpoch != prev.leaderEpoch) {
             builder.append(prefix).append("leaderEpoch: ").
                 append(prev.leaderEpoch).append(" -> ").append(leaderEpoch);
@@ -167,6 +180,7 @@ public ApiMessageAndVersion toRecord(Uuid topicId, int partitionId) {
             setRemovingReplicas(Replicas.toList(removingReplicas)).
             setAddingReplicas(Replicas.toList(addingReplicas)).
             setLeader(leader).
+            setLeaderRecoveryState(leaderRecoveryState.value()).
             setLeaderEpoch(leaderEpoch).
             setPartitionEpoch(partitionEpoch), PARTITION_RECORD.highestSupportedVersion());
     }
@@ -180,10 +194,11 @@ public LeaderAndIsrPartitionState toLeaderAndIsrPartitionState(TopicPartition tp
             setLeader(leader).
             setLeaderEpoch(leaderEpoch).
             setIsr(Replicas.toList(isr)).
-            setZkVersion(partitionEpoch).
+            setPartitionEpoch(partitionEpoch).
             setReplicas(Replicas.toList(replicas)).
             setAddingReplicas(Replicas.toList(addingReplicas)).
             setRemovingReplicas(Replicas.toList(removingReplicas)).
+            setLeaderRecoveryState(leaderRecoveryState.value()).
             setIsNew(isNew);
     }
 
@@ -196,7 +211,7 @@ public boolean isReassigning() {
 
     @Override
     public int hashCode() {
-        return Objects.hash(replicas, isr, removingReplicas, addingReplicas, leader,
+        return Objects.hash(replicas, isr, removingReplicas, addingReplicas, leader, leaderRecoveryState,
             leaderEpoch, partitionEpoch);
     }
 
@@ -209,6 +224,7 @@ public boolean equals(Object o) {
             Arrays.equals(removingReplicas, other.removingReplicas) &&
             Arrays.equals(addingReplicas, other.addingReplicas) &&
             leader == other.leader &&
+            leaderRecoveryState == other.leaderRecoveryState &&
             leaderEpoch == other.leaderEpoch &&
             partitionEpoch == other.partitionEpoch;
     }
@@ -221,6 +237,7 @@ public String toString() {
         builder.append(", removingReplicas=").append(Arrays.toString(removingReplicas));
         builder.append(", addingReplicas=").append(Arrays.toString(addingReplicas));
         builder.append(", leader=").append(leader);
+        builder.append(", leaderRecoveryState=").append(leaderRecoveryState);
         builder.append(", leaderEpoch=").append(leaderEpoch);
         builder.append(", partitionEpoch=").append(partitionEpoch);
         builder.append(")");
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/VersionRange.java b/metadata/src/main/java/org/apache/kafka/metadata/VersionRange.java
index f171ea14bc42e..ed0648bef51d0 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/VersionRange.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/VersionRange.java
@@ -23,16 +23,24 @@
  * An immutable class which represents version ranges.
  */
 public class VersionRange {
-    public final static VersionRange ALL = new VersionRange((short) 0, Short.MAX_VALUE);
+    public final static VersionRange ALL = of((short) 0, Short.MAX_VALUE);
 
     private final short min;
     private final short max;
 
-    public VersionRange(short min, short max) {
+    private VersionRange(short min, short max) {
         this.min = min;
         this.max = max;
     }
 
+    public static VersionRange of(short min, short max) {
+        return new VersionRange(min, max);
+    }
+
+    public static VersionRange of(int min, int max) {
+        return new VersionRange((short) min, (short) max);
+    }
+
     public short min() {
         return min;
     }
@@ -41,8 +49,18 @@ public short max() {
         return max;
     }
 
-    public boolean contains(VersionRange other) {
-        return other.min >= min && other.max <= max;
+    /**
+     * Check if a given version is fully contained within this range
+     */
+    public boolean contains(short version) {
+        return version >= min && version <= max;
+    }
+
+    /**
+     * Check if a given version range has overlap with this one
+     */
+    public boolean intersects(VersionRange other) {
+        return other.min <= max && other.max >= min;
     }
 
     @Override
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/AclMutator.java b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/AclMutator.java
index 95cd3702bf408..97558471659aa 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/AclMutator.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/AclMutator.java
@@ -19,6 +19,7 @@
 
 import org.apache.kafka.common.acl.AclBinding;
 import org.apache.kafka.common.acl.AclBindingFilter;
+import org.apache.kafka.controller.ControllerRequestContext;
 import org.apache.kafka.server.authorizer.AclCreateResult;
 import org.apache.kafka.server.authorizer.AclDeleteResult;
 
@@ -37,17 +38,26 @@ public interface AclMutator {
      * Create the specified ACLs. If any ACL already exists, nothing will be done for that
      * one, and we will return a success result for it.
      *
+     * @param context       The controller request context.
      * @param aclBindings   The ACLs to create.
+     *
      * @return              The results for each AclBinding, in the order they were passed.
      */
-    CompletableFuture<List<AclCreateResult>> createAcls(List<AclBinding> aclBindings);
+    CompletableFuture<List<AclCreateResult>> createAcls(
+        ControllerRequestContext context,
+        List<AclBinding> aclBindings
+    );
 
     /**
      * Delete some ACLs based on the set of filters that is passed in.
      *
+     * @param context               The controller request context.
      * @param aclBindingFilters     The filters.
+     *
      * @return                      The results for each filter, in the order they were passed.
      */
     CompletableFuture<List<AclDeleteResult>> deleteAcls(
-            List<AclBindingFilter> aclBindingFilters);
+        ControllerRequestContext context,
+        List<AclBindingFilter> aclBindingFilters
+    );
 }
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizer.java b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizer.java
index 90117004b3bc0..710a975778c02 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizer.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizer.java
@@ -23,6 +23,7 @@
 import org.apache.kafka.common.errors.ApiException;
 import org.apache.kafka.common.errors.UnknownServerException;
 import org.apache.kafka.common.requests.ApiError;
+import org.apache.kafka.controller.ControllerRequestContext;
 import org.apache.kafka.server.authorizer.AclCreateResult;
 import org.apache.kafka.server.authorizer.AclDeleteResult;
 import org.apache.kafka.server.authorizer.AuthorizableRequestContext;
@@ -31,6 +32,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.OptionalLong;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CompletionStage;
 
@@ -54,6 +56,18 @@ public interface ClusterMetadataAuthorizer extends Authorizer {
      */
     AclMutator aclMutatorOrException();
 
+    /**
+     * Complete the initial load of the cluster metadata authorizer, so that all
+     * principals can use it.
+     */
+    void completeInitialLoad();
+
+    /**
+     * Complete the initial load of the cluster metadata authorizer with an exception,
+     * indicating that the loading process has failed.
+     */
+    void completeInitialLoad(Exception e);
+
     /**
      * Load the ACLs in the given map. Anything not in the map will be removed.
      * The authorizer will also wait for this initial snapshot load to complete when
@@ -88,7 +102,9 @@ default List<? extends CompletionStage<AclCreateResult>> createAcls(
         List<CompletableFuture<AclCreateResult>> futures = new ArrayList<>(aclBindings.size());
         AclMutator aclMutator = aclMutatorOrException();
         aclBindings.forEach(b -> futures.add(new CompletableFuture<>()));
-        aclMutator.createAcls(aclBindings).whenComplete((results, throwable) -> {
+        ControllerRequestContext context = new ControllerRequestContext(
+            requestContext, OptionalLong.empty());
+        aclMutator.createAcls(context, aclBindings).whenComplete((results, throwable) -> {
             if (throwable == null && results.size() != futures.size()) {
                 throwable = new UnknownServerException("Invalid size " +
                     "of result set from controller. Expected " + futures.size() +
@@ -126,7 +142,9 @@ default List<? extends CompletionStage<AclDeleteResult>> deleteAcls(
         List<CompletableFuture<AclDeleteResult>> futures = new ArrayList<>(filters.size());
         AclMutator aclMutator = aclMutatorOrException();
         filters.forEach(b -> futures.add(new CompletableFuture<>()));
-        aclMutator.deleteAcls(filters).whenComplete((results, throwable) -> {
+        ControllerRequestContext context = new ControllerRequestContext(
+            requestContext, OptionalLong.empty());
+        aclMutator.deleteAcls(context, filters).whenComplete((results, throwable) -> {
             if (throwable == null && results.size() != futures.size()) {
                 throwable = new UnknownServerException("Invalid size " +
                     "of result set from controller. Expected " + futures.size() +
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAcl.java b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAcl.java
index fd3e0f029e48b..3fe8ac70da57a 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAcl.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAcl.java
@@ -25,6 +25,7 @@
 import org.apache.kafka.common.resource.PatternType;
 import org.apache.kafka.common.resource.ResourcePattern;
 import org.apache.kafka.common.resource.ResourceType;
+import org.apache.kafka.common.security.auth.KafkaPrincipal;
 
 import java.util.Objects;
 
@@ -96,6 +97,17 @@ public String principal() {
         return principal;
     }
 
+    public KafkaPrincipal kafkaPrincipal() {
+        int colonIndex = principal.indexOf(":");
+        if (colonIndex == -1) {
+            throw new IllegalStateException("Could not parse principal from `" + principal + "` " +
+                "(no colon is present separating the principal type from the principal name)");
+        }
+        String principalType = principal.substring(0, colonIndex);
+        String principalName = principal.substring(colonIndex + 1);
+        return new KafkaPrincipal(principalType, principalName);
+    }
+
     public String host() {
         return host;
     }
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizer.java b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizer.java
index 510a6f8707561..42f03367c22e8 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizer.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizer.java
@@ -54,10 +54,9 @@ public class StandardAuthorizer implements ClusterMetadataAuthorizer {
     public final static String ALLOW_EVERYONE_IF_NO_ACL_IS_FOUND_CONFIG = "allow.everyone.if.no.acl.found";
 
     /**
-     * A future which is completed once we have loaded a snapshot.
-     * TODO: KAFKA-13649: StandardAuthorizer should not finish loading until it reads up to the high water mark.
+     * A future which is completed once we have loaded up to the initial high water mark.
      */
-    private final CompletableFuture<Void> initialLoadFuture = CompletableFuture.completedFuture(null);
+    private final CompletableFuture<Void> initialLoadFuture = new CompletableFuture<>();
 
     /**
      * The current data. Can be read without a lock. Must be written while holding the object lock.
@@ -78,6 +77,24 @@ public AclMutator aclMutatorOrException() {
         return aclMutator;
     }
 
+    @Override
+    public synchronized void completeInitialLoad() {
+        data = data.copyWithNewLoadingComplete(true);
+        data.log.info("Completed initial ACL load process.");
+        initialLoadFuture.complete(null);
+    }
+
+    // Visible for testing
+    public CompletableFuture<Void> initialLoadFuture() {
+        return initialLoadFuture;
+    }
+
+    @Override
+    public void completeInitialLoad(Exception e) {
+        data.log.error("Failed to complete initial ACL load process.", e);
+        initialLoadFuture.completeExceptionally(e);
+    }
+
     @Override
     public void addAcl(Uuid id, StandardAcl acl) {
         data.addAcl(id, acl);
@@ -98,7 +115,12 @@ public synchronized void loadSnapshot(Map<Uuid, StandardAcl> acls) {
             AuthorizerServerInfo serverInfo) {
         Map<Endpoint, CompletableFuture<Void>> result = new HashMap<>();
         for (Endpoint endpoint : serverInfo.endpoints()) {
-            result.put(endpoint, initialLoadFuture);
+            if (serverInfo.earlyStartListeners().contains(
+                    endpoint.listenerName().orElseGet(() -> ""))) {
+                result.put(endpoint, CompletableFuture.completedFuture(null));
+            } else {
+                result.put(endpoint, initialLoadFuture);
+            }
         }
         return result;
     }
@@ -131,7 +153,6 @@ public void close() throws IOException {
         // Complete the initialLoadFuture, if it hasn't been completed already.
         initialLoadFuture.completeExceptionally(new TimeoutException("The authorizer was " +
             "closed before the initial load could complete."));
-        // Nothing else to do here.
     }
 
     @Override
@@ -145,8 +166,7 @@ public synchronized void configure(Map<String, ?> configs) {
             nodeId = -1;
         }
         this.data = data.copyWithNewConfig(nodeId, superUsers, defaultResult);
-        this.data.log.info("set super.users=" + String.join(",", superUsers) +
-            ", default result=" + defaultResult);
+        this.data.log.info("set super.users={}, default result={}", String.join(",", superUsers), defaultResult);
     }
 
     // VisibleForTesting
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerData.java b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerData.java
index a70fa8ca45dc6..d9ffd17562f53 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerData.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerData.java
@@ -22,12 +22,19 @@
 import org.apache.kafka.common.acl.AclBindingFilter;
 import org.apache.kafka.common.acl.AclOperation;
 import org.apache.kafka.common.acl.AclPermissionType;
+import org.apache.kafka.common.errors.AuthorizerNotReadyException;
+import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.resource.PatternType;
+import org.apache.kafka.common.resource.ResourcePattern;
+import org.apache.kafka.common.security.auth.KafkaPrincipal;
 import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.common.utils.SecurityUtils;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.server.authorizer.Action;
 import org.apache.kafka.server.authorizer.AuthorizableRequestContext;
 import org.apache.kafka.server.authorizer.AuthorizationResult;
 import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.Collection;
 import java.util.Collections;
@@ -69,17 +76,28 @@ public class StandardAuthorizerData {
      * The principal entry used in ACLs that match any principal.
      */
     public static final String WILDCARD_PRINCIPAL = "User:*";
+    public static final KafkaPrincipal WILDCARD_KAFKA_PRINCIPAL = new KafkaPrincipal(KafkaPrincipal.USER_TYPE, "*");
 
     /**
      * The logger to use.
      */
     final Logger log;
 
+    /**
+     * Logger to use for auditing.
+     */
+    final Logger auditLog;
+
     /**
      * The current AclMutator.
      */
     final AclMutator aclMutator;
 
+    /**
+     * True if the authorizer loading process is complete.
+     */
+    final boolean loadingComplete;
+
     /**
      * A statically configured set of users that are authorized to do anything.
      */
@@ -88,7 +106,7 @@ public class StandardAuthorizerData {
     /**
      * The result to return if no ACLs match.
      */
-    private final AuthorizationResult defaultResult;
+    private final DefaultRule defaultRule;
 
     /**
      * Contains all of the current ACLs sorted by (resource type, resource name).
@@ -104,9 +122,14 @@ private static Logger createLogger(int nodeId) {
         return new LogContext("[StandardAuthorizer " + nodeId + "] ").logger(StandardAuthorizerData.class);
     }
 
+    private static Logger auditLogger() {
+        return LoggerFactory.getLogger("kafka.authorizer.logger");
+    }
+
     static StandardAuthorizerData createEmpty() {
         return new StandardAuthorizerData(createLogger(-1),
             null,
+            false,
             Collections.emptySet(),
             DENIED,
             new ConcurrentSkipListSet<>(), new ConcurrentHashMap<>());
@@ -114,23 +137,38 @@ static StandardAuthorizerData createEmpty() {
 
     private StandardAuthorizerData(Logger log,
                                    AclMutator aclMutator,
+                                   boolean loadingComplete,
                                    Set<String> superUsers,
                                    AuthorizationResult defaultResult,
                                    ConcurrentSkipListSet<StandardAcl> aclsByResource,
                                    ConcurrentHashMap<Uuid, StandardAcl> aclsById) {
         this.log = log;
+        this.auditLog = auditLogger();
         this.aclMutator = aclMutator;
+        this.loadingComplete = loadingComplete;
         this.superUsers = superUsers;
-        this.defaultResult = defaultResult;
+        this.defaultRule = new DefaultRule(defaultResult);
         this.aclsByResource = aclsByResource;
         this.aclsById = aclsById;
     }
 
     StandardAuthorizerData copyWithNewAclMutator(AclMutator newAclMutator) {
-        return new StandardAuthorizerData(log,
+        return new StandardAuthorizerData(
+            log,
             newAclMutator,
+            loadingComplete,
+            superUsers,
+            defaultRule.result,
+            aclsByResource,
+            aclsById);
+    }
+
+    StandardAuthorizerData copyWithNewLoadingComplete(boolean newLoadingComplete) {
+        return new StandardAuthorizerData(log,
+            aclMutator,
+            newLoadingComplete,
             superUsers,
-            defaultResult,
+            defaultRule.result,
             aclsByResource,
             aclsById);
     }
@@ -141,6 +179,7 @@ StandardAuthorizerData copyWithNewConfig(int nodeId,
         return new StandardAuthorizerData(
             createLogger(nodeId),
             aclMutator,
+            loadingComplete,
             newSuperUsers,
             newDefaultResult,
             aclsByResource,
@@ -151,14 +190,15 @@ StandardAuthorizerData copyWithNewAcls(Collection<Entry<Uuid, StandardAcl>> aclE
         StandardAuthorizerData newData = new StandardAuthorizerData(
             log,
             aclMutator,
+            loadingComplete,
             superUsers,
-            defaultResult,
+            defaultRule.result,
             new ConcurrentSkipListSet<>(),
             new ConcurrentHashMap<>());
         for (Entry<Uuid, StandardAcl> entry : aclEntries) {
             newData.addAcl(entry.getKey(), entry.getValue());
         }
-        log.info("Applied " + aclEntries.size() + "acl(s) from image.");
+        log.info("Applied {} acl(s) from image.", aclEntries.size());
         return newData;
     }
 
@@ -173,9 +213,7 @@ void addAcl(Uuid id, StandardAcl acl) {
                 throw new RuntimeException("Unable to add the ACL with ID " + id +
                     " to aclsByResource");
             }
-            if (log.isTraceEnabled()) {
-                log.trace("Added ACL " + id + ": " + acl);
-            }
+            log.trace("Added ACL {}: {}", id, acl);
         } catch (Throwable e) {
             log.error("addAcl error", e);
             throw e;
@@ -192,9 +230,7 @@ void removeAcl(Uuid id) {
                 throw new RuntimeException("Unable to remove the ACL with ID " + id +
                     " from aclsByResource");
             }
-            if (log.isTraceEnabled()) {
-                log.trace("Removed ACL " + id + ": " + acl);
-            }
+            log.trace("Removed ACL {}: {}", id, acl);
         } catch (Throwable e) {
             log.error("removeAcl error", e);
             throw e;
@@ -206,18 +242,13 @@ Set<String> superUsers() {
     }
 
     AuthorizationResult defaultResult() {
-        return defaultResult;
+        return defaultRule.result;
     }
 
     int aclCount() {
         return aclsById.size();
     }
 
-    static class AuthorizationResultBuilder {
-        boolean foundDeny = false;
-        boolean foundAllow = false;
-    }
-
     /**
      * Authorize an action based on the current set of ACLs.
      *
@@ -227,18 +258,100 @@ static class AuthorizationResultBuilder {
      * result. In general it makes more sense to configure the default result to be
      * DENY, but some people (and unit tests) configure it as ALLOW.
      */
-    AuthorizationResult authorize(AuthorizableRequestContext requestContext,
-                                  Action action) {
+    public AuthorizationResult authorize(
+        AuthorizableRequestContext requestContext,
+        Action action
+    ) {
+        KafkaPrincipal principal = baseKafkaPrincipal(requestContext);
+        final MatchingRule rule;
+
         // Superusers are authorized to do anything.
-        if (superUsers.contains(requestContext.principal().toString())) {
-            if (log.isTraceEnabled()) {
-                log.trace("authorize(requestContext=" + requestContext + ", action=" + action +
-                    "): ALLOWED because " + requestContext.principal().toString() +
-                    " is a superuser");
+        if (superUsers.contains(principal.toString())) {
+            rule = SuperUserRule.INSTANCE;
+        } else if (!loadingComplete) {
+            throw new AuthorizerNotReadyException();
+        } else {
+            MatchingAclRule aclRule = findAclRule(
+                matchingPrincipals(requestContext),
+                requestContext.clientAddress().getHostAddress(),
+                action
+            );
+
+            if (aclRule != null) {
+                rule = aclRule;
+            } else {
+                // If nothing matched, we return the default result.
+                rule = defaultRule;
             }
-            return ALLOWED;
         }
 
+        logAuditMessage(principal, requestContext, action, rule);
+        return rule.result();
+    }
+
+    private String buildAuditMessage(
+        KafkaPrincipal principal,
+        AuthorizableRequestContext context,
+        Action action,
+        MatchingRule rule
+    ) {
+        StringBuilder bldr = new StringBuilder();
+        bldr.append("Principal = ").append(principal);
+        bldr.append(" is ").append(rule.result() == ALLOWED ? "Allowed" : "Denied");
+        bldr.append(" operation = ").append(action.operation());
+        bldr.append(" from host = ").append(context.clientAddress().getHostAddress());
+        bldr.append(" on resource = ");
+        appendResourcePattern(action.resourcePattern(), bldr);
+        bldr.append(" for request = ").append(ApiKeys.forId(context.requestType()).name);
+        bldr.append(" with resourceRefCount = ").append(action.resourceReferenceCount());
+        bldr.append(" based on rule ").append(rule);
+        return bldr.toString();
+    }
+
+    private void appendResourcePattern(ResourcePattern resourcePattern, StringBuilder bldr) {
+        bldr.append(SecurityUtils.resourceTypeName(resourcePattern.resourceType()))
+            .append(":")
+            .append(resourcePattern.patternType())
+            .append(":")
+            .append(resourcePattern.name());
+    }
+
+    private void logAuditMessage(
+        KafkaPrincipal principal,
+        AuthorizableRequestContext requestContext,
+        Action action,
+        MatchingRule rule
+    ) {
+        switch (rule.result()) {
+            case ALLOWED:
+                // logIfAllowed is true if access is granted to the resource as a result of this authorization.
+                // In this case, log at debug level. If false, no access is actually granted, the result is used
+                // only to determine authorized operations. So log only at trace level.
+                if (action.logIfAllowed() && auditLog.isDebugEnabled()) {
+                    auditLog.debug(buildAuditMessage(principal, requestContext, action, rule));
+                } else if (auditLog.isTraceEnabled()) {
+                    auditLog.trace(buildAuditMessage(principal, requestContext, action, rule));
+                }
+                return;
+
+            case DENIED:
+                // logIfDenied is true if access to the resource was explicitly requested. Since this is an attempt
+                // to access unauthorized resources, log at info level. If false, this is either a request to determine
+                // authorized operations or a filter (e.g for regex subscriptions) to filter out authorized resources.
+                // In this case, log only at trace level.
+                if (action.logIfDenied()) {
+                    auditLog.info(buildAuditMessage(principal, requestContext, action, rule));
+                } else if (auditLog.isTraceEnabled()) {
+                    auditLog.trace(buildAuditMessage(principal, requestContext, action, rule));
+                }
+        }
+    }
+
+    private MatchingAclRule findAclRule(
+        Set<KafkaPrincipal> matchingPrincipals,
+        String host,
+        Action action
+    ) {
         // This code relies on the ordering of StandardAcl within the NavigableMap.
         // Entries are sorted by resource type first, then REVERSE resource name.
         // Therefore, we can find all the applicable ACLs by starting at
@@ -255,7 +368,7 @@ AuthorizationResult authorize(AuthorizableRequestContext requestContext,
         // 5. rs=TOPIC rn=eeee pt=LITERAL
         //
         // Once we reached element 5, we would stop scanning.
-        AuthorizationResultBuilder builder = new AuthorizationResultBuilder();
+        MatchingAclBuilder matchingAclBuilder = new MatchingAclBuilder();
         StandardAcl exemplar = new StandardAcl(
             action.resourcePattern().resourceType(),
             action.resourcePattern().name(),
@@ -264,8 +377,10 @@ AuthorizationResult authorize(AuthorizableRequestContext requestContext,
             "",
             AclOperation.UNKNOWN,
             AclPermissionType.UNKNOWN);
-        checkSection(action, exemplar, requestContext, builder);
-        if (builder.foundDeny) return DENIED;
+        checkSection(action, exemplar, matchingPrincipals, host, matchingAclBuilder);
+        if (matchingAclBuilder.foundDeny()) {
+            return matchingAclBuilder.build();
+        }
 
         // In addition to ACLs for this specific resource name, there can also be wildcard
         // ACLs that match any resource name. These are stored as type = LITERAL,
@@ -278,30 +393,17 @@ AuthorizationResult authorize(AuthorizableRequestContext requestContext,
             "",
             AclOperation.UNKNOWN,
             AclPermissionType.UNKNOWN);
-        checkSection(action, exemplar, requestContext, builder);
-        if (builder.foundDeny) return DENIED;
-
-        // If we found ALLOW ACLs, the action is allowed.
-        if (builder.foundAllow) {
-            if (log.isTraceEnabled()) {
-                log.trace("authorize(requestContext=" + requestContext + ", action=" +
-                    action + "): ALLOWED");
-            }
-            return ALLOWED;
-        }
-
-        // If nothing matched, we return the default result.
-        if (log.isTraceEnabled()) {
-            log.trace("authorize(requestContext=" + requestContext + ", action=" +
-                action + "): returning default result " + defaultResult);
-        }
-        return defaultResult;
+        checkSection(action, exemplar, matchingPrincipals, host, matchingAclBuilder);
+        return matchingAclBuilder.build();
     }
 
-    void checkSection(Action action,
-                      StandardAcl exemplar,
-                      AuthorizableRequestContext requestContext,
-                      AuthorizationResultBuilder builder) {
+    private void checkSection(
+        Action action,
+        StandardAcl exemplar,
+        Set<KafkaPrincipal> matchingPrincipals,
+        String host,
+        MatchingAclBuilder matchingAclBuilder
+    ) {
         NavigableSet<StandardAcl> tailSet = aclsByResource.tailSet(exemplar, true);
         String resourceName = action.resourcePattern().name();
         for (Iterator<StandardAcl> iterator = tailSet.iterator();
@@ -325,15 +427,11 @@ void checkSection(Action action,
                 // stepped outside of the section we care about and should stop scanning.
                 break;
             }
-            AuthorizationResult result = findResult(action, requestContext, acl);
+            AuthorizationResult result = findResult(action, matchingPrincipals, host, acl);
             if (ALLOWED == result) {
-                builder.foundAllow = true;
+                matchingAclBuilder.allowAcl = acl;
             } else if (DENIED == result) {
-                if (log.isTraceEnabled()) {
-                    log.trace("authorize(requestContext=" + requestContext + ", action=" +
-                        action + "): DENIED because of " + acl);
-                }
-                builder.foundDeny = true;
+                matchingAclBuilder.denyAcl = acl;
                 return;
             }
         }
@@ -351,30 +449,55 @@ void checkSection(Action action,
     private static final Set<AclOperation> IMPLIES_DESCRIBE_CONFIGS = Collections.unmodifiableSet(
         EnumSet.of(DESCRIBE_CONFIGS, ALTER_CONFIGS));
 
+    static AuthorizationResult findResult(Action action,
+                                          AuthorizableRequestContext requestContext,
+                                          StandardAcl acl) {
+        return findResult(
+            action,
+            matchingPrincipals(requestContext),
+            requestContext.clientAddress().getHostAddress(),
+            acl
+        );
+    }
+
+    static KafkaPrincipal baseKafkaPrincipal(AuthorizableRequestContext context) {
+        KafkaPrincipal sessionPrincipal = context.principal();
+        return sessionPrincipal.getClass().equals(KafkaPrincipal.class)
+            ? sessionPrincipal
+            : new KafkaPrincipal(sessionPrincipal.getPrincipalType(), sessionPrincipal.getName());
+    }
+
+    static Set<KafkaPrincipal> matchingPrincipals(AuthorizableRequestContext context) {
+        KafkaPrincipal sessionPrincipal = context.principal();
+        KafkaPrincipal basePrincipal = sessionPrincipal.getClass().equals(KafkaPrincipal.class)
+            ? sessionPrincipal
+            : new KafkaPrincipal(sessionPrincipal.getPrincipalType(), sessionPrincipal.getName());
+        return Utils.mkSet(basePrincipal, WILDCARD_KAFKA_PRINCIPAL);
+    }
+
     /**
      * Determine what the result of applying an ACL to the given action and request
      * context should be. Note that this function assumes that the resource name matches;
      * the resource name is not checked here.
      *
-     * @param action            The input action.
-     * @param requestContext    The input request context.
-     * @param acl               The input ACL.
-     * @return                  null if the ACL does not match. The authorization result
-     *                          otherwise.
+     * @param action             The input action.
+     * @param matchingPrincipals The set of input matching principals
+     * @param host               The input host.
+     * @param acl                The input ACL.
+     * @return                   null if the ACL does not match. The authorization result
+     *                           otherwise.
      */
     static AuthorizationResult findResult(Action action,
-                                          AuthorizableRequestContext requestContext,
+                                          Set<KafkaPrincipal> matchingPrincipals,
+                                          String host,
                                           StandardAcl acl) {
         // Check if the principal matches. If it doesn't, return no result (null).
-        if (!acl.principal().equals(WILDCARD_PRINCIPAL)) {
-            if (!acl.principal().equals(requestContext.principal().toString())) return null;
+        if (!matchingPrincipals.contains(acl.kafkaPrincipal())) {
+            return null;
         }
         // Check if the host matches. If it doesn't, return no result (null).
-        // The hostname should be cached in the InetAddress object, so calling this more
-        // than once shouldn't be too expensive.
-        if (!acl.host().equals(WILDCARD)) {
-            String host = requestContext.clientAddress().getHostAddress();
-            if (!acl.host().equals(host)) return null;
+        if (!acl.host().equals(WILDCARD) && !acl.host().equals(host)) {
+            return null;
         }
         // Check if the operation field matches. Here we hit a slight complication.
         // ACLs for various operations (READ, WRITE, DELETE, ALTER), "imply" the presence
@@ -456,4 +579,79 @@ public AclBinding next() {
             return result;
         }
     }
+
+    private interface MatchingRule {
+        AuthorizationResult result();
+    }
+
+    private static class SuperUserRule implements MatchingRule {
+        private static final SuperUserRule INSTANCE = new SuperUserRule();
+
+        @Override
+        public AuthorizationResult result() {
+            return ALLOWED;
+        }
+
+        @Override
+        public String toString() {
+            return "SuperUser";
+        }
+    }
+
+    private static class DefaultRule implements MatchingRule {
+        private final AuthorizationResult result;
+
+        private DefaultRule(AuthorizationResult result) {
+            this.result = result;
+        }
+
+        @Override
+        public AuthorizationResult result() {
+            return result;
+        }
+
+        @Override
+        public String toString() {
+            return result == ALLOWED ? "DefaultAllow" : "DefaultDeny";
+        }
+    }
+
+    private static class MatchingAclRule implements MatchingRule {
+        private final StandardAcl acl;
+        private final AuthorizationResult result;
+
+        private MatchingAclRule(StandardAcl acl, AuthorizationResult result) {
+            this.acl = acl;
+            this.result = result;
+        }
+
+        @Override
+        public AuthorizationResult result() {
+            return result;
+        }
+
+        @Override
+        public String toString() {
+            return "MatchingAcl(acl=" + acl + ")";
+        }
+    }
+
+    private static class MatchingAclBuilder {
+        private StandardAcl denyAcl;
+        private StandardAcl allowAcl;
+
+        boolean foundDeny() {
+            return denyAcl != null;
+        }
+
+        MatchingAclRule build() {
+            if (denyAcl != null) {
+                return new MatchingAclRule(denyAcl, DENIED);
+            } else if (allowAcl != null) {
+                return new MatchingAclRule(allowAcl, ALLOWED);
+            } else {
+                return null;
+            }
+        }
+    }
 }
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/placement/ClusterDescriber.java b/metadata/src/main/java/org/apache/kafka/metadata/placement/ClusterDescriber.java
new file mode 100644
index 0000000000000..8aaa092205e63
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/placement/ClusterDescriber.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata.placement;
+
+import org.apache.kafka.common.annotation.InterfaceStability;
+
+import java.util.Iterator;
+
+
+/**
+ * Can describe a cluster to a ReplicaPlacer.
+ */
+@InterfaceStability.Unstable
+public interface ClusterDescriber {
+    /**
+     * Get an iterator through the usable brokers.
+     */
+    Iterator<UsableBroker> usableBrokers();
+}
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/placement/PlacementSpec.java b/metadata/src/main/java/org/apache/kafka/metadata/placement/PlacementSpec.java
new file mode 100644
index 0000000000000..85daaf59e5d47
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/placement/PlacementSpec.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata.placement;
+
+import org.apache.kafka.common.annotation.InterfaceStability;
+
+import java.util.Objects;
+
+
+/**
+ * Specifies a replica placement that we want to make.
+ */
+@InterfaceStability.Unstable
+public class PlacementSpec {
+    private final int startPartition;
+
+    private final int numPartitions;
+
+    private final short numReplicas;
+
+    public PlacementSpec(
+        int startPartition,
+        int numPartitions,
+        short numReplicas
+    ) {
+        this.startPartition = startPartition;
+        this.numPartitions = numPartitions;
+        this.numReplicas = numReplicas;
+    }
+
+    public int startPartition() {
+        return startPartition;
+    }
+
+    public int numPartitions() {
+        return numPartitions;
+    }
+
+    public short numReplicas() {
+        return numReplicas;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == null) return false;
+        if (!(o.getClass().equals(this.getClass()))) return false;
+        PlacementSpec other = (PlacementSpec) o;
+        return startPartition == other.startPartition &&
+            numPartitions == other.numPartitions &&
+            numReplicas == other.numReplicas;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(startPartition,
+            numPartitions,
+            numReplicas);
+    }
+
+    @Override
+    public String toString() {
+        return "PlacementSpec" +
+            "(startPartition=" + startPartition +
+            ", numPartitions=" + numPartitions +
+            ", numReplicas=" + numReplicas +
+            ")";
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/controller/ReplicaPlacer.java b/metadata/src/main/java/org/apache/kafka/metadata/placement/ReplicaPlacer.java
similarity index 59%
rename from metadata/src/main/java/org/apache/kafka/controller/ReplicaPlacer.java
rename to metadata/src/main/java/org/apache/kafka/metadata/placement/ReplicaPlacer.java
index 9a705f43d8445..6af37fd960913 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/ReplicaPlacer.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/placement/ReplicaPlacer.java
@@ -15,36 +15,31 @@
  * limitations under the License.
  */
 
-package org.apache.kafka.controller;
+package org.apache.kafka.metadata.placement;
 
-import java.util.Iterator;
 import java.util.List;
+
 import org.apache.kafka.common.annotation.InterfaceStability;
 import org.apache.kafka.common.errors.InvalidReplicationFactorException;
-import org.apache.kafka.metadata.UsableBroker;
 
 
 /**
  * The interface which a Kafka replica placement policy must implement.
  */
 @InterfaceStability.Unstable
-interface ReplicaPlacer {
+public interface ReplicaPlacer {
     /**
      * Create a new replica placement.
      *
-     * @param startPartition        The partition ID to start with.
-     * @param numPartitions         The number of partitions to create placements for.
-     * @param numReplicas           The number of replicas to create for each partitions.
-     *                              Must be positive.
-     * @param iterator              An iterator that yields all the usable brokers.
+     * @param placement     What we're trying to place.
+     * @param cluster       A description of the cluster we're trying to place in.
      *
-     * @return                      A list of replica lists.
+     * @return              A list of replica lists.
      *
      * @throws InvalidReplicationFactorException    If too many replicas were requested.
      */
-    List<List<Integer>> place(int startPartition,
-                              int numPartitions,
-                              short numReplicas,
-                              Iterator<UsableBroker> iterator)
-        throws InvalidReplicationFactorException;
+    List<List<Integer>> place(
+        PlacementSpec placement,
+        ClusterDescriber cluster
+    ) throws InvalidReplicationFactorException;
 }
diff --git a/metadata/src/main/java/org/apache/kafka/controller/StripedReplicaPlacer.java b/metadata/src/main/java/org/apache/kafka/metadata/placement/StripedReplicaPlacer.java
similarity index 95%
rename from metadata/src/main/java/org/apache/kafka/controller/StripedReplicaPlacer.java
rename to metadata/src/main/java/org/apache/kafka/metadata/placement/StripedReplicaPlacer.java
index 031354c56dab7..43f41179d4366 100644
--- a/metadata/src/main/java/org/apache/kafka/controller/StripedReplicaPlacer.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/placement/StripedReplicaPlacer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.kafka.controller;
+package org.apache.kafka.metadata.placement;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -28,7 +28,6 @@
 
 import org.apache.kafka.common.errors.InvalidReplicationFactorException;
 import org.apache.kafka.metadata.OptionalStringComparator;
-import org.apache.kafka.metadata.UsableBroker;
 
 
 /**
@@ -428,17 +427,18 @@ public StripedReplicaPlacer(Random random) {
     }
 
     @Override
-    public List<List<Integer>> place(int startPartition,
-                                     int numPartitions,
-                                     short replicationFactor,
-                                     Iterator<UsableBroker> iterator) {
-        RackList rackList = new RackList(random, iterator);
-        throwInvalidReplicationFactorIfNonPositive(replicationFactor);
+    public List<List<Integer>> place(
+        PlacementSpec placement,
+        ClusterDescriber cluster
+    ) throws InvalidReplicationFactorException {
+        RackList rackList = new RackList(random, cluster.usableBrokers());
+        throwInvalidReplicationFactorIfNonPositive(placement.numReplicas());
         throwInvalidReplicationFactorIfZero(rackList.numUnfencedBrokers());
-        throwInvalidReplicationFactorIfTooFewBrokers(replicationFactor, rackList.numTotalBrokers());
-        List<List<Integer>> placements = new ArrayList<>(numPartitions);
-        for (int partition = 0; partition < numPartitions; partition++) {
-            placements.add(rackList.place(replicationFactor));
+        throwInvalidReplicationFactorIfTooFewBrokers(placement.numReplicas(),
+            rackList.numTotalBrokers());
+        List<List<Integer>> placements = new ArrayList<>(placement.numPartitions());
+        for (int partition = 0; partition < placement.numPartitions(); partition++) {
+            placements.add(rackList.place(placement.numReplicas()));
         }
         return placements;
     }
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/UsableBroker.java b/metadata/src/main/java/org/apache/kafka/metadata/placement/UsableBroker.java
similarity index 83%
rename from metadata/src/main/java/org/apache/kafka/metadata/UsableBroker.java
rename to metadata/src/main/java/org/apache/kafka/metadata/placement/UsableBroker.java
index 9c04ebd480b8c..75d16d7718b12 100644
--- a/metadata/src/main/java/org/apache/kafka/metadata/UsableBroker.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/placement/UsableBroker.java
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.kafka.metadata;
+package org.apache.kafka.metadata.placement;
+
+import org.apache.kafka.common.annotation.InterfaceStability;
 
 import java.util.Objects;
 import java.util.Optional;
@@ -24,6 +26,7 @@
 /**
  * A broker where a replica can be placed.
  */
+@InterfaceStability.Unstable
 public class UsableBroker {
     private final int id;
 
@@ -58,11 +61,17 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-        return Objects.hash(id, rack, fenced);
+        return Objects.hash(id,
+            rack,
+            fenced);
     }
 
     @Override
     public String toString() {
-        return "UsableBroker(id=" + id + ", rack=" + rack + ", fenced=" + fenced + ")";
+        return "UsableBroker" +
+            "(id=" + id +
+            ", rack=" + rack +
+            ", fenced=" + fenced +
+            ")";
     }
 }
diff --git a/shell/src/main/java/org/apache/kafka/shell/SnapshotFileReader.java b/metadata/src/main/java/org/apache/kafka/metadata/util/SnapshotFileReader.java
similarity index 99%
rename from shell/src/main/java/org/apache/kafka/shell/SnapshotFileReader.java
rename to metadata/src/main/java/org/apache/kafka/metadata/util/SnapshotFileReader.java
index 9edf8685f18dd..1e5e6371d0cb3 100644
--- a/shell/src/main/java/org/apache/kafka/shell/SnapshotFileReader.java
+++ b/metadata/src/main/java/org/apache/kafka/metadata/util/SnapshotFileReader.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.kafka.shell;
+package org.apache.kafka.metadata.util;
 
 import org.apache.kafka.common.message.LeaderChangeMessage;
 import org.apache.kafka.common.protocol.ByteBufferAccessor;
diff --git a/metadata/src/main/java/org/apache/kafka/metadata/util/SnapshotFileWriter.java b/metadata/src/main/java/org/apache/kafka/metadata/util/SnapshotFileWriter.java
new file mode 100644
index 0000000000000..5608bdc464abb
--- /dev/null
+++ b/metadata/src/main/java/org/apache/kafka/metadata/util/SnapshotFileWriter.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata.util;
+
+import org.apache.kafka.common.record.CompressionType;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.metadata.MetadataRecordSerde;
+import org.apache.kafka.raft.internals.BatchAccumulator;
+import org.apache.kafka.raft.internals.BatchMemoryPool;
+import org.apache.kafka.server.common.ApiMessageAndVersion;
+
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Collections;
+import java.util.List;
+
+import static org.apache.kafka.raft.KafkaRaftClient.MAX_BATCH_SIZE_BYTES;
+
+
+/**
+ * Write an arbitrary set of metadata records into a Kafka metadata snapshot format. The resulting snapshot will be use
+ * epoch of zero and an initial offset of zero. This class should not be used for creating actual metadata snapshots.
+ */
+public class SnapshotFileWriter implements AutoCloseable {
+    private final FileChannel channel;
+    private final BatchAccumulator<ApiMessageAndVersion> batchAccumulator;
+
+    SnapshotFileWriter(FileChannel channel, BatchAccumulator<ApiMessageAndVersion> batchAccumulator) {
+        this.channel = channel;
+        this.batchAccumulator = batchAccumulator;
+    }
+
+    public void append(ApiMessageAndVersion apiMessageAndVersion) {
+        batchAccumulator.append(0, Collections.singletonList(apiMessageAndVersion));
+    }
+
+    public void append(List<ApiMessageAndVersion> messageBatch) {
+        batchAccumulator.append(0, messageBatch);
+    }
+
+    public void close() throws IOException {
+        for (BatchAccumulator.CompletedBatch<ApiMessageAndVersion> batch : batchAccumulator.drain()) {
+            Utils.writeFully(channel, batch.data.buffer());
+        }
+        channel.close();
+    }
+
+    public static SnapshotFileWriter open(Path snapshotPath) throws IOException {
+        BatchAccumulator<ApiMessageAndVersion> batchAccumulator = new BatchAccumulator<>(
+            0,
+            0,
+            Integer.MAX_VALUE,
+            MAX_BATCH_SIZE_BYTES,
+            new BatchMemoryPool(5, MAX_BATCH_SIZE_BYTES),
+            Time.SYSTEM,
+            CompressionType.NONE,
+            new MetadataRecordSerde());
+
+        FileChannel channel = FileChannel.open(snapshotPath, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE);
+
+        return new SnapshotFileWriter(channel, batchAccumulator);
+    }
+}
diff --git a/metadata/src/main/java/org/apache/kafka/timeline/SnapshottableHashTable.java b/metadata/src/main/java/org/apache/kafka/timeline/SnapshottableHashTable.java
index cbd0a280fc12e..299f65a6f7849 100644
--- a/metadata/src/main/java/org/apache/kafka/timeline/SnapshottableHashTable.java
+++ b/metadata/src/main/java/org/apache/kafka/timeline/SnapshottableHashTable.java
@@ -111,15 +111,19 @@ static class HashTier<T extends SnapshottableHashTable.ElementWithStartEpoch> im
         @Override
         public void mergeFrom(long epoch, Delta source) {
             HashTier<T> other = (HashTier<T>) source;
-            List<T> list = new ArrayList<>();
-            Object[] otherElements = other.deltaTable.baseElements();
-            for (int slot = 0; slot < otherElements.length; slot++) {
-                BaseHashTable.unpackSlot(list, otherElements, slot);
-                for (T element : list) {
-                    // When merging in a later hash tier, we want to keep only the elements
-                    // that were present at our epoch.
-                    if (element.startEpoch() <= epoch) {
-                        deltaTable.baseAddOrReplace(element);
+            // As an optimization, the deltaTable might not exist for a new key
+            // as there is no previous value
+            if (other.deltaTable != null) {
+                List<T> list = new ArrayList<>();
+                Object[] otherElements = other.deltaTable.baseElements();
+                for (int slot = 0; slot < otherElements.length; slot++) {
+                    BaseHashTable.unpackSlot(list, otherElements, slot);
+                    for (T element : list) {
+                        // When merging in a later hash tier, we want to keep only the elements
+                        // that were present at our epoch.
+                        if (element.startEpoch() <= epoch) {
+                            deltaTable.baseAddOrReplace(element);
+                        }
                     }
                 }
             }
diff --git a/metadata/src/main/resources/common/metadata/BrokerRegistrationChangeRecord.json b/metadata/src/main/resources/common/metadata/BrokerRegistrationChangeRecord.json
index 152508ce54f09..81bebaaff276c 100644
--- a/metadata/src/main/resources/common/metadata/BrokerRegistrationChangeRecord.json
+++ b/metadata/src/main/resources/common/metadata/BrokerRegistrationChangeRecord.json
@@ -17,7 +17,7 @@
   "apiKey": 17,
   "type": "metadata",
   "name": "BrokerRegistrationChangeRecord",
-  "validVersions": "0",
+  "validVersions": "0-1",
   "flexibleVersions": "0+",
   "fields": [
    { "name": "BrokerId", "type": "int32", "versions": "0+", "entityType": "brokerId",
@@ -25,6 +25,8 @@
    { "name": "BrokerEpoch", "type": "int64", "versions": "0+",
      "about": "The broker epoch assigned by the controller." },
    { "name": "Fenced", "type": "int8", "versions": "0+", "taggedVersions": "0+", "tag": 0,
-     "about": "-1 if the broker has been unfenced, 0 if no change, 1 if the broker has been fenced." }
+     "about": "-1 if the broker has been unfenced, 0 if no change, 1 if the broker has been fenced." },
+   { "name": "InControlledShutdown", "type": "int8", "versions": "1+", "taggedVersions": "1+", "tag": 1,
+     "about": "0 if no change, 1 if the broker is in controlled shutdown." }
   ]
 }
diff --git a/metadata/src/main/resources/common/metadata/FeatureLevelRecord.json b/metadata/src/main/resources/common/metadata/FeatureLevelRecord.json
index ac112f15c2fda..03ff347eb82da 100644
--- a/metadata/src/main/resources/common/metadata/FeatureLevelRecord.json
+++ b/metadata/src/main/resources/common/metadata/FeatureLevelRecord.json
@@ -22,9 +22,7 @@
   "fields": [
     { "name": "Name", "type": "string", "versions": "0+",
       "about": "The feature name." },
-    { "name": "MinFeatureLevel", "type": "int16", "versions": "0+",
-      "about": "The current finalized minimum feature level of this feature for the cluster." },
-    { "name": "MaxFeatureLevel", "type": "int16", "versions": "0+",
-      "about": "The current finalized maximum feature level of this feature for the cluster." }
+    { "name": "FeatureLevel", "type": "int16", "versions": "0+",
+      "about": "The current finalized feature level of this feature for the cluster." }
   ]
 }
diff --git a/metadata/src/main/resources/common/metadata/RemoveFeatureLevelRecord.json b/metadata/src/main/resources/common/metadata/NoOpRecord.json
similarity index 83%
rename from metadata/src/main/resources/common/metadata/RemoveFeatureLevelRecord.json
rename to metadata/src/main/resources/common/metadata/NoOpRecord.json
index 6ed716192e67a..88b907f8cc0a7 100644
--- a/metadata/src/main/resources/common/metadata/RemoveFeatureLevelRecord.json
+++ b/metadata/src/main/resources/common/metadata/NoOpRecord.json
@@ -14,13 +14,10 @@
 // limitations under the License.
 
 {
-  "apiKey": 16,
+  "apiKey": 20,
   "type": "metadata",
-  "name": "RemoveFeatureLevelRecord",
+  "name": "NoOpRecord",
   "validVersions": "0",
   "flexibleVersions": "0+",
-  "fields": [
-    { "name": "Name", "type": "string", "versions": "0+",
-      "about": "The feature name." }
-  ]
+  "fields": []
 }
diff --git a/metadata/src/main/resources/common/metadata/PartitionChangeRecord.json b/metadata/src/main/resources/common/metadata/PartitionChangeRecord.json
index 7afaa425b7395..587e512d575a0 100644
--- a/metadata/src/main/resources/common/metadata/PartitionChangeRecord.json
+++ b/metadata/src/main/resources/common/metadata/PartitionChangeRecord.json
@@ -38,6 +38,8 @@
       "about": "null if the removing replicas didn't change; the new removing replicas otherwise." },
     { "name": "AddingReplicas", "type": "[]int32", "default": "null", "entityType": "brokerId",
       "versions": "0+", "nullableVersions": "0+", "taggedVersions": "0+", "tag": 4,
-      "about": "null if the adding replicas didn't change; the new adding replicas otherwise." }
+      "about": "null if the adding replicas didn't change; the new adding replicas otherwise." },
+    { "name": "LeaderRecoveryState", "type": "int8", "default": "-1", "versions": "0+", "taggedVersions": "0+", "tag": 5,
+      "about": "-1 if it didn't change; 0 if the leader was elected from the ISR or recovered from an unclean election; 1 if the leader that was elected using unclean leader election and it is still recovering." }
   ]
 }
diff --git a/metadata/src/main/resources/common/metadata/PartitionRecord.json b/metadata/src/main/resources/common/metadata/PartitionRecord.json
index 66a13e2a0670c..fdd05f8a5ca88 100644
--- a/metadata/src/main/resources/common/metadata/PartitionRecord.json
+++ b/metadata/src/main/resources/common/metadata/PartitionRecord.json
@@ -34,6 +34,8 @@
       "about": "The replicas that we are in the process of adding." },
     { "name": "Leader", "type": "int32", "versions": "0+", "default": "-1", "entityType": "brokerId",
       "about": "The lead replica, or -1 if there is no leader." },
+    { "name": "LeaderRecoveryState", "type": "int8", "default": "0", "versions": "0+", "taggedVersions": "0+", "tag": 0,
+      "about": "1 if the partition is recovering from an unclean leader election; 0 otherwise." },
     { "name": "LeaderEpoch", "type": "int32", "versions": "0+", "default": "-1",
       "about": "The epoch of the partition leader." },
     { "name": "PartitionEpoch", "type": "int32", "versions": "0+", "default": "-1",
diff --git a/metadata/src/main/resources/common/metadata/RegisterBrokerRecord.json b/metadata/src/main/resources/common/metadata/RegisterBrokerRecord.json
index a0e7af2fbed8c..a32c16d8a607c 100644
--- a/metadata/src/main/resources/common/metadata/RegisterBrokerRecord.json
+++ b/metadata/src/main/resources/common/metadata/RegisterBrokerRecord.json
@@ -17,7 +17,7 @@
   "apiKey": 0,
   "type": "metadata",
   "name": "RegisterBrokerRecord",
-  "validVersions": "0",
+  "validVersions": "0-1",
   "flexibleVersions": "0+",
   "fields": [
     { "name": "BrokerId", "type": "int32", "versions": "0+", "entityType": "brokerId",
@@ -49,6 +49,8 @@
     { "name": "Rack", "type": "string", "versions": "0+", "nullableVersions": "0+",
       "about": "The broker rack." },
     { "name": "Fenced", "type": "bool", "versions": "0+", "default": "true",
-      "about": "True if the broker is fenced." }
+      "about": "True if the broker is fenced." },
+    { "name": "InControlledShutdown", "type": "bool", "versions": "1+", "default": "false",
+      "about": "True if the broker is in controlled shutdown." }
   ]
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/AclControlManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/AclControlManagerTest.java
index 368eaa14432c7..fdc0327645176 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/AclControlManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/AclControlManagerTest.java
@@ -148,6 +148,16 @@ public AclMutator aclMutatorOrException() {
             throw new NotControllerException("The current node is not the active controller.");
         }
 
+        @Override
+        public void completeInitialLoad() {
+            // do nothing
+        }
+
+        @Override
+        public void completeInitialLoad(Exception e) {
+            // do nothing
+        }
+
         @Override
         public void loadSnapshot(Map<Uuid, StandardAcl> acls) {
             this.acls = new HashMap<>(acls);
@@ -306,4 +316,34 @@ public void testCreateAclDeleteAcl() {
             (AccessControlEntryRecord) list.get(0).message()).toBinding());
         assertFalse(iterator.hasNext());
     }
+
+    @Test
+    public void testDeleteDedupe() {
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
+        AclControlManager manager = new AclControlManager(snapshotRegistry, Optional.empty());
+        MockClusterMetadataAuthorizer authorizer = new MockClusterMetadataAuthorizer();
+        authorizer.loadSnapshot(manager.idToAcl());
+
+        AclBinding aclBinding = new AclBinding(new ResourcePattern(TOPIC, "topic-1", LITERAL),
+                new AccessControlEntry("User:user", "10.0.0.1", AclOperation.ALL, ALLOW));
+
+        ControllerResult<List<AclCreateResult>> createResult = manager.createAcls(Arrays.asList(aclBinding));
+        Uuid id = ((AccessControlEntryRecord) createResult.records().get(0).message()).id();
+        assertEquals(1, createResult.records().size());
+
+        ControllerResult<List<AclDeleteResult>> deleteAclResultsAnyFilter = manager.deleteAcls(Arrays.asList(AclBindingFilter.ANY));
+        assertEquals(1, deleteAclResultsAnyFilter.records().size());
+        assertEquals(id, ((RemoveAccessControlEntryRecord) deleteAclResultsAnyFilter.records().get(0).message()).id());
+        assertEquals(1, deleteAclResultsAnyFilter.response().size());
+
+        ControllerResult<List<AclDeleteResult>> deleteAclResultsSpecificFilter = manager.deleteAcls(Arrays.asList(aclBinding.toFilter()));
+        assertEquals(1, deleteAclResultsSpecificFilter.records().size());
+        assertEquals(id, ((RemoveAccessControlEntryRecord) deleteAclResultsSpecificFilter.records().get(0).message()).id());
+        assertEquals(1, deleteAclResultsSpecificFilter.response().size());
+
+        ControllerResult<List<AclDeleteResult>> deleteAclResultsBothFilters = manager.deleteAcls(Arrays.asList(AclBindingFilter.ANY, aclBinding.toFilter()));
+        assertEquals(1, deleteAclResultsBothFilters.records().size());
+        assertEquals(id, ((RemoveAccessControlEntryRecord) deleteAclResultsBothFilters.records().get(0).message()).id());
+        assertEquals(2, deleteAclResultsBothFilters.response().size());
+    }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/BootstrapMetadataTest.java b/metadata/src/test/java/org/apache/kafka/controller/BootstrapMetadataTest.java
new file mode 100644
index 0000000000000..f1577269196bc
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/controller/BootstrapMetadataTest.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.server.common.MetadataVersion;
+import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Random;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class BootstrapMetadataTest {
+    private Path tmpDir;
+
+    @BeforeEach
+    public void createTestDir() {
+        tmpDir = TestUtils.tempDirectory("BootstrapMetadataTest").toPath();
+    }
+
+    @AfterEach
+    public void deleteTestDir() throws IOException {
+        if (tmpDir != null)
+            Utils.delete(tmpDir.toFile());
+    }
+
+    @Test
+    public void testWriteAndReadBootstrapFile() throws Exception {
+        BootstrapMetadata metadata = BootstrapMetadata.create(MetadataVersion.MINIMUM_KRAFT_VERSION);
+        BootstrapMetadata.write(metadata, tmpDir);
+
+        assertTrue(Files.exists(tmpDir.resolve(BootstrapMetadata.BOOTSTRAP_FILE)));
+
+        BootstrapMetadata newMetadata = BootstrapMetadata.load(tmpDir, () -> MetadataVersion.MINIMUM_KRAFT_VERSION);
+        assertEquals(metadata, newMetadata);
+    }
+
+    @Test
+    public void testNoBootstrapFile() throws Exception {
+        BootstrapMetadata metadata = BootstrapMetadata.load(tmpDir, () -> MetadataVersion.MINIMUM_KRAFT_VERSION);
+        assertEquals(MetadataVersion.MINIMUM_KRAFT_VERSION, metadata.metadataVersion());
+        metadata = BootstrapMetadata.load(tmpDir, () -> MetadataVersion.IBP_3_2_IV0);
+        assertEquals(MetadataVersion.IBP_3_2_IV0, metadata.metadataVersion());
+    }
+
+    @Test
+    public void testExistingBootstrapFile() throws Exception {
+        BootstrapMetadata.write(BootstrapMetadata.create(MetadataVersion.MINIMUM_KRAFT_VERSION), tmpDir);
+        assertThrows(IOException.class, () -> {
+            BootstrapMetadata.write(BootstrapMetadata.create(MetadataVersion.IBP_3_1_IV0), tmpDir);
+        });
+    }
+
+    @Test
+    public void testEmptyBootstrapFile() throws Exception {
+        Files.createFile(tmpDir.resolve(BootstrapMetadata.BOOTSTRAP_FILE));
+        assertThrows(Exception.class, () -> BootstrapMetadata.load(tmpDir, () -> MetadataVersion.MINIMUM_KRAFT_VERSION),
+            "Should fail to load if no metadata.version is set");
+    }
+
+    @Test
+    public void testGarbageBootstrapFile() throws Exception {
+        Files.createFile(tmpDir.resolve(BootstrapMetadata.BOOTSTRAP_FILE));
+        Random random = new Random(1);
+        byte[] data = new byte[100];
+        random.nextBytes(data);
+        Files.write(tmpDir.resolve(BootstrapMetadata.BOOTSTRAP_FILE), data, StandardOpenOption.CREATE, StandardOpenOption.WRITE);
+        assertThrows(Exception.class, () -> BootstrapMetadata.load(tmpDir, () -> MetadataVersion.MINIMUM_KRAFT_VERSION),
+            "Should fail on invalid data");
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/controller/BrokerHeartbeatManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/BrokerHeartbeatManagerTest.java
index c5c46abab33ff..28387b17a0fd6 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/BrokerHeartbeatManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/BrokerHeartbeatManagerTest.java
@@ -30,7 +30,7 @@
 import org.apache.kafka.controller.BrokerHeartbeatManager.BrokerHeartbeatStateIterator;
 import org.apache.kafka.controller.BrokerHeartbeatManager.BrokerHeartbeatStateList;
 import org.apache.kafka.controller.BrokerHeartbeatManager.UsableBrokerIterator;
-import org.apache.kafka.metadata.UsableBroker;
+import org.apache.kafka.metadata.placement.UsableBroker;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
diff --git a/metadata/src/test/java/org/apache/kafka/controller/BrokersToIsrsTest.java b/metadata/src/test/java/org/apache/kafka/controller/BrokersToIsrsTest.java
index 6510ee5d4b9eb..1258c9d5a2e7f 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/BrokersToIsrsTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/BrokersToIsrsTest.java
@@ -20,7 +20,6 @@
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.controller.BrokersToIsrs.PartitionsOnReplicaIterator;
-import org.apache.kafka.controller.BrokersToIsrs.TopicIdPartition;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
diff --git a/metadata/src/test/java/org/apache/kafka/controller/ClusterControlManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/ClusterControlManagerTest.java
index 124cb3d5f354d..e47def81e6d5d 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/ClusterControlManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/ClusterControlManagerTest.java
@@ -20,33 +20,45 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Optional;
-import java.util.Random;
+
+import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.common.Endpoint;
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.errors.InconsistentClusterIdException;
 import org.apache.kafka.common.errors.StaleBrokerEpochException;
 import org.apache.kafka.common.message.BrokerRegistrationRequestData;
+import org.apache.kafka.common.metadata.BrokerRegistrationChangeRecord;
+import org.apache.kafka.common.metadata.FenceBrokerRecord;
+import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord.BrokerEndpoint;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord.BrokerEndpointCollection;
-import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.metadata.UnfenceBrokerRecord;
 import org.apache.kafka.common.metadata.UnregisterBrokerRecord;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.metadata.BrokerRegistration;
-import org.apache.kafka.metadata.FeatureMap;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
+import org.apache.kafka.metadata.BrokerRegistrationFencingChange;
+import org.apache.kafka.metadata.BrokerRegistrationInControlledShutdownChange;
+import org.apache.kafka.metadata.BrokerRegistrationReply;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
 import org.apache.kafka.metadata.RecordTestUtils;
+import org.apache.kafka.metadata.placement.ClusterDescriber;
+import org.apache.kafka.metadata.placement.PlacementSpec;
+import org.apache.kafka.metadata.placement.UsableBroker;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.ValueSource;
 
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_3_IV2;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -55,14 +67,26 @@
 
 @Timeout(value = 40)
 public class ClusterControlManagerTest {
-    @Test
-    public void testReplay() {
+    @ParameterizedTest
+    @EnumSource(value = MetadataVersion.class, names = {"IBP_3_0_IV1", "IBP_3_3_IV2"})
+    public void testReplay(MetadataVersion metadataVersion) {
         MockTime time = new MockTime(0, 0, 0);
 
         SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ClusterControlManager clusterControl = new ClusterControlManager(
-            new LogContext(), Uuid.randomUuid().toString(), time, snapshotRegistry, 1000,
-                new StripedReplicaPlacer(new Random()), new MockControllerMetrics());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
         clusterControl.activate();
         assertFalse(clusterControl.unfenced(0));
 
@@ -72,7 +96,7 @@ public void testReplay() {
             setPort((short) 9092).
             setName("PLAINTEXT").
             setHost("example.com"));
-        clusterControl.replay(brokerRecord);
+        clusterControl.replay(brokerRecord, 100L);
         clusterControl.checkBrokerEpoch(1, 100);
         assertThrows(StaleBrokerEpochException.class,
             () -> clusterControl.checkBrokerEpoch(1, 101));
@@ -81,20 +105,161 @@ public void testReplay() {
         assertFalse(clusterControl.unfenced(0));
         assertFalse(clusterControl.unfenced(1));
 
-        UnfenceBrokerRecord unfenceBrokerRecord =
-            new UnfenceBrokerRecord().setId(1).setEpoch(100);
-        clusterControl.replay(unfenceBrokerRecord);
+        if (metadataVersion.isLessThan(IBP_3_3_IV2)) {
+            UnfenceBrokerRecord unfenceBrokerRecord =
+                    new UnfenceBrokerRecord().setId(1).setEpoch(100);
+            clusterControl.replay(unfenceBrokerRecord);
+        } else {
+            BrokerRegistrationChangeRecord changeRecord =
+                    new BrokerRegistrationChangeRecord().setBrokerId(1).setBrokerEpoch(100).setFenced(BrokerRegistrationFencingChange.UNFENCE.value());
+            clusterControl.replay(changeRecord);
+        }
         assertFalse(clusterControl.unfenced(0));
         assertTrue(clusterControl.unfenced(1));
+
+        if (metadataVersion.isLessThan(IBP_3_3_IV2)) {
+            FenceBrokerRecord fenceBrokerRecord =
+                    new FenceBrokerRecord().setId(1).setEpoch(100);
+            clusterControl.replay(fenceBrokerRecord);
+        } else {
+            BrokerRegistrationChangeRecord changeRecord =
+                    new BrokerRegistrationChangeRecord().setBrokerId(1).setBrokerEpoch(100).setFenced(BrokerRegistrationFencingChange.FENCE.value());
+            clusterControl.replay(changeRecord);
+        }
+        assertFalse(clusterControl.unfenced(0));
+        assertFalse(clusterControl.unfenced(1));
+    }
+
+    @Test
+    public void testReplayRegisterBrokerRecord() {
+        MockTime time = new MockTime(0, 0, 0);
+
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setClusterId("fPZv1VBsRFmnlRvmGcOW9w").
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
+
+        assertFalse(clusterControl.unfenced(0));
+        assertFalse(clusterControl.inControlledShutdown(0));
+
+        RegisterBrokerRecord brokerRecord = new RegisterBrokerRecord().
+            setBrokerEpoch(100).
+            setBrokerId(0).
+            setRack(null).
+            setFenced(true).
+            setInControlledShutdown(true);
+        brokerRecord.endPoints().add(new BrokerEndpoint().
+            setSecurityProtocol(SecurityProtocol.PLAINTEXT.id).
+            setPort((short) 9092).
+            setName("PLAINTEXT").
+            setHost("example.com"));
+        clusterControl.replay(brokerRecord, 100L);
+
+        assertFalse(clusterControl.unfenced(0));
+        assertTrue(clusterControl.inControlledShutdown(0));
+
+        brokerRecord.setInControlledShutdown(false);
+        clusterControl.replay(brokerRecord, 100L);
+
+        assertFalse(clusterControl.unfenced(0));
+        assertFalse(clusterControl.inControlledShutdown(0));
+        assertEquals(100L, clusterControl.registerBrokerRecordOffset(brokerRecord.brokerId()).getAsLong());
+
+        brokerRecord.setFenced(false);
+        clusterControl.replay(brokerRecord, 100L);
+
+        assertTrue(clusterControl.unfenced(0));
+        assertFalse(clusterControl.inControlledShutdown(0));
+    }
+
+    @Test
+    public void testReplayBrokerRegistrationChangeRecord() {
+        MockTime time = new MockTime(0, 0, 0);
+
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setClusterId("fPZv1VBsRFmnlRvmGcOW9w").
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
+
+        assertFalse(clusterControl.unfenced(0));
+        assertFalse(clusterControl.inControlledShutdown(0));
+
+        RegisterBrokerRecord brokerRecord = new RegisterBrokerRecord().
+            setBrokerEpoch(100).
+            setBrokerId(0).
+            setRack(null).
+            setFenced(false);
+        brokerRecord.endPoints().add(new BrokerEndpoint().
+            setSecurityProtocol(SecurityProtocol.PLAINTEXT.id).
+            setPort((short) 9092).
+            setName("PLAINTEXT").
+            setHost("example.com"));
+        clusterControl.replay(brokerRecord, 100L);
+
+        assertTrue(clusterControl.unfenced(0));
+        assertFalse(clusterControl.inControlledShutdown(0));
+
+        BrokerRegistrationChangeRecord registrationChangeRecord = new BrokerRegistrationChangeRecord()
+            .setBrokerId(0)
+            .setBrokerEpoch(100)
+            .setInControlledShutdown(BrokerRegistrationInControlledShutdownChange.IN_CONTROLLED_SHUTDOWN.value());
+        clusterControl.replay(registrationChangeRecord);
+
+        assertTrue(clusterControl.unfenced(0));
+        assertTrue(clusterControl.inControlledShutdown(0));
+
+        registrationChangeRecord = new BrokerRegistrationChangeRecord()
+            .setBrokerId(0)
+            .setBrokerEpoch(100)
+            .setFenced(BrokerRegistrationFencingChange.UNFENCE.value());
+        clusterControl.replay(registrationChangeRecord);
+
+        assertTrue(clusterControl.unfenced(0));
+        assertTrue(clusterControl.inControlledShutdown(0));
     }
 
     @Test
     public void testRegistrationWithIncorrectClusterId() throws Exception {
         SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ClusterControlManager clusterControl = new ClusterControlManager(
-            new LogContext(), "fPZv1VBsRFmnlRvmGcOW9w", new MockTime(0, 0, 0),
-            snapshotRegistry, 1000,
-            new StripedReplicaPlacer(new Random()), new MockControllerMetrics());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setClusterId("fPZv1VBsRFmnlRvmGcOW9w").
+            setTime(new MockTime(0, 0, 0)).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
         clusterControl.activate();
         assertThrows(InconsistentClusterIdException.class, () ->
             clusterControl.registerBroker(new BrokerRegistrationRequestData().
@@ -103,7 +268,50 @@ public void testRegistrationWithIncorrectClusterId() throws Exception {
                     setRack(null).
                     setIncarnationId(Uuid.fromString("0H4fUu1xQEKXFYwB1aBjhg")),
                 123L,
-                new FeatureMapAndEpoch(new FeatureMap(Collections.emptyMap()), 456L)));
+                new FinalizedControllerFeatures(Collections.emptyMap(), 456L)));
+    }
+
+    @ParameterizedTest
+    @EnumSource(value = MetadataVersion.class, names = {"IBP_3_3_IV2", "IBP_3_3_IV3"})
+    public void testRegisterBrokerRecordVersion(MetadataVersion metadataVersion) {
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(metadataVersion).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setClusterId("fPZv1VBsRFmnlRvmGcOW9w").
+            setTime(new MockTime(0, 0, 0)).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
+        clusterControl.activate();
+
+        ControllerResult<BrokerRegistrationReply> result = clusterControl.registerBroker(
+            new BrokerRegistrationRequestData().
+                setClusterId("fPZv1VBsRFmnlRvmGcOW9w").
+                setBrokerId(0).
+                setRack(null).
+                setIncarnationId(Uuid.fromString("0H4fUu1xQEKXFYwB1aBjhg")),
+            123L,
+            new FinalizedControllerFeatures(Collections.emptyMap(), 456L));
+
+        short expectedVersion = metadataVersion.registerBrokerRecordVersion();
+
+        assertEquals(
+            Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
+                setBrokerEpoch(123L).
+                setBrokerId(0).
+                setRack(null).
+                setIncarnationId(Uuid.fromString("0H4fUu1xQEKXFYwB1aBjhg")).
+                setFenced(true).
+                setInControlledShutdown(false), expectedVersion)),
+            result.records());
     }
 
     @Test
@@ -119,22 +327,34 @@ public void testUnregister() throws Exception {
             setName("PLAINTEXT").
             setHost("example.com"));
         SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ClusterControlManager clusterControl = new ClusterControlManager(
-            new LogContext(), Uuid.randomUuid().toString(), new MockTime(0, 0, 0),
-            snapshotRegistry, 1000,
-            new StripedReplicaPlacer(new Random()), new MockControllerMetrics());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setTime(new MockTime(0, 0, 0)).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
         clusterControl.activate();
-        clusterControl.replay(brokerRecord);
+        clusterControl.replay(brokerRecord, 100L);
         assertEquals(new BrokerRegistration(1, 100,
-            Uuid.fromString("fPZv1VBsRFmnlRvmGcOW9w"), Collections.singletonMap("PLAINTEXT",
-            new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "example.com", 9092)),
-            Collections.emptyMap(), Optional.of("arack"), true),
-                clusterControl.brokerRegistrations().get(1));
+                Uuid.fromString("fPZv1VBsRFmnlRvmGcOW9w"), Collections.singletonMap("PLAINTEXT",
+                new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "example.com", 9092)),
+                Collections.emptyMap(), Optional.of("arack"), true, false),
+            clusterControl.brokerRegistrations().get(1));
+        assertEquals(100L, clusterControl.registerBrokerRecordOffset(brokerRecord.brokerId()).getAsLong());
         UnregisterBrokerRecord unregisterRecord = new UnregisterBrokerRecord().
             setBrokerId(1).
             setBrokerEpoch(100);
         clusterControl.replay(unregisterRecord);
         assertFalse(clusterControl.brokerRegistrations().containsKey(1));
+        assertFalse(clusterControl.registerBrokerRecordOffset(brokerRecord.brokerId()).isPresent());
     }
 
     @ParameterizedTest
@@ -142,10 +362,20 @@ public void testUnregister() throws Exception {
     public void testPlaceReplicas(int numUsableBrokers) throws Exception {
         MockTime time = new MockTime(0, 0, 0);
         SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        MockRandom random = new MockRandom();
-        ClusterControlManager clusterControl = new ClusterControlManager(
-            new LogContext(),  Uuid.randomUuid().toString(), time, snapshotRegistry, 1000,
-            new StripedReplicaPlacer(random), new MockControllerMetrics());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
         clusterControl.activate();
         for (int i = 0; i < numUsableBrokers; i++) {
             RegisterBrokerRecord brokerRecord =
@@ -155,7 +385,7 @@ public void testPlaceReplicas(int numUsableBrokers) throws Exception {
                 setPort((short) 9092).
                 setName("PLAINTEXT").
                 setHost("example.com"));
-            clusterControl.replay(brokerRecord);
+            clusterControl.replay(brokerRecord, 100L);
             UnfenceBrokerRecord unfenceRecord =
                 new UnfenceBrokerRecord().setId(i).setEpoch(100);
             clusterControl.replay(unfenceRecord);
@@ -166,7 +396,17 @@ public void testPlaceReplicas(int numUsableBrokers) throws Exception {
                 String.format("broker %d was not unfenced.", i));
         }
         for (int i = 0; i < 100; i++) {
-            List<List<Integer>> results = clusterControl.placeReplicas(0, 1, (short) 3);
+            List<List<Integer>> results = clusterControl.replicaPlacer().place(
+                new PlacementSpec(0,
+                    1,
+                    (short) 3),
+                new ClusterDescriber() {
+                    @Override
+                    public Iterator<UsableBroker> usableBrokers() {
+                        return clusterControl.usableBrokers();
+                    }
+                }
+            );
             HashSet<Integer> seen = new HashSet<>();
             for (Integer result : results.get(0)) {
                 assertTrue(result >= 0);
@@ -176,13 +416,25 @@ public void testPlaceReplicas(int numUsableBrokers) throws Exception {
         }
     }
 
-    @Test
-    public void testIterator() throws Exception {
+    @ParameterizedTest
+    @EnumSource(value = MetadataVersion.class, names = {"IBP_3_3_IV2", "IBP_3_3_IV3"})
+    public void testIterator(MetadataVersion metadataVersion) throws Exception {
         MockTime time = new MockTime(0, 0, 0);
         SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ClusterControlManager clusterControl = new ClusterControlManager(
-            new LogContext(), Uuid.randomUuid().toString(), time, snapshotRegistry, 1000,
-            new StripedReplicaPlacer(new Random()), new MockControllerMetrics());
+        FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(metadataVersion).
+            build();
+        ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
         clusterControl.activate();
         assertFalse(clusterControl.unfenced(0));
         for (int i = 0; i < 3; i++) {
@@ -193,13 +445,21 @@ public void testIterator() throws Exception {
                 setPort((short) 9092 + i).
                 setName("PLAINTEXT").
                 setHost("example.com"));
-            clusterControl.replay(brokerRecord);
+            clusterControl.replay(brokerRecord, 100L);
         }
         for (int i = 0; i < 2; i++) {
             UnfenceBrokerRecord unfenceBrokerRecord =
                 new UnfenceBrokerRecord().setId(i).setEpoch(100);
             clusterControl.replay(unfenceBrokerRecord);
         }
+        BrokerRegistrationChangeRecord registrationChangeRecord =
+            new BrokerRegistrationChangeRecord().
+                setBrokerId(0).
+                setBrokerEpoch(100).
+                setInControlledShutdown(BrokerRegistrationInControlledShutdownChange.
+                    IN_CONTROLLED_SHUTDOWN.value());
+        clusterControl.replay(registrationChangeRecord);
+        short expectedVersion = metadataVersion.registerBrokerRecordVersion();
         RecordTestUtils.assertBatchIteratorContains(Arrays.asList(
             Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
                 setBrokerEpoch(100).setBrokerId(0).setRack(null).
@@ -208,7 +468,8 @@ public void testIterator() throws Exception {
                         setPort((short) 9092).
                         setName("PLAINTEXT").
                         setHost("example.com")).iterator())).
-                setFenced(false), (short) 0)),
+                setInControlledShutdown(metadataVersion.isInControlledShutdownStateSupported()).
+                setFenced(false), expectedVersion)),
             Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
                 setBrokerEpoch(100).setBrokerId(1).setRack(null).
                 setEndPoints(new BrokerEndpointCollection(Collections.singleton(
@@ -216,7 +477,7 @@ public void testIterator() throws Exception {
                         setPort((short) 9093).
                         setName("PLAINTEXT").
                         setHost("example.com")).iterator())).
-                setFenced(false), (short) 0)),
+                setFenced(false), expectedVersion)),
             Arrays.asList(new ApiMessageAndVersion(new RegisterBrokerRecord().
                 setBrokerEpoch(100).setBrokerId(2).setRack(null).
                 setEndPoints(new BrokerEndpointCollection(Collections.singleton(
@@ -224,7 +485,7 @@ public void testIterator() throws Exception {
                         setPort((short) 9094).
                         setName("PLAINTEXT").
                         setHost("example.com")).iterator())).
-                setFenced(true), (short) 0))),
+                setFenced(true), expectedVersion))),
                 clusterControl.iterator(Long.MAX_VALUE));
     }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/ConfigurationControlManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/ConfigurationControlManagerTest.java
index 60bcb8299ffc3..1c59892444844 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/ConfigurationControlManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/ConfigurationControlManagerTest.java
@@ -20,18 +20,19 @@
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigResource;
 import org.apache.kafka.common.errors.PolicyViolationException;
+import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
 import org.apache.kafka.common.metadata.ConfigRecord;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.ApiError;
-import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.metadata.ConfigSynonym;
 import org.apache.kafka.metadata.KafkaConfigSchema;
 import org.apache.kafka.metadata.RecordTestUtils;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.apache.kafka.server.policy.AlterConfigPolicy;
 import org.apache.kafka.server.policy.AlterConfigPolicy.RequestMetadata;
-import org.apache.kafka.timeline.SnapshotRegistry;
 
 import java.util.AbstractMap.SimpleImmutableEntry;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -40,6 +41,7 @@
 import java.util.Map.Entry;
 import java.util.Optional;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Consumer;
 
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
@@ -51,7 +53,8 @@
 import static org.apache.kafka.clients.admin.AlterConfigOp.OpType.SUBTRACT;
 import static org.apache.kafka.common.config.ConfigResource.Type.BROKER;
 import static org.apache.kafka.common.config.ConfigResource.Type.TOPIC;
-import static org.apache.kafka.controller.ConfigurationControlManager.NO_OP_EXISTENCE_CHECKER;
+import static org.apache.kafka.common.metadata.MetadataRecordType.CONFIG_RECORD;
+import static org.apache.kafka.metadata.ConfigSynonym.HOURS_TO_MILLISECONDS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 
@@ -68,14 +71,34 @@ public class ConfigurationControlManagerTest {
         CONFIGS.put(TOPIC, new ConfigDef().
             define("abc", ConfigDef.Type.LIST, ConfigDef.Importance.HIGH, "abc").
             define("def", ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "def").
-            define("ghi", ConfigDef.Type.BOOLEAN, true, ConfigDef.Importance.HIGH, "ghi"));
+            define("ghi", ConfigDef.Type.BOOLEAN, true, ConfigDef.Importance.HIGH, "ghi").
+            define("quuux", ConfigDef.Type.LONG, ConfigDef.Importance.HIGH, "quux"));
     }
 
-    static final KafkaConfigSchema SCHEMA = new KafkaConfigSchema(CONFIGS);
+    public static final Map<String, List<ConfigSynonym>> SYNONYMS = new HashMap<>();
+
+    static {
+        SYNONYMS.put("abc", Arrays.asList(new ConfigSynonym("foo.bar")));
+        SYNONYMS.put("def", Arrays.asList(new ConfigSynonym("baz")));
+        SYNONYMS.put("quuux", Arrays.asList(new ConfigSynonym("quux", HOURS_TO_MILLISECONDS)));
+    }
+
+    static final KafkaConfigSchema SCHEMA = new KafkaConfigSchema(CONFIGS, SYNONYMS);
 
     static final ConfigResource BROKER0 = new ConfigResource(BROKER, "0");
     static final ConfigResource MYTOPIC = new ConfigResource(TOPIC, "mytopic");
 
+    static class TestExistenceChecker implements Consumer<ConfigResource> {
+        static final TestExistenceChecker INSTANCE = new TestExistenceChecker();
+
+        @Override
+        public void accept(ConfigResource resource) {
+            if (!resource.name().startsWith("Existing")) {
+                throw new UnknownTopicOrPartitionException("Unknown resource.");
+            }
+        }
+    }
+
     @SuppressWarnings("unchecked")
     private static <A, B> Map<A, B> toMap(Entry... entries) {
         Map<A, B> map = new LinkedHashMap<>();
@@ -91,10 +114,9 @@ static <A, B> Entry<A, B> entry(A a, B b) {
 
     @Test
     public void testReplay() throws Exception {
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ConfigurationControlManager manager =
-            new ConfigurationControlManager(new LogContext(), snapshotRegistry, SCHEMA,
-                Optional.empty(), ConfigurationValidator.NO_OP);
+        ConfigurationControlManager manager = new ConfigurationControlManager.Builder().
+            setKafkaConfigSchema(SCHEMA).
+            build();
         assertEquals(Collections.emptyMap(), manager.getConfigs(BROKER0));
         manager.replay(new ConfigRecord().
             setResourceType(BROKER.id()).setResourceName("0").
@@ -116,30 +138,29 @@ public void testReplay() throws Exception {
         RecordTestUtils.assertBatchIteratorContains(asList(
             asList(new ApiMessageAndVersion(new ConfigRecord().
                     setResourceType(TOPIC.id()).setResourceName("mytopic").
-                    setName("abc").setValue("x,y,z"), (short) 0),
+                    setName("abc").setValue("x,y,z"), CONFIG_RECORD.highestSupportedVersion()),
                 new ApiMessageAndVersion(new ConfigRecord().
                     setResourceType(TOPIC.id()).setResourceName("mytopic").
-                    setName("def").setValue("blah"), (short) 0))),
+                    setName("def").setValue("blah"), CONFIG_RECORD.highestSupportedVersion()))),
             manager.iterator(Long.MAX_VALUE));
     }
 
     @Test
     public void testIncrementalAlterConfigs() {
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ConfigurationControlManager manager =
-            new ConfigurationControlManager(new LogContext(), snapshotRegistry, SCHEMA,
-                Optional.empty(), ConfigurationValidator.NO_OP);
+        ConfigurationControlManager manager = new ConfigurationControlManager.Builder().
+            setKafkaConfigSchema(SCHEMA).
+            build();
 
         ControllerResult<Map<ConfigResource, ApiError>> result = manager.
             incrementalAlterConfigs(toMap(entry(BROKER0, toMap(
                 entry("baz", entry(SUBTRACT, "abc")),
                 entry("quux", entry(SET, "abc")))),
                 entry(MYTOPIC, toMap(entry("abc", entry(APPEND, "123"))))),
-                NO_OP_EXISTENCE_CHECKER);
+                true);
 
         assertEquals(ControllerResult.atomicOf(Collections.singletonList(new ApiMessageAndVersion(
                 new ConfigRecord().setResourceType(TOPIC.id()).setResourceName("mytopic").
-                    setName("abc").setValue("123"), (short) 0)),
+                    setName("abc").setValue("123"), CONFIG_RECORD.highestSupportedVersion())),
                 toMap(entry(BROKER0, new ApiError(Errors.INVALID_CONFIG,
                             "Can't SUBTRACT to key baz because its type is not LIST.")),
                     entry(MYTOPIC, ApiError.NONE))), result);
@@ -148,11 +169,79 @@ public void testIncrementalAlterConfigs() {
 
         assertEquals(ControllerResult.atomicOf(Collections.singletonList(new ApiMessageAndVersion(
                 new ConfigRecord().setResourceType(TOPIC.id()).setResourceName("mytopic").
-                    setName("abc").setValue(null), (short) 0)),
+                    setName("abc").setValue(null), CONFIG_RECORD.highestSupportedVersion())),
                 toMap(entry(MYTOPIC, ApiError.NONE))),
             manager.incrementalAlterConfigs(toMap(entry(MYTOPIC, toMap(
                 entry("abc", entry(DELETE, "xyz"))))),
-                NO_OP_EXISTENCE_CHECKER));
+                true));
+    }
+
+    @Test
+    public void testIncrementalAlterMultipleConfigValues() {
+        ConfigurationControlManager manager = new ConfigurationControlManager.Builder().
+            setKafkaConfigSchema(SCHEMA).
+            build();
+
+        ControllerResult<Map<ConfigResource, ApiError>> result = manager.
+            incrementalAlterConfigs(toMap(entry(MYTOPIC, toMap(entry("abc", entry(APPEND, "123,456,789"))))), true);
+
+        assertEquals(ControllerResult.atomicOf(Collections.singletonList(new ApiMessageAndVersion(
+                new ConfigRecord().setResourceType(TOPIC.id()).setResourceName("mytopic").
+                    setName("abc").setValue("123,456,789"), CONFIG_RECORD.highestSupportedVersion())),
+                toMap(entry(MYTOPIC, ApiError.NONE))), result);
+
+        RecordTestUtils.replayAll(manager, result.records());
+
+        // It's ok for the appended value to be already present
+        result = manager
+            .incrementalAlterConfigs(toMap(entry(MYTOPIC, toMap(entry("abc", entry(APPEND, "123,456"))))), true);
+        assertEquals(
+            ControllerResult.atomicOf(Collections.emptyList(), toMap(entry(MYTOPIC, ApiError.NONE))),
+            result
+        );
+        RecordTestUtils.replayAll(manager, result.records());
+
+        result = manager
+            .incrementalAlterConfigs(toMap(entry(MYTOPIC, toMap(entry("abc", entry(SUBTRACT, "123,456"))))), true);
+        assertEquals(ControllerResult.atomicOf(Collections.singletonList(new ApiMessageAndVersion(
+                new ConfigRecord().setResourceType(TOPIC.id()).setResourceName("mytopic").
+                    setName("abc").setValue("789"), CONFIG_RECORD.highestSupportedVersion())),
+                toMap(entry(MYTOPIC, ApiError.NONE))),
+                result);
+        RecordTestUtils.replayAll(manager, result.records());
+
+        // It's ok for the deleted value not to be present
+        result = manager
+            .incrementalAlterConfigs(toMap(entry(MYTOPIC, toMap(entry("abc", entry(SUBTRACT, "123456"))))), true);
+        assertEquals(
+            ControllerResult.atomicOf(Collections.emptyList(), toMap(entry(MYTOPIC, ApiError.NONE))),
+            result
+        );
+        RecordTestUtils.replayAll(manager, result.records());
+
+        assertEquals("789", manager.getConfigs(MYTOPIC).get("abc"));
+    }
+
+    @Test
+    public void testIncrementalAlterConfigsWithoutExistence() {
+        ConfigurationControlManager manager = new ConfigurationControlManager.Builder().
+            setKafkaConfigSchema(SCHEMA).
+            setExistenceChecker(TestExistenceChecker.INSTANCE).
+            build();
+        ConfigResource existingTopic = new ConfigResource(TOPIC, "ExistingTopic");
+
+        ControllerResult<Map<ConfigResource, ApiError>> result = manager.
+            incrementalAlterConfigs(toMap(entry(BROKER0, toMap(
+                entry("quux", entry(SET, "1")))),
+                entry(existingTopic, toMap(entry("def", entry(SET, "newVal"))))),
+                false);
+
+        assertEquals(ControllerResult.atomicOf(Collections.singletonList(new ApiMessageAndVersion(
+                new ConfigRecord().setResourceType(TOPIC.id()).setResourceName("ExistingTopic").
+                    setName("def").setValue("newVal"), CONFIG_RECORD.highestSupportedVersion())),
+            toMap(entry(BROKER0, new ApiError(Errors.UNKNOWN_TOPIC_OR_PARTITION,
+                    "Unknown resource.")),
+                entry(existingTopic, ApiError.NONE))), result);
     }
 
     private static class MockAlterConfigsPolicy implements AlterConfigPolicy {
@@ -190,21 +279,32 @@ public void configure(Map<String, ?> configs) {
 
     @Test
     public void testIncrementalAlterConfigsWithPolicy() {
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
         MockAlterConfigsPolicy policy = new MockAlterConfigsPolicy(asList(
             new RequestMetadata(MYTOPIC, Collections.emptyMap()),
-            new RequestMetadata(BROKER0, toMap(entry("foo.bar", "123"),
-                entry("quux", "456")))));
-        ConfigurationControlManager manager = new ConfigurationControlManager(
-            new LogContext(), snapshotRegistry, SCHEMA, Optional.of(policy),
-            ConfigurationValidator.NO_OP);
-
+            new RequestMetadata(BROKER0, toMap(
+                entry("foo.bar", "123"),
+                entry("quux", "456"),
+                entry("broker.config.to.remove", null)))));
+        ConfigurationControlManager manager = new ConfigurationControlManager.Builder().
+            setKafkaConfigSchema(SCHEMA).
+            setAlterConfigPolicy(Optional.of(policy)).
+            build();
+        // Existing configs should not be passed to the policy
+        manager.replay(new ConfigRecord().setResourceType(BROKER.id()).setResourceName("0").
+                setName("broker.config").setValue("123"));
+        manager.replay(new ConfigRecord().setResourceType(TOPIC.id()).setResourceName(MYTOPIC.name()).
+                setName("topic.config").setValue("123"));
+        manager.replay(new ConfigRecord().setResourceType(BROKER.id()).setResourceName("0").
+                setName("broker.config.to.remove").setValue("123"));
         assertEquals(ControllerResult.atomicOf(asList(new ApiMessageAndVersion(
                 new ConfigRecord().setResourceType(BROKER.id()).setResourceName("0").
-                    setName("foo.bar").setValue("123"), (short) 0), new ApiMessageAndVersion(
+                    setName("foo.bar").setValue("123"), CONFIG_RECORD.highestSupportedVersion()), new ApiMessageAndVersion(
+                new ConfigRecord().setResourceType(BROKER.id()).setResourceName("0").
+                    setName("quux").setValue("456"), CONFIG_RECORD.highestSupportedVersion()), new ApiMessageAndVersion(
                 new ConfigRecord().setResourceType(BROKER.id()).setResourceName("0").
-                    setName("quux").setValue("456"), (short) 0)),
-            toMap(entry(MYTOPIC, new ApiError(Errors.POLICY_VIOLATION,
+                    setName("broker.config.to.remove").setValue(null), CONFIG_RECORD.highestSupportedVersion())
+                ),
+                toMap(entry(MYTOPIC, new ApiError(Errors.POLICY_VIOLATION,
                     "Expected: AlterConfigPolicy.RequestMetadata(resource=ConfigResource(" +
                     "type=TOPIC, name='mytopic'), configs={}). Got: " +
                     "AlterConfigPolicy.RequestMetadata(resource=ConfigResource(" +
@@ -213,29 +313,30 @@ public void testIncrementalAlterConfigsWithPolicy() {
             manager.incrementalAlterConfigs(toMap(entry(MYTOPIC, toMap(
                 entry("foo.bar", entry(SET, "123")))),
                 entry(BROKER0, toMap(
-                entry("foo.bar", entry(SET, "123")),
-                entry("quux", entry(SET, "456"))))),
-                NO_OP_EXISTENCE_CHECKER));
+                        entry("foo.bar", entry(SET, "123")),
+                        entry("quux", entry(SET, "456")),
+                        entry("broker.config.to.remove", entry(DELETE, null))
+                ))),
+                true));
     }
 
     @Test
     public void testLegacyAlterConfigs() {
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        ConfigurationControlManager manager =
-            new ConfigurationControlManager(new LogContext(), snapshotRegistry, SCHEMA,
-                Optional.empty(), ConfigurationValidator.NO_OP);
+        ConfigurationControlManager manager = new ConfigurationControlManager.Builder().
+            setKafkaConfigSchema(SCHEMA).
+            build();
         List<ApiMessageAndVersion> expectedRecords1 = asList(
             new ApiMessageAndVersion(new ConfigRecord().
                 setResourceType(TOPIC.id()).setResourceName("mytopic").
-                setName("abc").setValue("456"), (short) 0),
+                setName("abc").setValue("456"), CONFIG_RECORD.highestSupportedVersion()),
             new ApiMessageAndVersion(new ConfigRecord().
                 setResourceType(TOPIC.id()).setResourceName("mytopic").
-                setName("def").setValue("901"), (short) 0));
+                setName("def").setValue("901"), CONFIG_RECORD.highestSupportedVersion()));
         assertEquals(ControllerResult.atomicOf(
                 expectedRecords1, toMap(entry(MYTOPIC, ApiError.NONE))),
             manager.legacyAlterConfigs(
                 toMap(entry(MYTOPIC, toMap(entry("abc", "456"), entry("def", "901")))),
-                NO_OP_EXISTENCE_CHECKER));
+                true));
         for (ApiMessageAndVersion message : expectedRecords1) {
             manager.replay((ConfigRecord) message.message());
         }
@@ -246,9 +347,9 @@ expectedRecords1, toMap(entry(MYTOPIC, ApiError.NONE))),
                     .setResourceName("mytopic")
                     .setName("abc")
                     .setValue(null),
-                (short) 0)),
+                CONFIG_RECORD.highestSupportedVersion())),
             toMap(entry(MYTOPIC, ApiError.NONE))),
             manager.legacyAlterConfigs(toMap(entry(MYTOPIC, toMap(entry("def", "901")))),
-                NO_OP_EXISTENCE_CHECKER));
+                true));
     }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/ControllerRequestContextUtil.java b/metadata/src/test/java/org/apache/kafka/controller/ControllerRequestContextUtil.java
new file mode 100644
index 0000000000000..8d70a2d82f537
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/controller/ControllerRequestContextUtil.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+import java.util.OptionalLong;
+import org.apache.kafka.common.message.RequestHeaderData;
+import org.apache.kafka.common.protocol.ApiKeys;
+import org.apache.kafka.common.security.auth.KafkaPrincipal;
+
+public class ControllerRequestContextUtil {
+    public static final ControllerRequestContext ANONYMOUS_CONTEXT =
+        new ControllerRequestContext(
+            new RequestHeaderData(),
+            KafkaPrincipal.ANONYMOUS,
+            OptionalLong.empty());
+
+    public static ControllerRequestContext anonymousContextFor(ApiKeys apiKeys) {
+        return anonymousContextFor(apiKeys, apiKeys.latestVersion());
+    }
+
+    public static ControllerRequestContext anonymousContextFor(
+        ApiKeys apiKeys,
+        short version
+    ) {
+        return new ControllerRequestContext(
+            new RequestHeaderData()
+                .setRequestApiKey(apiKeys.id)
+                .setRequestApiVersion(version),
+            KafkaPrincipal.ANONYMOUS,
+            OptionalLong.empty()
+        );
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/controller/FeatureControlManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/FeatureControlManagerTest.java
index 680253c712547..4d4c4719945fe 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/FeatureControlManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/FeatureControlManagerTest.java
@@ -23,32 +23,64 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
+
+import org.apache.kafka.clients.ApiVersions;
+import org.apache.kafka.clients.admin.FeatureUpdate;
 import org.apache.kafka.common.metadata.FeatureLevelRecord;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.common.utils.LogContext;
-import org.apache.kafka.metadata.FeatureMap;
-import org.apache.kafka.metadata.FeatureMapAndEpoch;
+import org.apache.kafka.metadata.FinalizedControllerFeatures;
 import org.apache.kafka.metadata.RecordTestUtils;
 import org.apache.kafka.metadata.VersionRange;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
+import static java.util.Collections.emptyList;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 
 @Timeout(value = 40)
 public class FeatureControlManagerTest {
+
     @SuppressWarnings("unchecked")
     private static Map<String, VersionRange> rangeMap(Object... args) {
         Map<String, VersionRange> result = new HashMap<>();
         for (int i = 0; i < args.length; i += 3) {
             String feature = (String) args[i];
-            Integer low = (Integer) args[i + 1];
-            Integer high = (Integer) args[i + 2];
-            result.put(feature, new VersionRange(low.shortValue(), high.shortValue()));
+            Number low = (Number) args[i + 1];
+            Number high = (Number) args[i + 2];
+            result.put(feature, VersionRange.of(low.shortValue(), high.shortValue()));
+        }
+        return result;
+    }
+
+    private static Map<String, Short> versionMap(Object... args) {
+        Map<String, Short> result = new HashMap<>();
+        for (int i = 0; i < args.length; i += 2) {
+            String feature = (String) args[i];
+            Number ver = (Number) args[i + 1];
+            result.put(feature, ver.shortValue());
+        }
+        return result;
+    }
+
+    public static QuorumFeatures features(Object... args) {
+        Map<String, VersionRange> features = QuorumFeatures.defaultFeatureMap();
+        features.putAll(rangeMap(args));
+        return new QuorumFeatures(0, new ApiVersions(), features, emptyList());
+    }
+
+    private static Map<String, Short> updateMap(Object... args) {
+        Map<String, Short> result = new HashMap<>();
+        for (int i = 0; i < args.length; i += 2) {
+            String feature = (String) args[i];
+            Number ver = (Number) args[i + 1];
+            result.put(feature, ver.shortValue());
         }
         return result;
     }
@@ -56,82 +88,93 @@ private static Map<String, VersionRange> rangeMap(Object... args) {
     @Test
     public void testUpdateFeatures() {
         SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+            setQuorumFeatures(features("foo", 1, 2)).
+            setSnapshotRegistry(snapshotRegistry).
+            build();
         snapshotRegistry.getOrCreateSnapshot(-1);
-        FeatureControlManager manager = new FeatureControlManager(
-            rangeMap("foo", 1, 2), snapshotRegistry);
-        assertEquals(new FeatureMapAndEpoch(new FeatureMap(Collections.emptyMap()), -1),
+        assertEquals(new FinalizedControllerFeatures(Collections.singletonMap("metadata.version", (short) 1), -1),
             manager.finalizedFeatures(-1));
-        assertEquals(ControllerResult.atomicOf(Collections.emptyList(), Collections.
+        assertEquals(ControllerResult.atomicOf(emptyList(), Collections.
                 singletonMap("foo", new ApiError(Errors.INVALID_UPDATE_VERSION,
-                    "The controller does not support the given feature range."))),
-            manager.updateFeatures(rangeMap("foo", 1, 3),
-                Collections.singleton("foo"),
-                Collections.emptyMap()));
+                    "Invalid update version 3 for feature foo. Local controller 0 only supports versions 1-2"))),
+            manager.updateFeatures(updateMap("foo", 3),
+                Collections.singletonMap("foo", FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
+                Collections.emptyMap(), false));
         ControllerResult<Map<String, ApiError>> result = manager.updateFeatures(
-            rangeMap("foo", 1, 2, "bar", 1, 1), Collections.emptySet(),
-                Collections.emptyMap());
+                updateMap("foo", 2, "bar", 1), Collections.emptyMap(),
+                Collections.emptyMap(), false);
         Map<String, ApiError> expectedMap = new HashMap<>();
         expectedMap.put("foo", ApiError.NONE);
         expectedMap.put("bar", new ApiError(Errors.INVALID_UPDATE_VERSION,
-                "The controller does not support the given feature range."));
+                "Invalid update version 1 for feature bar. Local controller 0 does not support this feature."));
         assertEquals(expectedMap, result.response());
         List<ApiMessageAndVersion> expectedMessages = new ArrayList<>();
         expectedMessages.add(new ApiMessageAndVersion(new FeatureLevelRecord().
-            setName("foo").setMinFeatureLevel((short) 1).setMaxFeatureLevel((short) 2),
+            setName("foo").setFeatureLevel((short) 2),
             (short) 0));
         assertEquals(expectedMessages, result.records());
     }
 
     @Test
     public void testReplay() {
+        LogContext logContext = new LogContext();
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(logContext);
         FeatureLevelRecord record = new FeatureLevelRecord().
-            setName("foo").setMinFeatureLevel((short) 1).setMaxFeatureLevel((short) 2);
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
+            setName("foo").setFeatureLevel((short) 2);
+
         snapshotRegistry.getOrCreateSnapshot(-1);
-        FeatureControlManager manager = new FeatureControlManager(
-            rangeMap("foo", 1, 2), snapshotRegistry);
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+                setLogContext(logContext).
+                setQuorumFeatures(features("foo", 1, 2)).
+                setSnapshotRegistry(snapshotRegistry).
+                build();
         manager.replay(record);
         snapshotRegistry.getOrCreateSnapshot(123);
-        assertEquals(new FeatureMapAndEpoch(new FeatureMap(rangeMap("foo", 1, 2)), 123),
+        assertEquals(new FinalizedControllerFeatures(versionMap("metadata.version", 1, "foo", 2), 123),
             manager.finalizedFeatures(123));
     }
 
     @Test
     public void testUpdateFeaturesErrorCases() {
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        FeatureControlManager manager = new FeatureControlManager(
-            rangeMap("foo", 1, 5, "bar", 1, 2), snapshotRegistry);
+        LogContext logContext = new LogContext();
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(logContext);
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+            setLogContext(logContext).
+            setQuorumFeatures(features("foo", 1, 5, "bar", 1, 2)).
+            setSnapshotRegistry(snapshotRegistry).
+            build();
 
         assertEquals(
             ControllerResult.atomicOf(
-                Collections.emptyList(),
+                emptyList(),
                 Collections.singletonMap(
                     "foo",
                     new ApiError(
                         Errors.INVALID_UPDATE_VERSION,
-                        "Broker 5 does not support the given feature range."
+                        "Invalid update version 3 for feature foo. Broker 5 does not support this feature."
                     )
                 )
             ),
             manager.updateFeatures(
-                rangeMap("foo", 1, 3),
-                Collections.singleton("foo"),
-                Collections.singletonMap(5, rangeMap())
-            )
+                updateMap("foo", 3),
+                Collections.singletonMap("foo", FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
+                Collections.singletonMap(5, rangeMap()),
+                false)
         );
 
         ControllerResult<Map<String, ApiError>> result = manager.updateFeatures(
-            rangeMap("foo", 1, 3), Collections.emptySet(), Collections.emptyMap());
+            updateMap("foo", 3), Collections.emptyMap(), Collections.emptyMap(), false);
         assertEquals(Collections.singletonMap("foo", ApiError.NONE), result.response());
         manager.replay((FeatureLevelRecord) result.records().get(0).message());
         snapshotRegistry.getOrCreateSnapshot(3);
 
-        assertEquals(ControllerResult.atomicOf(Collections.emptyList(), Collections.
+        assertEquals(ControllerResult.atomicOf(emptyList(), Collections.
                 singletonMap("foo", new ApiError(Errors.INVALID_UPDATE_VERSION,
-                    "Can't downgrade the maximum version of this feature without " +
-                    "setting downgradable to true."))),
-            manager.updateFeatures(rangeMap("foo", 1, 2),
-                Collections.emptySet(), Collections.emptyMap()));
+                    "Invalid update version 2 for feature foo. Can't downgrade the version of this feature " +
+                    "without setting the upgrade type to either safe or unsafe downgrade."))),
+            manager.updateFeatures(updateMap("foo", 2),
+                Collections.emptyMap(), Collections.emptyMap(), false));
 
         assertEquals(
             ControllerResult.atomicOf(
@@ -139,39 +182,123 @@ public void testUpdateFeaturesErrorCases() {
                     new ApiMessageAndVersion(
                         new FeatureLevelRecord()
                             .setName("foo")
-                            .setMinFeatureLevel((short) 1)
-                            .setMaxFeatureLevel((short) 2),
+                            .setFeatureLevel((short) 2),
                         (short) 0
                     )
                 ),
                 Collections.singletonMap("foo", ApiError.NONE)
             ),
             manager.updateFeatures(
-                rangeMap("foo", 1, 2),
-                Collections.singleton("foo"),
-                Collections.emptyMap()
-            )
+                updateMap("foo", 2),
+                Collections.singletonMap("foo", FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
+                Collections.emptyMap(),
+                false)
         );
     }
 
     @Test
     public void testFeatureControlIterator() throws Exception {
-        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new LogContext());
-        FeatureControlManager manager = new FeatureControlManager(
-            rangeMap("foo", 1, 5, "bar", 1, 2), snapshotRegistry);
+        LogContext logContext = new LogContext();
+        SnapshotRegistry snapshotRegistry = new SnapshotRegistry(logContext);
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+            setLogContext(logContext).
+            setQuorumFeatures(features("foo", 1, 5, "bar", 1, 2)).
+            setSnapshotRegistry(snapshotRegistry).
+            build();
         ControllerResult<Map<String, ApiError>> result = manager.
-            updateFeatures(rangeMap("foo", 1, 5, "bar", 1, 1),
-                Collections.emptySet(), Collections.emptyMap());
+            updateFeatures(updateMap("foo", 5, "bar", 1),
+                Collections.emptyMap(), Collections.emptyMap(), false);
         RecordTestUtils.replayAll(manager, result.records());
         RecordTestUtils.assertBatchIteratorContains(Arrays.asList(
+            Arrays.asList(new ApiMessageAndVersion(new FeatureLevelRecord().
+                    setName("metadata.version").
+                    setFeatureLevel((short) 1), (short) 0)),
             Arrays.asList(new ApiMessageAndVersion(new FeatureLevelRecord().
                 setName("foo").
-                setMinFeatureLevel((short) 1).
-                setMaxFeatureLevel((short) 5), (short) 0)),
+                setFeatureLevel((short) 5), (short) 0)),
             Arrays.asList(new ApiMessageAndVersion(new FeatureLevelRecord().
                 setName("bar").
-                setMinFeatureLevel((short) 1).
-                setMaxFeatureLevel((short) 1), (short) 0))),
+                setFeatureLevel((short) 1), (short) 0))),
             manager.iterator(Long.MAX_VALUE));
     }
+
+    @Test
+    public void testApplyMetadataVersionChangeRecord() {
+        QuorumFeatures features = features(MetadataVersion.FEATURE_NAME,
+                MetadataVersion.IBP_3_0_IV1.featureLevel(), MetadataVersion.IBP_3_3_IV0.featureLevel());
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+            setQuorumFeatures(features).build();
+        manager.replay(new FeatureLevelRecord().
+            setName(MetadataVersion.FEATURE_NAME).
+            setFeatureLevel(MetadataVersion.IBP_3_0_IV1.featureLevel()));
+        assertEquals(MetadataVersion.IBP_3_0_IV1, manager.metadataVersion());
+    }
+
+    @Test
+    public void testDowngradeMetadataVersion() {
+        QuorumFeatures features = features(MetadataVersion.FEATURE_NAME,
+                MetadataVersion.IBP_3_2_IV0.featureLevel(), MetadataVersion.IBP_3_3_IV0.featureLevel());
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+            setQuorumFeatures(features).
+            setMetadataVersion(MetadataVersion.IBP_3_3_IV0).
+            build();
+        assertEquals(manager.metadataVersion(), MetadataVersion.IBP_3_3_IV0);
+
+        ControllerResult<Map<String, ApiError>> result;
+        result = manager.updateFeatures(
+            Collections.singletonMap(MetadataVersion.FEATURE_NAME, MetadataVersion.IBP_3_2_IV0.featureLevel()),
+            Collections.singletonMap(MetadataVersion.FEATURE_NAME, FeatureUpdate.UpgradeType.UPGRADE),
+            Collections.emptyMap(),
+            true);
+        assertEquals(Errors.INVALID_UPDATE_VERSION, result.response().get(MetadataVersion.FEATURE_NAME).error());
+
+
+        result = manager.updateFeatures(
+            Collections.singletonMap(MetadataVersion.FEATURE_NAME, MetadataVersion.IBP_3_1_IV0.featureLevel()),
+            Collections.singletonMap(MetadataVersion.FEATURE_NAME, FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
+            Collections.emptyMap(),
+            true);
+        assertEquals(Errors.INVALID_UPDATE_VERSION, result.response().get(MetadataVersion.FEATURE_NAME).error());
+
+        result = manager.updateFeatures(
+                Collections.singletonMap(MetadataVersion.FEATURE_NAME, MetadataVersion.MINIMUM_KRAFT_VERSION.featureLevel()),
+                Collections.singletonMap(MetadataVersion.FEATURE_NAME, FeatureUpdate.UpgradeType.SAFE_DOWNGRADE),
+                Collections.emptyMap(),
+                true);
+        assertEquals(Errors.INVALID_UPDATE_VERSION, result.response().get(MetadataVersion.FEATURE_NAME).error());
+        assertEquals("Invalid update version 1 for feature metadata.version. Local controller 0 only supports versions 3-4",
+            result.response().get(MetadataVersion.FEATURE_NAME).message());
+    }
+
+    @Test
+    public void testCreateFeatureLevelRecords() {
+        Map<String, VersionRange> localSupportedFeatures = new HashMap<>();
+        localSupportedFeatures.put(MetadataVersion.FEATURE_NAME, VersionRange.of(
+                MetadataVersion.IBP_3_0_IV1.featureLevel(), MetadataVersion.latest().featureLevel()));
+        localSupportedFeatures.put("foo", VersionRange.of(0, 2));
+        FeatureControlManager manager = new FeatureControlManager.Builder().
+                setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(), localSupportedFeatures, emptyList())).
+                build();
+        ControllerResult<Map<String, ApiError>> result  = manager.updateFeatures(
+                Collections.singletonMap("foo", (short) 1),
+                Collections.singletonMap("foo", FeatureUpdate.UpgradeType.UPGRADE),
+                Collections.singletonMap(1, Collections.singletonMap("foo", VersionRange.of(0, 3))),
+                false);
+        assertEquals(ControllerResult.atomicOf(Arrays.asList(new ApiMessageAndVersion(
+                new FeatureLevelRecord().setName("foo").setFeatureLevel((short) 1), (short) 0)),
+                        Collections.singletonMap("foo", ApiError.NONE)), result);
+        RecordTestUtils.replayAll(manager, result.records());
+        assertEquals(Optional.of((short) 1), manager.finalizedFeatures(Long.MAX_VALUE).get("foo"));
+
+        ControllerResult<Map<String, ApiError>> result2  = manager.updateFeatures(
+                Collections.singletonMap("foo", (short) 0),
+                Collections.singletonMap("foo", FeatureUpdate.UpgradeType.UNSAFE_DOWNGRADE),
+                Collections.singletonMap(1, Collections.singletonMap("foo", VersionRange.of(0, 3))),
+                false);
+        assertEquals(ControllerResult.atomicOf(Arrays.asList(new ApiMessageAndVersion(
+                        new FeatureLevelRecord().setName("foo").setFeatureLevel((short) 0), (short) 0)),
+                Collections.singletonMap("foo", ApiError.NONE)), result2);
+        RecordTestUtils.replayAll(manager, result2.records());
+        assertEquals(Optional.empty(), manager.finalizedFeatures(Long.MAX_VALUE).get("foo"));
+    }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/MockControllerMetrics.java b/metadata/src/test/java/org/apache/kafka/controller/MockControllerMetrics.java
index 0120f15295140..ca13d90ddeae1 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/MockControllerMetrics.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/MockControllerMetrics.java
@@ -17,25 +17,22 @@
 
 package org.apache.kafka.controller;
 
+import java.util.concurrent.atomic.AtomicInteger;
+
 public final class MockControllerMetrics implements ControllerMetrics {
-    private volatile boolean active;
-    private volatile int fencedBrokers;
-    private volatile int activeBrokers;
-    private volatile int topics;
-    private volatile int partitions;
-    private volatile int offlinePartitions;
-    private volatile int preferredReplicaImbalances;
-    private volatile boolean closed = false;
+    private volatile boolean active = false;
+    private volatile int fencedBrokers = 0;
+    private volatile int activeBrokers = 0;
+    private volatile int topics = 0;
+    private volatile int partitions = 0;
+    private volatile int offlinePartitions = 0;
+    private volatile int preferredReplicaImbalances = 0;
+    private volatile AtomicInteger metadataErrors = new AtomicInteger(0);
+    private volatile long lastAppliedRecordOffset = 0;
+    private volatile long lastCommittedRecordOffset = 0;
+    private volatile long lastAppliedRecordTimestamp = 0;
 
-    public MockControllerMetrics() {
-        this.active = false;
-        this.fencedBrokers = 0;
-        this.activeBrokers = 0;
-        this.topics = 0;
-        this.partitions = 0;
-        this.offlinePartitions = 0;
-        this.preferredReplicaImbalances = 0;
-    }
+    private volatile boolean closed = false;
 
     @Override
     public void setActive(boolean active) {
@@ -117,6 +114,46 @@ public int preferredReplicaImbalanceCount() {
         return this.preferredReplicaImbalances;
     }
 
+    @Override
+    public void incrementMetadataErrorCount() {
+        this.metadataErrors.getAndIncrement();
+    }
+
+    @Override
+    public int metadataErrorCount() {
+        return this.metadataErrors.get();
+    }
+
+    @Override
+    public void setLastAppliedRecordOffset(long offset) {
+        lastAppliedRecordOffset = offset;
+    }
+
+    @Override
+    public long lastAppliedRecordOffset() {
+        return lastAppliedRecordOffset;
+    }
+
+    @Override
+    public void setLastCommittedRecordOffset(long offset) {
+        lastCommittedRecordOffset = offset;
+    }
+
+    @Override
+    public long lastCommittedRecordOffset() {
+        return lastCommittedRecordOffset;
+    }
+
+    @Override
+    public void setLastAppliedRecordTimestamp(long timestamp) {
+        lastAppliedRecordTimestamp = timestamp;
+    }
+
+    @Override
+    public long lastAppliedRecordTimestamp() {
+        return lastAppliedRecordTimestamp;
+    }
+
     @Override
     public void close() {
         closed = true;
diff --git a/metadata/src/test/java/org/apache/kafka/controller/PartitionChangeBuilderTest.java b/metadata/src/test/java/org/apache/kafka/controller/PartitionChangeBuilderTest.java
index f935a808f0b69..fedfa8c0a5023 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/PartitionChangeBuilderTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/PartitionChangeBuilderTest.java
@@ -19,18 +19,23 @@
 
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.metadata.PartitionChangeRecord;
-import org.apache.kafka.controller.PartitionChangeBuilder.BestLeader;
+import org.apache.kafka.common.protocol.types.TaggedFields;
+import org.apache.kafka.controller.PartitionChangeBuilder.ElectionResult;
+import org.apache.kafka.metadata.LeaderRecoveryState;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.Replicas;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Optional;
 
 import static org.apache.kafka.common.metadata.MetadataRecordType.PARTITION_CHANGE_RECORD;
+import static org.apache.kafka.controller.PartitionChangeBuilder.Election;
 import static org.apache.kafka.controller.PartitionChangeBuilder.changeRecordIsNoOp;
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER;
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER_CHANGE;
@@ -43,6 +48,16 @@
 public class PartitionChangeBuilderTest {
     @Test
     public void testChangeRecordIsNoOp() {
+        /* If the next few checks fail please update them based on the latest schema and make sure
+         * to update changeRecordIsNoOp to take into account the new schema or tagged fields.
+         */
+        // Check that the supported versions haven't changed
+        assertEquals(0, PartitionChangeRecord.HIGHEST_SUPPORTED_VERSION);
+        assertEquals(0, PartitionChangeRecord.LOWEST_SUPPORTED_VERSION);
+        // For the latest version check that the number of tagged fields hasn't changed
+        TaggedFields taggedFields = (TaggedFields) PartitionChangeRecord.SCHEMA_0.get(2).def.type;
+        assertEquals(6, taggedFields.numFields());
+
         assertTrue(changeRecordIsNoOp(new PartitionChangeRecord()));
         assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().setLeader(1)));
         assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().
@@ -51,70 +66,80 @@ public void testChangeRecordIsNoOp() {
             setRemovingReplicas(Arrays.asList(1))));
         assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().
             setAddingReplicas(Arrays.asList(4))));
+        assertFalse(
+            changeRecordIsNoOp(
+                new PartitionChangeRecord()
+                  .setLeaderRecoveryState(LeaderRecoveryState.RECOVERED.value())
+            )
+        );
     }
 
     private final static PartitionRegistration FOO = new PartitionRegistration(
         new int[] {2, 1, 3}, new int[] {2, 1, 3}, Replicas.NONE, Replicas.NONE,
-        1, 100, 200);
+        1, LeaderRecoveryState.RECOVERED, 100, 200);
 
     private final static Uuid FOO_ID = Uuid.fromString("FbrrdcfiR-KC2CPSTHaJrg");
 
-    private static PartitionChangeBuilder createFooBuilder(boolean allowUnclean) {
-        return new PartitionChangeBuilder(FOO, FOO_ID, 0, r -> r != 3, () -> allowUnclean);
+    private static PartitionChangeBuilder createFooBuilder() {
+        return new PartitionChangeBuilder(FOO, FOO_ID, 0, r -> r != 3, true);
     }
 
     private final static PartitionRegistration BAR = new PartitionRegistration(
         new int[] {1, 2, 3, 4}, new int[] {1, 2, 3}, new int[] {1}, new int[] {4},
-        1, 100, 200);
+        1, LeaderRecoveryState.RECOVERED, 100, 200);
 
     private final static Uuid BAR_ID = Uuid.fromString("LKfUsCBnQKekvL9O5dY9nw");
 
-    private static PartitionChangeBuilder createBarBuilder(boolean allowUnclean) {
-        return new PartitionChangeBuilder(BAR, BAR_ID, 0, r -> r != 3, () -> allowUnclean);
+    private static PartitionChangeBuilder createBarBuilder() {
+        return new PartitionChangeBuilder(BAR, BAR_ID, 0, r -> r != 3, true);
+    }
+
+    private final static PartitionRegistration BAZ = new PartitionRegistration(
+        new int[] {2, 1, 3}, new int[] {1, 3}, Replicas.NONE, Replicas.NONE,
+        3, LeaderRecoveryState.RECOVERED, 100, 200);
+
+    private final static Uuid BAZ_ID = Uuid.fromString("wQzt5gkSTwuQNXZF5gIw7A");
+
+    private static PartitionChangeBuilder createBazBuilder() {
+        return new PartitionChangeBuilder(BAZ, BAZ_ID, 0, __ -> true, true);
+    }
+
+    private final static PartitionRegistration OFFLINE = new PartitionRegistration(
+        new int[] {2, 1, 3}, new int[] {3}, Replicas.NONE, Replicas.NONE,
+        -1, LeaderRecoveryState.RECOVERED, 100, 200);
+
+    private final static Uuid OFFLINE_ID = Uuid.fromString("LKfUsCBnQKekvL9O5dY9nw");
+
+    private static PartitionChangeBuilder createOfflineBuilder() {
+        return new PartitionChangeBuilder(OFFLINE, OFFLINE_ID, 0, r -> r == 1, true);
     }
 
-    private static void assertBestLeaderEquals(PartitionChangeBuilder builder,
+    private static void assertElectLeaderEquals(PartitionChangeBuilder builder,
                                                int expectedNode,
                                                boolean expectedUnclean) {
-        BestLeader bestLeader = builder.new BestLeader();
-        assertEquals(expectedNode, bestLeader.node);
-        assertEquals(expectedUnclean, bestLeader.unclean);
+        ElectionResult electionResult = builder.electLeader();
+        assertEquals(expectedNode, electionResult.node);
+        assertEquals(expectedUnclean, electionResult.unclean);
     }
 
     @Test
-    public void testBestLeader() {
-        assertBestLeaderEquals(createFooBuilder(false), 2, false);
-        assertBestLeaderEquals(createFooBuilder(true), 2, false);
-        assertBestLeaderEquals(createFooBuilder(false).
-                setTargetIsr(Arrays.asList(1, 3)), 1, false);
-        assertBestLeaderEquals(createFooBuilder(true).
-            setTargetIsr(Arrays.asList(1, 3)), 1, false);
-        assertBestLeaderEquals(createFooBuilder(false).
-            setTargetIsr(Arrays.asList(3)), NO_LEADER, false);
-        assertBestLeaderEquals(createFooBuilder(true).
-            setTargetIsr(Arrays.asList(3)), 2, true);
-        assertBestLeaderEquals(createFooBuilder(true).
-                setTargetIsr(Arrays.asList(4)).setTargetReplicas(Arrays.asList(2, 1, 3, 4)),
-            4, false);
-    }
+    public void testElectLeader() {
+        assertElectLeaderEquals(createFooBuilder().setElection(Election.PREFERRED), 2, false);
+        assertElectLeaderEquals(createFooBuilder(), 1, false);
+        assertElectLeaderEquals(createFooBuilder().setElection(Election.UNCLEAN), 1, false);
+        assertElectLeaderEquals(createFooBuilder().setTargetIsr(Arrays.asList(1, 3)), 1, false);
+        assertElectLeaderEquals(createFooBuilder().setElection(Election.UNCLEAN).setTargetIsr(Arrays.asList(1, 3)), 1, false);
+        assertElectLeaderEquals(createFooBuilder().setTargetIsr(Arrays.asList(3)), NO_LEADER, false);
+        assertElectLeaderEquals(createFooBuilder().setElection(Election.UNCLEAN).setTargetIsr(Arrays.asList(3)), 2, true);
+        assertElectLeaderEquals(
+            createFooBuilder().setElection(Election.UNCLEAN).setTargetIsr(Arrays.asList(4)).setTargetReplicas(Arrays.asList(2, 1, 3, 4)),
+            4,
+            false
+        );
 
-    @Test
-    public void testShouldTryElection() {
-        assertFalse(createFooBuilder(false).shouldTryElection());
-        assertTrue(createFooBuilder(false).setAlwaysElectPreferredIfPossible(true).
-            shouldTryElection());
-        assertTrue(createFooBuilder(false).setTargetIsr(Arrays.asList(2, 3)).
-            shouldTryElection());
-        assertFalse(createFooBuilder(false).setTargetIsr(Arrays.asList(2, 1)).
-            shouldTryElection());
-
-        assertTrue(createFooBuilder(true)
-            .setTargetIsr(Arrays.asList(3))
-            .shouldTryElection());
-        assertTrue(createFooBuilder(true)
-            .setTargetIsr(Arrays.asList(4))
-            .setTargetReplicas(Arrays.asList(2, 1, 3, 4))
-            .shouldTryElection());
+        assertElectLeaderEquals(createBazBuilder().setElection(Election.PREFERRED), 3, false);
+        assertElectLeaderEquals(createBazBuilder(), 3, false);
+        assertElectLeaderEquals(createBazBuilder().setElection(Election.UNCLEAN), 3, false);
     }
 
     private static void testTriggerLeaderEpochBumpIfNeededLeader(PartitionChangeBuilder builder,
@@ -126,27 +151,28 @@ private static void testTriggerLeaderEpochBumpIfNeededLeader(PartitionChangeBuil
 
     @Test
     public void testTriggerLeaderEpochBumpIfNeeded() {
-        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder(false),
+        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder(),
             new PartitionChangeRecord(), NO_LEADER_CHANGE);
-        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder(false).
+        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder().
             setTargetIsr(Arrays.asList(2, 1)), new PartitionChangeRecord(), 1);
-        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder(false).
+        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder().
             setTargetIsr(Arrays.asList(2, 1, 3, 4)), new PartitionChangeRecord(),
             NO_LEADER_CHANGE);
-        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder(false).
+        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder().
             setTargetReplicas(Arrays.asList(2, 1, 3, 4)), new PartitionChangeRecord(),
             NO_LEADER_CHANGE);
-        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder(false).
+        testTriggerLeaderEpochBumpIfNeededLeader(createFooBuilder().
             setTargetReplicas(Arrays.asList(2, 1, 3, 4)),
             new PartitionChangeRecord().setLeader(2), 2);
     }
 
     @Test
     public void testNoChange() {
-        assertEquals(Optional.empty(), createFooBuilder(false).build());
-        assertEquals(Optional.empty(), createFooBuilder(true).build());
-        assertEquals(Optional.empty(), createBarBuilder(false).build());
-        assertEquals(Optional.empty(), createBarBuilder(true).build());
+        assertEquals(Optional.empty(), createFooBuilder().build());
+        assertEquals(Optional.empty(), createFooBuilder().setElection(Election.UNCLEAN).build());
+        assertEquals(Optional.empty(), createBarBuilder().build());
+        assertEquals(Optional.empty(), createBarBuilder().setElection(Election.UNCLEAN).build());
+        assertEquals(Optional.empty(), createBazBuilder().setElection(Election.PREFERRED).build());
     }
 
     @Test
@@ -156,7 +182,7 @@ public void testIsrChangeAndLeaderBump() {
             setPartitionId(0).
             setIsr(Arrays.asList(2, 1)).
             setLeader(1), PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createFooBuilder(false).setTargetIsr(Arrays.asList(2, 1)).build());
+            createFooBuilder().setTargetIsr(Arrays.asList(2, 1)).build());
     }
 
     @Test
@@ -166,7 +192,7 @@ public void testIsrChangeAndLeaderChange() {
                 setPartitionId(0).
                 setIsr(Arrays.asList(2, 3)).
                 setLeader(2), PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createFooBuilder(false).setTargetIsr(Arrays.asList(2, 3)).build());
+            createFooBuilder().setTargetIsr(Arrays.asList(2, 3)).build());
     }
 
     @Test
@@ -176,7 +202,7 @@ public void testReassignmentRearrangesReplicas() {
                 setPartitionId(0).
                 setReplicas(Arrays.asList(3, 2, 1)),
                 PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createFooBuilder(false).setTargetReplicas(Arrays.asList(3, 2, 1)).build());
+            createFooBuilder().setTargetReplicas(Arrays.asList(3, 2, 1)).build());
     }
 
     @Test
@@ -190,7 +216,7 @@ public void testIsrEnlargementCompletesReassignment() {
                 setRemovingReplicas(Collections.emptyList()).
                 setAddingReplicas(Collections.emptyList()),
                 PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createBarBuilder(false).setTargetIsr(Arrays.asList(1, 2, 3, 4)).build());
+            createBarBuilder().setTargetIsr(Arrays.asList(1, 2, 3, 4)).build());
     }
 
     @Test
@@ -206,7 +232,7 @@ public void testRevertReassignment() {
                 setRemovingReplicas(Collections.emptyList()).
                 setAddingReplicas(Collections.emptyList()),
                 PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createBarBuilder(false).
+            createBarBuilder().
                 setTargetReplicas(revert.replicas()).
                 setTargetIsr(revert.isr()).
                 setTargetRemoving(Collections.emptyList()).
@@ -228,7 +254,7 @@ public void testRemovingReplicaReassignment() {
                 setIsr(Arrays.asList(2, 1)).
                 setLeader(1),
                 PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createFooBuilder(false).
+            createFooBuilder().
                 setTargetReplicas(replicas.merged()).
                 setTargetRemoving(replicas.removing()).
                 build());
@@ -247,9 +273,160 @@ public void testAddingReplicaReassignment() {
                 setReplicas(Arrays.asList(1, 2, 3, 4)).
                 setAddingReplicas(Collections.singletonList(4)),
                 PARTITION_CHANGE_RECORD.highestSupportedVersion())),
-            createFooBuilder(false).
+            createFooBuilder().
                 setTargetReplicas(replicas.merged()).
                 setTargetAdding(replicas.adding()).
                 build());
     }
+
+    @Test
+    public void testUncleanLeaderElection() {
+        ApiMessageAndVersion expectedRecord = new ApiMessageAndVersion(
+            new PartitionChangeRecord()
+                .setTopicId(FOO_ID)
+                .setPartitionId(0)
+                .setIsr(Arrays.asList(2))
+                .setLeader(2)
+                .setLeaderRecoveryState(LeaderRecoveryState.RECOVERING.value()),
+            PARTITION_CHANGE_RECORD.highestSupportedVersion()
+        );
+        assertEquals(
+            Optional.of(expectedRecord),
+            createFooBuilder().setElection(Election.UNCLEAN).setTargetIsr(Arrays.asList(3)).build()
+        );
+
+        expectedRecord = new ApiMessageAndVersion(
+            new PartitionChangeRecord()
+                .setTopicId(OFFLINE_ID)
+                .setPartitionId(0)
+                .setIsr(Arrays.asList(1))
+                .setLeader(1)
+                .setLeaderRecoveryState(LeaderRecoveryState.RECOVERING.value()),
+            PARTITION_CHANGE_RECORD.highestSupportedVersion()
+        );
+        assertEquals(
+            Optional.of(expectedRecord),
+            createOfflineBuilder().setElection(Election.UNCLEAN).build()
+        );
+        assertEquals(
+            Optional.of(expectedRecord),
+            createOfflineBuilder().setElection(Election.UNCLEAN).setTargetIsr(Arrays.asList(2)).build()
+        );
+    }
+
+    @ParameterizedTest
+    @ValueSource(booleans = {true, false})
+    public void testChangeInLeadershipDoesNotChangeRecoveryState(boolean isLeaderRecoverySupported) {
+        final byte noChange = (byte) -1;
+        int leaderId = 1;
+        LeaderRecoveryState recoveryState = LeaderRecoveryState.RECOVERING;
+        PartitionRegistration registration = new PartitionRegistration(
+            new int[] {leaderId, leaderId + 1, leaderId + 2},
+            new int[] {leaderId},
+            Replicas.NONE,
+            Replicas.NONE,
+            leaderId,
+            recoveryState,
+            100,
+            200
+        );
+
+        // Change the partition so that there is no leader
+        PartitionChangeBuilder offlineBuilder = new PartitionChangeBuilder(
+            registration,
+            FOO_ID,
+            0,
+            brokerId -> false,
+            isLeaderRecoverySupported
+        );
+        // Set the target ISR to empty to indicate that the last leader is offline
+        offlineBuilder.setTargetIsr(Collections.emptyList());
+
+        // The partition should stay as recovering
+        PartitionChangeRecord changeRecord = (PartitionChangeRecord) offlineBuilder
+            .build()
+            .get()
+            .message();
+        assertEquals(noChange, changeRecord.leaderRecoveryState());
+        assertEquals(NO_LEADER, changeRecord.leader());
+
+        registration = registration.merge(changeRecord);
+
+        assertEquals(NO_LEADER, registration.leader);
+        assertEquals(leaderId, registration.isr[0]);
+        assertEquals(recoveryState, registration.leaderRecoveryState);
+
+        // Bring the leader back online
+        PartitionChangeBuilder onlineBuilder = new PartitionChangeBuilder(
+            registration,
+            FOO_ID,
+            0,
+            brokerId -> true,
+            isLeaderRecoverySupported
+        );
+
+        // The only broker in the ISR is elected leader and stays in the recovering
+        changeRecord = (PartitionChangeRecord) onlineBuilder.build().get().message();
+        assertEquals(noChange, changeRecord.leaderRecoveryState());
+
+        registration = registration.merge(changeRecord);
+
+        assertEquals(leaderId, registration.leader);
+        assertEquals(leaderId, registration.isr[0]);
+        assertEquals(recoveryState, registration.leaderRecoveryState);
+    }
+
+    @ParameterizedTest
+    @ValueSource(booleans = {true, false})
+    void testUncleanSetsLeaderRecoveringState(boolean isLeaderRecoverySupported) {
+        final byte noChange = (byte) -1;
+        int leaderId = 1;
+        PartitionRegistration registration = new PartitionRegistration(
+            new int[] {leaderId, leaderId + 1, leaderId + 2},
+            new int[] {leaderId + 1, leaderId + 2},
+            Replicas.NONE,
+            Replicas.NONE,
+            NO_LEADER,
+            LeaderRecoveryState.RECOVERED,
+            100,
+            200
+        );
+
+        // Change the partition using unclean leader election
+        PartitionChangeBuilder onlineBuilder = new PartitionChangeBuilder(
+            registration,
+            FOO_ID,
+            0,
+            brokerId -> brokerId == leaderId,
+            isLeaderRecoverySupported
+        ).setElection(Election.UNCLEAN);
+        
+
+        // The partition should stay as recovering
+        PartitionChangeRecord changeRecord = (PartitionChangeRecord) onlineBuilder
+            .build()
+            .get()
+            .message();
+
+        byte expectedRecoveryChange = noChange;
+        if (isLeaderRecoverySupported) {
+            expectedRecoveryChange = LeaderRecoveryState.RECOVERING.value();
+        }
+
+        assertEquals(expectedRecoveryChange, changeRecord.leaderRecoveryState());
+        assertEquals(leaderId, changeRecord.leader());
+        assertEquals(1, changeRecord.isr().size());
+        assertEquals(leaderId, changeRecord.isr().get(0));
+
+        registration = registration.merge(changeRecord);
+
+        LeaderRecoveryState expectedRecovery = LeaderRecoveryState.RECOVERED;
+        if (isLeaderRecoverySupported) {
+            expectedRecovery = LeaderRecoveryState.RECOVERING;
+        }
+
+        assertEquals(leaderId, registration.leader);
+        assertEquals(leaderId, registration.isr[0]);
+        assertEquals(expectedRecovery, registration.leaderRecoveryState);
+    }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/PartitionReassignmentRevertTest.java b/metadata/src/test/java/org/apache/kafka/controller/PartitionReassignmentRevertTest.java
index 26120be8fb820..6d32b2eb7dbdf 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/PartitionReassignmentRevertTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/PartitionReassignmentRevertTest.java
@@ -19,6 +19,7 @@
 
 import java.util.Arrays;
 
+import org.apache.kafka.metadata.LeaderRecoveryState;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.Replicas;
 import org.junit.jupiter.api.Test;
@@ -35,7 +36,7 @@ public class PartitionReassignmentRevertTest {
     public void testNoneAddedOrRemoved() {
         PartitionRegistration registration = new PartitionRegistration(
             new int[] {3, 2, 1}, new int[] {3, 2},
-                Replicas.NONE, Replicas.NONE, 3, 100, 200);
+                Replicas.NONE, Replicas.NONE, 3, LeaderRecoveryState.RECOVERED, 100, 200);
         PartitionReassignmentRevert revert = new PartitionReassignmentRevert(registration);
         assertEquals(Arrays.asList(3, 2, 1), revert.replicas());
         assertEquals(Arrays.asList(3, 2), revert.isr());
@@ -46,7 +47,7 @@ public void testNoneAddedOrRemoved() {
     public void testSomeRemoving() {
         PartitionRegistration registration = new PartitionRegistration(
             new int[] {3, 2, 1}, new int[] {3, 2},
-            new int[] {2, 1}, Replicas.NONE, 3, 100, 200);
+            new int[] {2, 1}, Replicas.NONE, 3, LeaderRecoveryState.RECOVERED, 100, 200);
         PartitionReassignmentRevert revert = new PartitionReassignmentRevert(registration);
         assertEquals(Arrays.asList(3, 2, 1), revert.replicas());
         assertEquals(Arrays.asList(3, 2), revert.isr());
@@ -57,7 +58,7 @@ public void testSomeRemoving() {
     public void testSomeAdding() {
         PartitionRegistration registration = new PartitionRegistration(
             new int[] {4, 5, 3, 2, 1}, new int[] {4, 5, 2},
-            Replicas.NONE, new int[] {4, 5}, 3, 100, 200);
+            Replicas.NONE, new int[] {4, 5}, 3, LeaderRecoveryState.RECOVERED, 100, 200);
         PartitionReassignmentRevert revert = new PartitionReassignmentRevert(registration);
         assertEquals(Arrays.asList(3, 2, 1), revert.replicas());
         assertEquals(Arrays.asList(2), revert.isr());
@@ -68,7 +69,7 @@ public void testSomeAdding() {
     public void testSomeRemovingAndAdding() {
         PartitionRegistration registration = new PartitionRegistration(
             new int[] {4, 5, 3, 2, 1}, new int[] {4, 5, 2},
-            new int[] {2}, new int[] {4, 5}, 3, 100, 200);
+            new int[] {2}, new int[] {4, 5}, 3, LeaderRecoveryState.RECOVERED, 100, 200);
         PartitionReassignmentRevert revert = new PartitionReassignmentRevert(registration);
         assertEquals(Arrays.asList(3, 2, 1), revert.replicas());
         assertEquals(Arrays.asList(2), revert.isr());
@@ -79,7 +80,7 @@ public void testSomeRemovingAndAdding() {
     public void testIsrSpecialCase() {
         PartitionRegistration registration = new PartitionRegistration(
             new int[] {4, 5, 3, 2, 1}, new int[] {4, 5},
-            new int[] {2}, new int[] {4, 5}, 3, 100, 200);
+            new int[] {2}, new int[] {4, 5}, 3, LeaderRecoveryState.RECOVERED, 100, 200);
         PartitionReassignmentRevert revert = new PartitionReassignmentRevert(registration);
         assertEquals(Arrays.asList(3, 2, 1), revert.replicas());
         assertEquals(Arrays.asList(3), revert.isr());
diff --git a/metadata/src/test/java/org/apache/kafka/controller/ProducerIdControlManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/ProducerIdControlManagerTest.java
index 21613607dc06c..80c5c505ae0eb 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/ProducerIdControlManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/ProducerIdControlManagerTest.java
@@ -17,7 +17,8 @@
 
 package org.apache.kafka.controller;
 
-import org.apache.kafka.common.Uuid;
+import java.util.Collections;
+import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.common.errors.StaleBrokerEpochException;
 import org.apache.kafka.common.errors.UnknownServerException;
 import org.apache.kafka.common.metadata.ProducerIdsRecord;
@@ -26,6 +27,7 @@
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.server.common.ProducerIdsBlock;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.junit.jupiter.api.BeforeEach;
@@ -33,7 +35,6 @@
 
 import java.util.Iterator;
 import java.util.List;
-import java.util.Random;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -44,19 +45,28 @@
 public class ProducerIdControlManagerTest {
 
     private SnapshotRegistry snapshotRegistry;
+    private FeatureControlManager featureControl;
     private ClusterControlManager clusterControl;
     private ProducerIdControlManager producerIdControlManager;
 
     @BeforeEach
     public void setUp() {
-        final LogContext logContext = new LogContext();
-        String clusterId = Uuid.randomUuid().toString();
         final MockTime time = new MockTime();
-        final Random random = new Random();
-        snapshotRegistry = new SnapshotRegistry(logContext);
-        clusterControl = new ClusterControlManager(
-            logContext, clusterId, time, snapshotRegistry, 1000,
-            new StripedReplicaPlacer(random), new MockControllerMetrics());
+        snapshotRegistry = new SnapshotRegistry(new LogContext());
+        featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        clusterControl = new ClusterControlManager.Builder().
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(1000).
+            setControllerMetrics(new MockControllerMetrics()).
+            setFeatureControlManager(featureControl).
+            build();
 
         clusterControl.activate();
         for (int i = 0; i < 4; i++) {
@@ -66,7 +76,7 @@ public void setUp() {
                     setPort((short) 9092).
                     setName("PLAINTEXT").
                     setHost(String.format("broker-%02d.example.org", i)));
-            clusterControl.replay(brokerRecord);
+            clusterControl.replay(brokerRecord, 100L);
         }
 
         this.producerIdControlManager = new ProducerIdControlManager(clusterControl, snapshotRegistry);
diff --git a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerMetricsTest.java b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerMetricsTest.java
index 4b0afb52c65a2..400b860197e53 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerMetricsTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerMetricsTest.java
@@ -17,10 +17,12 @@
 
 package org.apache.kafka.controller;
 
+import com.yammer.metrics.core.Gauge;
 import com.yammer.metrics.core.Histogram;
 import com.yammer.metrics.core.MetricName;
 import com.yammer.metrics.core.MetricsRegistry;
 import java.util.Set;
+import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Utils;
 import org.junit.jupiter.api.Test;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -34,10 +36,18 @@ public void testKafkaControllerMetricNames() {
         String expectedType = "KafkaController";
         Set<String> expectedMetricNames = Utils.mkSet(
             "ActiveControllerCount",
+            "FencedBrokerCount",
+            "ActiveBrokerCount",
             "GlobalTopicCount",
             "GlobalPartitionCount",
             "OfflinePartitionsCount",
-            "PreferredReplicaImbalanceCount");
+            "PreferredReplicaImbalanceCount",
+            "MetadataErrorCount",
+            "LastAppliedRecordLagMs",
+            "LastAppliedRecordOffset",
+            "LastAppliedRecordTimestamp",
+            "LastCommittedRecordOffset"
+        );
         assertMetricsCreatedAndRemovedUponClose(expectedType, expectedMetricNames);
     }
 
@@ -53,8 +63,9 @@ public void testControllerEventManagerMetricNames() {
     @Test
     public void testUpdateEventQueueTime() {
         MetricsRegistry registry = new MetricsRegistry();
+        MockTime time = new MockTime();
         try {
-            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry)) {
+            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry, time)) {
                 quorumControllerMetrics.updateEventQueueTime(1000);
                 assertMetricHistogram(registry, metricName("ControllerEventManager", "EventQueueTimeMs"), 1, 1000);
             }
@@ -66,8 +77,9 @@ public void testUpdateEventQueueTime() {
     @Test
     public void testUpdateEventQueueProcessingTime() {
         MetricsRegistry registry = new MetricsRegistry();
+        MockTime time = new MockTime();
         try {
-            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry)) {
+            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry, time)) {
                 quorumControllerMetrics.updateEventQueueProcessingTime(1000);
                 assertMetricHistogram(registry, metricName("ControllerEventManager", "EventQueueProcessingTimeMs"), 1, 1000);
             }
@@ -76,10 +88,70 @@ public void testUpdateEventQueueProcessingTime() {
         }
     }
 
+    @Test
+    public void testLastAppliedRecordMetrics() {
+        MetricsRegistry registry = new MetricsRegistry();
+        MockTime time = new MockTime();
+        time.sleep(1000);
+        try {
+            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry, time)) {
+                quorumControllerMetrics.setLastAppliedRecordOffset(100);
+                quorumControllerMetrics.setLastAppliedRecordTimestamp(500);
+                quorumControllerMetrics.setLastCommittedRecordOffset(50);
+
+                @SuppressWarnings("unchecked")
+                Gauge<Long> lastAppliedRecordOffset = (Gauge<Long>) registry
+                    .allMetrics()
+                    .get(metricName("KafkaController", "LastAppliedRecordOffset"));
+                assertEquals(100, lastAppliedRecordOffset.value());
+
+                @SuppressWarnings("unchecked")
+                Gauge<Long> lastAppliedRecordTimestamp = (Gauge<Long>) registry
+                    .allMetrics()
+                    .get(metricName("KafkaController", "LastAppliedRecordTimestamp"));
+                assertEquals(500, lastAppliedRecordTimestamp.value());
+
+                @SuppressWarnings("unchecked")
+                Gauge<Long> lastAppliedRecordLagMs = (Gauge<Long>) registry
+                    .allMetrics()
+                    .get(metricName("KafkaController", "LastAppliedRecordLagMs"));
+                assertEquals(time.milliseconds() - 500, lastAppliedRecordLagMs.value());
+
+                @SuppressWarnings("unchecked")
+                Gauge<Long> lastCommittedRecordOffset = (Gauge<Long>) registry
+                    .allMetrics()
+                    .get(metricName("KafkaController", "LastCommittedRecordOffset"));
+                assertEquals(50, lastCommittedRecordOffset.value());
+            }
+        } finally {
+            registry.shutdown();
+        }
+    }
+
+    @Test
+    public void testMetadataErrorCount() {
+        MetricsRegistry registry = new MetricsRegistry();
+        MockTime time = new MockTime();
+        try {
+            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry, time)) {
+                @SuppressWarnings("unchecked")
+                Gauge<Integer> metadataErrorCount = (Gauge<Integer>) registry
+                        .allMetrics()
+                        .get(metricName("KafkaController", "MetadataErrorCount"));
+                assertEquals(0, metadataErrorCount.value());
+                quorumControllerMetrics.incrementMetadataErrorCount();
+                assertEquals(1, metadataErrorCount.value());
+            }
+        } finally {
+            registry.shutdown();
+        }
+    }
+
     private static void assertMetricsCreatedAndRemovedUponClose(String expectedType, Set<String> expectedMetricNames) {
         MetricsRegistry registry = new MetricsRegistry();
+        MockTime time = new MockTime();
         try {
-            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry)) {
+            try (QuorumControllerMetrics quorumControllerMetrics = new QuorumControllerMetrics(registry, time)) {
                 assertMetricsCreated(registry, expectedMetricNames, expectedType);
             }
             assertMetricsRemoved(registry, expectedMetricNames, expectedType);
@@ -101,10 +173,18 @@ private static MetricName metricName(String type, String name) {
     }
 
     private static void assertMetricsCreated(MetricsRegistry registry, Set<String> expectedMetricNames, String expectedType) {
+        assertEquals(registry.allMetrics().keySet().stream()
+                .filter(k -> k.getType() == expectedType).count(),
+                expectedMetricNames.size());
         expectedMetricNames.forEach(expectedName -> {
             MetricName expectMetricName = metricName(expectedType, expectedName);
             assertTrue(registry.allMetrics().containsKey(expectMetricName), "Missing metric: " + expectMetricName);
         });
+        registry.allMetrics().forEach((actualMetricName, actualMetric) -> {
+            if (actualMetricName.getType() == expectedType) {
+                assertTrue(expectedMetricNames.contains(actualMetricName.getName()), "Unexpected metric: " + actualMetricName);
+            }
+        });
     }
 
     private static void assertMetricsRemoved(MetricsRegistry registry, Set<String> expectedMetricNames, String expectedType) {
diff --git a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java
index 26300d5a06375..e839289562682 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.kafka.controller;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -24,24 +25,33 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.OptionalInt;
+import java.util.OptionalLong;
+import java.util.Set;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
-import java.util.stream.StreamSupport;
 import java.util.stream.IntStream;
+import java.util.stream.StreamSupport;
 
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.errors.BrokerIdNotRegisteredException;
 import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
+import org.apache.kafka.common.message.RequestHeaderData;
+import org.apache.kafka.common.metadata.ConfigRecord;
+import org.apache.kafka.common.security.auth.KafkaPrincipal;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.common.config.ConfigResource;
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.message.AllocateProducerIdsRequestData;
-import org.apache.kafka.common.message.AlterIsrRequestData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignableTopic;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData;
@@ -61,6 +71,7 @@
 import org.apache.kafka.common.message.ElectLeadersResponseData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData;
+import org.apache.kafka.common.metadata.FeatureLevelRecord;
 import org.apache.kafka.common.metadata.PartitionRecord;
 import org.apache.kafka.common.metadata.ProducerIdsRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord;
@@ -70,30 +81,34 @@
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.common.utils.BufferSupplier;
-import org.apache.kafka.controller.BrokersToIsrs.TopicIdPartition;
 import org.apache.kafka.controller.QuorumController.ConfigResourceExistenceChecker;
 import org.apache.kafka.metadata.BrokerHeartbeatReply;
 import org.apache.kafka.metadata.BrokerRegistrationReply;
 import org.apache.kafka.metadata.MetadataRecordSerde;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.RecordTestUtils;
+import org.apache.kafka.metadata.authorizer.StandardAuthorizer;
+import org.apache.kafka.metalog.LocalLogManager;
 import org.apache.kafka.metalog.LocalLogManagerTestEnv;
 import org.apache.kafka.raft.Batch;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.snapshot.SnapshotReader;
 import org.apache.kafka.snapshot.RawSnapshotReader;
 import org.apache.kafka.snapshot.RecordsSnapshotReader;
 import org.apache.kafka.test.TestUtils;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
+import org.mockito.Mockito;
 
-import static java.util.concurrent.TimeUnit.HOURS;
+import static java.util.function.Function.identity;
 import static org.apache.kafka.clients.admin.AlterConfigOp.OpType.SET;
 import static org.apache.kafka.common.config.ConfigResource.Type.BROKER;
 import static org.apache.kafka.common.config.ConfigResource.Type.TOPIC;
 import static org.apache.kafka.controller.ConfigurationControlManagerTest.BROKER0;
 import static org.apache.kafka.controller.ConfigurationControlManagerTest.SCHEMA;
 import static org.apache.kafka.controller.ConfigurationControlManagerTest.entry;
+import static org.apache.kafka.controller.ControllerRequestContextUtil.ANONYMOUS_CONTEXT;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
@@ -131,7 +146,8 @@ public void testConfigurationOperations() throws Throwable {
                 b.setConfigSchema(SCHEMA);
             })
         ) {
-            controlEnv.activeController().registerBroker(new BrokerRegistrationRequestData().
+            controlEnv.activeController().registerBroker(ANONYMOUS_CONTEXT,
+                new BrokerRegistrationRequestData().
                 setBrokerId(0).setClusterId(logEnv.clusterId())).get();
             testConfigurationOperations(controlEnv.activeController());
         }
@@ -139,18 +155,18 @@ public void testConfigurationOperations() throws Throwable {
 
     private void testConfigurationOperations(QuorumController controller) throws Throwable {
         assertEquals(Collections.singletonMap(BROKER0, ApiError.NONE),
-            controller.incrementalAlterConfigs(Collections.singletonMap(
+            controller.incrementalAlterConfigs(ANONYMOUS_CONTEXT, Collections.singletonMap(
                 BROKER0, Collections.singletonMap("baz", entry(SET, "123"))), true).get());
         assertEquals(Collections.singletonMap(BROKER0,
             new ResultOrError<>(Collections.emptyMap())),
-            controller.describeConfigs(Collections.singletonMap(
+            controller.describeConfigs(ANONYMOUS_CONTEXT, Collections.singletonMap(
                 BROKER0, Collections.emptyList())).get());
         assertEquals(Collections.singletonMap(BROKER0, ApiError.NONE),
-            controller.incrementalAlterConfigs(Collections.singletonMap(
+            controller.incrementalAlterConfigs(ANONYMOUS_CONTEXT, Collections.singletonMap(
                 BROKER0, Collections.singletonMap("baz", entry(SET, "123"))), false).get());
         assertEquals(Collections.singletonMap(BROKER0, new ResultOrError<>(Collections.
                 singletonMap("baz", "123"))),
-            controller.describeConfigs(Collections.singletonMap(
+            controller.describeConfigs(ANONYMOUS_CONTEXT, Collections.singletonMap(
                 BROKER0, Collections.emptyList())).get());
     }
 
@@ -166,8 +182,9 @@ public void testDelayedConfigurationOperations() throws Throwable {
                 b.setConfigSchema(SCHEMA);
             })
         ) {
-            controlEnv.activeController().registerBroker(new BrokerRegistrationRequestData().
-                setBrokerId(0).setClusterId(logEnv.clusterId())).get();
+            controlEnv.activeController().registerBroker(ANONYMOUS_CONTEXT,
+                new BrokerRegistrationRequestData().
+                    setBrokerId(0).setClusterId(logEnv.clusterId())).get();
             testDelayedConfigurationOperations(logEnv, controlEnv.activeController());
         }
     }
@@ -175,16 +192,16 @@ public void testDelayedConfigurationOperations() throws Throwable {
     private void testDelayedConfigurationOperations(LocalLogManagerTestEnv logEnv,
                                                     QuorumController controller)
                                                     throws Throwable {
-        logEnv.logManagers().forEach(m -> m.setMaxReadOffset(0L));
+        logEnv.logManagers().forEach(m -> m.setMaxReadOffset(1L));
         CompletableFuture<Map<ConfigResource, ApiError>> future1 =
-            controller.incrementalAlterConfigs(Collections.singletonMap(
+            controller.incrementalAlterConfigs(ANONYMOUS_CONTEXT, Collections.singletonMap(
                 BROKER0, Collections.singletonMap("baz", entry(SET, "123"))), false);
         assertFalse(future1.isDone());
         assertEquals(Collections.singletonMap(BROKER0,
             new ResultOrError<>(Collections.emptyMap())),
-            controller.describeConfigs(Collections.singletonMap(
+            controller.describeConfigs(ANONYMOUS_CONTEXT, Collections.singletonMap(
                 BROKER0, Collections.emptyList())).get());
-        logEnv.logManagers().forEach(m -> m.setMaxReadOffset(2L));
+        logEnv.logManagers().forEach(m -> m.setMaxReadOffset(3L));
         assertEquals(Collections.singletonMap(BROKER0, ApiError.NONE), future1.get());
     }
 
@@ -193,14 +210,15 @@ public void testFenceMultipleBrokers() throws Throwable {
         List<Integer> allBrokers = Arrays.asList(1, 2, 3, 4, 5);
         List<Integer> brokersToKeepUnfenced = Arrays.asList(1);
         List<Integer> brokersToFence = Arrays.asList(2, 3, 4, 5);
-        short replicationFactor = 5;
+        short replicationFactor = (short) allBrokers.size();
+        short numberOfPartitions = (short) allBrokers.size();
         long sessionTimeoutMillis = 1000;
 
         try (
             LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(1, Optional.empty());
             QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(logEnv, b -> {
                 b.setConfigSchema(SCHEMA);
-            }, Optional.of(sessionTimeoutMillis));
+            }, OptionalLong.of(sessionTimeoutMillis), OptionalLong.empty(), BootstrapMetadata.create(MetadataVersion.latest()));
         ) {
             ListenerCollection listeners = new ListenerCollection();
             listeners.add(new Listener().setName("PLAINTEXT").setHost("localhost").setPort(9092));
@@ -209,6 +227,7 @@ public void testFenceMultipleBrokers() throws Throwable {
 
             for (Integer brokerId : allBrokers) {
                 CompletableFuture<BrokerRegistrationReply> reply = active.registerBroker(
+                    ANONYMOUS_CONTEXT,
                     new BrokerRegistrationRequestData().
                         setBrokerId(brokerId).
                         setClusterId(active.clusterId()).
@@ -219,7 +238,7 @@ public void testFenceMultipleBrokers() throws Throwable {
 
             // Brokers are only registered and should still be fenced
             allBrokers.forEach(brokerId -> {
-                assertFalse(active.replicationControl().isBrokerUnfenced(brokerId),
+                assertFalse(active.clusterControl().unfenced(brokerId),
                     "Broker " + brokerId + " should have been fenced");
             });
 
@@ -227,9 +246,11 @@ public void testFenceMultipleBrokers() throws Throwable {
             sendBrokerheartbeat(active, allBrokers, brokerEpochs);
             CreateTopicsRequestData createTopicsRequestData = new CreateTopicsRequestData().setTopics(
                 new CreatableTopicCollection(Collections.singleton(
-                    new CreatableTopic().setName("foo").setNumPartitions(1).
+                    new CreatableTopic().setName("foo").setNumPartitions(numberOfPartitions).
                         setReplicationFactor(replicationFactor)).iterator()));
-            CreateTopicsResponseData createTopicsResponseData = active.createTopics(createTopicsRequestData).get();
+            CreateTopicsResponseData createTopicsResponseData = active.createTopics(
+                ANONYMOUS_CONTEXT, createTopicsRequestData,
+                Collections.singleton("foo")).get();
             assertEquals(Errors.NONE, Errors.forCode(createTopicsResponseData.topics().find("foo").errorCode()));
             Uuid topicIdFoo = createTopicsResponseData.topics().find("foo").topicId();
 
@@ -237,7 +258,7 @@ public void testFenceMultipleBrokers() throws Throwable {
             TestUtils.waitForCondition(() -> {
                     sendBrokerheartbeat(active, brokersToKeepUnfenced, brokerEpochs);
                     for (Integer brokerId : brokersToFence) {
-                        if (active.replicationControl().isBrokerUnfenced(brokerId)) {
+                        if (active.clusterControl().unfenced(brokerId)) {
                             return false;
                         }
                     }
@@ -251,11 +272,11 @@ public void testFenceMultipleBrokers() throws Throwable {
 
             // At this point only the brokers we want fenced should be fenced.
             brokersToKeepUnfenced.forEach(brokerId -> {
-                assertTrue(active.replicationControl().isBrokerUnfenced(brokerId),
+                assertTrue(active.clusterControl().unfenced(brokerId),
                     "Broker " + brokerId + " should have been unfenced");
             });
             brokersToFence.forEach(brokerId -> {
-                assertFalse(active.replicationControl().isBrokerUnfenced(brokerId),
+                assertFalse(active.clusterControl().unfenced(brokerId),
                     "Broker " + brokerId + " should have been fenced");
             });
 
@@ -269,6 +290,190 @@ public void testFenceMultipleBrokers() throws Throwable {
 
             int fooLeader = active.replicationControl().getPartition(topicIdFoo, 0).leader;
             assertEquals(expectedIsr[0], fooLeader);
+
+            // Check that there are imbalaned partitions
+            assertTrue(active.replicationControl().arePartitionLeadersImbalanced());
+        }
+    }
+
+    @Test
+    public void testBalancePartitionLeaders() throws Throwable {
+        List<Integer> allBrokers = Arrays.asList(1, 2, 3);
+        List<Integer> brokersToKeepUnfenced = Arrays.asList(1, 2);
+        List<Integer> brokersToFence = Arrays.asList(3);
+        short replicationFactor = (short) allBrokers.size();
+        short numberOfPartitions = (short) allBrokers.size();
+        long sessionTimeoutMillis = 1000;
+        long leaderImbalanceCheckIntervalNs = 1_000_000_000;
+
+        try (
+            LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(1, Optional.empty());
+            QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(logEnv, b -> {
+                b.setConfigSchema(SCHEMA);
+            }, OptionalLong.of(sessionTimeoutMillis), OptionalLong.of(leaderImbalanceCheckIntervalNs), BootstrapMetadata.create(MetadataVersion.latest()));
+        ) {
+            ListenerCollection listeners = new ListenerCollection();
+            listeners.add(new Listener().setName("PLAINTEXT").setHost("localhost").setPort(9092));
+            QuorumController active = controlEnv.activeController();
+            Map<Integer, Long> brokerEpochs = new HashMap<>();
+
+            for (Integer brokerId : allBrokers) {
+                CompletableFuture<BrokerRegistrationReply> reply = active.registerBroker(
+                    ANONYMOUS_CONTEXT,
+                    new BrokerRegistrationRequestData().
+                        setBrokerId(brokerId).
+                        setClusterId(active.clusterId()).
+                        setIncarnationId(Uuid.randomUuid()).
+                        setListeners(listeners));
+                brokerEpochs.put(brokerId, reply.get().epoch());
+            }
+
+            // Brokers are only registered and should still be fenced
+            allBrokers.forEach(brokerId -> {
+                assertFalse(active.clusterControl().unfenced(brokerId),
+                    "Broker " + brokerId + " should have been fenced");
+            });
+
+            // Unfence all brokers and create a topic foo
+            sendBrokerheartbeat(active, allBrokers, brokerEpochs);
+            CreateTopicsRequestData createTopicsRequestData = new CreateTopicsRequestData().setTopics(
+                new CreatableTopicCollection(Collections.singleton(
+                    new CreatableTopic().setName("foo").setNumPartitions(numberOfPartitions).
+                        setReplicationFactor(replicationFactor)).iterator()));
+            CreateTopicsResponseData createTopicsResponseData = active.createTopics(
+                ANONYMOUS_CONTEXT, createTopicsRequestData, Collections.singleton("foo")).get();
+            assertEquals(Errors.NONE, Errors.forCode(createTopicsResponseData.topics().find("foo").errorCode()));
+            Uuid topicIdFoo = createTopicsResponseData.topics().find("foo").topicId();
+
+            // Fence some of the brokers
+            TestUtils.waitForCondition(
+                () -> {
+                    sendBrokerheartbeat(active, brokersToKeepUnfenced, brokerEpochs);
+                    for (Integer brokerId : brokersToFence) {
+                        if (active.clusterControl().unfenced(brokerId)) {
+                            return false;
+                        }
+                    }
+                    return true;
+                },
+                sessionTimeoutMillis * 3,
+                "Fencing of brokers did not process within expected time"
+            );
+
+            // Send another heartbeat to the brokers we want to keep alive
+            sendBrokerheartbeat(active, brokersToKeepUnfenced, brokerEpochs);
+
+            // At this point only the brokers we want fenced should be fenced.
+            brokersToKeepUnfenced.forEach(brokerId -> {
+                assertTrue(active.clusterControl().unfenced(brokerId),
+                    "Broker " + brokerId + " should have been unfenced");
+            });
+            brokersToFence.forEach(brokerId -> {
+                assertFalse(active.clusterControl().unfenced(brokerId),
+                    "Broker " + brokerId + " should have been fenced");
+            });
+
+            // Check that there are imbalaned partitions
+            assertTrue(active.replicationControl().arePartitionLeadersImbalanced());
+
+            // Re-register all fenced brokers
+            for (Integer brokerId : brokersToFence) {
+                CompletableFuture<BrokerRegistrationReply> reply = active.registerBroker(
+                    ANONYMOUS_CONTEXT,
+                    new BrokerRegistrationRequestData().
+                        setBrokerId(brokerId).
+                        setClusterId(active.clusterId()).
+                        setIncarnationId(Uuid.randomUuid()).
+                        setListeners(listeners));
+                brokerEpochs.put(brokerId, reply.get().epoch());
+            }
+
+            // Unfence all brokers
+            sendBrokerheartbeat(active, allBrokers, brokerEpochs);
+
+            // Let the unfenced broker, 3, join the ISR partition 2
+            Set<TopicIdPartition> imbalancedPartitions = active.replicationControl().imbalancedPartitions();
+            assertEquals(1, imbalancedPartitions.size());
+            int imbalancedPartitionId = imbalancedPartitions.iterator().next().partitionId();
+            PartitionRegistration partitionRegistration = active.replicationControl().getPartition(topicIdFoo, imbalancedPartitionId);
+            AlterPartitionRequestData.PartitionData partitionData = new AlterPartitionRequestData.PartitionData()
+                .setPartitionIndex(imbalancedPartitionId)
+                .setLeaderEpoch(partitionRegistration.leaderEpoch)
+                .setPartitionEpoch(partitionRegistration.partitionEpoch)
+                .setNewIsr(Arrays.asList(1, 2, 3));
+
+            AlterPartitionRequestData.TopicData topicData = new AlterPartitionRequestData.TopicData()
+                .setTopicName("foo");
+            topicData.partitions().add(partitionData);
+
+            AlterPartitionRequestData alterPartitionRequest = new AlterPartitionRequestData()
+                .setBrokerId(partitionRegistration.leader)
+                .setBrokerEpoch(brokerEpochs.get(partitionRegistration.leader));
+            alterPartitionRequest.topics().add(topicData);
+
+            active.alterPartition(ANONYMOUS_CONTEXT, alterPartitionRequest).get();
+
+            // Check that partitions are balanced
+            AtomicLong lastHeartbeat = new AtomicLong(active.time().milliseconds());
+            TestUtils.waitForCondition(
+                () -> {
+                    if (active.time().milliseconds() > lastHeartbeat.get() + (sessionTimeoutMillis / 2)) {
+                        lastHeartbeat.set(active.time().milliseconds());
+                        sendBrokerheartbeat(active, allBrokers, brokerEpochs);
+                    }
+                    return !active.replicationControl().arePartitionLeadersImbalanced();
+                },
+                TimeUnit.MILLISECONDS.convert(leaderImbalanceCheckIntervalNs * 10, TimeUnit.NANOSECONDS),
+                "Leaders where not balanced after unfencing all of the brokers"
+            );
+        }
+    }
+
+    @Test
+    public void testNoOpRecordWriteAfterTimeout() throws Throwable {
+        long maxIdleIntervalNs = 1_000;
+        long maxReplicationDelayMs = 60_000;
+        try (
+            LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(3, Optional.empty());
+            QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(
+                logEnv,
+                builder -> {
+                    builder.setConfigSchema(SCHEMA)
+                        .setMaxIdleIntervalNs(OptionalLong.of(maxIdleIntervalNs));
+                }
+            );
+        ) {
+            ListenerCollection listeners = new ListenerCollection();
+            listeners.add(new Listener().setName("PLAINTEXT").setHost("localhost").setPort(9092));
+            QuorumController active = controlEnv.activeController();
+
+            LocalLogManager localLogManager = logEnv
+                .logManagers()
+                .stream()
+                .filter(logManager -> logManager.nodeId().equals(OptionalInt.of(active.nodeId())))
+                .findAny()
+                .get();
+            TestUtils.waitForCondition(
+                () -> localLogManager.highWatermark().isPresent(),
+                maxReplicationDelayMs,
+                "High watermark was not established"
+            );
+
+
+            final long firstHighWatermark = localLogManager.highWatermark().getAsLong();
+            TestUtils.waitForCondition(
+                () -> localLogManager.highWatermark().getAsLong() > firstHighWatermark,
+                maxReplicationDelayMs,
+                "Active controller didn't write NoOpRecord the first time"
+            );
+
+            // Do it again to make sure that we are not counting the leader change record
+            final long secondHighWatermark = localLogManager.highWatermark().getAsLong();
+            TestUtils.waitForCondition(
+                () -> localLogManager.highWatermark().getAsLong() > secondHighWatermark,
+                maxReplicationDelayMs,
+                "Active controller didn't write NoOpRecord the second time"
+            );
         }
     }
 
@@ -283,39 +488,45 @@ public void testUnregisterBroker() throws Throwable {
                     setHost("localhost").setPort(9092));
                 QuorumController active = controlEnv.activeController();
                 CompletableFuture<BrokerRegistrationReply> reply = active.registerBroker(
+                    ANONYMOUS_CONTEXT,
                     new BrokerRegistrationRequestData().
                         setBrokerId(0).
                         setClusterId(active.clusterId()).
                         setIncarnationId(Uuid.fromString("kxAT73dKQsitIedpiPtwBA")).
+                        setFeatures(brokerFeatures()).
                         setListeners(listeners));
-                assertEquals(0L, reply.get().epoch());
+                assertEquals(2L, reply.get().epoch());
                 CreateTopicsRequestData createTopicsRequestData =
                     new CreateTopicsRequestData().setTopics(
                         new CreatableTopicCollection(Collections.singleton(
                             new CreatableTopic().setName("foo").setNumPartitions(1).
                                 setReplicationFactor((short) 1)).iterator()));
                 assertEquals(Errors.INVALID_REPLICATION_FACTOR.code(), active.createTopics(
-                    createTopicsRequestData).get().topics().find("foo").errorCode());
+                    ANONYMOUS_CONTEXT,
+                    createTopicsRequestData, Collections.singleton("foo")).get().
+                        topics().find("foo").errorCode());
                 assertEquals("Unable to replicate the partition 1 time(s): All brokers " +
-                    "are currently fenced.", active.createTopics(
-                    createTopicsRequestData).get().topics().find("foo").errorMessage());
+                    "are currently fenced.", active.createTopics(ANONYMOUS_CONTEXT,
+                        createTopicsRequestData, Collections.singleton("foo")).
+                            get().topics().find("foo").errorMessage());
                 assertEquals(new BrokerHeartbeatReply(true, false, false, false),
-                    active.processBrokerHeartbeat(new BrokerHeartbeatRequestData().
-                            setWantFence(false).setBrokerEpoch(0L).setBrokerId(0).
+                    active.processBrokerHeartbeat(ANONYMOUS_CONTEXT, new BrokerHeartbeatRequestData().
+                            setWantFence(false).setBrokerEpoch(2L).setBrokerId(0).
                             setCurrentMetadataOffset(100000L)).get());
-                assertEquals(Errors.NONE.code(), active.createTopics(
-                    createTopicsRequestData).get().topics().find("foo").errorCode());
+                assertEquals(Errors.NONE.code(), active.createTopics(ANONYMOUS_CONTEXT,
+                    createTopicsRequestData, Collections.singleton("foo")).
+                        get().topics().find("foo").errorCode());
                 CompletableFuture<TopicIdPartition> topicPartitionFuture = active.appendReadEvent(
-                    "debugGetPartition", () -> {
+                    "debugGetPartition", OptionalLong.empty(), () -> {
                         Iterator<TopicIdPartition> iterator = active.
                             replicationControl().brokersToIsrs().iterator(0, true);
                         assertTrue(iterator.hasNext());
                         return iterator.next();
                     });
                 assertEquals(0, topicPartitionFuture.get().partitionId());
-                active.unregisterBroker(0).get();
+                active.unregisterBroker(ANONYMOUS_CONTEXT, 0).get();
                 topicPartitionFuture = active.appendReadEvent(
-                    "debugGetPartition", () -> {
+                    "debugGetPartition", OptionalLong.empty(), () -> {
                         Iterator<TopicIdPartition> iterator = active.
                             replicationControl().brokersToIsrs().partitionsWithNoLeader();
                         assertTrue(iterator.hasNext());
@@ -326,6 +537,20 @@ public void testUnregisterBroker() throws Throwable {
         }
     }
 
+    private BrokerRegistrationRequestData.FeatureCollection brokerFeatures() {
+        return brokerFeatures(MetadataVersion.MINIMUM_KRAFT_VERSION, MetadataVersion.latest());
+    }
+
+    private BrokerRegistrationRequestData.FeatureCollection brokerFeatures(
+            MetadataVersion minVersion, MetadataVersion maxVersion) {
+        BrokerRegistrationRequestData.FeatureCollection features = new BrokerRegistrationRequestData.FeatureCollection();
+        features.add(new BrokerRegistrationRequestData.Feature()
+            .setName(MetadataVersion.FEATURE_NAME)
+            .setMinSupportedVersion(minVersion.featureLevel())
+            .setMaxSupportedVersion(maxVersion.featureLevel()));
+        return features;
+    }
+
     @Test
     public void testSnapshotSaveAndLoad() throws Throwable {
         final int numBrokers = 4;
@@ -338,7 +563,7 @@ public void testSnapshotSaveAndLoad() throws Throwable {
             })) {
                 QuorumController active = controlEnv.activeController();
                 for (int i = 0; i < numBrokers; i++) {
-                    BrokerRegistrationReply reply = active.registerBroker(
+                    BrokerRegistrationReply reply = active.registerBroker(ANONYMOUS_CONTEXT,
                         new BrokerRegistrationRequestData().
                             setBrokerId(i).
                             setRack(null).
@@ -351,11 +576,11 @@ public void testSnapshotSaveAndLoad() throws Throwable {
                 }
                 for (int i = 0; i < numBrokers - 1; i++) {
                     assertEquals(new BrokerHeartbeatReply(true, false, false, false),
-                        active.processBrokerHeartbeat(new BrokerHeartbeatRequestData().
+                        active.processBrokerHeartbeat(ANONYMOUS_CONTEXT, new BrokerHeartbeatRequestData().
                             setWantFence(false).setBrokerEpoch(brokerEpochs.get(i)).
                             setBrokerId(i).setCurrentMetadataOffset(100000L)).get());
                 }
-                CreateTopicsResponseData fooData = active.createTopics(
+                CreateTopicsResponseData fooData = active.createTopics(ANONYMOUS_CONTEXT,
                     new CreateTopicsRequestData().setTopics(
                         new CreatableTopicCollection(Collections.singleton(
                             new CreatableTopic().setName("foo").setNumPartitions(-1).
@@ -367,9 +592,10 @@ public void testSnapshotSaveAndLoad() throws Throwable {
                                     new CreatableReplicaAssignment().
                                         setPartitionIndex(1).
                                         setBrokerIds(Arrays.asList(1, 2, 0))).
-                                            iterator()))).iterator()))).get();
+                                            iterator()))).iterator())),
+                    Collections.singleton("foo")).get();
                 fooId = fooData.topics().find("foo").topicId();
-                active.allocateProducerIds(
+                active.allocateProducerIds(ANONYMOUS_CONTEXT,
                     new AllocateProducerIdsRequestData().setBrokerId(0).setBrokerEpoch(brokerEpochs.get(0))).get();
                 long snapshotLogOffset = active.beginWritingSnapshot().get();
                 reader = logEnv.waitForSnapshot(snapshotLogOffset);
@@ -407,7 +633,7 @@ public void testSnapshotConfiguration() throws Throwable {
             })) {
                 QuorumController active = controlEnv.activeController();
                 for (int i = 0; i < numBrokers; i++) {
-                    BrokerRegistrationReply reply = active.registerBroker(
+                    BrokerRegistrationReply reply = active.registerBroker(ANONYMOUS_CONTEXT,
                         new BrokerRegistrationRequestData().
                             setBrokerId(i).
                             setRack(null).
@@ -420,11 +646,11 @@ public void testSnapshotConfiguration() throws Throwable {
                 }
                 for (int i = 0; i < numBrokers - 1; i++) {
                     assertEquals(new BrokerHeartbeatReply(true, false, false, false),
-                        active.processBrokerHeartbeat(new BrokerHeartbeatRequestData().
+                        active.processBrokerHeartbeat(ANONYMOUS_CONTEXT, new BrokerHeartbeatRequestData().
                             setWantFence(false).setBrokerEpoch(brokerEpochs.get(i)).
                             setBrokerId(i).setCurrentMetadataOffset(100000L)).get());
                 }
-                CreateTopicsResponseData fooData = active.createTopics(
+                CreateTopicsResponseData fooData = active.createTopics(ANONYMOUS_CONTEXT,
                     new CreateTopicsRequestData().setTopics(
                         new CreatableTopicCollection(Collections.singleton(
                             new CreatableTopic().setName("foo").setNumPartitions(-1).
@@ -436,9 +662,10 @@ public void testSnapshotConfiguration() throws Throwable {
                                     new CreatableReplicaAssignment().
                                         setPartitionIndex(1).
                                         setBrokerIds(Arrays.asList(1, 2, 0))).
-                                            iterator()))).iterator()))).get();
+                                            iterator()))).iterator())),
+                    Collections.singleton("foo")).get();
                 fooId = fooData.topics().find("foo").topicId();
-                active.allocateProducerIds(
+                active.allocateProducerIds(ANONYMOUS_CONTEXT,
                     new AllocateProducerIdsRequestData().setBrokerId(0).setBrokerEpoch(brokerEpochs.get(0))).get();
 
                 SnapshotReader<ApiMessageAndVersion> snapshot = createSnapshotReader(logEnv.waitForLatestSnapshot());
@@ -462,7 +689,7 @@ public void testSnapshotOnlyAfterConfiguredMinBytes() throws Throwable {
             })) {
                 QuorumController active = controlEnv.activeController();
                 for (int i = 0; i < numBrokers; i++) {
-                    BrokerRegistrationReply reply = active.registerBroker(
+                    BrokerRegistrationReply reply = active.registerBroker(ANONYMOUS_CONTEXT,
                         new BrokerRegistrationRequestData().
                             setBrokerId(i).
                             setRack(null).
@@ -473,7 +700,7 @@ public void testSnapshotOnlyAfterConfiguredMinBytes() throws Throwable {
                                 setPort(9092 + i)).iterator()))).get();
                     brokerEpochs.put(i, reply.epoch());
                     assertEquals(new BrokerHeartbeatReply(true, false, false, false),
-                        active.processBrokerHeartbeat(new BrokerHeartbeatRequestData().
+                        active.processBrokerHeartbeat(ANONYMOUS_CONTEXT, new BrokerHeartbeatRequestData().
                             setWantFence(false).setBrokerEpoch(brokerEpochs.get(i)).
                             setBrokerId(i).setCurrentMetadataOffset(100000L)).get());
                 }
@@ -488,7 +715,7 @@ public void testSnapshotOnlyAfterConfiguredMinBytes() throws Throwable {
                 while (logEnv.appendedBytes() < maxNewRecordBytes) {
                     counter += 1;
                     String topicName = String.format("foo-%s", counter);
-                    active.createTopics(new CreateTopicsRequestData().setTopics(
+                    active.createTopics(ANONYMOUS_CONTEXT, new CreateTopicsRequestData().setTopics(
                             new CreatableTopicCollection(Collections.singleton(
                                 new CreatableTopic().setName(topicName).setNumPartitions(-1).
                                     setReplicationFactor((short) -1).
@@ -499,7 +726,8 @@ public void testSnapshotOnlyAfterConfiguredMinBytes() throws Throwable {
                                         new CreatableReplicaAssignment().
                                             setPartitionIndex(1).
                                             setBrokerIds(Arrays.asList(1, 2, 0))).
-                                                iterator()))).iterator()))).get();
+                                                iterator()))).iterator())),
+                        Collections.singleton(topicName)).get(60, TimeUnit.SECONDS);
                 }
                 logEnv.waitForLatestSnapshot();
             }
@@ -511,12 +739,16 @@ private SnapshotReader<ApiMessageAndVersion> createSnapshotReader(RawSnapshotRea
             reader,
             new MetadataRecordSerde(),
             BufferSupplier.create(),
-            Integer.MAX_VALUE
+            Integer.MAX_VALUE,
+            true
         );
     }
 
     private List<ApiMessageAndVersion> expectedSnapshotContent(Uuid fooId, Map<Integer, Long> brokerEpochs) {
         return Arrays.asList(
+            new ApiMessageAndVersion(new FeatureLevelRecord().
+                setName(MetadataVersion.FEATURE_NAME).
+                setFeatureLevel(MetadataVersion.latest().featureLevel()), (short) 0),
             new ApiMessageAndVersion(new TopicRecord().
                 setName("foo").setTopicId(fooId), (short) 0),
             new ApiMessageAndVersion(new PartitionRecord().setPartitionId(0).
@@ -538,7 +770,7 @@ private List<ApiMessageAndVersion> expectedSnapshotContent(Uuid fooId, Map<Integ
                             new BrokerEndpoint().setName("PLAINTEXT").setHost("localhost").
                             setPort(9092).setSecurityProtocol((short) 0)).iterator())).
                 setRack(null).
-                setFenced(false), (short) 0),
+                setFenced(false), (short) 1),
             new ApiMessageAndVersion(new RegisterBrokerRecord().
                 setBrokerId(1).setBrokerEpoch(brokerEpochs.get(1)).
                 setIncarnationId(Uuid.fromString("kxAT73dKQsitIedpiPtwB1")).
@@ -548,7 +780,7 @@ private List<ApiMessageAndVersion> expectedSnapshotContent(Uuid fooId, Map<Integ
                             new BrokerEndpoint().setName("PLAINTEXT").setHost("localhost").
                             setPort(9093).setSecurityProtocol((short) 0)).iterator())).
                 setRack(null).
-                setFenced(false), (short) 0),
+                setFenced(false), (short) 1),
             new ApiMessageAndVersion(new RegisterBrokerRecord().
                 setBrokerId(2).setBrokerEpoch(brokerEpochs.get(2)).
                 setIncarnationId(Uuid.fromString("kxAT73dKQsitIedpiPtwB2")).
@@ -558,14 +790,14 @@ private List<ApiMessageAndVersion> expectedSnapshotContent(Uuid fooId, Map<Integ
                             new BrokerEndpoint().setName("PLAINTEXT").setHost("localhost").
                             setPort(9094).setSecurityProtocol((short) 0)).iterator())).
                 setRack(null).
-                setFenced(false), (short) 0),
+                setFenced(false), (short) 1),
             new ApiMessageAndVersion(new RegisterBrokerRecord().
                 setBrokerId(3).setBrokerEpoch(brokerEpochs.get(3)).
                 setIncarnationId(Uuid.fromString("kxAT73dKQsitIedpiPtwB3")).
                 setEndPoints(new BrokerEndpointCollection(Arrays.asList(
                     new BrokerEndpoint().setName("PLAINTEXT").setHost("localhost").
                         setPort(9095).setSecurityProtocol((short) 0)).iterator())).
-                setRack(null), (short) 0),
+                setRack(null), (short) 1),
             new ApiMessageAndVersion(new ProducerIdsRecord().
                 setBrokerId(0).
                 setBrokerEpoch(brokerEpochs.get(0)).
@@ -632,29 +864,32 @@ public void testTimeouts() throws Throwable {
             })) {
                 QuorumController controller = controlEnv.activeController();
                 CountDownLatch countDownLatch = controller.pause();
+                long now = controller.time().nanoseconds();
+                ControllerRequestContext context0 = new ControllerRequestContext(
+                    new RequestHeaderData(), KafkaPrincipal.ANONYMOUS, OptionalLong.of(now));
                 CompletableFuture<CreateTopicsResponseData> createFuture =
-                    controller.createTopics(new CreateTopicsRequestData().setTimeoutMs(0).
+                    controller.createTopics(context0, new CreateTopicsRequestData().setTimeoutMs(0).
                         setTopics(new CreatableTopicCollection(Collections.singleton(
-                            new CreatableTopic().setName("foo")).iterator())));
-                long now = controller.time().nanoseconds();
+                            new CreatableTopic().setName("foo")).iterator())),
+                        Collections.emptySet());
                 CompletableFuture<Map<Uuid, ApiError>> deleteFuture =
-                    controller.deleteTopics(now, Collections.singletonList(Uuid.ZERO_UUID));
+                    controller.deleteTopics(context0, Collections.singletonList(Uuid.ZERO_UUID));
                 CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIdsFuture =
-                    controller.findTopicIds(now, Collections.singletonList("foo"));
+                    controller.findTopicIds(context0, Collections.singletonList("foo"));
                 CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNamesFuture =
-                    controller.findTopicNames(now, Collections.singletonList(Uuid.ZERO_UUID));
+                    controller.findTopicNames(context0, Collections.singletonList(Uuid.ZERO_UUID));
                 CompletableFuture<List<CreatePartitionsTopicResult>> createPartitionsFuture =
-                    controller.createPartitions(now, Collections.singletonList(
-                        new CreatePartitionsTopic()));
+                    controller.createPartitions(context0, Collections.singletonList(
+                        new CreatePartitionsTopic()), false);
                 CompletableFuture<ElectLeadersResponseData> electLeadersFuture =
-                    controller.electLeaders(new ElectLeadersRequestData().setTimeoutMs(0).
+                    controller.electLeaders(context0, new ElectLeadersRequestData().setTimeoutMs(0).
                         setTopicPartitions(null));
                 CompletableFuture<AlterPartitionReassignmentsResponseData> alterReassignmentsFuture =
-                    controller.alterPartitionReassignments(
+                    controller.alterPartitionReassignments(context0,
                         new AlterPartitionReassignmentsRequestData().setTimeoutMs(0).
                             setTopics(Collections.singletonList(new ReassignableTopic())));
                 CompletableFuture<ListPartitionReassignmentsResponseData> listReassignmentsFuture =
-                    controller.listPartitionReassignments(
+                    controller.listPartitionReassignments(context0,
                         new ListPartitionReassignmentsRequestData().setTopics(null).setTimeoutMs(0));
                 while (controller.time().nanoseconds() == now) {
                     Thread.sleep(0, 10);
@@ -690,21 +925,21 @@ public void testEarlyControllerResults() throws Throwable {
                 QuorumController controller = controlEnv.activeController();
                 CountDownLatch countDownLatch = controller.pause();
                 CompletableFuture<CreateTopicsResponseData> createFuture =
-                    controller.createTopics(new CreateTopicsRequestData().setTimeoutMs(120000));
-                long deadlineMs = controller.time().nanoseconds() + HOURS.toNanos(1);
+                    controller.createTopics(ANONYMOUS_CONTEXT, new CreateTopicsRequestData().
+                        setTimeoutMs(120000), Collections.emptySet());
                 CompletableFuture<Map<Uuid, ApiError>> deleteFuture =
-                    controller.deleteTopics(deadlineMs, Collections.emptyList());
+                    controller.deleteTopics(ANONYMOUS_CONTEXT, Collections.emptyList());
                 CompletableFuture<Map<String, ResultOrError<Uuid>>> findTopicIdsFuture =
-                    controller.findTopicIds(deadlineMs, Collections.emptyList());
+                    controller.findTopicIds(ANONYMOUS_CONTEXT, Collections.emptyList());
                 CompletableFuture<Map<Uuid, ResultOrError<String>>> findTopicNamesFuture =
-                    controller.findTopicNames(deadlineMs, Collections.emptyList());
+                    controller.findTopicNames(ANONYMOUS_CONTEXT, Collections.emptyList());
                 CompletableFuture<List<CreatePartitionsTopicResult>> createPartitionsFuture =
-                    controller.createPartitions(deadlineMs, Collections.emptyList());
+                    controller.createPartitions(ANONYMOUS_CONTEXT, Collections.emptyList(), false);
                 CompletableFuture<ElectLeadersResponseData> electLeadersFuture =
-                    controller.electLeaders(new ElectLeadersRequestData().setTimeoutMs(120000));
+                    controller.electLeaders(ANONYMOUS_CONTEXT, new ElectLeadersRequestData());
                 CompletableFuture<AlterPartitionReassignmentsResponseData> alterReassignmentsFuture =
-                    controller.alterPartitionReassignments(
-                        new AlterPartitionReassignmentsRequestData().setTimeoutMs(12000));
+                    controller.alterPartitionReassignments(ANONYMOUS_CONTEXT,
+                        new AlterPartitionReassignmentsRequestData());
                 createFuture.get();
                 deleteFuture.get();
                 findTopicIdsFuture.get();
@@ -740,23 +975,17 @@ public void testMissingInMemorySnapshot() throws Exception {
                 )
                 .collect(Collectors.toList());
 
-            Uuid topicId = controller.createTopics(
-                new CreateTopicsRequestData()
-                    .setTopics(
-                        new CreatableTopicCollection(
-                            Collections.singleton(
-                                new CreatableTopic()
-                                    .setName(topicName)
-                                    .setNumPartitions(-1)
-                                    .setReplicationFactor((short) -1)
-                                    .setAssignments(new CreatableReplicaAssignmentCollection(partitions.iterator()))
-                            ).iterator()
-                        )
-                    )
-            ).get().topics().find(topicName).topicId();
+            Uuid topicId = controller.createTopics(ANONYMOUS_CONTEXT, new CreateTopicsRequestData()
+                    .setTopics(new CreatableTopicCollection(Collections.singleton(new CreatableTopic()
+                        .setName(topicName)
+                        .setNumPartitions(-1)
+                        .setReplicationFactor((short) -1)
+                        .setAssignments(new CreatableReplicaAssignmentCollection(partitions.iterator()))
+                    ).iterator())),
+                Collections.singleton("foo")).get().topics().find(topicName).topicId();
 
             // Create a lot of alter isr
-            List<AlterIsrRequestData.PartitionData> alterIsrs = IntStream
+            List<AlterPartitionRequestData.PartitionData> alterPartitions = IntStream
                 .range(0, numPartitions)
                 .mapToObj(partitionIndex -> {
                     PartitionRegistration partitionRegistration = controller.replicationControl().getPartition(
@@ -764,31 +993,29 @@ public void testMissingInMemorySnapshot() throws Exception {
                         partitionIndex
                     );
 
-                    return new AlterIsrRequestData.PartitionData()
+                    return new AlterPartitionRequestData.PartitionData()
                         .setPartitionIndex(partitionIndex)
                         .setLeaderEpoch(partitionRegistration.leaderEpoch)
-                        .setCurrentIsrVersion(partitionRegistration.partitionEpoch)
+                        .setPartitionEpoch(partitionRegistration.partitionEpoch)
                         .setNewIsr(Arrays.asList(0, 1));
                 })
                 .collect(Collectors.toList());
 
-            AlterIsrRequestData.TopicData topicData = new AlterIsrRequestData.TopicData()
-                .setName(topicName);
-            topicData.partitions().addAll(alterIsrs);
+            AlterPartitionRequestData.TopicData topicData = new AlterPartitionRequestData.TopicData()
+                .setTopicName(topicName);
+            topicData.partitions().addAll(alterPartitions);
 
             int leaderId = 0;
-            AlterIsrRequestData alterIsrRequest = new AlterIsrRequestData()
+            AlterPartitionRequestData alterPartitionRequest = new AlterPartitionRequestData()
                 .setBrokerId(leaderId)
                 .setBrokerEpoch(brokerEpochs.get(leaderId));
-            alterIsrRequest.topics().add(topicData);
+            alterPartitionRequest.topics().add(topicData);
 
             logEnv.logManagers().get(0).resignAfterNonAtomicCommit();
 
             int oldClaimEpoch = controller.curClaimEpoch();
-            assertThrows(
-                ExecutionException.class,
-                () -> controller.alterIsr(alterIsrRequest).get()
-            );
+            assertThrows(ExecutionException.class,
+                () -> controller.alterPartition(ANONYMOUS_CONTEXT, alterPartitionRequest).get());
 
             // Wait for the controller to become active again
             assertSame(controller, controlEnv.activeController());
@@ -797,7 +1024,7 @@ public void testMissingInMemorySnapshot() throws Exception {
                 String.format("oldClaimEpoch = %s, newClaimEpoch = %s", oldClaimEpoch, controller.curClaimEpoch())
             );
 
-            // Since the alterIsr partially failed we expect to see
+            // Since the alterPartition partially failed we expect to see
             // some partitions to still have 2 in the ISR.
             int partitionsWithReplica2 = Utils.toList(
                 controller
@@ -828,7 +1055,7 @@ public void testMissingInMemorySnapshot() throws Exception {
     private Map<Integer, Long> registerBrokers(QuorumController controller, int numBrokers) throws Exception {
         Map<Integer, Long> brokerEpochs = new HashMap<>();
         for (int brokerId = 0; brokerId < numBrokers; brokerId++) {
-            BrokerRegistrationReply reply = controller.registerBroker(
+            BrokerRegistrationReply reply = controller.registerBroker(ANONYMOUS_CONTEXT,
                 new BrokerRegistrationRequestData()
                     .setBrokerId(brokerId)
                     .setRack(null)
@@ -848,7 +1075,7 @@ private Map<Integer, Long> registerBrokers(QuorumController controller, int numB
             brokerEpochs.put(brokerId, reply.epoch());
 
             // Send heartbeat to unfence
-            controller.processBrokerHeartbeat(
+            controller.processBrokerHeartbeat(ANONYMOUS_CONTEXT,
                 new BrokerHeartbeatRequestData()
                     .setWantFence(false)
                     .setBrokerEpoch(brokerEpochs.get(brokerId))
@@ -869,7 +1096,7 @@ private void sendBrokerheartbeat(
             return;
         }
         for (Integer brokerId : brokers) {
-            BrokerHeartbeatReply reply = controller.processBrokerHeartbeat(
+            BrokerHeartbeatReply reply = controller.processBrokerHeartbeat(ANONYMOUS_CONTEXT,
                 new BrokerHeartbeatRequestData()
                     .setWantFence(false)
                     .setBrokerEpoch(brokerEpochs.get(brokerId))
@@ -888,11 +1115,12 @@ public void testConfigResourceExistenceChecker() throws Throwable {
             })) {
                 QuorumController active = controlEnv.activeController();
                 registerBrokers(active, 5);
-                active.createTopics(new CreateTopicsRequestData().
+                active.createTopics(ANONYMOUS_CONTEXT, new CreateTopicsRequestData().
                     setTopics(new CreatableTopicCollection(Collections.singleton(
                         new CreatableTopic().setName("foo").
                             setReplicationFactor((short) 3).
-                            setNumPartitions(1)).iterator()))).get();
+                            setNumPartitions(1)).iterator())),
+                    Collections.singleton("foo")).get();
                 ConfigResourceExistenceChecker checker =
                     active.new ConfigResourceExistenceChecker();
                 // A ConfigResource with type=BROKER and name=(empty string) represents
@@ -915,4 +1143,159 @@ public void testConfigResourceExistenceChecker() throws Throwable {
             }
         }
     }
+
+    private static final Uuid FOO_ID = Uuid.fromString("igRktLOnR8ektWHr79F8mw");
+
+    private static final Map<Integer, Long> ALL_ZERO_BROKER_EPOCHS =
+        IntStream.of(0, 1, 2, 3).boxed().collect(Collectors.toMap(identity(), __ -> 0L));
+
+    @Test
+    public void testQuorumControllerCompletesAuthorizerInitialLoad() throws Throwable {
+        final int numControllers = 3;
+        List<StandardAuthorizer> authorizers = new ArrayList<>(numControllers);
+        for (int i = 0; i < numControllers; i++) {
+            StandardAuthorizer authorizer = new StandardAuthorizer();
+            authorizer.configure(Collections.emptyMap());
+            authorizers.add(authorizer);
+        }
+        try (LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(
+            numControllers,
+            Optional.empty(),
+            shared -> {
+                shared.setInitialMaxReadOffset(2);
+            }
+        )) {
+            logEnv.appendInitialRecords(expectedSnapshotContent(FOO_ID, ALL_ZERO_BROKER_EPOCHS));
+            logEnv.logManagers().forEach(m -> m.setMaxReadOffset(2));
+            try (QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(logEnv, b -> {
+                b.setAuthorizer(authorizers.get(b.nodeId()));
+            })) {
+                assertInitialLoadFuturesNotComplete(authorizers);
+                logEnv.logManagers().get(0).setMaxReadOffset(Long.MAX_VALUE);
+                QuorumController active = controlEnv.activeController();
+                active.unregisterBroker(ANONYMOUS_CONTEXT, 3).get();
+                assertInitialLoadFuturesNotComplete(authorizers.stream().skip(1).collect(Collectors.toList()));
+                logEnv.logManagers().forEach(m -> m.setMaxReadOffset(Long.MAX_VALUE));
+                TestUtils.waitForCondition(() -> {
+                    return authorizers.stream().allMatch(a -> a.initialLoadFuture().isDone());
+                }, "Failed to complete initial authorizer load for all controllers.");
+            }
+        }
+    }
+
+    @Test
+    public void testFatalMetadataReplayErrorOnActive() throws Throwable {
+        try (LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(3, Optional.empty())) {
+            try (QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(logEnv, b -> {
+            })) {
+                QuorumController active = controlEnv.activeController();
+                CompletableFuture<Void> future = active.appendWriteEvent("errorEvent",
+                        OptionalLong.empty(), () -> {
+                            return ControllerResult.of(Collections.singletonList(new ApiMessageAndVersion(
+                                    new ConfigRecord().
+                                            setName(null).
+                                            setResourceName(null).
+                                            setResourceType((byte) 255).
+                                            setValue(null), (short) 0)), null);
+                        });
+                assertThrows(ExecutionException.class, () -> future.get());
+                assertEquals(NullPointerException.class,
+                        controlEnv.fatalFaultHandler().firstException().getCause().getClass());
+                controlEnv.fatalFaultHandler().setIgnore(true);
+                controlEnv.metadataFaultHandler().setIgnore(true);
+            }
+        }
+    }
+
+    private static void assertInitialLoadFuturesNotComplete(List<StandardAuthorizer> authorizers) {
+        for (int i = 0; i < authorizers.size(); i++) {
+            assertFalse(authorizers.get(i).initialLoadFuture().isDone(),
+                "authorizer " + i + " should not have completed loading.");
+        }
+    }
+
+    @Test
+    public void testInvalidBootstrapMetadata() throws Exception {
+        // We can't actually create a BootstrapMetadata with an invalid version, so we have to mock it
+        BootstrapMetadata bootstrapMetadata = Mockito.mock(BootstrapMetadata.class);
+        CyclicBarrier barrier = new CyclicBarrier(2);
+        Mockito.when(bootstrapMetadata.metadataVersion()).thenAnswer(__ -> {
+            // This barrier allows us to catch the controller after it becomes leader, but before the bootstrapping fails
+            barrier.await(10, TimeUnit.SECONDS);
+            return MetadataVersion.IBP_2_8_IV0;
+        });
+        try (
+                LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(1, Optional.empty());
+                QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(logEnv, b -> {
+                    b.setConfigSchema(SCHEMA);
+                }, OptionalLong.empty(), OptionalLong.empty(), bootstrapMetadata);
+        ) {
+            QuorumController controller = controlEnv.activeController();
+            assertTrue(controller.isActive());
+            // Unblock the first call to BootstrapMetadata#metadataVersion
+            barrier.await(10, TimeUnit.SECONDS);
+            // Unblock the second call to BootstrapMetadata#metadataVersion
+            barrier.await(10, TimeUnit.SECONDS);
+            TestUtils.waitForCondition(() -> !controller.isActive(),
+                "Timed out waiting for controller to renounce itself after bad bootstrap metadata version.");
+        }
+    }
+
+    @Test
+    public void testBootstrapMetadataStartupRace() throws Throwable {
+        // KAFKA-13966: This tests a race condition between external RPC calls being handled before the bootstrap
+        // metadata is written. We instrument this by forcing the BootstrapMetadata#records method to block until a
+        // latch has been completed. This allows an asynchronous broker registration call to be handled before the
+        // handleLeaderChange callback completes. In this case, the registration should fail because the bootstrap
+        // metadata includes an unsupported metadata.version.
+        BootstrapMetadata bootstrapMetadata = BootstrapMetadata.create(MetadataVersion.latest());
+        BootstrapMetadata mockedMetadata = Mockito.mock(BootstrapMetadata.class);
+        CountDownLatch latch = new CountDownLatch(1);
+        Mockito.when(mockedMetadata.metadataVersion()).thenReturn(bootstrapMetadata.metadataVersion());
+        Mockito.when(mockedMetadata.records()).then(__ -> {
+            if (latch.await(30, TimeUnit.SECONDS)) {
+                return bootstrapMetadata.records();
+            } else {
+                throw new RuntimeException("Latch never completed");
+            }
+        });
+
+        try (LocalLogManagerTestEnv logEnv = new LocalLogManagerTestEnv(1, Optional.empty())) {
+            try (QuorumControllerTestEnv controlEnv = new QuorumControllerTestEnv(logEnv, b -> {
+                b.setConfigSchema(SCHEMA);
+            }, OptionalLong.empty(), OptionalLong.empty(), mockedMetadata)) {
+                ListenerCollection listeners = new ListenerCollection();
+                listeners.add(new Listener().setName("PLAINTEXT").
+                    setHost("localhost").setPort(9092));
+                QuorumController active = controlEnv.activeController();
+
+                // Issue a register broker request concurrently as the controller is initializing
+                assertEquals(1, latch.getCount(), "Latch should not have been completed yet");
+                CompletableFuture<Void> registrationFuture = new CompletableFuture<>();
+                Thread registerThread = new Thread(() -> {
+                    try {
+                        CompletableFuture<BrokerRegistrationReply> reply = active.registerBroker(
+                            ANONYMOUS_CONTEXT,
+                            new BrokerRegistrationRequestData().
+                                setBrokerId(0).
+                                setClusterId(active.clusterId()).
+                                setIncarnationId(Uuid.fromString("kxAT73dKQsitIedpiPtwBA")).
+                                setFeatures(brokerFeatures(MetadataVersion.IBP_3_0_IV0, MetadataVersion.IBP_3_3_IV0)).
+                                setListeners(listeners));
+                        // Once we have the future, the register broker event has been enqueued
+                        latch.countDown();
+                        reply.get();
+                        registrationFuture.complete(null);
+                    } catch (Throwable t) {
+                        registrationFuture.completeExceptionally(t);
+                    }
+                });
+                registerThread.start();
+                registerThread.join(30_000);
+                assertTrue(registrationFuture.isCompletedExceptionally(),
+                    "Should not be able to register broker since the bootstrap metadata specified an incompatible metadata.version");
+                assertEquals(0, active.clusterControl().brokerRegistrations().size());
+            }
+        }
+    }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTestEnv.java b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTestEnv.java
index f905621f4e69f..40dd21c88d330 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTestEnv.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/QuorumControllerTestEnv.java
@@ -19,51 +19,76 @@
 
 import static java.util.concurrent.TimeUnit.NANOSECONDS;
 
-import java.util.Optional;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.OptionalInt;
+import java.util.OptionalLong;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.controller.QuorumController.Builder;
 import org.apache.kafka.metalog.LocalLogManagerTestEnv;
 import org.apache.kafka.raft.LeaderAndEpoch;
+import org.apache.kafka.server.common.MetadataVersion;
+import org.apache.kafka.server.fault.MockFaultHandler;
 import org.apache.kafka.test.TestUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.OptionalInt;
-import java.util.concurrent.atomic.AtomicReference;
-import java.util.function.Consumer;
-
 public class QuorumControllerTestEnv implements AutoCloseable {
     private static final Logger log =
         LoggerFactory.getLogger(QuorumControllerTestEnv.class);
 
     private final List<QuorumController> controllers;
     private final LocalLogManagerTestEnv logEnv;
+    private final MockFaultHandler fatalFaultHandler = new MockFaultHandler("fatalFaultHandler");
+    private final MockFaultHandler metadataFaultHandler = new MockFaultHandler("metadataFaultHandler");
 
     public QuorumControllerTestEnv(
         LocalLogManagerTestEnv logEnv,
         Consumer<QuorumController.Builder> builderConsumer
     ) throws Exception {
-        this(logEnv, builderConsumer, Optional.empty());
+        this(logEnv, builderConsumer, OptionalLong.empty(), OptionalLong.empty(), BootstrapMetadata.create(MetadataVersion.latest()));
+    }
+
+    public QuorumControllerTestEnv(
+            LocalLogManagerTestEnv logEnv,
+            Consumer<Builder> builderConsumer,
+            OptionalLong sessionTimeoutMillis,
+            OptionalLong leaderImbalanceCheckIntervalNs,
+            MetadataVersion metadataVersion
+    ) throws Exception {
+        this(logEnv, builderConsumer, sessionTimeoutMillis, leaderImbalanceCheckIntervalNs, BootstrapMetadata.create(metadataVersion));
     }
 
     public QuorumControllerTestEnv(
         LocalLogManagerTestEnv logEnv,
         Consumer<Builder> builderConsumer,
-        Optional<Long> sessionTimeoutMillis
+        OptionalLong sessionTimeoutMillis,
+        OptionalLong leaderImbalanceCheckIntervalNs,
+        BootstrapMetadata bootstrapMetadata
     ) throws Exception {
         this.logEnv = logEnv;
         int numControllers = logEnv.logManagers().size();
         this.controllers = new ArrayList<>(numControllers);
         try {
+            ApiVersions apiVersions = new ApiVersions();
+            List<Integer> nodeIds = IntStream.range(0, numControllers).boxed().collect(Collectors.toList());
             for (int i = 0; i < numControllers; i++) {
                 QuorumController.Builder builder = new QuorumController.Builder(i, logEnv.clusterId());
                 builder.setRaftClient(logEnv.logManagers().get(i));
-                if (sessionTimeoutMillis.isPresent()) {
-                    builder.setSessionTimeoutNs(NANOSECONDS.convert(
-                        sessionTimeoutMillis.get(), TimeUnit.MILLISECONDS));
-                }
+                builder.setBootstrapMetadata(bootstrapMetadata);
+                builder.setLeaderImbalanceCheckIntervalNs(leaderImbalanceCheckIntervalNs);
+                builder.setQuorumFeatures(new QuorumFeatures(i, apiVersions, QuorumFeatures.defaultFeatureMap(), nodeIds));
+                sessionTimeoutMillis.ifPresent(timeout -> {
+                    builder.setSessionTimeoutNs(NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS));
+                });
+                builder.setFatalFaultHandler(fatalFaultHandler);
+                builder.setMetadataFaultHandler(metadataFaultHandler);
                 builderConsumer.accept(builder);
                 this.controllers.add(builder.build());
             }
@@ -97,6 +122,14 @@ public List<QuorumController> controllers() {
         return controllers;
     }
 
+    public MockFaultHandler fatalFaultHandler() {
+        return fatalFaultHandler;
+    }
+
+    public MockFaultHandler metadataFaultHandler() {
+        return metadataFaultHandler;
+    }
+
     @Override
     public void close() throws InterruptedException {
         for (QuorumController controller : controllers) {
@@ -105,5 +138,7 @@ public void close() throws InterruptedException {
         for (QuorumController controller : controllers) {
             controller.close();
         }
+        fatalFaultHandler.maybeRethrowFirstException();
+        metadataFaultHandler.maybeRethrowFirstException();
     }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/QuorumFeaturesTest.java b/metadata/src/test/java/org/apache/kafka/controller/QuorumFeaturesTest.java
new file mode 100644
index 0000000000000..7d8ba5bfec2aa
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/controller/QuorumFeaturesTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.controller;
+
+import org.apache.kafka.clients.ApiVersions;
+import org.apache.kafka.clients.NodeApiVersions;
+import org.apache.kafka.common.message.ApiVersionsResponseData.SupportedFeatureKey;
+import org.apache.kafka.metadata.VersionRange;
+import org.junit.jupiter.api.Test;
+
+import java.util.AbstractMap.SimpleImmutableEntry;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Optional;
+
+import static java.util.Collections.emptyMap;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class QuorumFeaturesTest {
+    private final static Map<String, VersionRange> LOCAL;
+
+    static {
+        Map<String, VersionRange> local = new HashMap<>();
+        local.put("foo", VersionRange.of(0, 3));
+        local.put("bar", VersionRange.of(0, 4));
+        local.put("baz", VersionRange.of(2, 2));
+        LOCAL = Collections.unmodifiableMap(local);
+    }
+
+    @Test
+    public void testDefaultSupportedLevels() {
+        QuorumFeatures quorumFeatures = new QuorumFeatures(0, new ApiVersions(), emptyMap(), Arrays.asList(0, 1, 2));
+        assertEquals(Optional.empty(), quorumFeatures.reasonNotSupported("foo", (short) 0));
+        assertEquals(Optional.of("Local controller 0 does not support this feature."),
+            quorumFeatures.reasonNotSupported("foo", (short) 1));
+    }
+
+    @Test
+    public void testLocalSupportedFeature() {
+        QuorumFeatures quorumFeatures = new QuorumFeatures(0, new ApiVersions(), LOCAL, Arrays.asList(0, 1, 2));
+        assertEquals(VersionRange.of(0, 3), quorumFeatures.localSupportedFeature("foo"));
+        assertEquals(VersionRange.of(0, 4), quorumFeatures.localSupportedFeature("bar"));
+        assertEquals(VersionRange.of(2, 2), quorumFeatures.localSupportedFeature("baz"));
+        assertEquals(VersionRange.of(0, 0), quorumFeatures.localSupportedFeature("quux"));
+    }
+
+    @Test
+    public void testReasonNotSupported() {
+        ApiVersions apiVersions = new ApiVersions();
+        QuorumFeatures quorumFeatures = new QuorumFeatures(0, apiVersions, LOCAL, Arrays.asList(0, 1, 2));
+        assertEquals(Optional.of("Local controller 0 only supports versions 0-3"),
+                quorumFeatures.reasonNotSupported("foo", (short) 10));
+        apiVersions.update("1", nodeApiVersions(Arrays.asList(
+                new SimpleImmutableEntry<>("foo", VersionRange.of(1, 3)),
+                new SimpleImmutableEntry<>("bar", VersionRange.of(1, 3)),
+                new SimpleImmutableEntry<>("baz", VersionRange.of(1, 2)))));
+        assertEquals(Optional.empty(), quorumFeatures.reasonNotSupported("bar", (short) 3));
+        assertEquals(Optional.of("Controller 1 only supports versions 1-3"),
+                quorumFeatures.reasonNotSupported("bar", (short) 4));
+    }
+
+    private static NodeApiVersions nodeApiVersions(List<Entry<String, VersionRange>> entries) {
+        List<SupportedFeatureKey> features = new ArrayList<>();
+        entries.forEach(entry -> {
+            features.add(new SupportedFeatureKey().
+                    setName(entry.getKey()).
+                    setMinVersion(entry.getValue().min()).
+                    setMaxVersion(entry.getValue().max()));
+        });
+        return new NodeApiVersions(Collections.emptyList(), features);
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/controller/ReplicationControlManagerTest.java b/metadata/src/test/java/org/apache/kafka/controller/ReplicationControlManagerTest.java
index 4a08b237b0c22..d33776ca10e55 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/ReplicationControlManagerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/ReplicationControlManagerTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.kafka.controller;
 
+import org.apache.kafka.clients.ApiVersions;
 import org.apache.kafka.common.ElectionType;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.Uuid;
@@ -24,10 +25,10 @@
 import org.apache.kafka.common.errors.InvalidReplicaAssignmentException;
 import org.apache.kafka.common.errors.PolicyViolationException;
 import org.apache.kafka.common.errors.StaleBrokerEpochException;
-import org.apache.kafka.common.message.AlterIsrRequestData;
-import org.apache.kafka.common.message.AlterIsrRequestData.PartitionData;
-import org.apache.kafka.common.message.AlterIsrRequestData.TopicData;
-import org.apache.kafka.common.message.AlterIsrResponseData;
+import org.apache.kafka.common.message.AlterPartitionRequestData;
+import org.apache.kafka.common.message.AlterPartitionRequestData.PartitionData;
+import org.apache.kafka.common.message.AlterPartitionRequestData.TopicData;
+import org.apache.kafka.common.message.AlterPartitionResponseData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignablePartition;
 import org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignableTopic;
@@ -54,33 +55,45 @@
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData.OngoingPartitionReassignment;
 import org.apache.kafka.common.message.ListPartitionReassignmentsResponseData.OngoingTopicReassignment;
+import org.apache.kafka.common.metadata.BrokerRegistrationChangeRecord;
 import org.apache.kafka.common.metadata.ConfigRecord;
 import org.apache.kafka.common.metadata.PartitionChangeRecord;
 import org.apache.kafka.common.metadata.PartitionRecord;
 import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.metadata.TopicRecord;
+import org.apache.kafka.common.protocol.ApiKeys;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.kafka.common.requests.ApiError;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.common.utils.annotation.ApiKeyVersionsSource;
+import org.apache.kafka.controller.ReplicationControlManager.KRaftClusterDescriber;
 import org.apache.kafka.metadata.BrokerHeartbeatReply;
 import org.apache.kafka.metadata.BrokerRegistration;
-import org.apache.kafka.metadata.KafkaConfigSchema;
+import org.apache.kafka.metadata.BrokerRegistrationInControlledShutdownChange;
+import org.apache.kafka.metadata.LeaderRecoveryState;
+import org.apache.kafka.metadata.MockRandom;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.RecordTestUtils;
 import org.apache.kafka.metadata.Replicas;
+import org.apache.kafka.metadata.placement.StripedReplicaPlacer;
+import org.apache.kafka.metadata.placement.UsableBroker;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.apache.kafka.server.policy.CreateTopicPolicy;
 import org.apache.kafka.timeline.SnapshotRegistry;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.ValueSource;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Arrays;
+import java.util.TreeSet;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -102,16 +115,20 @@
 import static org.apache.kafka.common.protocol.Errors.ELECTION_NOT_NEEDED;
 import static org.apache.kafka.common.protocol.Errors.ELIGIBLE_LEADERS_NOT_AVAILABLE;
 import static org.apache.kafka.common.protocol.Errors.FENCED_LEADER_EPOCH;
+import static org.apache.kafka.common.protocol.Errors.INELIGIBLE_REPLICA;
 import static org.apache.kafka.common.protocol.Errors.INVALID_PARTITIONS;
+import static org.apache.kafka.common.protocol.Errors.INVALID_REPLICATION_FACTOR;
 import static org.apache.kafka.common.protocol.Errors.INVALID_REPLICA_ASSIGNMENT;
 import static org.apache.kafka.common.protocol.Errors.INVALID_TOPIC_EXCEPTION;
+import static org.apache.kafka.common.protocol.Errors.NEW_LEADER_ELECTED;
 import static org.apache.kafka.common.protocol.Errors.NONE;
 import static org.apache.kafka.common.protocol.Errors.NO_REASSIGNMENT_IN_PROGRESS;
+import static org.apache.kafka.common.protocol.Errors.OPERATION_NOT_ATTEMPTED;
 import static org.apache.kafka.common.protocol.Errors.POLICY_VIOLATION;
 import static org.apache.kafka.common.protocol.Errors.PREFERRED_LEADER_NOT_AVAILABLE;
 import static org.apache.kafka.common.protocol.Errors.UNKNOWN_TOPIC_ID;
 import static org.apache.kafka.common.protocol.Errors.UNKNOWN_TOPIC_OR_PARTITION;
-import static org.apache.kafka.controller.BrokersToIsrs.TopicIdPartition;
+import static org.apache.kafka.controller.ControllerRequestContextUtil.anonymousContextFor;
 import static org.apache.kafka.metadata.LeaderConstants.NO_LEADER;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -134,17 +151,25 @@ private static class ReplicationControlTestContext {
         final MockTime time = new MockTime();
         final MockRandom random = new MockRandom();
         final ControllerMetrics metrics = new MockControllerMetrics();
-        final String clusterId = Uuid.randomUuid().toString();
-        final ClusterControlManager clusterControl = new ClusterControlManager(logContext,
-            clusterId,
-            time,
-            snapshotRegistry,
-            TimeUnit.MILLISECONDS.convert(BROKER_SESSION_TIMEOUT_MS, TimeUnit.NANOSECONDS),
-            new StripedReplicaPlacer(random),
-            metrics);
-        final ConfigurationControlManager configurationControl = new ConfigurationControlManager(
-            new LogContext(), snapshotRegistry, KafkaConfigSchema.EMPTY, Optional.empty(),
-                (__, ___) -> { });
+        final FeatureControlManager featureControl = new FeatureControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                QuorumFeatures.defaultFeatureMap(),
+                Collections.singletonList(0))).
+            setMetadataVersion(MetadataVersion.latest()).
+            build();
+        final ClusterControlManager clusterControl = new ClusterControlManager.Builder().
+            setLogContext(logContext).
+            setTime(time).
+            setSnapshotRegistry(snapshotRegistry).
+            setSessionTimeoutNs(TimeUnit.MILLISECONDS.convert(BROKER_SESSION_TIMEOUT_MS, TimeUnit.NANOSECONDS)).
+            setReplicaPlacer(new StripedReplicaPlacer(random)).
+            setControllerMetrics(metrics).
+            setFeatureControlManager(featureControl).
+            build();
+        final ConfigurationControlManager configurationControl = new ConfigurationControlManager.Builder().
+            setSnapshotRegistry(snapshotRegistry).
+            build();
         final ReplicationControlManager replicationControl;
 
         void replay(List<ApiMessageAndVersion> records) throws Exception {
@@ -157,15 +182,33 @@ void replay(List<ApiMessageAndVersion> records) throws Exception {
             this(Optional.empty());
         }
 
+        ReplicationControlTestContext(MetadataVersion metadataVersion) {
+            this(metadataVersion, Optional.empty());
+        }
+
         ReplicationControlTestContext(Optional<CreateTopicPolicy> createTopicPolicy) {
-            this.replicationControl = new ReplicationControlManager(snapshotRegistry,
-                new LogContext(),
-                (short) 3,
-                1,
-                configurationControl,
-                clusterControl,
-                metrics,
-                createTopicPolicy);
+            this(MetadataVersion.latest(), createTopicPolicy);
+        }
+
+        ReplicationControlTestContext(MetadataVersion metadataVersion, Optional<CreateTopicPolicy> createTopicPolicy) {
+            FeatureControlManager featureControl = new FeatureControlManager.Builder().
+                setSnapshotRegistry(snapshotRegistry).
+                setQuorumFeatures(new QuorumFeatures(0, new ApiVersions(),
+                    QuorumFeatures.defaultFeatureMap(),
+                    Collections.singletonList(0))).
+                setMetadataVersion(metadataVersion).
+                build();
+
+            this.replicationControl = new ReplicationControlManager.Builder().
+                setSnapshotRegistry(snapshotRegistry).
+                setLogContext(logContext).
+                setMaxElectionsPerImbalance(Integer.MAX_VALUE).
+                setConfigurationControl(configurationControl).
+                setClusterControl(clusterControl).
+                setControllerMetrics(metrics).
+                setCreateTopicPolicy(createTopicPolicy).
+                setFeatureControl(featureControl).
+                build();
             clusterControl.activate();
         }
 
@@ -178,7 +221,7 @@ CreatableTopicResult createTestTopic(String name,
             topic.setNumPartitions(numPartitions).setReplicationFactor(replicationFactor);
             request.topics().add(topic);
             ControllerResult<CreateTopicsResponseData> result =
-                replicationControl.createTopics(request);
+                replicationControl.createTopics(request, Collections.singleton(name));
             CreatableTopicResult topicResult = result.response().topics().find(name);
             assertNotNull(topicResult);
             assertEquals(expectedErrorCode, topicResult.errorCode());
@@ -213,7 +256,7 @@ CreatableTopicResult createTestTopic(String name, int[][] replicas,
                     setValue(e.getValue())));
             request.topics().add(topic);
             ControllerResult<CreateTopicsResponseData> result =
-                replicationControl.createTopics(request);
+                replicationControl.createTopics(request, Collections.singleton(name));
             CreatableTopicResult topicResult = result.response().topics().find(name);
             assertNotNull(topicResult);
             assertEquals(expectedErrorCode, topicResult.errorCode());
@@ -247,7 +290,7 @@ void createPartitions(int count, String name,
         void registerBrokers(Integer... brokerIds) throws Exception {
             for (int brokerId : brokerIds) {
                 RegisterBrokerRecord brokerRecord = new RegisterBrokerRecord().
-                    setBrokerEpoch(brokerId + 100).setBrokerId(brokerId);
+                    setBrokerEpoch(brokerId + 100).setBrokerId(brokerId).setRack(null);
                 brokerRecord.endPoints().add(new RegisterBrokerRecord.BrokerEndpoint().
                     setSecurityProtocol(SecurityProtocol.PLAINTEXT.id).
                     setPort((short) 9092 + brokerId).
@@ -257,10 +300,11 @@ void registerBrokers(Integer... brokerIds) throws Exception {
             }
         }
 
-        void alterIsr(
+        void alterPartition(
             TopicIdPartition topicIdPartition,
             int leaderId,
-            List<Integer> isr
+            List<Integer> isr,
+            LeaderRecoveryState leaderRecoveryState
         ) throws Exception {
             BrokerRegistration registration = clusterControl.brokerRegistrations().get(leaderId);
             assertFalse(registration.fenced());
@@ -274,24 +318,29 @@ void alterIsr(
 
             PartitionData partitionData = new PartitionData()
                 .setPartitionIndex(topicIdPartition.partitionId())
-                .setCurrentIsrVersion(partition.partitionEpoch)
+                .setPartitionEpoch(partition.partitionEpoch)
                 .setLeaderEpoch(partition.leaderEpoch)
+                .setLeaderRecoveryState(leaderRecoveryState.value())
                 .setNewIsr(isr);
 
             String topicName = replicationControl.getTopic(topicIdPartition.topicId()).name();
             TopicData topicData = new TopicData()
-                .setName(topicName)
+                .setTopicName(topicName)
+                .setTopicId(topicIdPartition.topicId())
                 .setPartitions(singletonList(partitionData));
 
-            ControllerResult<AlterIsrResponseData> alterIsr = replicationControl.alterIsr(
-                new AlterIsrRequestData()
+            ControllerRequestContext requestContext =
+                anonymousContextFor(ApiKeys.ALTER_PARTITION);
+            ControllerResult<AlterPartitionResponseData> alterPartition = replicationControl.alterPartition(
+                requestContext,
+                new AlterPartitionRequestData()
                     .setBrokerId(leaderId)
                     .setBrokerEpoch(registration.epoch())
                     .setTopics(singletonList(topicData)));
-            replay(alterIsr.records());
+            replay(alterPartition.records());
         }
 
-        void unfenceBrokers(Integer... brokerIds)  throws Exception {
+        void unfenceBrokers(Integer... brokerIds) throws Exception {
             unfenceBrokers(Utils.mkSet(brokerIds));
         }
 
@@ -308,6 +357,20 @@ void unfenceBrokers(Set<Integer> brokerIds) throws Exception {
             }
         }
 
+        void inControlledShutdownBrokers(Integer... brokerIds) throws Exception {
+            inControlledShutdownBrokers(Utils.mkSet(brokerIds));
+        }
+
+        void inControlledShutdownBrokers(Set<Integer> brokerIds) throws Exception {
+            for (int brokerId : brokerIds) {
+                BrokerRegistrationChangeRecord record = new BrokerRegistrationChangeRecord()
+                    .setBrokerId(brokerId)
+                    .setBrokerEpoch(brokerId + 100)
+                    .setInControlledShutdown(BrokerRegistrationInControlledShutdownChange.IN_CONTROLLED_SHUTDOWN.value());
+                replay(singletonList(new ApiMessageAndVersion(record, (short) 1)));
+            }
+        }
+
         void alterTopicConfig(
             String topic,
             String configKey,
@@ -393,38 +456,53 @@ public void testCreateTopics() throws Exception {
         CreateTopicsRequestData request = new CreateTopicsRequestData();
         request.topics().add(new CreatableTopic().setName("foo").
             setNumPartitions(-1).setReplicationFactor((short) -1));
+
         ControllerResult<CreateTopicsResponseData> result =
-            replicationControl.createTopics(request);
+            replicationControl.createTopics(request, Collections.singleton("foo"));
         CreateTopicsResponseData expectedResponse = new CreateTopicsResponseData();
         expectedResponse.topics().add(new CreatableTopicResult().setName("foo").
-            setErrorCode(Errors.INVALID_REPLICATION_FACTOR.code()).
+            setErrorCode(INVALID_REPLICATION_FACTOR.code()).
                 setErrorMessage("Unable to replicate the partition 3 time(s): All " +
                     "brokers are currently fenced."));
         assertEquals(expectedResponse, result.response());
 
         ctx.registerBrokers(0, 1, 2);
-        ctx.unfenceBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0);
+        ctx.inControlledShutdownBrokers(0);
+
         ControllerResult<CreateTopicsResponseData> result2 =
-            replicationControl.createTopics(request);
+            replicationControl.createTopics(request, Collections.singleton("foo"));
         CreateTopicsResponseData expectedResponse2 = new CreateTopicsResponseData();
         expectedResponse2.topics().add(new CreatableTopicResult().setName("foo").
-            setNumPartitions(1).setReplicationFactor((short) 3).
-            setErrorMessage(null).setErrorCode((short) 0).
-            setTopicId(result2.response().topics().find("foo").topicId()));
+            setErrorCode(INVALID_REPLICATION_FACTOR.code()).
+            setErrorMessage("Unable to replicate the partition 3 time(s): All " +
+                "brokers are currently fenced or in controlled shutdown."));
         assertEquals(expectedResponse2, result2.response());
-        ctx.replay(result2.records());
-        assertEquals(new PartitionRegistration(new int[] {1, 2, 0},
-            new int[] {1, 2, 0}, Replicas.NONE, Replicas.NONE, 1, 0, 0),
-            replicationControl.getPartition(
-                ((TopicRecord) result2.records().get(0).message()).topicId(), 0));
+
+        ctx.registerBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0, 1, 2);
+
         ControllerResult<CreateTopicsResponseData> result3 =
-                replicationControl.createTopics(request);
+            replicationControl.createTopics(request, Collections.singleton("foo"));
         CreateTopicsResponseData expectedResponse3 = new CreateTopicsResponseData();
         expectedResponse3.topics().add(new CreatableTopicResult().setName("foo").
+            setNumPartitions(1).setReplicationFactor((short) 3).
+            setErrorMessage(null).setErrorCode((short) 0).
+            setTopicId(result3.response().topics().find("foo").topicId()));
+        assertEquals(expectedResponse3, result3.response());
+        ctx.replay(result3.records());
+        assertEquals(new PartitionRegistration(new int[] {1, 2, 0},
+            new int[] {1, 2, 0}, Replicas.NONE, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 0, 0),
+            replicationControl.getPartition(
+                ((TopicRecord) result3.records().get(0).message()).topicId(), 0));
+        ControllerResult<CreateTopicsResponseData> result4 =
+                replicationControl.createTopics(request, Collections.singleton("foo"));
+        CreateTopicsResponseData expectedResponse4 = new CreateTopicsResponseData();
+        expectedResponse4.topics().add(new CreatableTopicResult().setName("foo").
                 setErrorCode(Errors.TOPIC_ALREADY_EXISTS.code()).
                 setErrorMessage("Topic 'foo' already exists."));
-        assertEquals(expectedResponse3, result3.response());
-        Uuid fooId = result2.response().topics().find("foo").topicId();
+        assertEquals(expectedResponse4, result4.response());
+        Uuid fooId = result3.response().topics().find("foo").topicId();
         RecordTestUtils.assertBatchIteratorContains(asList(
             asList(new ApiMessageAndVersion(new PartitionRecord().
                     setPartitionId(0).setTopicId(fooId).
@@ -436,6 +514,96 @@ public void testCreateTopics() throws Exception {
             ctx.replicationControl.iterator(Long.MAX_VALUE));
     }
 
+    @Test
+    public void testCreateTopicsISRInvariants() throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replicationControl = ctx.replicationControl;
+
+        CreateTopicsRequestData request = new CreateTopicsRequestData();
+        request.topics().add(new CreatableTopic().setName("foo").
+            setNumPartitions(-1).setReplicationFactor((short) -1));
+
+        ctx.registerBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0, 1);
+        ctx.inControlledShutdownBrokers(1);
+
+        ControllerResult<CreateTopicsResponseData> result =
+            replicationControl.createTopics(request, Collections.singleton("foo"));
+
+        CreateTopicsResponseData expectedResponse = new CreateTopicsResponseData();
+        expectedResponse.topics().add(new CreatableTopicResult().setName("foo").
+            setNumPartitions(1).setReplicationFactor((short) 3).
+            setErrorMessage(null).setErrorCode((short) 0).
+            setTopicId(result.response().topics().find("foo").topicId()));
+        assertEquals(expectedResponse, result.response());
+
+        ctx.replay(result.records());
+
+        // Broker 2 cannot be in the ISR because it is fenced and broker 1
+        // cannot be in the ISR because it is in controlled shutdown.
+        assertEquals(
+            new PartitionRegistration(new int[]{1, 0, 2},
+                new int[]{0},
+                Replicas.NONE,
+                Replicas.NONE,
+                0,
+                LeaderRecoveryState.RECOVERED,
+                0,
+                0),
+            replicationControl.getPartition(
+                ((TopicRecord) result.records().get(0).message()).topicId(), 0));
+    }
+
+    @Test
+    public void testCreateTopicsWithConfigs() throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replicationControl = ctx.replicationControl;
+        ctx.registerBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0, 1, 2);
+
+        CreateTopicsRequestData.CreateableTopicConfigCollection validConfigs =
+            new CreateTopicsRequestData.CreateableTopicConfigCollection();
+        validConfigs.add(
+            new CreateTopicsRequestData.CreateableTopicConfig()
+                .setName("foo")
+                .setValue("notNull")
+        );
+        CreateTopicsRequestData request1 = new CreateTopicsRequestData();
+        request1.topics().add(new CreatableTopic().setName("foo")
+            .setNumPartitions(-1).setReplicationFactor((short) -1)
+            .setConfigs(validConfigs));
+
+        ControllerResult<CreateTopicsResponseData> result1 =
+            replicationControl.createTopics(request1, Collections.singleton("foo"));
+        assertEquals((short) 0, result1.response().topics().find("foo").errorCode());
+
+        ctx.replay(result1.records());
+        assertEquals(
+            "notNull",
+            ctx.configurationControl.getConfigs(new ConfigResource(ConfigResource.Type.TOPIC, "foo")).get("foo")
+        );
+
+        CreateTopicsRequestData.CreateableTopicConfigCollection invalidConfigs =
+            new CreateTopicsRequestData.CreateableTopicConfigCollection();
+        invalidConfigs.add(
+            new CreateTopicsRequestData.CreateableTopicConfig()
+                .setName("foo")
+                .setValue(null)
+        );
+        CreateTopicsRequestData request2 = new CreateTopicsRequestData();
+        request2.topics().add(new CreatableTopic().setName("bar")
+            .setNumPartitions(-1).setReplicationFactor((short) -1)
+            .setConfigs(invalidConfigs));
+
+        ControllerResult<CreateTopicsResponseData> result2 =
+            replicationControl.createTopics(request2, Collections.singleton("bar"));
+        assertEquals(Errors.INVALID_CONFIG.code(), result2.response().topics().find("bar").errorCode());
+        assertEquals(
+            "Null value not supported for topic configs: foo",
+            result2.response().topics().find("bar").errorMessage()
+        );
+    }
+
     @Test
     public void testBrokerCountMetrics() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
@@ -480,7 +648,7 @@ public void testCreateTopicsWithValidateOnlyFlag() throws Exception {
         request.topics().add(new CreatableTopic().setName("foo").
             setNumPartitions(1).setReplicationFactor((short) 3));
         ControllerResult<CreateTopicsResponseData> result =
-            ctx.replicationControl.createTopics(request);
+            ctx.replicationControl.createTopics(request, Collections.singleton("foo"));
         assertEquals(0, result.records().size());
         CreatableTopicResult topicResult = result.response().topics().find("foo");
         assertEquals((short) 0, topicResult.errorCode());
@@ -495,11 +663,11 @@ public void testInvalidCreateTopicsWithValidateOnlyFlag() throws Exception {
         request.topics().add(new CreatableTopic().setName("foo").
             setNumPartitions(1).setReplicationFactor((short) 4));
         ControllerResult<CreateTopicsResponseData> result =
-            ctx.replicationControl.createTopics(request);
+            ctx.replicationControl.createTopics(request, Collections.singleton("foo"));
         assertEquals(0, result.records().size());
         CreateTopicsResponseData expectedResponse = new CreateTopicsResponseData();
         expectedResponse.topics().add(new CreatableTopicResult().setName("foo").
-            setErrorCode(Errors.INVALID_REPLICATION_FACTOR.code()).
+            setErrorCode(INVALID_REPLICATION_FACTOR.code()).
             setErrorMessage("Unable to replicate the partition 4 time(s): The target " +
                 "replication factor of 4 cannot be reached because only 3 broker(s) " +
                 "are registered."));
@@ -543,7 +711,7 @@ public void testGlobalTopicAndPartitionMetrics() throws Exception {
         List<Uuid> topicsToDelete = new ArrayList<>();
 
         ControllerResult<CreateTopicsResponseData> result =
-            replicationControl.createTopics(request);
+            replicationControl.createTopics(request, Collections.singleton("foo"));
         topicsToDelete.add(result.response().topics().find("foo").topicId());
 
         RecordTestUtils.replayAll(replicationControl, result.records());
@@ -554,7 +722,8 @@ public void testGlobalTopicAndPartitionMetrics() throws Exception {
             setNumPartitions(1).setReplicationFactor((short) -1));
         request.topics().add(new CreatableTopic().setName("baz").
             setNumPartitions(2).setReplicationFactor((short) -1));
-        result = replicationControl.createTopics(request);
+        result = replicationControl.createTopics(request,
+            new HashSet<>(Arrays.asList("bar", "baz")));
         RecordTestUtils.replayAll(replicationControl, result.records());
         assertEquals(3, ctx.metrics.globalTopicsCount());
         assertEquals(4, ctx.metrics.globalPartitionCount());
@@ -636,7 +805,7 @@ public void testValidateNewTopicNames() {
         topics.add(new CreatableTopic().setName(""));
         topics.add(new CreatableTopic().setName("woo"));
         topics.add(new CreatableTopic().setName("."));
-        ReplicationControlManager.validateNewTopicNames(topicErrors, topics);
+        ReplicationControlManager.validateNewTopicNames(topicErrors, topics, Collections.emptyMap());
         Map<String, ApiError> expectedTopicErrors = new HashMap<>();
         expectedTopicErrors.put("", new ApiError(INVALID_TOPIC_EXCEPTION,
             "Topic name is illegal, it can't be empty"));
@@ -645,6 +814,24 @@ public void testValidateNewTopicNames() {
         assertEquals(expectedTopicErrors, topicErrors);
     }
 
+    @Test
+    public void testTopicNameCollision() {
+        Map<String, ApiError> topicErrors = new HashMap<>();
+        CreatableTopicCollection topics = new CreatableTopicCollection();
+        topics.add(new CreatableTopic().setName("foo.bar"));
+        topics.add(new CreatableTopic().setName("woo.bar_foo"));
+        Map<String, Set<String>> collisionMap = new HashMap<>();
+        collisionMap.put("foo_bar", new TreeSet<>(Arrays.asList("foo_bar")));
+        collisionMap.put("woo_bar_foo", new TreeSet<>(Arrays.asList("woo.bar.foo", "woo_bar.foo")));
+        ReplicationControlManager.validateNewTopicNames(topicErrors, topics, collisionMap);
+        Map<String, ApiError> expectedTopicErrors = new HashMap<>();
+        expectedTopicErrors.put("foo.bar", new ApiError(INVALID_TOPIC_EXCEPTION,
+            "Topic 'foo.bar' collides with existing topic: foo_bar"));
+        expectedTopicErrors.put("woo.bar_foo", new ApiError(INVALID_TOPIC_EXCEPTION,
+            "Topic 'woo.bar_foo' collides with existing topic: woo.bar.foo"));
+        assertEquals(expectedTopicErrors, topicErrors);
+    }
+
     @Test
     public void testRemoveLeaderships() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
@@ -680,28 +867,65 @@ public void testShrinkAndExpandIsr() throws Exception {
             new int[][] {new int[] {0, 1, 2}});
 
         TopicIdPartition topicIdPartition = new TopicIdPartition(createTopicResult.topicId(), 0);
-        TopicPartition topicPartition = new TopicPartition("foo", 0);
         assertEquals(OptionalInt.of(0), ctx.currentLeader(topicIdPartition));
         long brokerEpoch = ctx.currentBrokerEpoch(0);
-        PartitionData shrinkIsrRequest = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1));
-        ControllerResult<AlterIsrResponseData> shrinkIsrResult = sendAlterIsr(
-            replicationControl, 0, brokerEpoch, "foo", shrinkIsrRequest);
-        AlterIsrResponseData.PartitionData shrinkIsrResponse = assertAlterIsrResponse(
-            shrinkIsrResult, topicPartition, NONE);
-        assertConsistentAlterIsrResponse(replicationControl, topicIdPartition, shrinkIsrResponse);
-
-        PartitionData expandIsrRequest = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1, 2));
-        ControllerResult<AlterIsrResponseData> expandIsrResult = sendAlterIsr(
-            replicationControl, 0, brokerEpoch, "foo", expandIsrRequest);
-        AlterIsrResponseData.PartitionData expandIsrResponse = assertAlterIsrResponse(
-            expandIsrResult, topicPartition, NONE);
-        assertConsistentAlterIsrResponse(replicationControl, topicIdPartition, expandIsrResponse);
+        PartitionData shrinkIsrRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1), LeaderRecoveryState.RECOVERED);
+        ControllerResult<AlterPartitionResponseData> shrinkIsrResult = sendAlterPartition(
+            replicationControl, 0, brokerEpoch, topicIdPartition.topicId(), shrinkIsrRequest);
+        AlterPartitionResponseData.PartitionData shrinkIsrResponse = assertAlterPartitionResponse(
+            shrinkIsrResult, topicIdPartition, NONE);
+        assertConsistentAlterPartitionResponse(replicationControl, topicIdPartition, shrinkIsrResponse);
+
+        PartitionData expandIsrRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1, 2), LeaderRecoveryState.RECOVERED);
+        ControllerResult<AlterPartitionResponseData> expandIsrResult = sendAlterPartition(
+            replicationControl, 0, brokerEpoch, topicIdPartition.topicId(), expandIsrRequest);
+        AlterPartitionResponseData.PartitionData expandIsrResponse = assertAlterPartitionResponse(
+            expandIsrResult, topicIdPartition, NONE);
+        assertConsistentAlterPartitionResponse(replicationControl, topicIdPartition, expandIsrResponse);
+    }
+
+    @ParameterizedTest
+    @ApiKeyVersionsSource(apiKey = ApiKeys.ALTER_PARTITION)
+    public void testAlterPartitionHandleUnknownTopicIdOrName(short version) throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replicationControl = ctx.replicationControl;
+        ctx.registerBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0, 1, 2);
+
+        String topicName = "foo";
+        Uuid topicId = Uuid.randomUuid();
+
+        AlterPartitionRequestData request = new AlterPartitionRequestData()
+            .setBrokerId(0)
+            .setBrokerEpoch(100)
+            .setTopics(asList(new AlterPartitionRequestData.TopicData()
+                .setTopicName(version <= 1 ? topicName : "")
+                .setTopicId(version > 1 ? topicId : Uuid.ZERO_UUID)
+                .setPartitions(asList(new PartitionData()
+                    .setPartitionIndex(0)))));
+
+        ControllerRequestContext requestContext =
+            anonymousContextFor(ApiKeys.ALTER_PARTITION, version);
+
+        ControllerResult<AlterPartitionResponseData> result =
+            replicationControl.alterPartition(requestContext, request);
+
+        Errors expectedError = version > 1 ? UNKNOWN_TOPIC_ID : UNKNOWN_TOPIC_OR_PARTITION;
+        AlterPartitionResponseData expectedResponse = new AlterPartitionResponseData()
+            .setTopics(asList(new AlterPartitionResponseData.TopicData()
+                .setTopicName(version <= 1 ? topicName : "")
+                .setTopicId(version > 1 ? topicId : Uuid.ZERO_UUID)
+                .setPartitions(asList(new AlterPartitionResponseData.PartitionData()
+                    .setPartitionIndex(0)
+                    .setErrorCode(expectedError.code())))));
+
+        assertEquals(expectedResponse, result.response());
     }
 
     @Test
-    public void testInvalidAlterIsrRequests() throws Exception {
+    public void testInvalidAlterPartitionRequests() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
         ReplicationControlManager replicationControl = ctx.replicationControl;
         ctx.registerBrokers(0, 1, 2);
@@ -710,115 +934,133 @@ public void testInvalidAlterIsrRequests() throws Exception {
             new int[][] {new int[] {0, 1, 2}});
 
         TopicIdPartition topicIdPartition = new TopicIdPartition(createTopicResult.topicId(), 0);
-        TopicPartition topicPartition = new TopicPartition("foo", 0);
-        assertEquals(OptionalInt.of(0), ctx.currentLeader(topicIdPartition));
+        int leaderId = 0;
+        int notLeaderId = 1;
+        assertEquals(OptionalInt.of(leaderId), ctx.currentLeader(topicIdPartition));
         long brokerEpoch = ctx.currentBrokerEpoch(0);
 
         // Invalid leader
-        PartitionData invalidLeaderRequest = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1));
-        ControllerResult<AlterIsrResponseData> invalidLeaderResult = sendAlterIsr(
-            replicationControl, 1, ctx.currentBrokerEpoch(1),
-            "foo", invalidLeaderRequest);
-        assertAlterIsrResponse(invalidLeaderResult, topicPartition, Errors.INVALID_REQUEST);
+        PartitionData invalidLeaderRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1), LeaderRecoveryState.RECOVERED);
+        ControllerResult<AlterPartitionResponseData> invalidLeaderResult = sendAlterPartition(
+            replicationControl, notLeaderId, ctx.currentBrokerEpoch(notLeaderId),
+            topicIdPartition.topicId(), invalidLeaderRequest);
+        assertAlterPartitionResponse(invalidLeaderResult, topicIdPartition, Errors.INVALID_REQUEST);
 
         // Stale broker epoch
-        PartitionData invalidBrokerEpochRequest = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1));
-        assertThrows(StaleBrokerEpochException.class, () -> sendAlterIsr(
-            replicationControl, 0, brokerEpoch - 1, "foo", invalidBrokerEpochRequest));
+        PartitionData invalidBrokerEpochRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1), LeaderRecoveryState.RECOVERED);
+        assertThrows(StaleBrokerEpochException.class, () -> sendAlterPartition(
+            replicationControl, leaderId, brokerEpoch - 1, topicIdPartition.topicId(), invalidBrokerEpochRequest));
 
         // Invalid leader epoch
-        PartitionData invalidLeaderEpochRequest = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1));
+        PartitionData invalidLeaderEpochRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1), LeaderRecoveryState.RECOVERED);
         invalidLeaderEpochRequest.setLeaderEpoch(500);
-        ControllerResult<AlterIsrResponseData> invalidLeaderEpochResult = sendAlterIsr(
-            replicationControl, 1, ctx.currentBrokerEpoch(1),
-            "foo", invalidLeaderEpochRequest);
-        assertAlterIsrResponse(invalidLeaderEpochResult, topicPartition, FENCED_LEADER_EPOCH);
+        ControllerResult<AlterPartitionResponseData> invalidLeaderEpochResult = sendAlterPartition(
+            replicationControl, leaderId, ctx.currentBrokerEpoch(leaderId),
+            topicIdPartition.topicId(), invalidLeaderEpochRequest);
+        assertAlterPartitionResponse(invalidLeaderEpochResult, topicIdPartition, FENCED_LEADER_EPOCH);
 
         // Invalid ISR (3 is not a valid replica)
-        PartitionData invalidIsrRequest1 = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1));
-        invalidIsrRequest1.setNewIsr(asList(0, 1, 3));
-        ControllerResult<AlterIsrResponseData> invalidIsrResult1 = sendAlterIsr(
-            replicationControl, 1, ctx.currentBrokerEpoch(1),
-            "foo", invalidIsrRequest1);
-        assertAlterIsrResponse(invalidIsrResult1, topicPartition, Errors.INVALID_REQUEST);
+        PartitionData invalidIsrRequest1 = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1, 3), LeaderRecoveryState.RECOVERED);
+        ControllerResult<AlterPartitionResponseData> invalidIsrResult1 = sendAlterPartition(
+            replicationControl, leaderId, ctx.currentBrokerEpoch(leaderId),
+            topicIdPartition.topicId(), invalidIsrRequest1);
+        assertAlterPartitionResponse(invalidIsrResult1, topicIdPartition, Errors.INVALID_REQUEST);
 
         // Invalid ISR (does not include leader 0)
-        PartitionData invalidIsrRequest2 = newAlterIsrPartition(
-            replicationControl, topicIdPartition, asList(0, 1));
-        invalidIsrRequest2.setNewIsr(asList(1, 2));
-        ControllerResult<AlterIsrResponseData> invalidIsrResult2 = sendAlterIsr(
-            replicationControl, 1, ctx.currentBrokerEpoch(1),
-            "foo", invalidIsrRequest2);
-        assertAlterIsrResponse(invalidIsrResult2, topicPartition, Errors.INVALID_REQUEST);
+        PartitionData invalidIsrRequest2 = newAlterPartition(
+            replicationControl, topicIdPartition, asList(1, 2), LeaderRecoveryState.RECOVERED);
+        ControllerResult<AlterPartitionResponseData> invalidIsrResult2 = sendAlterPartition(
+            replicationControl, leaderId, ctx.currentBrokerEpoch(leaderId),
+            topicIdPartition.topicId(), invalidIsrRequest2);
+        assertAlterPartitionResponse(invalidIsrResult2, topicIdPartition, Errors.INVALID_REQUEST);
+
+        // Invalid ISR length and recovery state
+        PartitionData invalidIsrRecoveryRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0, 1), LeaderRecoveryState.RECOVERING);
+        ControllerResult<AlterPartitionResponseData> invalidIsrRecoveryResult = sendAlterPartition(
+            replicationControl, leaderId, ctx.currentBrokerEpoch(leaderId),
+            topicIdPartition.topicId(), invalidIsrRecoveryRequest);
+        assertAlterPartitionResponse(invalidIsrRecoveryResult, topicIdPartition, Errors.INVALID_REQUEST);
+
+        // Invalid recovery state transition from RECOVERED to RECOVERING
+        PartitionData invalidRecoveryRequest = newAlterPartition(
+            replicationControl, topicIdPartition, asList(0), LeaderRecoveryState.RECOVERING);
+        ControllerResult<AlterPartitionResponseData> invalidRecoveryResult = sendAlterPartition(
+            replicationControl, leaderId, ctx.currentBrokerEpoch(leaderId),
+            topicIdPartition.topicId(), invalidRecoveryRequest);
+        assertAlterPartitionResponse(invalidRecoveryResult, topicIdPartition, Errors.INVALID_REQUEST);
     }
 
-    private PartitionData newAlterIsrPartition(
+    private PartitionData newAlterPartition(
         ReplicationControlManager replicationControl,
         TopicIdPartition topicIdPartition,
-        List<Integer> newIsr
+        List<Integer> newIsr,
+        LeaderRecoveryState leaderRecoveryState
     ) {
         PartitionRegistration partitionControl =
             replicationControl.getPartition(topicIdPartition.topicId(), topicIdPartition.partitionId());
-        return new AlterIsrRequestData.PartitionData()
+        return new AlterPartitionRequestData.PartitionData()
             .setPartitionIndex(0)
             .setLeaderEpoch(partitionControl.leaderEpoch)
-            .setCurrentIsrVersion(partitionControl.partitionEpoch)
-            .setNewIsr(newIsr);
+            .setPartitionEpoch(partitionControl.partitionEpoch)
+            .setNewIsr(newIsr)
+            .setLeaderRecoveryState(leaderRecoveryState.value());
     }
 
-    private ControllerResult<AlterIsrResponseData> sendAlterIsr(
+    private ControllerResult<AlterPartitionResponseData> sendAlterPartition(
         ReplicationControlManager replicationControl,
         int brokerId,
         long brokerEpoch,
-        String topic,
-        AlterIsrRequestData.PartitionData partitionData
+        Uuid topicId,
+        AlterPartitionRequestData.PartitionData partitionData
     ) throws Exception {
-        AlterIsrRequestData request = new AlterIsrRequestData()
+        AlterPartitionRequestData request = new AlterPartitionRequestData()
             .setBrokerId(brokerId)
             .setBrokerEpoch(brokerEpoch);
 
-        AlterIsrRequestData.TopicData topicData = new AlterIsrRequestData.TopicData()
-            .setName(topic);
+        AlterPartitionRequestData.TopicData topicData = new AlterPartitionRequestData.TopicData()
+            .setTopicId(topicId);
         request.topics().add(topicData);
         topicData.partitions().add(partitionData);
 
-        ControllerResult<AlterIsrResponseData> result = replicationControl.alterIsr(request);
+        ControllerRequestContext requestContext = anonymousContextFor(ApiKeys.ALTER_PARTITION);
+        ControllerResult<AlterPartitionResponseData> result = replicationControl.alterPartition(requestContext, request);
         RecordTestUtils.replayAll(replicationControl, result.records());
         return result;
     }
 
-    private AlterIsrResponseData.PartitionData assertAlterIsrResponse(
-        ControllerResult<AlterIsrResponseData> alterIsrResult,
-        TopicPartition topicPartition,
+    private AlterPartitionResponseData.PartitionData assertAlterPartitionResponse(
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult,
+        TopicIdPartition topicIdPartition,
         Errors expectedError
     ) {
-        AlterIsrResponseData response = alterIsrResult.response();
+        AlterPartitionResponseData response = alterPartitionResult.response();
         assertEquals(1, response.topics().size());
 
-        AlterIsrResponseData.TopicData topicData = response.topics().get(0);
-        assertEquals(topicPartition.topic(), topicData.name());
+        AlterPartitionResponseData.TopicData topicData = response.topics().get(0);
+        assertEquals(topicIdPartition.topicId(), topicData.topicId());
         assertEquals(1, topicData.partitions().size());
 
-        AlterIsrResponseData.PartitionData partitionData = topicData.partitions().get(0);
-        assertEquals(topicPartition.partition(), partitionData.partitionIndex());
+        AlterPartitionResponseData.PartitionData partitionData = topicData.partitions().get(0);
+        assertEquals(topicIdPartition.partitionId(), partitionData.partitionIndex());
         assertEquals(expectedError, Errors.forCode(partitionData.errorCode()));
         return partitionData;
     }
 
-    private void assertConsistentAlterIsrResponse(
+    private void assertConsistentAlterPartitionResponse(
         ReplicationControlManager replicationControl,
         TopicIdPartition topicIdPartition,
-        AlterIsrResponseData.PartitionData partitionData
+        AlterPartitionResponseData.PartitionData partitionData
     ) {
         PartitionRegistration partitionControl =
             replicationControl.getPartition(topicIdPartition.topicId(), topicIdPartition.partitionId());
         assertEquals(partitionControl.leader, partitionData.leaderId());
         assertEquals(partitionControl.leaderEpoch, partitionData.leaderEpoch());
-        assertEquals(partitionControl.partitionEpoch, partitionData.currentIsrVersion());
+        assertEquals(partitionControl.partitionEpoch, partitionData.partitionEpoch());
         List<Integer> expectedIsr = IntStream.of(partitionControl.isr).boxed().collect(Collectors.toList());
         assertEquals(expectedIsr, partitionData.isr());
     }
@@ -863,7 +1105,7 @@ public void testDeleteTopics() throws Exception {
         ctx.registerBrokers(0, 1);
         ctx.unfenceBrokers(0, 1);
         ControllerResult<CreateTopicsResponseData> createResult =
-            replicationControl.createTopics(request);
+            replicationControl.createTopics(request, Collections.singleton("foo"));
         CreateTopicsResponseData expectedResponse = new CreateTopicsResponseData();
         Uuid topicId = createResult.response().topics().find("foo").topicId();
         expectedResponse.topics().add(new CreatableTopicResult().setName("foo").
@@ -919,7 +1161,6 @@ public void testDeleteTopics() throws Exception {
         assertEmptyTopicConfigs(ctx, "foo");
     }
 
-
     @Test
     public void testCreatePartitions() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
@@ -935,8 +1176,8 @@ public void testCreatePartitions() throws Exception {
             setNumPartitions(2).setReplicationFactor((short) 2));
         ctx.registerBrokers(0, 1);
         ctx.unfenceBrokers(0, 1);
-        ControllerResult<CreateTopicsResponseData> createTopicResult =
-            replicationControl.createTopics(request);
+        ControllerResult<CreateTopicsResponseData> createTopicResult = replicationControl.
+            createTopics(request, new HashSet<>(Arrays.asList("foo", "bar", "quux", "foo2")));
         ctx.replay(createTopicResult.records());
         List<CreatePartitionsTopic> topics = new ArrayList<>();
         topics.add(new CreatePartitionsTopic().
@@ -1005,6 +1246,79 @@ public void testCreatePartitions() throws Exception {
         ctx.replay(createPartitionsResult2.records());
     }
 
+    @Test
+    public void testCreatePartitionsFailsWhenAllBrokersAreFencedOrInControlledShutdown() throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replicationControl = ctx.replicationControl;
+        CreateTopicsRequestData request = new CreateTopicsRequestData();
+        request.topics().add(new CreatableTopic().setName("foo").
+            setNumPartitions(1).setReplicationFactor((short) 2));
+
+        ctx.registerBrokers(0, 1);
+        ctx.unfenceBrokers(0, 1);
+
+        ControllerResult<CreateTopicsResponseData> createTopicResult = replicationControl.
+            createTopics(request, new HashSet<>(Arrays.asList("foo")));
+        ctx.replay(createTopicResult.records());
+
+        ctx.registerBrokers(0, 1);
+        ctx.unfenceBrokers(0);
+        ctx.inControlledShutdownBrokers(0);
+
+        List<CreatePartitionsTopic> topics = new ArrayList<>();
+        topics.add(new CreatePartitionsTopic().
+            setName("foo").setCount(2).setAssignments(null));
+        ControllerResult<List<CreatePartitionsTopicResult>> createPartitionsResult =
+            replicationControl.createPartitions(topics);
+
+        assertEquals(
+            asList(new CreatePartitionsTopicResult().
+                setName("foo").
+                setErrorCode(INVALID_REPLICATION_FACTOR.code()).
+                setErrorMessage("Unable to replicate the partition 2 time(s): All " +
+                    "brokers are currently fenced or in controlled shutdown.")),
+            createPartitionsResult.response());
+    }
+
+    @Test
+    public void testCreatePartitionsISRInvariants() throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replicationControl = ctx.replicationControl;
+
+        CreateTopicsRequestData request = new CreateTopicsRequestData();
+        request.topics().add(new CreatableTopic().setName("foo").
+            setNumPartitions(1).setReplicationFactor((short) 3));
+
+        ctx.registerBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0, 1);
+        ctx.inControlledShutdownBrokers(1);
+
+        ControllerResult<CreateTopicsResponseData> result =
+            replicationControl.createTopics(request, Collections.singleton("foo"));
+        ctx.replay(result.records());
+
+        List<CreatePartitionsTopic> topics = asList(new CreatePartitionsTopic().
+            setName("foo").setCount(2).setAssignments(null));
+
+        ControllerResult<List<CreatePartitionsTopicResult>> createPartitionsResult =
+            replicationControl.createPartitions(topics);
+        ctx.replay(createPartitionsResult.records());
+
+        // Broker 2 cannot be in the ISR because it is fenced and broker 1
+        // cannot be in the ISR because it is in controlled shutdown.
+        assertEquals(
+            new PartitionRegistration(new int[]{0, 1, 2},
+                new int[]{0},
+                Replicas.NONE,
+                Replicas.NONE,
+                0,
+                LeaderRecoveryState.RECOVERED,
+                0,
+                0),
+            replicationControl.getPartition(
+                ((TopicRecord) result.records().get(0).message()).topicId(), 1));
+    }
+
     @Test
     public void testValidateGoodManualPartitionAssignments() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
@@ -1045,8 +1359,9 @@ public void testValidateBadManualPartitionAssignments() throws Exception {
     private final static ListPartitionReassignmentsResponseData NONE_REASSIGNING =
         new ListPartitionReassignmentsResponseData().setErrorMessage(null);
 
-    @Test
-    public void testReassignPartitions() throws Exception {
+    @ParameterizedTest
+    @ApiKeyVersionsSource(apiKey = ApiKeys.ALTER_PARTITION)
+    public void testReassignPartitions(short version) throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
         ReplicationControlManager replication = ctx.replicationControl;
         ctx.registerBrokers(0, 1, 2, 3);
@@ -1130,22 +1445,168 @@ public void testReassignPartitions() throws Exception {
                         setErrorCode(NO_REASSIGNMENT_IN_PROGRESS.code()).
                         setErrorMessage(null)))))),
             cancelResult);
-        log.info("running final alterIsr...");
-        ControllerResult<AlterIsrResponseData> alterIsrResult = replication.alterIsr(
-            new AlterIsrRequestData().setBrokerId(3).setBrokerEpoch(103).
-                setTopics(asList(new TopicData().setName("foo").setPartitions(asList(
-                    new PartitionData().setPartitionIndex(1).setCurrentIsrVersion(1).
-                        setLeaderEpoch(0).setNewIsr(asList(3, 0, 2, 1)))))));
-        assertEquals(new AlterIsrResponseData().setTopics(asList(
-            new AlterIsrResponseData.TopicData().setName("foo").setPartitions(asList(
-                new AlterIsrResponseData.PartitionData().
+        log.info("running final alterPartition...");
+        ControllerRequestContext requestContext =
+            anonymousContextFor(ApiKeys.ALTER_PARTITION, version);
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult = replication.alterPartition(
+            requestContext,
+            new AlterPartitionRequestData().setBrokerId(3).setBrokerEpoch(103).
+                setTopics(asList(new TopicData().
+                    setTopicName(version <= 1 ? "foo" : "").
+                    setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID).
+                    setPartitions(asList(new PartitionData().
+                        setPartitionIndex(1).
+                        setPartitionEpoch(1).
+                        setLeaderEpoch(0).
+                        setNewIsr(asList(3, 0, 2, 1)))))));
+        Errors expectedError = version > 1 ? NEW_LEADER_ELECTED : FENCED_LEADER_EPOCH;
+        assertEquals(new AlterPartitionResponseData().setTopics(asList(
+            new AlterPartitionResponseData.TopicData().
+                setTopicName(version <= 1 ? "foo" : "").
+                setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID).
+                setPartitions(asList(
+                new AlterPartitionResponseData.PartitionData().
                     setPartitionIndex(1).
-                    setErrorCode(FENCED_LEADER_EPOCH.code()))))),
-            alterIsrResult.response());
-        ctx.replay(alterIsrResult.records());
+                    setErrorCode(expectedError.code()))))),
+            alterPartitionResult.response());
+        ctx.replay(alterPartitionResult.records());
         assertEquals(NONE_REASSIGNING, replication.listPartitionReassignments(null));
     }
 
+    @ParameterizedTest
+    @ApiKeyVersionsSource(apiKey = ApiKeys.ALTER_PARTITION)
+    public void testAlterPartitionShouldRejectFencedBrokers(short version) throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replication = ctx.replicationControl;
+        ctx.registerBrokers(0, 1, 2, 3, 4);
+        ctx.unfenceBrokers(0, 1, 2, 3, 4);
+        Uuid fooId = ctx.createTestTopic(
+            "foo",
+            new int[][] {new int[] {1, 2, 3, 4}}
+        ).topicId();
+
+        List<ApiMessageAndVersion> fenceRecords = new ArrayList<>();
+        replication.handleBrokerFenced(3, fenceRecords);
+        ctx.replay(fenceRecords);
+
+        assertEquals(
+            new PartitionRegistration(
+                new int[] {1, 2, 3, 4},
+                new int[] {1, 2, 4},
+                new int[] {},
+                new int[] {},
+                1,
+                LeaderRecoveryState.RECOVERED,
+                1,
+                1),
+            replication.getPartition(fooId, 0));
+
+        AlterPartitionRequestData alterIsrRequest = new AlterPartitionRequestData()
+            .setBrokerId(1)
+            .setBrokerEpoch(101)
+            .setTopics(asList(new TopicData()
+                .setTopicName(version <= 1 ? "foo" : "")
+                .setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID)
+                .setPartitions(asList(new PartitionData()
+                    .setPartitionIndex(0)
+                    .setPartitionEpoch(1)
+                    .setLeaderEpoch(1)
+                    .setNewIsr(asList(1, 2, 3, 4))))));
+
+        ControllerRequestContext requestContext =
+            anonymousContextFor(ApiKeys.ALTER_PARTITION, version);
+
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult =
+            replication.alterPartition(requestContext, alterIsrRequest);
+
+        Errors expectedError = version <= 1 ? OPERATION_NOT_ATTEMPTED : INELIGIBLE_REPLICA;
+        assertEquals(
+            new AlterPartitionResponseData()
+                .setTopics(asList(new AlterPartitionResponseData.TopicData()
+                    .setTopicName(version <= 1 ? "foo" : "")
+                    .setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID)
+                    .setPartitions(asList(new AlterPartitionResponseData.PartitionData()
+                        .setPartitionIndex(0)
+                        .setErrorCode(expectedError.code()))))),
+            alterPartitionResult.response());
+
+        fenceRecords = new ArrayList<>();
+        replication.handleBrokerUnfenced(3, 103, fenceRecords);
+        ctx.replay(fenceRecords);
+
+        alterPartitionResult = replication.alterPartition(requestContext, alterIsrRequest);
+
+        assertEquals(
+            new AlterPartitionResponseData()
+                .setTopics(asList(new AlterPartitionResponseData.TopicData()
+                    .setTopicName(version <= 1 ? "foo" : "")
+                    .setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID)
+                    .setPartitions(asList(new AlterPartitionResponseData.PartitionData()
+                        .setPartitionIndex(0)
+                        .setLeaderId(1)
+                        .setLeaderEpoch(1)
+                        .setIsr(asList(1, 2, 3, 4))
+                        .setPartitionEpoch(2)
+                        .setErrorCode(NONE.code()))))),
+            alterPartitionResult.response());
+    }
+
+    @ParameterizedTest
+    @ApiKeyVersionsSource(apiKey = ApiKeys.ALTER_PARTITION)
+    public void testAlterPartitionShouldRejectShuttingDownBrokers(short version) throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replication = ctx.replicationControl;
+        ctx.registerBrokers(0, 1, 2, 3, 4);
+        ctx.unfenceBrokers(0, 1, 2, 3, 4);
+        Uuid fooId = ctx.createTestTopic(
+            "foo",
+            new int[][] {new int[] {1, 2, 3, 4}}
+        ).topicId();
+
+        assertEquals(
+            new PartitionRegistration(
+                new int[] {1, 2, 3, 4},
+                new int[] {1, 2, 3, 4},
+                new int[] {},
+                new int[] {},
+                1,
+                LeaderRecoveryState.RECOVERED,
+                0,
+                0),
+            replication.getPartition(fooId, 0));
+
+        ctx.inControlledShutdownBrokers(3);
+
+        AlterPartitionRequestData alterIsrRequest = new AlterPartitionRequestData()
+            .setBrokerId(1)
+            .setBrokerEpoch(101)
+            .setTopics(asList(new TopicData()
+                .setTopicName(version <= 1 ? "foo" : "")
+                .setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID)
+                .setPartitions(asList(new PartitionData()
+                    .setPartitionIndex(0)
+                    .setPartitionEpoch(0)
+                    .setLeaderEpoch(0)
+                    .setNewIsr(asList(1, 2, 3, 4))))));
+
+        ControllerRequestContext requestContext =
+            anonymousContextFor(ApiKeys.ALTER_PARTITION, version);
+
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult =
+            replication.alterPartition(requestContext, alterIsrRequest);
+
+        Errors expectedError = version <= 1 ? OPERATION_NOT_ATTEMPTED : INELIGIBLE_REPLICA;
+        assertEquals(
+            new AlterPartitionResponseData()
+                .setTopics(asList(new AlterPartitionResponseData.TopicData()
+                    .setTopicName(version <= 1 ? "foo" : "")
+                    .setTopicId(version > 1 ? fooId : Uuid.ZERO_UUID)
+                    .setPartitions(asList(new AlterPartitionResponseData.PartitionData()
+                        .setPartitionIndex(0)
+                        .setErrorCode(expectedError.code()))))),
+            alterPartitionResult.response());
+    }
+
     @Test
     public void testCancelReassignPartitions() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
@@ -1162,7 +1623,7 @@ public void testCancelReassignPartitions() throws Exception {
         replication.handleBrokerFenced(3, fenceRecords);
         ctx.replay(fenceRecords);
         assertEquals(new PartitionRegistration(new int[] {1, 2, 3, 4}, new int[] {1, 2, 4},
-            new int[] {}, new int[] {}, 1, 1, 1), replication.getPartition(fooId, 0));
+            new int[] {}, new int[] {}, 1, LeaderRecoveryState.RECOVERED, 1, 1), replication.getPartition(fooId, 0));
         ControllerResult<AlterPartitionReassignmentsResponseData> alterResult =
             replication.alterPartitionReassignments(
                 new AlterPartitionReassignmentsRequestData().setTopics(asList(
@@ -1199,11 +1660,11 @@ public void testCancelReassignPartitions() throws Exception {
             alterResult.response());
         ctx.replay(alterResult.records());
         assertEquals(new PartitionRegistration(new int[] {1, 2, 3}, new int[] {1, 2},
-            new int[] {}, new int[] {}, 1, 2, 2), replication.getPartition(fooId, 0));
+            new int[] {}, new int[] {}, 1, LeaderRecoveryState.RECOVERED, 2, 2), replication.getPartition(fooId, 0));
         assertEquals(new PartitionRegistration(new int[] {1, 2, 3, 0}, new int[] {0, 1, 2},
-            new int[] {}, new int[] {}, 0, 1, 2), replication.getPartition(fooId, 1));
+            new int[] {}, new int[] {}, 0, LeaderRecoveryState.RECOVERED, 1, 2), replication.getPartition(fooId, 1));
         assertEquals(new PartitionRegistration(new int[] {1, 2, 3, 4, 0}, new int[] {4, 2},
-            new int[] {}, new int[] {0, 1}, 4, 1, 2), replication.getPartition(barId, 0));
+            new int[] {}, new int[] {0, 1}, 4, LeaderRecoveryState.RECOVERED, 1, 2), replication.getPartition(barId, 0));
         ListPartitionReassignmentsResponseData currentReassigning =
             new ListPartitionReassignmentsResponseData().setErrorMessage(null).
                 setTopics(asList(new OngoingTopicReassignment().
@@ -1219,21 +1680,22 @@ public void testCancelReassignPartitions() throws Exception {
         assertEquals(currentReassigning, replication.listPartitionReassignments(asList(
             new ListPartitionReassignmentsTopics().setName("bar").
                 setPartitionIndexes(asList(0, 1, 2)))));
-        ControllerResult<AlterIsrResponseData> alterIsrResult = replication.alterIsr(
-            new AlterIsrRequestData().setBrokerId(4).setBrokerEpoch(104).
-                setTopics(asList(new TopicData().setName("bar").setPartitions(asList(
-                    new PartitionData().setPartitionIndex(0).setCurrentIsrVersion(2).
-                        setLeaderEpoch(1).setNewIsr(asList(4, 1, 2, 3, 0)))))));
-        assertEquals(new AlterIsrResponseData().setTopics(asList(
-            new AlterIsrResponseData.TopicData().setName("bar").setPartitions(asList(
-                new AlterIsrResponseData.PartitionData().
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult = replication.alterPartition(
+            anonymousContextFor(ApiKeys.ALTER_PARTITION),
+            new AlterPartitionRequestData().setBrokerId(4).setBrokerEpoch(104).
+                setTopics(asList(new TopicData().setTopicId(barId).setPartitions(asList(
+                    new PartitionData().setPartitionIndex(0).setPartitionEpoch(2).
+                        setLeaderEpoch(1).setNewIsr(asList(4, 1, 2, 0)))))));
+        assertEquals(new AlterPartitionResponseData().setTopics(asList(
+            new AlterPartitionResponseData.TopicData().setTopicId(barId).setPartitions(asList(
+                new AlterPartitionResponseData.PartitionData().
                     setPartitionIndex(0).
                     setLeaderId(4).
                     setLeaderEpoch(1).
-                    setIsr(asList(4, 1, 2, 3, 0)).
-                    setCurrentIsrVersion(3).
+                    setIsr(asList(4, 1, 2, 0)).
+                    setPartitionEpoch(3).
                     setErrorCode(NONE.code()))))),
-            alterIsrResult.response());
+            alterPartitionResult.response());
         ControllerResult<AlterPartitionReassignmentsResponseData> cancelResult =
             replication.alterPartitionReassignments(
                 new AlterPartitionReassignmentsRequestData().setTopics(asList(
@@ -1261,7 +1723,7 @@ public void testCancelReassignPartitions() throws Exception {
         ctx.replay(cancelResult.records());
         assertEquals(NONE_REASSIGNING, replication.listPartitionReassignments(null));
         assertEquals(new PartitionRegistration(new int[] {2, 3, 4}, new int[] {4, 2},
-            new int[] {}, new int[] {}, 4, 2, 3), replication.getPartition(barId, 0));
+            new int[] {}, new int[] {}, 4, LeaderRecoveryState.RECOVERED, 2, 3), replication.getPartition(barId, 0));
     }
 
     @Test
@@ -1282,7 +1744,7 @@ public void testCreatePartitionsFailsWithManualAssignmentWithAllFenced() throws
             INVALID_REPLICA_ASSIGNMENT.code());
         ctx.createPartitions(2, "foo", new int[][] {new int[] {2, 4, 5}}, NONE.code());
         assertEquals(new PartitionRegistration(new int[] {2, 4, 5},
-                new int[] {2}, Replicas.NONE, Replicas.NONE, 2, 0, 0),
+                new int[] {2}, Replicas.NONE, Replicas.NONE, 2, LeaderRecoveryState.RECOVERED, 0, 0),
             ctx.replicationControl.getPartition(fooId, 1));
     }
 
@@ -1352,7 +1814,7 @@ public void testElectUncleanLeaders(boolean electAllPartitions) throws Exception
 
         // Bring 2 back into the ISR for partition 1. This allows us to verify that
         // preferred election does not occur as a result of the unclean election request.
-        ctx.alterIsr(partition1, 4, asList(2, 4));
+        ctx.alterPartition(partition1, 4, asList(2, 4), LeaderRecoveryState.RECOVERED);
 
         ControllerResult<ElectLeadersResponseData> result = replication.electLeaders(request);
         assertEquals(1, result.records().size());
@@ -1475,14 +1937,15 @@ public void testElectPreferredLeaders() throws Exception {
         ReplicationControlTestContext ctx = new ReplicationControlTestContext();
         ReplicationControlManager replication = ctx.replicationControl;
         ctx.registerBrokers(0, 1, 2, 3, 4);
-        ctx.unfenceBrokers(2, 3, 4);
+        ctx.unfenceBrokers(1, 2, 3, 4);
+        ctx.inControlledShutdownBrokers(1);
         Uuid fooId = ctx.createTestTopic("foo", new int[][]{
             new int[]{1, 2, 3}, new int[]{2, 3, 4}, new int[]{0, 2, 1}}).topicId();
         ElectLeadersRequestData request1 = new ElectLeadersRequestData().
             setElectionType(ElectionType.PREFERRED.value).
             setTopicPartitions(new TopicPartitionsCollection(asList(
                 new TopicPartitions().setTopic("foo").
-                    setPartitions(asList(0, 1)),
+                    setPartitions(asList(0, 1, 2)),
                 new TopicPartitions().setTopic("bar").
                     setPartitions(asList(0, 1))).iterator()));
         ControllerResult<ElectLeadersResponseData> election1Result =
@@ -1496,6 +1959,10 @@ public void testElectPreferredLeaders() throws Exception {
                 new TopicPartition("foo", 1),
                 new ApiError(ELECTION_NOT_NEEDED)
             ),
+            Utils.mkEntry(
+                new TopicPartition("foo", 2),
+                new ApiError(PREFERRED_LEADER_NOT_AVAILABLE)
+            ),
             Utils.mkEntry(
                 new TopicPartition("bar", 0),
                 new ApiError(UNKNOWN_TOPIC_OR_PARTITION, "No such topic as bar")
@@ -1507,24 +1974,39 @@ public void testElectPreferredLeaders() throws Exception {
         ));
         assertElectLeadersResponse(expectedResponse1, election1Result.response());
         assertEquals(Collections.emptyList(), election1Result.records());
+
+        // Broker 1 must be registered to get out from the controlled shutdown state.
+        ctx.registerBrokers(1);
         ctx.unfenceBrokers(0, 1);
 
-        ControllerResult<AlterIsrResponseData> alterIsrResult = replication.alterIsr(
-            new AlterIsrRequestData().setBrokerId(2).setBrokerEpoch(102).
-                setTopics(asList(new AlterIsrRequestData.TopicData().setName("foo").
-                    setPartitions(asList(new AlterIsrRequestData.PartitionData().
-                        setPartitionIndex(0).setCurrentIsrVersion(0).
-                        setLeaderEpoch(0).setNewIsr(asList(1, 2, 3)))))));
-        assertEquals(new AlterIsrResponseData().setTopics(asList(
-            new AlterIsrResponseData.TopicData().setName("foo").setPartitions(asList(
-                new AlterIsrResponseData.PartitionData().
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult = replication.alterPartition(
+            anonymousContextFor(ApiKeys.ALTER_PARTITION),
+            new AlterPartitionRequestData().setBrokerId(2).setBrokerEpoch(102).
+                setTopics(asList(new AlterPartitionRequestData.TopicData().setTopicId(fooId).
+                    setPartitions(asList(
+                        new AlterPartitionRequestData.PartitionData().
+                            setPartitionIndex(0).setPartitionEpoch(0).
+                            setLeaderEpoch(0).setNewIsr(asList(1, 2, 3)),
+                        new AlterPartitionRequestData.PartitionData().
+                            setPartitionIndex(2).setPartitionEpoch(0).
+                            setLeaderEpoch(0).setNewIsr(asList(0, 2, 1)))))));
+        assertEquals(new AlterPartitionResponseData().setTopics(asList(
+            new AlterPartitionResponseData.TopicData().setTopicId(fooId).setPartitions(asList(
+                new AlterPartitionResponseData.PartitionData().
                     setPartitionIndex(0).
                     setLeaderId(2).
                     setLeaderEpoch(0).
                     setIsr(asList(1, 2, 3)).
-                    setCurrentIsrVersion(1).
+                    setPartitionEpoch(1).
+                    setErrorCode(NONE.code()),
+                new AlterPartitionResponseData.PartitionData().
+                    setPartitionIndex(2).
+                    setLeaderId(2).
+                    setLeaderEpoch(0).
+                    setIsr(asList(0, 2, 1)).
+                    setPartitionEpoch(1).
                     setErrorCode(NONE.code()))))),
-            alterIsrResult.response());
+            alterPartitionResult.response());
 
         ElectLeadersResponseData expectedResponse2 = buildElectLeadersResponse(NONE, false, Utils.mkMap(
             Utils.mkEntry(
@@ -1535,6 +2017,10 @@ public void testElectPreferredLeaders() throws Exception {
                 new TopicPartition("foo", 1),
                 new ApiError(ELECTION_NOT_NEEDED)
             ),
+            Utils.mkEntry(
+                new TopicPartition("foo", 2),
+                ApiError.NONE
+            ),
             Utils.mkEntry(
                 new TopicPartition("bar", 0),
                 new ApiError(UNKNOWN_TOPIC_OR_PARTITION, "No such topic as bar")
@@ -1545,14 +2031,104 @@ public void testElectPreferredLeaders() throws Exception {
             )
         ));
 
-        ctx.replay(alterIsrResult.records());
+        ctx.replay(alterPartitionResult.records());
         ControllerResult<ElectLeadersResponseData> election2Result =
             replication.electLeaders(request1);
         assertElectLeadersResponse(expectedResponse2, election2Result.response());
-        assertEquals(asList(new ApiMessageAndVersion(new PartitionChangeRecord().
-            setPartitionId(0).
-            setTopicId(fooId).
-            setLeader(1), (short) 0)), election2Result.records());
+        assertEquals(
+            asList(
+                new ApiMessageAndVersion(
+                    new PartitionChangeRecord().
+                        setPartitionId(0).
+                        setTopicId(fooId).
+                        setLeader(1),
+                    (short) 0),
+                new ApiMessageAndVersion(
+                    new PartitionChangeRecord().
+                        setPartitionId(2).
+                        setTopicId(fooId).
+                        setLeader(0),
+                    (short) 0)),
+            election2Result.records());
+    }
+
+    @Test
+    public void testBalancePartitionLeaders() throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replication = ctx.replicationControl;
+        ctx.registerBrokers(0, 1, 2, 3, 4);
+        ctx.unfenceBrokers(2, 3, 4);
+        Uuid fooId = ctx.createTestTopic("foo", new int[][]{
+            new int[]{1, 2, 3}, new int[]{2, 3, 4}, new int[]{0, 2, 1}}).topicId();
+
+        assertTrue(replication.arePartitionLeadersImbalanced());
+        assertEquals(2, ctx.metrics.preferredReplicaImbalanceCount());
+
+        ctx.unfenceBrokers(1);
+
+        ControllerResult<AlterPartitionResponseData> alterPartitionResult = replication.alterPartition(
+            anonymousContextFor(ApiKeys.ALTER_PARTITION),
+            new AlterPartitionRequestData().setBrokerId(2).setBrokerEpoch(102).
+                setTopics(asList(new AlterPartitionRequestData.TopicData().setTopicId(fooId).
+                    setPartitions(asList(new AlterPartitionRequestData.PartitionData().
+                        setPartitionIndex(0).setPartitionEpoch(0).
+                        setLeaderEpoch(0).setNewIsr(asList(1, 2, 3)))))));
+        assertEquals(new AlterPartitionResponseData().setTopics(asList(
+            new AlterPartitionResponseData.TopicData().setTopicId(fooId).setPartitions(asList(
+                new AlterPartitionResponseData.PartitionData().
+                    setPartitionIndex(0).
+                    setLeaderId(2).
+                    setLeaderEpoch(0).
+                    setIsr(asList(1, 2, 3)).
+                    setPartitionEpoch(1).
+                    setErrorCode(NONE.code()))))),
+            alterPartitionResult.response());
+        ctx.replay(alterPartitionResult.records());
+
+        ControllerResult<Boolean> balanceResult = replication.maybeBalancePartitionLeaders();
+        ctx.replay(balanceResult.records());
+
+        PartitionChangeRecord expectedChangeRecord = new PartitionChangeRecord()
+            .setPartitionId(0)
+            .setTopicId(fooId)
+            .setLeader(1);
+        assertEquals(asList(new ApiMessageAndVersion(expectedChangeRecord, (short) 0)), balanceResult.records());
+        assertTrue(replication.arePartitionLeadersImbalanced());
+        assertEquals(1, ctx.metrics.preferredReplicaImbalanceCount());
+        assertFalse(balanceResult.response());
+
+        ctx.unfenceBrokers(0);
+
+        alterPartitionResult = replication.alterPartition(
+            anonymousContextFor(ApiKeys.ALTER_PARTITION),
+            new AlterPartitionRequestData().setBrokerId(2).setBrokerEpoch(102).
+                setTopics(asList(new AlterPartitionRequestData.TopicData().setTopicId(fooId).
+                    setPartitions(asList(new AlterPartitionRequestData.PartitionData().
+                        setPartitionIndex(2).setPartitionEpoch(0).
+                        setLeaderEpoch(0).setNewIsr(asList(0, 2, 1)))))));
+        assertEquals(new AlterPartitionResponseData().setTopics(asList(
+            new AlterPartitionResponseData.TopicData().setTopicId(fooId).setPartitions(asList(
+                new AlterPartitionResponseData.PartitionData().
+                    setPartitionIndex(2).
+                    setLeaderId(2).
+                    setLeaderEpoch(0).
+                    setIsr(asList(0, 2, 1)).
+                    setPartitionEpoch(1).
+                    setErrorCode(NONE.code()))))),
+            alterPartitionResult.response());
+        ctx.replay(alterPartitionResult.records());
+
+        balanceResult = replication.maybeBalancePartitionLeaders();
+        ctx.replay(balanceResult.records());
+
+        expectedChangeRecord = new PartitionChangeRecord()
+            .setPartitionId(2)
+            .setTopicId(fooId)
+            .setLeader(0);
+        assertEquals(asList(new ApiMessageAndVersion(expectedChangeRecord, (short) 0)), balanceResult.records());
+        assertFalse(replication.arePartitionLeadersImbalanced());
+        assertEquals(0, ctx.metrics.preferredReplicaImbalanceCount());
+        assertFalse(balanceResult.response());
     }
 
     private void assertElectLeadersResponse(
@@ -1605,4 +2181,65 @@ private ElectLeadersResponseData buildElectLeadersResponse(
         return response;
     }
 
+    @Test
+    public void testKRaftClusterDescriber() throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext();
+        ReplicationControlManager replication = ctx.replicationControl;
+        ctx.registerBrokers(0, 1, 2, 3, 4);
+        ctx.unfenceBrokers(2, 3, 4);
+        ctx.createTestTopic("foo", new int[][]{
+            new int[]{1, 2, 3}, new int[]{2, 3, 4}, new int[]{0, 2, 1}}).topicId();
+        ctx.createTestTopic("bar", new int[][]{
+            new int[]{2, 3, 4}, new int[]{3, 4, 2}}).topicId();
+        KRaftClusterDescriber describer = replication.clusterDescriber;
+        HashSet<UsableBroker> brokers = new HashSet<>();
+        describer.usableBrokers().forEachRemaining(broker -> brokers.add(broker));
+        assertEquals(new HashSet<>(Arrays.asList(
+            new UsableBroker(0, Optional.empty(), true),
+            new UsableBroker(1, Optional.empty(), true),
+            new UsableBroker(2, Optional.empty(), false),
+            new UsableBroker(3, Optional.empty(), false),
+            new UsableBroker(4, Optional.empty(), false))), brokers);
+    }
+
+    @ParameterizedTest
+    @EnumSource(value = MetadataVersion.class, names = {"IBP_3_3_IV2", "IBP_3_3_IV3"})
+    public void testProcessBrokerHeartbeatInControlledShutdown(MetadataVersion metadataVersion) throws Exception {
+        ReplicationControlTestContext ctx = new ReplicationControlTestContext(metadataVersion);
+        ctx.registerBrokers(0, 1, 2);
+        ctx.unfenceBrokers(0, 1, 2);
+
+        Uuid topicId = ctx.createTestTopic("foo", new int[][]{new int[]{0, 1, 2}}).topicId();
+
+        BrokerHeartbeatRequestData heartbeatRequest = new BrokerHeartbeatRequestData()
+            .setBrokerId(0)
+            .setBrokerEpoch(100)
+            .setCurrentMetadataOffset(0)
+            .setWantShutDown(true);
+
+        ControllerResult<BrokerHeartbeatReply> result = ctx.replicationControl
+            .processBrokerHeartbeat(heartbeatRequest, 0);
+
+        List<ApiMessageAndVersion> expectedRecords = new ArrayList<>();
+
+        if (metadataVersion.isInControlledShutdownStateSupported()) {
+            expectedRecords.add(new ApiMessageAndVersion(
+                new BrokerRegistrationChangeRecord()
+                    .setBrokerEpoch(100)
+                    .setBrokerId(0)
+                    .setInControlledShutdown(BrokerRegistrationInControlledShutdownChange
+                        .IN_CONTROLLED_SHUTDOWN.value()),
+                (short) 1));
+        }
+
+        expectedRecords.add(new ApiMessageAndVersion(
+            new PartitionChangeRecord()
+                .setPartitionId(0)
+                .setTopicId(topicId)
+                .setIsr(asList(1, 2))
+                .setLeader(1),
+            (short) 0));
+
+        assertEquals(expectedRecords, result.records());
+    }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/controller/SnapshotGeneratorTest.java b/metadata/src/test/java/org/apache/kafka/controller/SnapshotGeneratorTest.java
index 2c61dbcdc74df..f7fa18f20a4cf 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/SnapshotGeneratorTest.java
+++ b/metadata/src/test/java/org/apache/kafka/controller/SnapshotGeneratorTest.java
@@ -41,6 +41,7 @@
 import java.util.OptionalLong;
 import java.util.Optional;
 
+import static org.apache.kafka.raft.KafkaRaftClient.MAX_BATCH_SIZE_BYTES;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -94,7 +95,7 @@ private SnapshotWriter<ApiMessageAndVersion> createSnapshotWriter(
     ) {
         return RecordsSnapshotWriter.createWithHeader(
             () -> createNewSnapshot(new OffsetAndEpoch(committedOffset + 1, 1)),
-            1024,
+            MAX_BATCH_SIZE_BYTES,
             MemoryPool.NONE,
             new MockTime(),
             lastContainedLogTime,
diff --git a/metadata/src/test/java/org/apache/kafka/image/AclsDeltaTest.java b/metadata/src/test/java/org/apache/kafka/image/AclsDeltaTest.java
new file mode 100644
index 0000000000000..13d3aceb5c898
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/image/AclsDeltaTest.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.image;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.apache.kafka.common.Uuid;
+import org.apache.kafka.common.metadata.AccessControlEntryRecord;
+import org.apache.kafka.common.metadata.RemoveAccessControlEntryRecord;
+import org.apache.kafka.metadata.authorizer.StandardAcl;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+@Timeout(40)
+public class AclsDeltaTest {
+
+    private Uuid aclId = Uuid.fromString("iOZpss6VQUmD6blnqzl50g");
+
+    @Test
+    public void testRemovesDeleteIfNotInImage() {
+        AclsImage image = new AclsImage(Collections.emptyMap());
+        AclsDelta delta = new AclsDelta(image);
+        AccessControlEntryRecord inputAclRecord = testAccessControlEntryRecord();
+
+        assertEquals(0, delta.changes().size());
+
+        delta.replay(inputAclRecord);
+        assertEquals(Optional.of(testStandardAcl()), delta.changes().get(aclId));
+
+        RemoveAccessControlEntryRecord inputRemoveAclRecord = testRemoveAccessControlEntryRecord();
+        delta.replay(inputRemoveAclRecord);
+
+        assertFalse(delta.changes().containsKey(aclId));
+    }
+
+    @Test
+    public void testKeepsDeleteIfInImage() {
+        Map<Uuid, StandardAcl> initialImageMap = new HashMap<>();
+        initialImageMap.put(aclId, testStandardAcl());
+        AclsImage image = new AclsImage(initialImageMap);
+        AclsDelta delta = new AclsDelta(image);
+
+        assertEquals(0, delta.changes().size());
+
+        RemoveAccessControlEntryRecord removeAccessControlEntryRecord = testRemoveAccessControlEntryRecord();
+        delta.replay(removeAccessControlEntryRecord);
+
+        assertTrue(delta.changes().containsKey(aclId));
+        assertEquals(Optional.empty(), delta.changes().get(aclId));
+    }
+
+    @Test
+    public void testThrowsExceptionOnInvalidStateWhenImageIsEmpty() {
+        AclsImage image = new AclsImage(Collections.emptyMap());
+        AclsDelta delta = new AclsDelta(image);
+
+        RemoveAccessControlEntryRecord removeAccessControlEntryRecord = testRemoveAccessControlEntryRecord();
+        assertThrows(IllegalStateException.class, () -> delta.replay(removeAccessControlEntryRecord));
+    }
+
+    @Test
+    public void testThrowsExceptionOnInvalidStateWhenImageHasOtherAcls() {
+        Uuid id = Uuid.fromString("nGiNMQHwRgmgsIlfu73aJQ");
+        AccessControlEntryRecord record = new AccessControlEntryRecord();
+        record.setId(id);
+        record.setResourceType((byte) 1);
+        record.setResourceName("foo");
+        record.setPatternType((byte) 1);
+        record.setPrincipal("User:user");
+        record.setHost("host");
+        record.setOperation((byte) 1);
+        record.setPermissionType((byte) 1);
+
+        Map<Uuid, StandardAcl> initialImageMap = new HashMap<>();
+        initialImageMap.put(id, StandardAcl.fromRecord(record));
+        AclsImage image = new AclsImage(initialImageMap);
+        AclsDelta delta = new AclsDelta(image);
+
+        RemoveAccessControlEntryRecord removeAccessControlEntryRecord = testRemoveAccessControlEntryRecord();
+        assertThrows(IllegalStateException.class, () -> delta.replay(removeAccessControlEntryRecord));
+    }
+
+    private AccessControlEntryRecord testAccessControlEntryRecord() {
+        AccessControlEntryRecord record = new AccessControlEntryRecord();
+        record.setId(aclId);
+        record.setResourceType((byte) 1);
+        record.setResourceName("foo");
+        record.setPatternType((byte) 1);
+        record.setPrincipal("User:user");
+        record.setHost("host");
+        record.setOperation((byte) 1);
+        record.setPermissionType((byte) 1);
+        return record;
+    }
+
+    private RemoveAccessControlEntryRecord testRemoveAccessControlEntryRecord() {
+        RemoveAccessControlEntryRecord record = new RemoveAccessControlEntryRecord();
+        record.setId(aclId);
+        return record;
+    }
+
+    private StandardAcl testStandardAcl() {
+        return StandardAcl.fromRecord(testAccessControlEntryRecord());
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/image/ClusterImageTest.java b/metadata/src/test/java/org/apache/kafka/image/ClusterImageTest.java
index 6908cf2a78b6f..59d5d2fed940a 100644
--- a/metadata/src/test/java/org/apache/kafka/image/ClusterImageTest.java
+++ b/metadata/src/test/java/org/apache/kafka/image/ClusterImageTest.java
@@ -19,14 +19,17 @@
 
 import org.apache.kafka.common.Endpoint;
 import org.apache.kafka.common.Uuid;
+import org.apache.kafka.common.metadata.BrokerRegistrationChangeRecord;
 import org.apache.kafka.common.metadata.FenceBrokerRecord;
 import org.apache.kafka.common.metadata.UnfenceBrokerRecord;
 import org.apache.kafka.common.metadata.UnregisterBrokerRecord;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.metadata.BrokerRegistration;
+import org.apache.kafka.metadata.BrokerRegistrationInControlledShutdownChange;
 import org.apache.kafka.metadata.RecordTestUtils;
 import org.apache.kafka.metadata.VersionRange;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
@@ -60,15 +63,17 @@ public class ClusterImageTest {
             1000,
             Uuid.fromString("vZKYST0pSA2HO5x_6hoO2Q"),
             Arrays.asList(new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "localhost", 9092)),
-            Collections.singletonMap("foo", new VersionRange((short) 1, (short) 3)),
+            Collections.singletonMap("foo", VersionRange.of((short) 1, (short) 3)),
             Optional.empty(),
-            true));
+            true,
+            false));
         map1.put(1, new BrokerRegistration(1,
             1001,
             Uuid.fromString("U52uRe20RsGI0RvpcTx33Q"),
             Arrays.asList(new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "localhost", 9093)),
-            Collections.singletonMap("foo", new VersionRange((short) 1, (short) 3)),
+            Collections.singletonMap("foo", VersionRange.of((short) 1, (short) 3)),
             Optional.empty(),
+            false,
             false));
         map1.put(2, new BrokerRegistration(2,
             123,
@@ -76,6 +81,7 @@ public class ClusterImageTest {
             Arrays.asList(new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "localhost", 9093)),
             Collections.emptyMap(),
             Optional.of("arack"),
+            false,
             false));
         IMAGE1 = new ClusterImage(map1);
 
@@ -84,6 +90,10 @@ public class ClusterImageTest {
             setId(0).setEpoch(1000), UNFENCE_BROKER_RECORD.highestSupportedVersion()));
         DELTA1_RECORDS.add(new ApiMessageAndVersion(new FenceBrokerRecord().
             setId(1).setEpoch(1001), FENCE_BROKER_RECORD.highestSupportedVersion()));
+        DELTA1_RECORDS.add(new ApiMessageAndVersion(new BrokerRegistrationChangeRecord().
+            setBrokerId(0).setBrokerEpoch(1000).setInControlledShutdown(
+                BrokerRegistrationInControlledShutdownChange.IN_CONTROLLED_SHUTDOWN.value()),
+            FENCE_BROKER_RECORD.highestSupportedVersion()));
         DELTA1_RECORDS.add(new ApiMessageAndVersion(new UnregisterBrokerRecord().
             setBrokerId(2).setBrokerEpoch(123),
             UNREGISTER_BROKER_RECORD.highestSupportedVersion()));
@@ -96,16 +106,18 @@ public class ClusterImageTest {
             1000,
             Uuid.fromString("vZKYST0pSA2HO5x_6hoO2Q"),
             Arrays.asList(new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "localhost", 9092)),
-            Collections.singletonMap("foo", new VersionRange((short) 1, (short) 3)),
+            Collections.singletonMap("foo", VersionRange.of((short) 1, (short) 3)),
             Optional.empty(),
-            false));
+            false,
+            true));
         map2.put(1, new BrokerRegistration(1,
             1001,
             Uuid.fromString("U52uRe20RsGI0RvpcTx33Q"),
             Arrays.asList(new Endpoint("PLAINTEXT", SecurityProtocol.PLAINTEXT, "localhost", 9093)),
-            Collections.singletonMap("foo", new VersionRange((short) 1, (short) 3)),
+            Collections.singletonMap("foo", VersionRange.of((short) 1, (short) 3)),
             Optional.empty(),
-            true));
+            true,
+            false));
         IMAGE2 = new ClusterImage(map2);
     }
 
@@ -131,7 +143,7 @@ public void testImage2RoundTrip() throws Throwable {
 
     private void testToImageAndBack(ClusterImage image) throws Throwable {
         MockSnapshotConsumer writer = new MockSnapshotConsumer();
-        image.write(writer);
+        image.write(writer, MetadataVersion.latest());
         ClusterDelta delta = new ClusterDelta(ClusterImage.EMPTY);
         RecordTestUtils.replayAllBatches(delta, writer.batches());
         ClusterImage nextImage = delta.apply();
diff --git a/metadata/src/test/java/org/apache/kafka/image/FeaturesImageTest.java b/metadata/src/test/java/org/apache/kafka/image/FeaturesImageTest.java
index 720086f87b941..6ea31b080b98a 100644
--- a/metadata/src/test/java/org/apache/kafka/image/FeaturesImageTest.java
+++ b/metadata/src/test/java/org/apache/kafka/image/FeaturesImageTest.java
@@ -18,10 +18,9 @@
 package org.apache.kafka.image;
 
 import org.apache.kafka.common.metadata.FeatureLevelRecord;
-import org.apache.kafka.common.metadata.RemoveFeatureLevelRecord;
 import org.apache.kafka.metadata.RecordTestUtils;
-import org.apache.kafka.metadata.VersionRange;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
@@ -30,8 +29,6 @@
 import java.util.List;
 import java.util.Map;
 
-import static org.apache.kafka.common.metadata.MetadataRecordType.FEATURE_LEVEL_RECORD;
-import static org.apache.kafka.common.metadata.MetadataRecordType.REMOVE_FEATURE_LEVEL_RECORD;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 
@@ -43,27 +40,29 @@ public class FeaturesImageTest {
     final static FeaturesImage IMAGE2;
 
     static {
-        Map<String, VersionRange> map1 = new HashMap<>();
-        map1.put("foo", new VersionRange((short) 1, (short) 2));
-        map1.put("bar", new VersionRange((short) 1, (short) 1));
-        map1.put("baz", new VersionRange((short) 1, (short) 8));
-        IMAGE1 = new FeaturesImage(map1);
+        Map<String, Short> map1 = new HashMap<>();
+        map1.put("foo", (short) 2);
+        map1.put("bar", (short) 1);
+        map1.put("baz", (short) 8);
+        IMAGE1 = new FeaturesImage(map1, MetadataVersion.latest());
 
         DELTA1_RECORDS = new ArrayList<>();
         DELTA1_RECORDS.add(new ApiMessageAndVersion(new FeatureLevelRecord().
-            setName("foo").setMinFeatureLevel((short) 1).setMaxFeatureLevel((short) 3),
-            FEATURE_LEVEL_RECORD.highestSupportedVersion()));
-        DELTA1_RECORDS.add(new ApiMessageAndVersion(new RemoveFeatureLevelRecord().
-            setName("bar"), REMOVE_FEATURE_LEVEL_RECORD.highestSupportedVersion()));
-        DELTA1_RECORDS.add(new ApiMessageAndVersion(new RemoveFeatureLevelRecord().
-            setName("baz"), REMOVE_FEATURE_LEVEL_RECORD.highestSupportedVersion()));
+            setName("foo").setFeatureLevel((short) 3),
+            (short) 0));
+        DELTA1_RECORDS.add(new ApiMessageAndVersion(new FeatureLevelRecord().
+            setName("bar").setFeatureLevel((short) 0),
+            (short) 0));
+        DELTA1_RECORDS.add(new ApiMessageAndVersion(new FeatureLevelRecord().
+            setName("baz").setFeatureLevel((short) 0),
+            (short) 0));
 
         DELTA1 = new FeaturesDelta(IMAGE1);
         RecordTestUtils.replayAll(DELTA1, DELTA1_RECORDS);
 
-        Map<String, VersionRange> map2 = new HashMap<>();
-        map2.put("foo", new VersionRange((short) 1, (short) 3));
-        IMAGE2 = new FeaturesImage(map2);
+        Map<String, Short> map2 = new HashMap<>();
+        map2.put("foo", (short) 3);
+        IMAGE2 = new FeaturesImage(map2, MetadataVersion.latest());
     }
 
     @Test
diff --git a/metadata/src/test/java/org/apache/kafka/image/TopicsImageTest.java b/metadata/src/test/java/org/apache/kafka/image/TopicsImageTest.java
index e417fb2202361..3f6dece390e22 100644
--- a/metadata/src/test/java/org/apache/kafka/image/TopicsImageTest.java
+++ b/metadata/src/test/java/org/apache/kafka/image/TopicsImageTest.java
@@ -23,6 +23,7 @@
 import org.apache.kafka.common.metadata.PartitionRecord;
 import org.apache.kafka.common.metadata.RemoveTopicRecord;
 import org.apache.kafka.common.metadata.TopicRecord;
+import org.apache.kafka.metadata.LeaderRecoveryState;
 import org.apache.kafka.metadata.PartitionRegistration;
 import org.apache.kafka.metadata.RecordTestUtils;
 import org.apache.kafka.metadata.Replicas;
@@ -96,14 +97,14 @@ private static Map<String, TopicImage> newTopicsByNameMap(Collection<TopicImage>
         TOPIC_IMAGES1 = Arrays.asList(
             newTopicImage("foo", FOO_UUID,
                 new PartitionRegistration(new int[] {2, 3, 4},
-                    new int[] {2, 3}, Replicas.NONE, Replicas.NONE, 2, 1, 345),
+                    new int[] {2, 3}, Replicas.NONE, Replicas.NONE, 2, LeaderRecoveryState.RECOVERED, 1, 345),
                 new PartitionRegistration(new int[] {3, 4, 5},
-                    new int[] {3, 4, 5}, Replicas.NONE, Replicas.NONE, 3, 4, 684),
+                    new int[] {3, 4, 5}, Replicas.NONE, Replicas.NONE, 3, LeaderRecoveryState.RECOVERED, 4, 684),
                 new PartitionRegistration(new int[] {2, 4, 5},
-                    new int[] {2, 4, 5}, Replicas.NONE, Replicas.NONE, 2, 10, 84)),
+                    new int[] {2, 4, 5}, Replicas.NONE, Replicas.NONE, 2, LeaderRecoveryState.RECOVERED, 10, 84)),
             newTopicImage("bar", BAR_UUID,
                 new PartitionRegistration(new int[] {0, 1, 2, 3, 4},
-                    new int[] {0, 1, 2, 3}, new int[] {1}, new int[] {3, 4}, 0, 1, 345)));
+                    new int[] {0, 1, 2, 3}, new int[] {1}, new int[] {3, 4}, 0, LeaderRecoveryState.RECOVERED, 1, 345)));
 
         IMAGE1 = new TopicsImage(newTopicsByIdMap(TOPIC_IMAGES1), newTopicsByNameMap(TOPIC_IMAGES1));
 
@@ -135,10 +136,10 @@ private static Map<String, TopicImage> newTopicsByNameMap(Collection<TopicImage>
         List<TopicImage> topics2 = Arrays.asList(
             newTopicImage("bar", BAR_UUID,
                 new PartitionRegistration(new int[] {0, 1, 2, 3, 4},
-                    new int[] {0, 1, 2, 3}, new int[] {1}, new int[] {3, 4}, 1, 2, 346)),
+                    new int[] {0, 1, 2, 3}, new int[] {1}, new int[] {3, 4}, 1, LeaderRecoveryState.RECOVERED, 2, 346)),
             newTopicImage("baz", BAZ_UUID,
                 new PartitionRegistration(new int[] {1, 2, 3, 4},
-                    new int[] {3, 4}, new int[] {2}, new int[] {1}, 3, 2, 1)));
+                    new int[] {3, 4}, new int[] {2}, new int[] {1}, 3, LeaderRecoveryState.RECOVERED, 2, 1)));
         IMAGE2 = new TopicsImage(newTopicsByIdMap(topics2), newTopicsByNameMap(topics2));
     }
 
@@ -157,7 +158,7 @@ private ApiMessageAndVersion newPartitionRecord(Uuid topicId, int partitionId, L
     }
 
     private PartitionRegistration newPartition(int[] replicas) {
-        return new PartitionRegistration(replicas, replicas, Replicas.NONE, Replicas.NONE, replicas[0], 1, 1);
+        return new PartitionRegistration(replicas, replicas, Replicas.NONE, Replicas.NONE, replicas[0], LeaderRecoveryState.RECOVERED, 1, 1);
     }
 
     @Test
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationFencingChangeTest.java b/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationFencingChangeTest.java
new file mode 100644
index 0000000000000..8f48923c99373
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationFencingChangeTest.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+@Timeout(40)
+public class BrokerRegistrationFencingChangeTest {
+    @Test
+    public void testValues() {
+        assertEquals((byte) 1, BrokerRegistrationFencingChange.FENCE.value());
+        assertEquals((byte) 0, BrokerRegistrationFencingChange.NONE.value());
+        assertEquals((byte) -1, BrokerRegistrationFencingChange.UNFENCE.value());
+    }
+
+    @Test
+    public void testAsBoolean() {
+        assertEquals(Optional.of(true), BrokerRegistrationFencingChange.FENCE.asBoolean());
+        assertEquals(Optional.empty(), BrokerRegistrationFencingChange.NONE.asBoolean());
+        assertEquals(Optional.of(false), BrokerRegistrationFencingChange.UNFENCE.asBoolean());
+    }
+
+    @Test
+    public void testValueRoundTrip() {
+        for (BrokerRegistrationFencingChange change : BrokerRegistrationFencingChange.values()) {
+            assertEquals(Optional.of(change), BrokerRegistrationFencingChange.fromValue(change.value()));
+        }
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationInControlledShutdownChangeTest.java b/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationInControlledShutdownChangeTest.java
new file mode 100644
index 0000000000000..7f6b69031c40c
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationInControlledShutdownChangeTest.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+import java.util.Optional;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+@Timeout(40)
+public class BrokerRegistrationInControlledShutdownChangeTest {
+
+    @Test
+    public void testValues() {
+        assertEquals((byte) 0, BrokerRegistrationInControlledShutdownChange.NONE.value());
+        assertEquals((byte) 1, BrokerRegistrationInControlledShutdownChange.IN_CONTROLLED_SHUTDOWN.value());
+    }
+
+    @Test
+    public void testAsBoolean() {
+        assertEquals(Optional.empty(), BrokerRegistrationInControlledShutdownChange.NONE.asBoolean());
+        assertEquals(Optional.of(true), BrokerRegistrationInControlledShutdownChange.IN_CONTROLLED_SHUTDOWN.asBoolean());
+    }
+
+    @Test
+    public void testValueRoundTrip() {
+        for (BrokerRegistrationInControlledShutdownChange change : BrokerRegistrationInControlledShutdownChange.values()) {
+            assertEquals(Optional.of(change), BrokerRegistrationInControlledShutdownChange.fromValue(change.value()));
+        }
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationTest.java b/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationTest.java
index 0f350c46221ba..10d1169412cd3 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/BrokerRegistrationTest.java
@@ -23,6 +23,7 @@
 import org.apache.kafka.common.metadata.RegisterBrokerRecord;
 import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
+import org.apache.kafka.server.common.MetadataVersion;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
@@ -32,24 +33,23 @@
 import java.util.Optional;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 
 @Timeout(value = 40)
 public class BrokerRegistrationTest {
     private static final List<BrokerRegistration> REGISTRATIONS = Arrays.asList(
         new BrokerRegistration(0, 0, Uuid.fromString("pc1GhUlBS92cGGaKXl6ipw"),
             Arrays.asList(new Endpoint("INTERNAL", SecurityProtocol.PLAINTEXT, "localhost", 9090)),
-            Collections.singletonMap("foo", new VersionRange((short) 1, (short) 2)),
-            Optional.empty(), false),
+            Collections.singletonMap("foo", VersionRange.of((short) 1, (short) 2)),
+            Optional.empty(), false, false),
         new BrokerRegistration(1, 0, Uuid.fromString("3MfdxWlNSn2UDYsmDP1pYg"),
             Arrays.asList(new Endpoint("INTERNAL", SecurityProtocol.PLAINTEXT, "localhost", 9091)),
-            Collections.singletonMap("foo", new VersionRange((short) 1, (short) 2)),
-            Optional.empty(), false),
+            Collections.singletonMap("foo", VersionRange.of((short) 1, (short) 2)),
+            Optional.empty(), true, false),
         new BrokerRegistration(2, 0, Uuid.fromString("eY7oaG1RREie5Kk9uy1l6g"),
             Arrays.asList(new Endpoint("INTERNAL", SecurityProtocol.PLAINTEXT, "localhost", 9092)),
-            Collections.singletonMap("foo", new VersionRange((short) 2, (short) 3)),
-            Optional.of("myrack"), false));
+            Collections.singletonMap("foo", VersionRange.of((short) 2, (short) 3)),
+            Optional.of("myrack"), false, true));
 
     @Test
     public void testValues() {
@@ -60,13 +60,13 @@ public void testValues() {
 
     @Test
     public void testEquals() {
-        assertFalse(REGISTRATIONS.get(0).equals(REGISTRATIONS.get(1)));
-        assertFalse(REGISTRATIONS.get(1).equals(REGISTRATIONS.get(0)));
-        assertFalse(REGISTRATIONS.get(0).equals(REGISTRATIONS.get(2)));
-        assertFalse(REGISTRATIONS.get(2).equals(REGISTRATIONS.get(0)));
-        assertTrue(REGISTRATIONS.get(0).equals(REGISTRATIONS.get(0)));
-        assertTrue(REGISTRATIONS.get(1).equals(REGISTRATIONS.get(1)));
-        assertTrue(REGISTRATIONS.get(2).equals(REGISTRATIONS.get(2)));
+        assertNotEquals(REGISTRATIONS.get(0), REGISTRATIONS.get(1));
+        assertNotEquals(REGISTRATIONS.get(1), REGISTRATIONS.get(0));
+        assertNotEquals(REGISTRATIONS.get(0), REGISTRATIONS.get(2));
+        assertNotEquals(REGISTRATIONS.get(2), REGISTRATIONS.get(0));
+        assertEquals(REGISTRATIONS.get(0), REGISTRATIONS.get(0));
+        assertEquals(REGISTRATIONS.get(1), REGISTRATIONS.get(1));
+        assertEquals(REGISTRATIONS.get(2), REGISTRATIONS.get(2));
     }
 
     @Test
@@ -75,7 +75,7 @@ public void testToString() {
             "incarnationId=3MfdxWlNSn2UDYsmDP1pYg, listeners=[Endpoint(" +
             "listenerName='INTERNAL', securityProtocol=PLAINTEXT, " +
             "host='localhost', port=9091)], supportedFeatures={foo: 1-2}, " +
-            "rack=Optional.empty, fenced=false)",
+            "rack=Optional.empty, fenced=true, inControlledShutdown=false)",
             REGISTRATIONS.get(1).toString());
     }
 
@@ -87,11 +87,11 @@ public void testFromRecordAndToRecord() {
     }
 
     private void testRoundTrip(BrokerRegistration registration) {
-        ApiMessageAndVersion messageAndVersion = registration.toRecord();
+        ApiMessageAndVersion messageAndVersion = registration.toRecord(MetadataVersion.latest());
         BrokerRegistration registration2 = BrokerRegistration.fromRecord(
             (RegisterBrokerRecord) messageAndVersion.message());
         assertEquals(registration, registration2);
-        ApiMessageAndVersion messageAndVersion2 = registration2.toRecord();
+        ApiMessageAndVersion messageAndVersion2 = registration2.toRecord(MetadataVersion.latest());
         assertEquals(messageAndVersion, messageAndVersion2);
     }
 
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/ConfigSynonymTest.java b/metadata/src/test/java/org/apache/kafka/metadata/ConfigSynonymTest.java
new file mode 100644
index 0000000000000..93f63c21091f3
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/metadata/ConfigSynonymTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+@Timeout(value = 40)
+public class ConfigSynonymTest {
+    @Test
+    public void testHoursToMilliseconds() {
+        assertEquals("0", ConfigSynonym.HOURS_TO_MILLISECONDS.apply(""));
+        assertEquals("0", ConfigSynonym.HOURS_TO_MILLISECONDS.apply(" "));
+        assertEquals("0", ConfigSynonym.HOURS_TO_MILLISECONDS.apply("0"));
+        assertEquals("442800000", ConfigSynonym.HOURS_TO_MILLISECONDS.apply("123"));
+        assertEquals("442800000", ConfigSynonym.HOURS_TO_MILLISECONDS.apply(" 123 "));
+        assertEquals("0", ConfigSynonym.HOURS_TO_MILLISECONDS.apply("not_a_number"));
+    }
+
+    @Test
+    public void testMinutesToMilliseconds() {
+        assertEquals("0", ConfigSynonym.MINUTES_TO_MILLISECONDS.apply(""));
+        assertEquals("0", ConfigSynonym.MINUTES_TO_MILLISECONDS.apply(" "));
+        assertEquals("0", ConfigSynonym.MINUTES_TO_MILLISECONDS.apply("0"));
+        assertEquals("7380000", ConfigSynonym.MINUTES_TO_MILLISECONDS.apply("123"));
+        assertEquals("7380000", ConfigSynonym.MINUTES_TO_MILLISECONDS.apply(" 123 "));
+        assertEquals("0", ConfigSynonym.MINUTES_TO_MILLISECONDS.apply("not_a_number"));
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/KafkaConfigSchemaTest.java b/metadata/src/test/java/org/apache/kafka/metadata/KafkaConfigSchemaTest.java
index fae40e2592677..36089d0f9a4f2 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/KafkaConfigSchemaTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/KafkaConfigSchemaTest.java
@@ -17,16 +17,22 @@
 
 package org.apache.kafka.metadata;
 
+import org.apache.kafka.clients.admin.ConfigEntry;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigResource;
+import org.apache.kafka.common.requests.DescribeConfigsResponse;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
+import static java.util.Collections.emptyList;
 import static org.apache.kafka.common.config.ConfigResource.Type.BROKER;
 import static org.apache.kafka.common.config.ConfigResource.Type.TOPIC;
+import static org.apache.kafka.metadata.ConfigSynonym.HOURS_TO_MILLISECONDS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -38,42 +44,122 @@ public class KafkaConfigSchemaTest {
 
     static {
         CONFIGS.put(BROKER, new ConfigDef().
-            define("foo.bar", ConfigDef.Type.LIST, "1", ConfigDef.Importance.HIGH, "foo bar").
-            define("baz", ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "baz").
-            define("quux", ConfigDef.Type.INT, ConfigDef.Importance.HIGH, "quux").
-            define("quuux", ConfigDef.Type.PASSWORD, ConfigDef.Importance.HIGH, "quuux"));
+            define("foo.bar", ConfigDef.Type.LIST, "1", ConfigDef.Importance.HIGH, "foo bar doc").
+            define("baz", ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "baz doc").
+            define("quux", ConfigDef.Type.INT, ConfigDef.Importance.HIGH, "quux doc").
+            define("quuux", ConfigDef.Type.PASSWORD, ConfigDef.Importance.HIGH, "quuux doc").
+            define("quuux2", ConfigDef.Type.PASSWORD, ConfigDef.Importance.HIGH, "quuux2 doc"));
         CONFIGS.put(TOPIC, new ConfigDef().
-            define("abc", ConfigDef.Type.LIST, ConfigDef.Importance.HIGH, "abc").
-            define("def", ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "def").
-            define("ghi", ConfigDef.Type.BOOLEAN, true, ConfigDef.Importance.HIGH, "ghi").
-            define("xyz", ConfigDef.Type.PASSWORD, "thedefault", ConfigDef.Importance.HIGH, "xyz"));
+            define("abc", ConfigDef.Type.LIST, ConfigDef.Importance.HIGH, "abc doc").
+            define("def", ConfigDef.Type.LONG, ConfigDef.Importance.HIGH, "def doc").
+            define("ghi", ConfigDef.Type.BOOLEAN, true, ConfigDef.Importance.HIGH, "ghi doc").
+            define("xyz", ConfigDef.Type.PASSWORD, "thedefault", ConfigDef.Importance.HIGH, "xyz doc"));
+    }
+
+    public static final Map<String, List<ConfigSynonym>> SYNONYMS = new HashMap<>();
+
+    static {
+        SYNONYMS.put("abc", Arrays.asList(new ConfigSynonym("foo.bar")));
+        SYNONYMS.put("def", Arrays.asList(new ConfigSynonym("quux", HOURS_TO_MILLISECONDS)));
+        SYNONYMS.put("ghi", Arrays.asList(new ConfigSynonym("ghi")));
+        SYNONYMS.put("xyz", Arrays.asList(new ConfigSynonym("quuux"), new ConfigSynonym("quuux2")));
+    }
+
+    private static final KafkaConfigSchema SCHEMA = new KafkaConfigSchema(CONFIGS, SYNONYMS);
+
+    @Test
+    public void testTranslateConfigTypes() {
+        testTranslateConfigType(ConfigDef.Type.BOOLEAN, ConfigEntry.ConfigType.BOOLEAN);
+        testTranslateConfigType(ConfigDef.Type.STRING, ConfigEntry.ConfigType.STRING);
+        testTranslateConfigType(ConfigDef.Type.INT, ConfigEntry.ConfigType.INT);
+        testTranslateConfigType(ConfigDef.Type.SHORT, ConfigEntry.ConfigType.SHORT);
+        testTranslateConfigType(ConfigDef.Type.LONG, ConfigEntry.ConfigType.LONG);
+        testTranslateConfigType(ConfigDef.Type.DOUBLE, ConfigEntry.ConfigType.DOUBLE);
+        testTranslateConfigType(ConfigDef.Type.LIST, ConfigEntry.ConfigType.LIST);
+        testTranslateConfigType(ConfigDef.Type.CLASS, ConfigEntry.ConfigType.CLASS);
+        testTranslateConfigType(ConfigDef.Type.PASSWORD, ConfigEntry.ConfigType.PASSWORD);
+    }
+
+    private static void testTranslateConfigType(ConfigDef.Type a, ConfigEntry.ConfigType b) {
+        assertEquals(b, KafkaConfigSchema.translateConfigType(a));
+    }
+
+    @Test
+    public void testTranslateConfigSources() {
+        testTranslateConfigSource(ConfigEntry.ConfigSource.DYNAMIC_TOPIC_CONFIG,
+            DescribeConfigsResponse.ConfigSource.TOPIC_CONFIG);
+        testTranslateConfigSource(ConfigEntry.ConfigSource.DYNAMIC_BROKER_LOGGER_CONFIG,
+            DescribeConfigsResponse.ConfigSource.DYNAMIC_BROKER_LOGGER_CONFIG);
+        testTranslateConfigSource(ConfigEntry.ConfigSource.DYNAMIC_BROKER_CONFIG,
+            DescribeConfigsResponse.ConfigSource.DYNAMIC_BROKER_CONFIG);
+        testTranslateConfigSource(ConfigEntry.ConfigSource.DYNAMIC_DEFAULT_BROKER_CONFIG,
+            DescribeConfigsResponse.ConfigSource.DYNAMIC_DEFAULT_BROKER_CONFIG);
+        testTranslateConfigSource(ConfigEntry.ConfigSource.STATIC_BROKER_CONFIG,
+            DescribeConfigsResponse.ConfigSource.STATIC_BROKER_CONFIG);
+        testTranslateConfigSource(ConfigEntry.ConfigSource.DEFAULT_CONFIG,
+            DescribeConfigsResponse.ConfigSource.DEFAULT_CONFIG);
+    }
+
+    private static void testTranslateConfigSource(ConfigEntry.ConfigSource a,
+                                                  DescribeConfigsResponse.ConfigSource b) {
+        assertEquals(b, KafkaConfigSchema.translateConfigSource(a));
     }
 
     @Test
     public void testIsSplittable() {
-        KafkaConfigSchema schema = new KafkaConfigSchema(CONFIGS);
-        assertTrue(schema.isSplittable(BROKER, "foo.bar"));
-        assertFalse(schema.isSplittable(BROKER, "baz"));
-        assertFalse(schema.isSplittable(BROKER, "foo.baz.quux"));
-        assertFalse(schema.isSplittable(TOPIC, "baz"));
-        assertTrue(schema.isSplittable(TOPIC, "abc"));
+        assertTrue(SCHEMA.isSplittable(BROKER, "foo.bar"));
+        assertFalse(SCHEMA.isSplittable(BROKER, "baz"));
+        assertFalse(SCHEMA.isSplittable(BROKER, "foo.baz.quux"));
+        assertFalse(SCHEMA.isSplittable(TOPIC, "baz"));
+        assertTrue(SCHEMA.isSplittable(TOPIC, "abc"));
     }
 
     @Test
     public void testGetConfigValueDefault() {
-        KafkaConfigSchema schema = new KafkaConfigSchema(CONFIGS);
-        assertEquals("1", schema.getDefault(BROKER, "foo.bar"));
-        assertEquals(null, schema.getDefault(BROKER, "foo.baz.quux"));
-        assertEquals(null, schema.getDefault(TOPIC, "abc"));
-        assertEquals("true", schema.getDefault(TOPIC, "ghi"));
+        assertEquals("1", SCHEMA.getDefault(BROKER, "foo.bar"));
+        assertEquals(null, SCHEMA.getDefault(BROKER, "foo.baz.quux"));
+        assertEquals(null, SCHEMA.getDefault(TOPIC, "abc"));
+        assertEquals("true", SCHEMA.getDefault(TOPIC, "ghi"));
     }
 
     @Test
     public void testIsSensitive() {
-        KafkaConfigSchema schema = new KafkaConfigSchema(CONFIGS);
-        assertFalse(schema.isSensitive(BROKER, "foo.bar"));
-        assertTrue(schema.isSensitive(BROKER, "quuux"));
-        assertTrue(schema.isSensitive(BROKER, "unknown.config.key"));
-        assertFalse(schema.isSensitive(TOPIC, "abc"));
+        assertFalse(SCHEMA.isSensitive(BROKER, "foo.bar"));
+        assertTrue(SCHEMA.isSensitive(BROKER, "quuux"));
+        assertTrue(SCHEMA.isSensitive(BROKER, "quuux2"));
+        assertTrue(SCHEMA.isSensitive(BROKER, "unknown.config.key"));
+        assertFalse(SCHEMA.isSensitive(TOPIC, "abc"));
+    }
+
+    @Test
+    public void testResolveEffectiveTopicConfig() {
+        Map<String, String> staticNodeConfig = new HashMap<>();
+        staticNodeConfig.put("foo.bar", "the,static,value");
+        staticNodeConfig.put("quux", "123");
+        staticNodeConfig.put("ghi", "false");
+        Map<String, String> dynamicClusterConfigs = new HashMap<>();
+        dynamicClusterConfigs.put("foo.bar", "the,dynamic,cluster,config,value");
+        dynamicClusterConfigs.put("quux", "456");
+        Map<String, String> dynamicNodeConfigs = new HashMap<>();
+        dynamicNodeConfigs.put("quux", "789");
+        Map<String, String> dynamicTopicConfigs = new HashMap<>();
+        dynamicTopicConfigs.put("ghi", "true");
+        Map<String, ConfigEntry> expected = new HashMap<>();
+        expected.put("abc", new ConfigEntry("abc", "the,dynamic,cluster,config,value",
+            ConfigEntry.ConfigSource.DYNAMIC_DEFAULT_BROKER_CONFIG, false, false, emptyList(),
+                ConfigEntry.ConfigType.LIST, "abc doc"));
+        expected.put("def", new ConfigEntry("def", "2840400000",
+            ConfigEntry.ConfigSource.DYNAMIC_BROKER_CONFIG, false, false, emptyList(),
+            ConfigEntry.ConfigType.LONG, "def doc"));
+        expected.put("ghi", new ConfigEntry("ghi", "true",
+            ConfigEntry.ConfigSource.DYNAMIC_TOPIC_CONFIG, false, false, emptyList(),
+            ConfigEntry.ConfigType.BOOLEAN, "ghi doc"));
+        expected.put("xyz", new ConfigEntry("xyz", "thedefault",
+            ConfigEntry.ConfigSource.DEFAULT_CONFIG, true, false, emptyList(),
+            ConfigEntry.ConfigType.PASSWORD, "xyz doc"));
+        assertEquals(expected, SCHEMA.resolveEffectiveTopicConfigs(staticNodeConfig,
+            dynamicClusterConfigs,
+            dynamicNodeConfigs,
+            dynamicTopicConfigs));
     }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/LeaderRecoveryStateTest.java b/metadata/src/test/java/org/apache/kafka/metadata/LeaderRecoveryStateTest.java
new file mode 100644
index 0000000000000..45268dfbc3d55
--- /dev/null
+++ b/metadata/src/test/java/org/apache/kafka/metadata/LeaderRecoveryStateTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+final public class LeaderRecoveryStateTest {
+    private static final byte NO_CHANGE = (byte) -1;
+
+    @Test
+    void testUniqueValues() {
+        Set<Byte> set = new HashSet<>();
+        for (LeaderRecoveryState recovery : LeaderRecoveryState.values()) {
+            assertTrue(
+                set.add(recovery.value()),
+                String.format("Value %s for election state %s has already been used", recovery.value(), recovery)
+            );
+        }
+    }
+
+    @Test
+    void testDoesNotContainNoChange() {
+        for (LeaderRecoveryState recovery : LeaderRecoveryState.values()) {
+            assertNotEquals(NO_CHANGE, recovery.value());
+        }
+    }
+
+    @Test
+    void testByteToLeaderRecoveryState() {
+        assertEquals(LeaderRecoveryState.RECOVERED, LeaderRecoveryState.of((byte) 0));
+        assertEquals(LeaderRecoveryState.RECOVERING, LeaderRecoveryState.of((byte) 1));
+    }
+
+    @Test
+    void testLeaderRecoveryStateValue() {
+        assertEquals(0, LeaderRecoveryState.RECOVERED.value());
+        assertEquals(1, LeaderRecoveryState.RECOVERING.value());
+    }
+
+    @Test
+    void testInvalidValue() {
+        assertThrows(
+            IllegalArgumentException.class,
+            () -> LeaderRecoveryState.of(NO_CHANGE)
+        );
+        assertThrows(IllegalArgumentException.class, () -> LeaderRecoveryState.of((byte) 2));
+    }
+
+    @Test
+    void testOptionalInvalidValue() {
+        assertEquals(Optional.empty(), LeaderRecoveryState.optionalOf(NO_CHANGE));
+        assertEquals(Optional.empty(), LeaderRecoveryState.optionalOf((byte) 2));
+    }
+
+    @Test
+    void testChangeTo() {
+        LeaderRecoveryState state = LeaderRecoveryState.RECOVERED;
+        assertEquals(LeaderRecoveryState.RECOVERED, state.changeTo(NO_CHANGE));
+        state = state.changeTo(LeaderRecoveryState.RECOVERING.value());
+        assertEquals(LeaderRecoveryState.RECOVERING, state);
+        assertEquals(LeaderRecoveryState.RECOVERING, state.changeTo(NO_CHANGE));
+        state = state.changeTo(LeaderRecoveryState.RECOVERED.value());
+        assertEquals(LeaderRecoveryState.RECOVERED, state);
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/controller/MockRandom.java b/metadata/src/test/java/org/apache/kafka/metadata/MockRandom.java
similarity index 96%
rename from metadata/src/test/java/org/apache/kafka/controller/MockRandom.java
rename to metadata/src/test/java/org/apache/kafka/metadata/MockRandom.java
index c42a158b660b1..40b2f13463f9c 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/MockRandom.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/MockRandom.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.kafka.controller;
+package org.apache.kafka.metadata;
 
 import java.util.Random;
 
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/PartitionRegistrationTest.java b/metadata/src/test/java/org/apache/kafka/metadata/PartitionRegistrationTest.java
index 9b1be5d4b3d20..66bf3fe033337 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/PartitionRegistrationTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/PartitionRegistrationTest.java
@@ -47,11 +47,11 @@ public void testElectionWasClean() {
     @Test
     public void testPartitionControlInfoMergeAndDiff() {
         PartitionRegistration a = new PartitionRegistration(
-            new int[]{1, 2, 3}, new int[]{1, 2}, Replicas.NONE, Replicas.NONE, 1, 0, 0);
+            new int[]{1, 2, 3}, new int[]{1, 2}, Replicas.NONE, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 0, 0);
         PartitionRegistration b = new PartitionRegistration(
-            new int[]{1, 2, 3}, new int[]{3}, Replicas.NONE, Replicas.NONE, 3, 1, 1);
+            new int[]{1, 2, 3}, new int[]{3}, Replicas.NONE, Replicas.NONE, 3, LeaderRecoveryState.RECOVERED, 1, 1);
         PartitionRegistration c = new PartitionRegistration(
-            new int[]{1, 2, 3}, new int[]{1}, Replicas.NONE, Replicas.NONE, 1, 0, 1);
+            new int[]{1, 2, 3}, new int[]{1}, Replicas.NONE, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 0, 1);
         assertEquals(b, a.merge(new PartitionChangeRecord().
             setLeader(3).setIsr(Arrays.asList(3))));
         assertEquals("isr: [1, 2] -> [3], leader: 1 -> 3, leaderEpoch: 0 -> 1, partitionEpoch: 0 -> 1",
@@ -63,7 +63,7 @@ public void testPartitionControlInfoMergeAndDiff() {
     @Test
     public void testRecordRoundTrip() {
         PartitionRegistration registrationA = new PartitionRegistration(
-            new int[]{1, 2, 3}, new int[]{1, 2}, new int[]{1}, Replicas.NONE, 1, 0, 0);
+            new int[]{1, 2, 3}, new int[]{1, 2}, new int[]{1}, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 0, 0);
         Uuid topicId = Uuid.fromString("OGdAI5nxT_m-ds3rJMqPLA");
         int partitionId = 4;
         ApiMessageAndVersion record = registrationA.toRecord(topicId, partitionId);
@@ -75,9 +75,9 @@ public void testRecordRoundTrip() {
     @Test
     public void testToLeaderAndIsrPartitionState() {
         PartitionRegistration a = new PartitionRegistration(
-            new int[]{1, 2, 3}, new int[]{1, 2}, Replicas.NONE, Replicas.NONE, 1, 123, 456);
+            new int[]{1, 2, 3}, new int[]{1, 2}, Replicas.NONE, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 123, 456);
         PartitionRegistration b = new PartitionRegistration(
-            new int[]{2, 3, 4}, new int[]{2, 3, 4}, Replicas.NONE, Replicas.NONE, 2, 234, 567);
+            new int[]{2, 3, 4}, new int[]{2, 3, 4}, Replicas.NONE, Replicas.NONE, 2, LeaderRecoveryState.RECOVERED, 234, 567);
         assertEquals(new LeaderAndIsrPartitionState().
                 setTopicName("foo").
                 setPartitionIndex(1).
@@ -85,7 +85,7 @@ public void testToLeaderAndIsrPartitionState() {
                 setLeader(1).
                 setLeaderEpoch(123).
                 setIsr(Arrays.asList(1, 2)).
-                setZkVersion(456).
+                setPartitionEpoch(456).
                 setReplicas(Arrays.asList(1, 2, 3)).
                 setAddingReplicas(Collections.emptyList()).
                 setRemovingReplicas(Collections.emptyList()).
@@ -98,7 +98,7 @@ public void testToLeaderAndIsrPartitionState() {
                 setLeader(2).
                 setLeaderEpoch(234).
                 setIsr(Arrays.asList(2, 3, 4)).
-                setZkVersion(567).
+                setPartitionEpoch(567).
                 setReplicas(Arrays.asList(2, 3, 4)).
                 setAddingReplicas(Collections.emptyList()).
                 setRemovingReplicas(Collections.emptyList()).
@@ -109,20 +109,20 @@ public void testToLeaderAndIsrPartitionState() {
     @Test
     public void testMergePartitionChangeRecordWithReassignmentData() {
         PartitionRegistration partition0 = new PartitionRegistration(new int[] {1, 2, 3},
-            new int[] {1, 2, 3}, Replicas.NONE, Replicas.NONE, 1, 100, 200);
+            new int[] {1, 2, 3}, Replicas.NONE, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 100, 200);
         PartitionRegistration partition1 = partition0.merge(new PartitionChangeRecord().
             setRemovingReplicas(Collections.singletonList(3)).
             setAddingReplicas(Collections.singletonList(4)).
             setReplicas(Arrays.asList(1, 2, 3, 4)));
         assertEquals(new PartitionRegistration(new int[] {1, 2, 3, 4},
-            new int[] {1, 2, 3}, new int[] {3}, new int[] {4}, 1, 100, 201), partition1);
+            new int[] {1, 2, 3}, new int[] {3}, new int[] {4}, 1, LeaderRecoveryState.RECOVERED, 100, 201), partition1);
         PartitionRegistration partition2 = partition1.merge(new PartitionChangeRecord().
             setIsr(Arrays.asList(1, 2, 4)).
             setRemovingReplicas(Collections.emptyList()).
             setAddingReplicas(Collections.emptyList()).
             setReplicas(Arrays.asList(1, 2, 4)));
         assertEquals(new PartitionRegistration(new int[] {1, 2, 4},
-            new int[] {1, 2, 4}, Replicas.NONE, Replicas.NONE, 1, 100, 202), partition2);
+            new int[] {1, 2, 4}, Replicas.NONE, Replicas.NONE, 1, LeaderRecoveryState.RECOVERED, 100, 202), partition2);
         assertFalse(partition2.isReassigning());
     }
 }
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/RecordTestUtils.java b/metadata/src/test/java/org/apache/kafka/metadata/RecordTestUtils.java
index 3fdeea8dbe653..c21bdb544789b 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/RecordTestUtils.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/RecordTestUtils.java
@@ -57,20 +57,25 @@ public static void replayAll(Object target,
         for (ApiMessageAndVersion recordAndVersion : recordsAndVersions) {
             ApiMessage record = recordAndVersion.message();
             try {
-                Method method = target.getClass().getMethod("replay", record.getClass());
-                method.invoke(target, record);
-            } catch (NoSuchMethodException e) {
                 try {
-                    Method method = target.getClass().getMethod("replay",
-                        record.getClass(),
-                        Optional.class);
-                    method.invoke(target, record, Optional.empty());
-                } catch (NoSuchMethodException t) {
-                    // ignore
-                } catch (InvocationTargetException t) {
-                    throw new RuntimeException(t);
-                } catch (IllegalAccessException t) {
-                    throw new RuntimeException(t);
+                    Method method = target.getClass().getMethod("replay", record.getClass());
+                    method.invoke(target, record);
+                } catch (NoSuchMethodException e) {
+                    try {
+                        Method method = target.getClass().getMethod("replay",
+                            record.getClass(),
+                            Optional.class);
+                        method.invoke(target, record, Optional.empty());
+                    } catch (NoSuchMethodException t) {
+                        try {
+                            Method method = target.getClass().getMethod("replay",
+                                record.getClass(),
+                                long.class);
+                            method.invoke(target, record, 0L);
+                        } catch (NoSuchMethodException i) {
+                            // ignore
+                        }
+                    }
                 }
             } catch (InvocationTargetException e) {
                 throw new RuntimeException(e);
@@ -119,7 +124,7 @@ public static void replayAllBatches(Object target,
      * @param delta the metadata delta on which to replay the records
      * @param highestOffset highest offset from the list of record batches
      * @param highestEpoch highest epoch from the list of record batches
-     * @param recordsAndVersions list of batches of records
+     * @param batches list of batches of records
      */
     public static void replayAllBatches(
         MetadataDelta delta,
@@ -205,12 +210,16 @@ public static void deepSortRecords(Object o) throws Exception {
     /**
      * Create a batch reader for testing.
      *
-     * @param lastOffset    The last offset of the given list of records.
-     * @param records       The records.
-     * @return              A batch reader which will return the given records.
+     * @param lastOffset the last offset of the given list of records
+     * @param appendTimestamp the append timestamp for the batches created
+     * @param records the records
+     * @return a batch reader which will return the given records
      */
-    public static BatchReader<ApiMessageAndVersion>
-            mockBatchReader(long lastOffset, List<ApiMessageAndVersion> records) {
+    public static BatchReader<ApiMessageAndVersion> mockBatchReader(
+        long lastOffset,
+        long appendTimestamp,
+        List<ApiMessageAndVersion> records
+    ) {
         List<Batch<ApiMessageAndVersion>> batches = new ArrayList<>();
         long offset = lastOffset - records.size() + 1;
         Iterator<ApiMessageAndVersion> iterator = records.iterator();
@@ -218,7 +227,7 @@ public static void deepSortRecords(Object o) throws Exception {
         assertTrue(iterator.hasNext()); // At least one record is required
         while (true) {
             if (!iterator.hasNext() || curRecords.size() >= 2) {
-                batches.add(Batch.data(offset, 0, 0, sizeInBytes(curRecords), curRecords));
+                batches.add(Batch.data(offset, 0, appendTimestamp, sizeInBytes(curRecords), curRecords));
                 if (!iterator.hasNext()) {
                     break;
                 }
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/VersionRangeTest.java b/metadata/src/test/java/org/apache/kafka/metadata/VersionRangeTest.java
index 88082a6f55331..d31e8f813962f 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/VersionRangeTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/VersionRangeTest.java
@@ -21,6 +21,7 @@
 import org.junit.jupiter.api.Timeout;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 @Timeout(value = 40)
@@ -31,24 +32,24 @@ private static VersionRange v(int a, int b) {
         assertTrue(a >= Short.MIN_VALUE);
         assertTrue(b <= Short.MAX_VALUE);
         assertTrue(b >= Short.MIN_VALUE);
-        return new VersionRange((short) a, (short) b);
+        return VersionRange.of((short) a, (short) b);
     }
 
     @Test
     public void testEquality() {
         assertEquals(v(1, 1), v(1, 1));
-        assertFalse(v(1, 1).equals(v(1, 2)));
-        assertFalse(v(2, 1).equals(v(1, 2)));
-        assertFalse(v(2, 1).equals(v(2, 2)));
+        assertNotEquals(v(1, 2), v(1, 1));
+        assertNotEquals(v(1, 2), v(2, 1));
+        assertNotEquals(v(2, 2), v(2, 1));
     }
 
     @Test
     public void testContains() {
-        assertTrue(v(1, 1).contains(v(1, 1)));
-        assertFalse(v(1, 1).contains(v(1, 2)));
-        assertTrue(v(1, 2).contains(v(1, 1)));
-        assertFalse(v(4, 10).contains(v(3, 8)));
-        assertTrue(v(2, 12).contains(v(3, 11)));
+        assertTrue(v(1, 1).contains((short) 1));
+        assertFalse(v(1, 1).contains((short) 2));
+        assertTrue(v(1, 2).contains((short) 1));
+        assertFalse(v(4, 10).contains((short) 3));
+        assertTrue(v(2, 12).contains((short) 11));
     }
 
     @Test
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizerTest.java b/metadata/src/test/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizerTest.java
index 2f0fb0f6a5ca6..3242170bcdfed 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/authorizer/ClusterMetadataAuthorizerTest.java
@@ -26,6 +26,7 @@
 import org.apache.kafka.common.errors.InvalidRequestException;
 import org.apache.kafka.common.errors.NotControllerException;
 import org.apache.kafka.common.resource.ResourcePattern;
+import org.apache.kafka.controller.ControllerRequestContext;
 import org.apache.kafka.server.authorizer.AclCreateResult;
 import org.apache.kafka.server.authorizer.AclDeleteResult;
 import org.apache.kafka.server.authorizer.AclDeleteResult.AclBindingDeleteResult;
@@ -69,7 +70,10 @@ void setCreateAclsResponse(CompletableFuture<List<AclCreateResult>> createAclsRe
         }
 
         @Override
-        public CompletableFuture<List<AclCreateResult>> createAcls(List<AclBinding> aclBindings) {
+        public CompletableFuture<List<AclCreateResult>> createAcls(
+            ControllerRequestContext context,
+            List<AclBinding> aclBindings
+        ) {
             return createAclsResponse;
         }
 
@@ -78,7 +82,10 @@ void setDeleteAclsResponse(CompletableFuture<List<AclDeleteResult>> deleteAclsRe
         }
 
         @Override
-        public CompletableFuture<List<AclDeleteResult>> deleteAcls(List<AclBindingFilter> aclBindingFilters) {
+        public CompletableFuture<List<AclDeleteResult>> deleteAcls(
+            ControllerRequestContext context,
+            List<AclBindingFilter> aclBindingFilters
+        ) {
             return deleteAclsResponse;
         }
     }
@@ -99,6 +106,16 @@ public AclMutator aclMutatorOrException() {
             return aclMutator;
         }
 
+        @Override
+        public void completeInitialLoad() {
+            // do nothing
+        }
+
+        @Override
+        public void completeInitialLoad(Exception e) {
+            // do nothing
+        }
+
         @Override
         public void loadSnapshot(Map<Uuid, StandardAcl> acls) {
             // do nothing
diff --git a/metadata/src/test/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerTest.java b/metadata/src/test/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerTest.java
index ee09bb4c12735..987c00155c47a 100644
--- a/metadata/src/test/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/authorizer/StandardAuthorizerTest.java
@@ -17,30 +17,46 @@
 
 package org.apache.kafka.metadata.authorizer;
 
+import org.apache.kafka.common.ClusterResource;
+import org.apache.kafka.common.Endpoint;
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.acl.AccessControlEntryFilter;
 import org.apache.kafka.common.acl.AclBinding;
 import org.apache.kafka.common.acl.AclBindingFilter;
 import org.apache.kafka.common.acl.AclOperation;
 import org.apache.kafka.common.acl.AclPermissionType;
+import org.apache.kafka.common.errors.AuthorizerNotReadyException;
+import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.resource.PatternType;
 import org.apache.kafka.common.resource.ResourcePattern;
 import org.apache.kafka.common.resource.ResourcePatternFilter;
 import org.apache.kafka.common.resource.ResourceType;
 import org.apache.kafka.common.security.auth.KafkaPrincipal;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.server.authorizer.Action;
 import org.apache.kafka.server.authorizer.AuthorizableRequestContext;
+import org.apache.kafka.server.authorizer.AuthorizerServerInfo;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+import org.mockito.MockedStatic;
+import org.mockito.Mockito;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.net.InetAddress;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.CompletionStage;
 
 import static java.util.Arrays.asList;
 import static java.util.Collections.singletonList;
@@ -77,6 +93,56 @@
 
 @Timeout(value = 40)
 public class StandardAuthorizerTest {
+    public static final Endpoint PLAINTEXT = new Endpoint("PLAINTEXT",
+        SecurityProtocol.PLAINTEXT,
+        "127.0.0.1",
+        9020);
+
+    public static final Endpoint CONTROLLER = new Endpoint("CONTROLLER",
+        SecurityProtocol.PLAINTEXT,
+        "127.0.0.1",
+        9020);
+
+    static class AuthorizerTestServerInfo implements AuthorizerServerInfo {
+        private final Collection<Endpoint> endpoints;
+
+        AuthorizerTestServerInfo(Collection<Endpoint> endpoints) {
+            assertFalse(endpoints.isEmpty());
+            this.endpoints = endpoints;
+        }
+
+        @Override
+        public ClusterResource clusterResource() {
+            return new ClusterResource(Uuid.fromString("r7mqHQrxTNmzbKvCvWZzLQ").toString());
+        }
+
+        @Override
+        public int brokerId() {
+            return 0;
+        }
+
+        @Override
+        public Collection<Endpoint> endpoints() {
+            return endpoints;
+        }
+
+        @Override
+        public Endpoint interBrokerEndpoint() {
+            return endpoints.iterator().next();
+        }
+
+        @Override
+        public Collection<String> earlyStartListeners() {
+            List<String> result = new ArrayList<>();
+            for (Endpoint endpoint : endpoints) {
+                if (endpoint.listenerName().get().equals("CONTROLLER")) {
+                    result.add(endpoint.listenerName().get());
+                }
+            }
+            return result;
+        }
+    }
+
     @Test
     public void testGetConfiguredSuperUsers() {
         assertEquals(Collections.emptySet(),
@@ -119,6 +185,14 @@ static Action newAction(AclOperation aclOperation,
             new ResourcePattern(resourceType, resourceName, LITERAL), 1, false, false);
     }
 
+    static StandardAuthorizer createAndInitializeStandardAuthorizer() {
+        StandardAuthorizer authorizer = new StandardAuthorizer();
+        authorizer.configure(Collections.singletonMap(SUPER_USERS_CONFIG, "User:superman"));
+        authorizer.start(new AuthorizerTestServerInfo(Collections.singletonList(PLAINTEXT)));
+        authorizer.completeInitialLoad();
+        return authorizer;
+    }
+
     private final static AtomicLong NEXT_ID = new AtomicLong(0);
 
     static StandardAcl newFooAcl(AclOperation op, AclPermissionType permission) {
@@ -222,8 +296,7 @@ private static void assertContains(Iterable<AclBinding> iterable, StandardAcl...
 
     @Test
     public void testListAcls() throws Exception {
-        StandardAuthorizer authorizer = new StandardAuthorizer();
-        authorizer.configure(Collections.emptyMap());
+        StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
         List<StandardAclWithId> fooAcls = asList(
             withId(newFooAcl(READ, ALLOW)),
             withId(newFooAcl(WRITE, ALLOW)));
@@ -244,8 +317,7 @@ public void testListAcls() throws Exception {
 
     @Test
     public void testSimpleAuthorizations() throws Exception {
-        StandardAuthorizer authorizer = new StandardAuthorizer();
-        authorizer.configure(Collections.emptyMap());
+        StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
         List<StandardAclWithId> fooAcls = asList(
             withId(newFooAcl(READ, ALLOW)),
             withId(newFooAcl(WRITE, ALLOW)));
@@ -266,20 +338,17 @@ public void testSimpleAuthorizations() throws Exception {
 
     @Test
     public void testDenyPrecedenceWithOperationAll() throws Exception {
-        StandardAuthorizer authorizer = new StandardAuthorizer();
-        authorizer.configure(Collections.emptyMap());
+        StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
         List<StandardAcl> acls = Arrays.asList(
             new StandardAcl(TOPIC, "foo", LITERAL, "User:alice", "*", ALL, DENY),
             new StandardAcl(TOPIC, "foo", PREFIXED, "User:alice", "*", READ, ALLOW),
             new StandardAcl(TOPIC, "foo", LITERAL, "User:*", "*", ALL, DENY),
             new StandardAcl(TOPIC, "foo", PREFIXED, "User:*", "*", DESCRIBE, ALLOW)
         );
-
         acls.forEach(acl -> {
             StandardAclWithId aclWithId = withId(acl);
             authorizer.addAcl(aclWithId.id(), aclWithId.acl());
         });
-
         assertEquals(Arrays.asList(DENIED, DENIED, DENIED, ALLOWED), authorizer.authorize(
             newRequestContext("alice"),
             Arrays.asList(
@@ -287,7 +356,6 @@ public void testDenyPrecedenceWithOperationAll() throws Exception {
                 newAction(READ, TOPIC, "foo"),
                 newAction(DESCRIBE, TOPIC, "foo"),
                 newAction(READ, TOPIC, "foobar"))));
-
         assertEquals(Arrays.asList(DENIED, DENIED, DENIED, ALLOWED, DENIED), authorizer.authorize(
             newRequestContext("bob"),
             Arrays.asList(
@@ -300,19 +368,16 @@ public void testDenyPrecedenceWithOperationAll() throws Exception {
 
     @Test
     public void testTopicAclWithOperationAll() throws Exception {
-        StandardAuthorizer authorizer = new StandardAuthorizer();
-        authorizer.configure(Collections.emptyMap());
+        StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
         List<StandardAcl> acls = Arrays.asList(
             new StandardAcl(TOPIC, "foo", LITERAL, "User:*", "*", ALL, ALLOW),
             new StandardAcl(TOPIC, "bar", PREFIXED, "User:alice", "*", ALL, ALLOW),
             new StandardAcl(TOPIC, "baz", LITERAL, "User:bob", "*", ALL, ALLOW)
         );
-
         acls.forEach(acl -> {
             StandardAclWithId aclWithId = withId(acl);
             authorizer.addAcl(aclWithId.id(), aclWithId.acl());
         });
-
         assertEquals(Arrays.asList(ALLOWED, ALLOWED, DENIED), authorizer.authorize(
             newRequestContext("alice"),
             Arrays.asList(
@@ -346,8 +411,7 @@ public void testHostAddressAclValidation() throws Exception {
         InetAddress host1 = InetAddress.getByName("192.168.1.1");
         InetAddress host2 = InetAddress.getByName("192.168.1.2");
 
-        StandardAuthorizer authorizer = new StandardAuthorizer();
-        authorizer.configure(Collections.emptyMap());
+        StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
         List<StandardAcl> acls = Arrays.asList(
             new StandardAcl(TOPIC, "foo", LITERAL, "User:alice", host1.getHostAddress(), READ, DENY),
             new StandardAcl(TOPIC, "foo", LITERAL, "User:alice", "*", READ, ALLOW),
@@ -392,9 +456,7 @@ private AuthorizableRequestContext newRequestContext(String principal, InetAddre
             .build();
     }
 
-    private static StandardAuthorizer createAuthorizerWithManyAcls() {
-        StandardAuthorizer authorizer = new StandardAuthorizer();
-        authorizer.configure(Collections.emptyMap());
+    private static void addManyAcls(StandardAuthorizer authorizer) {
         List<StandardAcl> acls = Arrays.asList(
             new StandardAcl(TOPIC, "green2", LITERAL, "User:*", "*", READ, ALLOW),
             new StandardAcl(TOPIC, "green", PREFIXED, "User:bob", "*", READ, ALLOW),
@@ -410,12 +472,12 @@ private static StandardAuthorizer createAuthorizerWithManyAcls() {
             StandardAclWithId aclWithId = withId(acl);
             authorizer.addAcl(aclWithId.id(), aclWithId.acl());
         });
-        return authorizer;
     }
 
     @Test
     public void testAuthorizationWithManyAcls() throws Exception {
-        StandardAuthorizer authorizer = createAuthorizerWithManyAcls();
+        StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
+        addManyAcls(authorizer);
         assertEquals(Arrays.asList(ALLOWED, DENIED),
             authorizer.authorize(new MockAuthorizableRequestContext.Builder().
                     setPrincipal(new KafkaPrincipal(USER_TYPE, "bob")).build(),
@@ -428,4 +490,154 @@ public void testAuthorizationWithManyAcls() throws Exception {
                     newAction(WRITE, GROUP, "arbitrary"),
                     newAction(READ, TOPIC, "ala"))));
     }
-}
\ No newline at end of file
+
+    @ParameterizedTest
+    @ValueSource(booleans = {true, false})
+    public void testDenyAuditLogging(boolean logIfDenied) throws Exception {
+        try (MockedStatic<LoggerFactory> mockedLoggerFactory = Mockito.mockStatic(LoggerFactory.class)) {
+            Logger otherLog = Mockito.mock(Logger.class);
+            Logger auditLog = Mockito.mock(Logger.class);
+            mockedLoggerFactory
+                .when(() -> LoggerFactory.getLogger("kafka.authorizer.logger"))
+                .thenReturn(auditLog);
+
+            mockedLoggerFactory
+                .when(() -> LoggerFactory.getLogger(Mockito.any(Class.class)))
+                .thenReturn(otherLog);
+
+            Mockito.when(auditLog.isDebugEnabled()).thenReturn(true);
+            Mockito.when(auditLog.isTraceEnabled()).thenReturn(true);
+
+            StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
+            addManyAcls(authorizer);
+            ResourcePattern topicResource = new ResourcePattern(TOPIC, "alpha", LITERAL);
+            Action action = new Action(READ, topicResource, 1, false, logIfDenied);
+            MockAuthorizableRequestContext requestContext = new MockAuthorizableRequestContext.Builder()
+                .setPrincipal(new KafkaPrincipal(USER_TYPE, "bob"))
+                .setClientAddress(InetAddress.getByName("127.0.0.1"))
+                .build();
+
+            assertEquals(singletonList(DENIED), authorizer.authorize(requestContext, singletonList(action)));
+
+            String expectedAuditLog = "Principal = User:bob is Denied operation = READ " +
+                "from host = 127.0.0.1 on resource = Topic:LITERAL:alpha for request = Fetch " +
+                "with resourceRefCount = 1 based on rule MatchingAcl(acl=StandardAcl(resourceType=TOPIC, " +
+                "resourceName=alp, patternType=PREFIXED, principal=User:bob, host=*, operation=READ, " +
+                "permissionType=DENY))";
+
+            if (logIfDenied) {
+                Mockito.verify(auditLog).info(expectedAuditLog);
+            } else {
+                Mockito.verify(auditLog).trace(expectedAuditLog);
+            }
+        }
+    }
+
+    @ParameterizedTest
+    @ValueSource(booleans = {true, false})
+    public void testAllowAuditLogging(boolean logIfAllowed) throws Exception {
+        try (MockedStatic<LoggerFactory> mockedLoggerFactory = Mockito.mockStatic(LoggerFactory.class)) {
+            Logger otherLog = Mockito.mock(Logger.class);
+            Logger auditLog = Mockito.mock(Logger.class);
+            mockedLoggerFactory
+                .when(() -> LoggerFactory.getLogger("kafka.authorizer.logger"))
+                .thenReturn(auditLog);
+
+            mockedLoggerFactory
+                .when(() -> LoggerFactory.getLogger(Mockito.any(Class.class)))
+                .thenReturn(otherLog);
+
+            Mockito.when(auditLog.isDebugEnabled()).thenReturn(true);
+            Mockito.when(auditLog.isTraceEnabled()).thenReturn(true);
+
+            StandardAuthorizer authorizer = createAndInitializeStandardAuthorizer();
+            addManyAcls(authorizer);
+            ResourcePattern topicResource = new ResourcePattern(TOPIC, "green1", LITERAL);
+            Action action = new Action(READ, topicResource, 1, logIfAllowed, false);
+            MockAuthorizableRequestContext requestContext = new MockAuthorizableRequestContext.Builder()
+                .setPrincipal(new KafkaPrincipal(USER_TYPE, "bob"))
+                .setClientAddress(InetAddress.getByName("127.0.0.1"))
+                .build();
+
+            assertEquals(singletonList(ALLOWED), authorizer.authorize(requestContext, singletonList(action)));
+
+            String expectedAuditLog = "Principal = User:bob is Allowed operation = READ " +
+                "from host = 127.0.0.1 on resource = Topic:LITERAL:green1 for request = Fetch " +
+                "with resourceRefCount = 1 based on rule MatchingAcl(acl=StandardAcl(resourceType=TOPIC, " +
+                "resourceName=green, patternType=PREFIXED, principal=User:bob, host=*, operation=READ, " +
+                "permissionType=ALLOW))";
+
+            if (logIfAllowed) {
+                Mockito.verify(auditLog).debug(expectedAuditLog);
+            } else {
+                Mockito.verify(auditLog).trace(expectedAuditLog);
+            }
+        }
+    }
+
+    /**
+     * Test that StandardAuthorizer#start returns a completed future for early start
+     * listeners.
+     */
+    @Test
+    public void testStartWithEarlyStartListeners() throws Exception {
+        StandardAuthorizer authorizer = new StandardAuthorizer();
+        authorizer.configure(Collections.singletonMap(SUPER_USERS_CONFIG, "User:superman"));
+        Map<Endpoint, ? extends CompletionStage<Void>> futures2 = authorizer.
+            start(new AuthorizerTestServerInfo(Arrays.asList(PLAINTEXT, CONTROLLER)));
+        assertEquals(new HashSet<>(Arrays.asList(PLAINTEXT, CONTROLLER)), futures2.keySet());
+        assertFalse(futures2.get(PLAINTEXT).toCompletableFuture().isDone());
+        assertTrue(futures2.get(CONTROLLER).toCompletableFuture().isDone());
+    }
+
+    /**
+     * Test attempts to authorize prior to completeInitialLoad. During this time, only
+     * superusers can be authorized. Other users will get an AuthorizerNotReadyException
+     * exception. Not even an authorization result, just an exception thrown for the whole
+     * batch.
+     */
+    @Test
+    public void testAuthorizationPriorToCompleteInitialLoad() throws Exception {
+        StandardAuthorizer authorizer = new StandardAuthorizer();
+        authorizer.configure(Collections.singletonMap(SUPER_USERS_CONFIG, "User:superman"));
+        assertThrows(AuthorizerNotReadyException.class, () ->
+            authorizer.authorize(new MockAuthorizableRequestContext.Builder().
+                    setPrincipal(new KafkaPrincipal(USER_TYPE, "bob")).build(),
+                Arrays.asList(newAction(READ, TOPIC, "green1"),
+                    newAction(READ, TOPIC, "green2"))));
+        assertEquals(Arrays.asList(ALLOWED, ALLOWED),
+            authorizer.authorize(new MockAuthorizableRequestContext.Builder().
+                    setPrincipal(new KafkaPrincipal(USER_TYPE, "superman")).build(),
+                Arrays.asList(newAction(READ, TOPIC, "green1"),
+                    newAction(WRITE, GROUP, "wheel"))));
+    }
+
+    @Test
+    public void testCompleteInitialLoad() throws Exception {
+        StandardAuthorizer authorizer = new StandardAuthorizer();
+        authorizer.configure(Collections.singletonMap(SUPER_USERS_CONFIG, "User:superman"));
+        Map<Endpoint, ? extends CompletionStage<Void>> futures = authorizer.
+            start(new AuthorizerTestServerInfo(Collections.singleton(PLAINTEXT)));
+        assertEquals(Collections.singleton(PLAINTEXT), futures.keySet());
+        assertFalse(futures.get(PLAINTEXT).toCompletableFuture().isDone());
+        authorizer.completeInitialLoad();
+        assertTrue(futures.get(PLAINTEXT).toCompletableFuture().isDone());
+        assertFalse(futures.get(PLAINTEXT).toCompletableFuture().isCompletedExceptionally());
+    }
+
+    @Test
+    public void testCompleteInitialLoadWithException() throws Exception {
+        StandardAuthorizer authorizer = new StandardAuthorizer();
+        authorizer.configure(Collections.singletonMap(SUPER_USERS_CONFIG, "User:superman"));
+        Map<Endpoint, ? extends CompletionStage<Void>> futures = authorizer.
+            start(new AuthorizerTestServerInfo(Arrays.asList(PLAINTEXT, CONTROLLER)));
+        assertEquals(new HashSet<>(Arrays.asList(PLAINTEXT, CONTROLLER)), futures.keySet());
+        assertFalse(futures.get(PLAINTEXT).toCompletableFuture().isDone());
+        assertTrue(futures.get(CONTROLLER).toCompletableFuture().isDone());
+        authorizer.completeInitialLoad(new TimeoutException("timed out"));
+        assertTrue(futures.get(PLAINTEXT).toCompletableFuture().isDone());
+        assertTrue(futures.get(PLAINTEXT).toCompletableFuture().isCompletedExceptionally());
+        assertTrue(futures.get(CONTROLLER).toCompletableFuture().isDone());
+        assertFalse(futures.get(CONTROLLER).toCompletableFuture().isCompletedExceptionally());
+    }
+}
diff --git a/metadata/src/test/java/org/apache/kafka/controller/StripedReplicaPlacerTest.java b/metadata/src/test/java/org/apache/kafka/metadata/placement/StripedReplicaPlacerTest.java
similarity index 89%
rename from metadata/src/test/java/org/apache/kafka/controller/StripedReplicaPlacerTest.java
rename to metadata/src/test/java/org/apache/kafka/metadata/placement/StripedReplicaPlacerTest.java
index c3fbb0996a5ba..9450d4037013f 100644
--- a/metadata/src/test/java/org/apache/kafka/controller/StripedReplicaPlacerTest.java
+++ b/metadata/src/test/java/org/apache/kafka/metadata/placement/StripedReplicaPlacerTest.java
@@ -15,18 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.kafka.controller;
+package org.apache.kafka.metadata.placement;
 
 import org.apache.kafka.common.errors.InvalidReplicationFactorException;
-import org.apache.kafka.controller.StripedReplicaPlacer.BrokerList;
-import org.apache.kafka.controller.StripedReplicaPlacer.RackList;
-import org.apache.kafka.metadata.UsableBroker;
+import org.apache.kafka.metadata.MockRandom;
+import org.apache.kafka.metadata.placement.StripedReplicaPlacer.BrokerList;
+import org.apache.kafka.metadata.placement.StripedReplicaPlacer.RackList;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.Timeout;
 
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -84,6 +85,25 @@ public void testAvoidFencedReplicaIfPossibleOnSingleRack() {
         assertEquals(Arrays.asList(0, 4, 3, 2), rackList.place(4));
     }
 
+    private List<List<Integer>> place(
+        ReplicaPlacer placer,
+        int startPartition,
+        int numPartitions,
+        short replicationFactor,
+        List<UsableBroker> brokers
+    ) {
+        PlacementSpec placementSpec = new PlacementSpec(startPartition,
+            numPartitions,
+            replicationFactor);
+        ClusterDescriber cluster = new ClusterDescriber() {
+            @Override
+            public Iterator<UsableBroker> usableBrokers() {
+                return brokers.iterator();
+            }
+        };
+        return placer.place(placementSpec, cluster);
+    }
+
     /**
      * Test that we perform striped replica placement as expected for a multi-partition topic
      * on a single unfenced broker
@@ -95,9 +115,9 @@ public void testMultiPartitionTopicPlacementOnSingleUnfencedBroker() {
         assertEquals(Arrays.asList(Arrays.asList(0),
                 Arrays.asList(0),
                 Arrays.asList(0)),
-                placer.place(0, 3, (short) 1, Arrays.asList(
+                place(placer, 0, 3, (short) 1, Arrays.asList(
                         new UsableBroker(0, Optional.empty(), false),
-                        new UsableBroker(1, Optional.empty(), true)).iterator()));
+                        new UsableBroker(1, Optional.empty(), true))));
     }
 
     /**
@@ -166,9 +186,9 @@ public void testAllBrokersFenced() {
         StripedReplicaPlacer placer = new StripedReplicaPlacer(random);
         assertEquals("All brokers are currently fenced.",
             assertThrows(InvalidReplicationFactorException.class,
-                () -> placer.place(0, 1, (short) 1, Arrays.asList(
+                () -> place(placer, 0, 1, (short) 1, Arrays.asList(
                     new UsableBroker(11, Optional.of("1"), true),
-                    new UsableBroker(10, Optional.of("1"), true)).iterator())).getMessage());
+                    new UsableBroker(10, Optional.of("1"), true)))).getMessage());
     }
 
     @Test
@@ -178,9 +198,9 @@ public void testNotEnoughBrokers() {
         assertEquals("The target replication factor of 3 cannot be reached because only " +
             "2 broker(s) are registered.",
             assertThrows(InvalidReplicationFactorException.class,
-                () -> placer.place(0, 1, (short) 3, Arrays.asList(
+                () -> place(placer, 0, 1, (short) 3, Arrays.asList(
                     new UsableBroker(11, Optional.of("1"), false),
-                    new UsableBroker(10, Optional.of("1"), false)).iterator())).getMessage());
+                    new UsableBroker(10, Optional.of("1"), false)))).getMessage());
     }
 
     @Test
@@ -189,9 +209,9 @@ public void testNonPositiveReplicationFactor() {
         StripedReplicaPlacer placer = new StripedReplicaPlacer(random);
         assertEquals("Invalid replication factor 0: the replication factor must be positive.",
                 assertThrows(InvalidReplicationFactorException.class,
-                        () -> placer.place(0, 1, (short) 0, Arrays.asList(
+                        () -> place(placer, 0, 1, (short) 0, Arrays.asList(
                                 new UsableBroker(11, Optional.of("1"), false),
-                                new UsableBroker(10, Optional.of("1"), false)).iterator())).getMessage());
+                                new UsableBroker(10, Optional.of("1"), false)))).getMessage());
     }
 
     @Test
@@ -203,22 +223,22 @@ public void testSuccessfulPlacement() {
                 Arrays.asList(0, 1, 2),
                 Arrays.asList(1, 2, 3),
                 Arrays.asList(1, 0, 2)),
-            placer.place(0, 5, (short) 3, Arrays.asList(
+            place(placer, 0, 5, (short) 3, Arrays.asList(
                 new UsableBroker(0, Optional.empty(), false),
                 new UsableBroker(3, Optional.empty(), false),
                 new UsableBroker(2, Optional.empty(), false),
-                new UsableBroker(1, Optional.empty(), false)).iterator()));
+                new UsableBroker(1, Optional.empty(), false))));
     }
 
     @Test
     public void testEvenDistribution() {
         MockRandom random = new MockRandom();
         StripedReplicaPlacer placer = new StripedReplicaPlacer(random);
-        List<List<Integer>> replicas = placer.place(0, 200, (short) 2, Arrays.asList(
+        List<List<Integer>> replicas = place(placer, 0, 200, (short) 2, Arrays.asList(
             new UsableBroker(0, Optional.empty(), false),
             new UsableBroker(1, Optional.empty(), false),
             new UsableBroker(2, Optional.empty(), false),
-            new UsableBroker(3, Optional.empty(), false)).iterator());
+            new UsableBroker(3, Optional.empty(), false)));
         Map<List<Integer>, Integer> counts = new HashMap<>();
         for (List<Integer> partitionReplicas : replicas) {
             counts.put(partitionReplicas, counts.getOrDefault(partitionReplicas, 0) + 1);
diff --git a/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManager.java b/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManager.java
index 855fd468cba2d..e24d86bd873ff 100644
--- a/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManager.java
+++ b/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManager.java
@@ -31,6 +31,7 @@
 import org.apache.kafka.raft.LeaderAndEpoch;
 import org.apache.kafka.raft.OffsetAndEpoch;
 import org.apache.kafka.raft.RaftClient;
+import org.apache.kafka.raft.errors.NotLeaderException;
 import org.apache.kafka.raft.internals.MemoryBatchReader;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.apache.kafka.snapshot.MockRawSnapshotReader;
@@ -77,10 +78,10 @@ interface LocalBatch {
         int size();
     }
 
-    static class LeaderChangeBatch implements LocalBatch {
+    public static class LeaderChangeBatch implements LocalBatch {
         private final LeaderAndEpoch newLeader;
 
-        LeaderChangeBatch(LeaderAndEpoch newLeader) {
+        public LeaderChangeBatch(LeaderAndEpoch newLeader) {
             this.newLeader = newLeader;
         }
 
@@ -113,12 +114,12 @@ public String toString() {
         }
     }
 
-    static class LocalRecordBatch implements LocalBatch {
+    public static class LocalRecordBatch implements LocalBatch {
         private final int leaderEpoch;
         private final long appendTimestamp;
         private final List<ApiMessageAndVersion> records;
 
-        LocalRecordBatch(int leaderEpoch, long appendTimestamp, List<ApiMessageAndVersion> records) {
+        public LocalRecordBatch(int leaderEpoch, long appendTimestamp, List<ApiMessageAndVersion> records) {
             this.leaderEpoch = leaderEpoch;
             this.appendTimestamp = appendTimestamp;
             this.records = records;
@@ -184,6 +185,11 @@ public static class SharedLogData {
          */
         private long prevOffset;
 
+        /**
+         * The initial max read offset which LocalLog instances will be configured with.
+         */
+        private long initialMaxReadOffset = Long.MAX_VALUE;
+
         /**
          * Maps committed offset to snapshot reader.
          */
@@ -221,23 +227,27 @@ synchronized long tryAppend(int nodeId, int epoch, List<ApiMessageAndVersion> ba
         }
 
         synchronized long tryAppend(int nodeId, int epoch, LocalBatch batch) {
-            if (epoch != leader.epoch()) {
-                log.trace("tryAppend(nodeId={}, epoch={}): the provided epoch does not " +
-                    "match the current leader epoch of {}.", nodeId, epoch, leader.epoch());
-                return Long.MAX_VALUE;
-            }
             if (!leader.isLeader(nodeId)) {
-                log.trace("tryAppend(nodeId={}, epoch={}): the given node id does not " +
-                    "match the current leader id of {}.", nodeId, epoch, leader.leaderId());
-                return Long.MAX_VALUE;
+                log.debug("tryAppend(nodeId={}, epoch={}): the given node id does not " +
+                        "match the current leader id of {}.", nodeId, epoch, leader.leaderId());
+                throw new NotLeaderException("Append failed because the replication is not the current leader");
+            }
+
+            if (epoch < leader.epoch()) {
+                throw new NotLeaderException("Append failed because the given epoch " + epoch + " is stale. " +
+                        "Current leader epoch = " + leader.epoch());
+            } else if (epoch > leader.epoch()) {
+                throw new IllegalArgumentException("Attempt to append from epoch " + epoch +
+                        " which is larger than the current epoch " + leader.epoch());
             }
+
             log.trace("tryAppend(nodeId={}): appending {}.", nodeId, batch);
             long offset = append(batch);
             electLeaderIfNeeded();
             return offset;
         }
 
-        synchronized long append(LocalBatch batch) {
+        public synchronized long append(LocalBatch batch) {
             prevOffset += batch.size();
             log.debug("append(batch={}, prevOffset={})", batch, prevOffset);
             batches.put(prevOffset, batch);
@@ -352,6 +362,15 @@ synchronized long appendedBytes() {
                 })
                 .sum();
         }
+
+        public SharedLogData setInitialMaxReadOffset(long initialMaxReadOffset) {
+            this.initialMaxReadOffset = initialMaxReadOffset;
+            return this;
+        }
+
+        public long initialMaxReadOffset() {
+            return initialMaxReadOffset;
+        }
     }
 
     private static class MetaLogListenerData {
@@ -454,6 +473,7 @@ public LocalLogManager(LogContext logContext,
         this.log = logContext.logger(LocalLogManager.class);
         this.nodeId = nodeId;
         this.shared = shared;
+        this.maxReadOffset = shared.initialMaxReadOffset();
         this.eventQueue = new KafkaEventQueue(Time.SYSTEM, logContext, threadNamePrefix);
         shared.registerLogManager(this);
     }
@@ -476,7 +496,8 @@ private void scheduleLogCheck() {
                                         snapshot.get(),
                                         new  MetadataRecordSerde(),
                                         BufferSupplier.create(),
-                                        Integer.MAX_VALUE
+                                        Integer.MAX_VALUE,
+                                        true
                                     )
                                 );
                             }
@@ -502,6 +523,8 @@ private void scheduleLogCheck() {
                             // Only notify the listener if it equals the shared leader state
                             LeaderAndEpoch sharedLeader = shared.leaderAndEpoch();
                             if (batch.newLeader.equals(sharedLeader)) {
+                                log.debug("Node {}: Executing handleLeaderChange {}",
+                                    nodeId, sharedLeader);
                                 listenerData.handleLeaderChange(entryOffset, batch.newLeader);
                                 if (batch.newLeader.epoch() > leader.epoch()) {
                                     leader = batch.newLeader;
@@ -658,6 +681,15 @@ public void unregister(RaftClient.Listener<ApiMessageAndVersion> listener) {
         });
     }
 
+    @Override
+    public synchronized OptionalLong highWatermark() {
+        if (shared.prevOffset > 0) {
+            return OptionalLong.of(shared.prevOffset);
+        } else {
+            return OptionalLong.empty();
+        }
+    }
+
     @Override
     public long scheduleAppend(int epoch, List<ApiMessageAndVersion> batch) {
         if (batch.isEmpty()) {
@@ -697,9 +729,35 @@ public long scheduleAtomicAppend(int epoch, List<ApiMessageAndVersion> batch) {
 
     @Override
     public void resign(int epoch) {
-        LeaderAndEpoch curLeader = leader;
-        LeaderAndEpoch nextLeader = new LeaderAndEpoch(OptionalInt.empty(), curLeader.epoch() + 1);
-        shared.tryAppend(nodeId, curLeader.epoch(), new LeaderChangeBatch(nextLeader));
+        if (epoch < 0) {
+            throw new IllegalArgumentException("Attempt to resign from an invalid negative epoch " + epoch);
+        }
+
+        LeaderAndEpoch leaderAndEpoch = leaderAndEpoch();
+        int currentEpoch = leaderAndEpoch.epoch();
+
+        if (epoch > currentEpoch) {
+            throw new IllegalArgumentException("Attempt to resign from epoch " + epoch +
+                    " which is larger than the current epoch " + currentEpoch);
+        } else if (epoch < currentEpoch) {
+            // If the passed epoch is smaller than the current epoch, then it might mean
+            // that the listener has not been notified about a leader change that already
+            // took place. In this case, we consider the call as already fulfilled and
+            // take no further action.
+            log.debug("Ignoring call to resign from epoch {} since it is smaller than the " +
+                    "current epoch {}", epoch, currentEpoch);
+            return;
+        }
+
+        LeaderAndEpoch nextLeader = new LeaderAndEpoch(OptionalInt.empty(), currentEpoch + 1);
+        try {
+            shared.tryAppend(nodeId, currentEpoch, new LeaderChangeBatch(nextLeader));
+        } catch (NotLeaderException exp) {
+            // the leader epoch has already advanced. resign is a no op.
+            log.debug("Ignoring call to resign from epoch {}. Either we are not the leader or the provided epoch is " +
+                    "smaller than the current epoch {}", epoch, currentEpoch);
+            return;
+        }
     }
 
     @Override
diff --git a/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManagerTestEnv.java b/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManagerTestEnv.java
index ed18ab6053da6..17c9c467124c1 100644
--- a/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManagerTestEnv.java
+++ b/metadata/src/test/java/org/apache/kafka/metalog/LocalLogManagerTestEnv.java
@@ -20,8 +20,11 @@
 import org.apache.kafka.common.Uuid;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.metalog.LocalLogManager.LeaderChangeBatch;
+import org.apache.kafka.metalog.LocalLogManager.LocalRecordBatch;
 import org.apache.kafka.metalog.LocalLogManager.SharedLogData;
 import org.apache.kafka.raft.LeaderAndEpoch;
+import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.apache.kafka.snapshot.RawSnapshotReader;
 import org.apache.kafka.test.TestUtils;
 import org.slf4j.Logger;
@@ -32,7 +35,9 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
+import java.util.OptionalInt;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Consumer;
 
 public class LocalLogManagerTestEnv implements AutoCloseable {
     private static final Logger log =
@@ -77,10 +82,15 @@ public static LocalLogManagerTestEnv createWithMockListeners(
         return testEnv;
     }
 
-    public LocalLogManagerTestEnv(int numManagers, Optional<RawSnapshotReader> snapshot) throws Exception {
+    public LocalLogManagerTestEnv(
+        int numManagers,
+        Optional<RawSnapshotReader> snapshot,
+        Consumer<SharedLogData> dataSetup
+    ) throws Exception {
         clusterId = Uuid.randomUuid().toString();
         dir = TestUtils.tempDirectory();
         shared = new SharedLogData(snapshot);
+        dataSetup.accept(shared);
         List<LocalLogManager> newLogManagers = new ArrayList<>(numManagers);
         try {
             for (int nodeId = 0; nodeId < numManagers; nodeId++) {
@@ -102,6 +112,28 @@ public LocalLogManagerTestEnv(int numManagers, Optional<RawSnapshotReader> snaps
         this.logManagers = newLogManagers;
     }
 
+    public LocalLogManagerTestEnv(
+        int numManagers,
+        Optional<RawSnapshotReader> snapshot
+    ) throws Exception {
+        this(numManagers, snapshot, __ -> { });
+    }
+
+    /**
+     * Append some records to the log. This method is meant to be called before the
+     * controllers are started, to simulate a pre-existing metadata log.
+     *
+     * @param records   The records to be appended. Will be added in a single batch.
+     */
+    public void appendInitialRecords(List<ApiMessageAndVersion> records) {
+        int initialLeaderEpoch = 1;
+        shared.append(new LeaderChangeBatch(
+            new LeaderAndEpoch(OptionalInt.empty(), initialLeaderEpoch + 1)));
+        shared.append(new LocalRecordBatch(initialLeaderEpoch + 1, 0, records));
+        shared.append(new LeaderChangeBatch(
+            new LeaderAndEpoch(OptionalInt.of(0), initialLeaderEpoch + 2)));
+    }
+
     public String clusterId() {
         return clusterId;
     }
diff --git a/metadata/src/test/java/org/apache/kafka/timeline/SnapshottableHashTableTest.java b/metadata/src/test/java/org/apache/kafka/timeline/SnapshottableHashTableTest.java
index 7f1ddcc3ff5f6..1b9dd1559ea27 100644
--- a/metadata/src/test/java/org/apache/kafka/timeline/SnapshottableHashTableTest.java
+++ b/metadata/src/test/java/org/apache/kafka/timeline/SnapshottableHashTableTest.java
@@ -97,6 +97,25 @@ public void testEmptyTable() {
             new SnapshottableHashTable<>(registry, 1);
         assertEquals(0, table.snapshottableSize(Long.MAX_VALUE));
     }
+    @Test
+    public void testDeleteOnEmptyDeltaTable() {
+        // A simple test case to validate the behavior of the TimelineHashSet
+        // when the deltaTable for a snapshot is null
+        SnapshotRegistry registry = new SnapshotRegistry(new LogContext());
+        TimelineHashSet<String> set = new TimelineHashSet<>(registry, 5);
+
+        registry.getOrCreateSnapshot(100);
+        set.add("bar");
+        registry.getOrCreateSnapshot(200);
+        set.add("baz");
+        registry.revertToSnapshot(100);
+        assertTrue(set.isEmpty());
+        set.add("foo");
+        registry.getOrCreateSnapshot(300);
+        set.remove("bar");
+        registry.revertToSnapshot(100);
+        assertTrue(set.isEmpty());
+    }
 
     @Test
     public void testAddAndRemove() {
diff --git a/raft/src/main/java/org/apache/kafka/raft/EpochState.java b/raft/src/main/java/org/apache/kafka/raft/EpochState.java
index 89e8f0ac235b8..9cf231c42131b 100644
--- a/raft/src/main/java/org/apache/kafka/raft/EpochState.java
+++ b/raft/src/main/java/org/apache/kafka/raft/EpochState.java
@@ -27,7 +27,7 @@ default Optional<LogOffsetMetadata> highWatermark() {
 
     /**
      * Decide whether to grant a vote to a candidate, it is the responsibility of the caller to invoke
-     * {@link QuorumState##transitionToVoted(int, int)} if vote is granted.
+     * {@link QuorumState#transitionToVoted(int, int)} if vote is granted.
      *
      * @param candidateId The ID of the voter who attempt to become leader
      * @param isLogUpToDate Whether the candidate’s log is at least as up-to-date as receiver’s log, it
diff --git a/raft/src/main/java/org/apache/kafka/raft/KafkaRaftClient.java b/raft/src/main/java/org/apache/kafka/raft/KafkaRaftClient.java
index dced48da39a0d..cac7a8a3cb998 100644
--- a/raft/src/main/java/org/apache/kafka/raft/KafkaRaftClient.java
+++ b/raft/src/main/java/org/apache/kafka/raft/KafkaRaftClient.java
@@ -310,7 +310,7 @@ private void updateListenersProgress(long highWatermark) {
                 if (nextExpectedOffset < log.startOffset() && nextExpectedOffset < highWatermark) {
                     SnapshotReader<T> snapshot = latestSnapshot().orElseThrow(() -> new IllegalStateException(
                         String.format(
-                            "Snapshot expected since next offset of %s is %s, log start offset is %s and high-watermark is %s",
+                            "Snapshot expected since next offset of %s is %d, log start offset is %d and high-watermark is %d",
                             listenerContext.listenerName(),
                             nextExpectedOffset,
                             log.startOffset(),
@@ -333,7 +333,12 @@ private void updateListenersProgress(long highWatermark) {
 
     private Optional<SnapshotReader<T>> latestSnapshot() {
         return log.latestSnapshot().map(reader ->
-            RecordsSnapshotReader.of(reader, serde, BufferSupplier.create(), MAX_BATCH_SIZE_BYTES)
+            RecordsSnapshotReader.of(reader,
+                serde,
+                BufferSupplier.create(),
+                MAX_BATCH_SIZE_BYTES,
+                true /* Validate batch CRC*/
+            )
         );
     }
 
@@ -1034,7 +1039,7 @@ private static OptionalInt optionalLeaderId(int leaderIdOrNil) {
     }
 
     private static String listenerName(Listener<?> listener) {
-        return String.format("%s@%s", listener.getClass().getTypeName(), System.identityHashCode(listener));
+        return String.format("%s@%d", listener.getClass().getTypeName(), System.identityHashCode(listener));
     }
 
     private boolean handleFetchResponse(
@@ -1261,7 +1266,7 @@ private FetchSnapshotResponseData handleFetchSnapshotRequest(
         if (partitionSnapshot.position() > Integer.MAX_VALUE) {
             throw new IllegalStateException(
                 String.format(
-                    "Trying to fetch a snapshot with size (%s) and a position (%s) larger than %s",
+                    "Trying to fetch a snapshot with size (%d) and a position (%d) larger than %d",
                     snapshotSize,
                     partitionSnapshot.position(),
                     Integer.MAX_VALUE
@@ -1334,7 +1339,7 @@ private boolean handleFetchSnapshotResponse(
             partitionSnapshot.snapshotId().epoch() < 0) {
 
             /* The leader deleted the snapshot before the follower could download it. Start over by
-             * reseting the fetching snapshot state and sending another fetch request.
+             * resetting the fetching snapshot state and sending another fetch request.
              */
             logger.trace(
                 "Leader doesn't know about snapshot id {}, returned error {} and snapshot id {}",
@@ -1373,7 +1378,7 @@ private boolean handleFetchSnapshotResponse(
         if (snapshot.sizeInBytes() != partitionSnapshot.position()) {
             throw new IllegalStateException(
                 String.format(
-                    "Received fetch snapshot response with an invalid position. Expected %s; Received %s",
+                    "Received fetch snapshot response with an invalid position. Expected %d; Received %d",
                     snapshot.sizeInBytes(),
                     partitionSnapshot.position()
                 )
@@ -1400,7 +1405,7 @@ private boolean handleFetchSnapshotResponse(
             } else {
                 throw new IllegalStateException(
                     String.format(
-                        "Full log truncation expected but didn't happen. Snapshot of %s, log end offset %s, last fetched %s",
+                        "Full log truncation expected but didn't happen. Snapshot of %s, log end offset %s, last fetched %d",
                         snapshot.snapshotId(),
                         log.endOffset(),
                         log.lastFetchedEpoch()
@@ -2519,7 +2524,8 @@ private void fireHandleCommit(long baseOffset, Records records) {
                     serde,
                     BufferSupplier.create(),
                     MAX_BATCH_SIZE_BYTES,
-                    this
+                    this,
+                    true /* Validate batch CRC*/
                 )
             );
         }
diff --git a/raft/src/main/java/org/apache/kafka/raft/LeaderState.java b/raft/src/main/java/org/apache/kafka/raft/LeaderState.java
index de08b7b1cc983..8717d4e4d28f3 100644
--- a/raft/src/main/java/org/apache/kafka/raft/LeaderState.java
+++ b/raft/src/main/java/org/apache/kafka/raft/LeaderState.java
@@ -353,7 +353,7 @@ else if (!that.endOffset.isPresent())
         @Override
         public String toString() {
             return String.format(
-                "ReplicaState(nodeId=%s, endOffset=%s, lastFetchTimestamp=%s, hasAcknowledgedLeader=%s)",
+                "ReplicaState(nodeId=%d, endOffset=%s, lastFetchTimestamp=%s, hasAcknowledgedLeader=%s)",
                 nodeId,
                 endOffset,
                 lastFetchTimestamp,
@@ -372,7 +372,7 @@ public boolean canGrantVote(int candidateId, boolean isLogUpToDate) {
     @Override
     public String toString() {
         return String.format(
-            "Leader(localId=%s, epoch=%s, epochStartOffset=%s, highWatermark=%s, voterStates=%s)",
+            "Leader(localId=%d, epoch=%d, epochStartOffset=%d, highWatermark=%s, voterStates=%s)",
             localId,
             epoch,
             epochStartOffset,
diff --git a/raft/src/main/java/org/apache/kafka/raft/RaftClient.java b/raft/src/main/java/org/apache/kafka/raft/RaftClient.java
index 8e4f50e74885a..51f859c6c0731 100644
--- a/raft/src/main/java/org/apache/kafka/raft/RaftClient.java
+++ b/raft/src/main/java/org/apache/kafka/raft/RaftClient.java
@@ -24,6 +24,7 @@
 import java.util.List;
 import java.util.Optional;
 import java.util.OptionalInt;
+import java.util.OptionalLong;
 import java.util.concurrent.CompletableFuture;
 
 public interface RaftClient<T> extends AutoCloseable {
@@ -113,6 +114,11 @@ default void beginShutdown() {}
      */
     void unregister(Listener<T> listener);
 
+    /**
+     * Returns the current high water mark, or OptionalLong.empty if it is not known.
+     */
+    OptionalLong highWatermark();
+
     /**
      * Return the current {@link LeaderAndEpoch}.
      *
@@ -199,8 +205,11 @@ default void beginShutdown() {}
      * Notification of successful resignation can be observed through
      * {@link Listener#handleLeaderChange(LeaderAndEpoch)}.
      *
-     * @param epoch the epoch to resign from. If this does not match the current epoch, this
+     * @param epoch the epoch to resign from. If this epoch is smaller than the current epoch, this
      *              call will be ignored.
+     *
+     * @throws IllegalArgumentException - if the passed epoch is invalid (negative or greater than current) or
+     * if the listener is not the leader associated with this epoch.
      */
     void resign(int epoch);
 
diff --git a/raft/src/main/java/org/apache/kafka/raft/RaftConfig.java b/raft/src/main/java/org/apache/kafka/raft/RaftConfig.java
index 0833df0bb2328..3ce72a591fe1e 100644
--- a/raft/src/main/java/org/apache/kafka/raft/RaftConfig.java
+++ b/raft/src/main/java/org/apache/kafka/raft/RaftConfig.java
@@ -68,7 +68,7 @@ public class RaftConfig {
 
     public static final String QUORUM_FETCH_TIMEOUT_MS_CONFIG = QUORUM_PREFIX + "fetch.timeout.ms";
     public static final String QUORUM_FETCH_TIMEOUT_MS_DOC = "Maximum time without a successful fetch from " +
-        "the current leader before becoming a candidate and triggering a election for voters; Maximum time without " +
+        "the current leader before becoming a candidate and triggering an election for voters; Maximum time without " +
         "receiving fetch from a majority of the quorum before asking around to see if there's a new epoch for leader";
     public static final int DEFAULT_QUORUM_FETCH_TIMEOUT_MS = 2_000;
 
diff --git a/raft/src/main/java/org/apache/kafka/raft/ReplicatedCounter.java b/raft/src/main/java/org/apache/kafka/raft/ReplicatedCounter.java
index 66303c6e4d31a..c7702ba20a56f 100644
--- a/raft/src/main/java/org/apache/kafka/raft/ReplicatedCounter.java
+++ b/raft/src/main/java/org/apache/kafka/raft/ReplicatedCounter.java
@@ -91,7 +91,7 @@ public synchronized void handleCommit(BatchReader<Integer> reader) {
                     if (nextCommitted != committed + 1) {
                         throw new AssertionError(
                             String.format(
-                                "Expected next committed value to be %s, but instead found %s on node %s",
+                                "Expected next committed value to be %d, but instead found %d on node %d",
                                 committed + 1,
                                 nextCommitted,
                                 nodeId
diff --git a/raft/src/main/java/org/apache/kafka/raft/internals/BatchAccumulator.java b/raft/src/main/java/org/apache/kafka/raft/internals/BatchAccumulator.java
index 7bfa44ac4a39d..323f393aebe4c 100644
--- a/raft/src/main/java/org/apache/kafka/raft/internals/BatchAccumulator.java
+++ b/raft/src/main/java/org/apache/kafka/raft/internals/BatchAccumulator.java
@@ -104,7 +104,8 @@ public BatchAccumulator(
      * @throws RecordBatchTooLargeException if the size of one record T is greater than the maximum
      *         batch size; if this exception is throw some of the elements in records may have
      *         been committed
-     * @throws NotLeaderException if the epoch doesn't match the leader epoch
+     * @throws NotLeaderException if the epoch is less than the leader epoch
+     * @throws IllegalArgumentException if the epoch is invalid (greater than the leader epoch)
      * @throws BufferAllocationException if we failed to allocate memory for the records
      */
     public long append(int epoch, List<T> records) {
@@ -123,7 +124,8 @@ public long append(int epoch, List<T> records) {
      * @throws RecordBatchTooLargeException if the size of the records is greater than the maximum
      *         batch size; if this exception is throw none of the elements in records were
      *         committed
-     * @throws NotLeaderException if the epoch doesn't match the leader epoch
+     * @throws NotLeaderException if the epoch is less than the leader epoch
+     * @throws IllegalArgumentException if the epoch is invalid (greater than the leader epoch)
      * @throws BufferAllocationException if we failed to allocate memory for the records
      */
     public long appendAtomic(int epoch, List<T> records) {
@@ -132,7 +134,8 @@ public long appendAtomic(int epoch, List<T> records) {
 
     private long append(int epoch, List<T> records, boolean isAtomic) {
         if (epoch < this.epoch) {
-            throw new NotLeaderException("Append failed because the epoch doesn't match");
+            throw new NotLeaderException("Append failed because the given epoch " + epoch + " is stale. " +
+                    "Current leader epoch = " + this.epoch());
         } else if (epoch > this.epoch) {
             throw new IllegalArgumentException("Attempt to append from epoch " + epoch +
                 " which is larger than the current epoch " + this.epoch);
@@ -189,7 +192,7 @@ private BatchBuilder<T> maybeAllocateBatch(
             if (bytesNeeded.isPresent() && bytesNeeded.getAsInt() > maxBatchSize) {
                 throw new RecordBatchTooLargeException(
                     String.format(
-                        "The total record(s) size of %s exceeds the maximum allowed batch size of %s",
+                        "The total record(s) size of %d exceeds the maximum allowed batch size of %d",
                         bytesNeeded.getAsInt(),
                         maxBatchSize
                     )
diff --git a/raft/src/main/java/org/apache/kafka/raft/internals/FuturePurgatory.java b/raft/src/main/java/org/apache/kafka/raft/internals/FuturePurgatory.java
index b37fb3a3847c5..e5dceeaa0c30e 100644
--- a/raft/src/main/java/org/apache/kafka/raft/internals/FuturePurgatory.java
+++ b/raft/src/main/java/org/apache/kafka/raft/internals/FuturePurgatory.java
@@ -56,8 +56,8 @@ public interface FuturePurgatory<T extends Comparable<T>> {
     CompletableFuture<Long> await(T threshold, long maxWaitTimeMs);
 
     /**
-     * Complete awaiting futures whose associated values are larger than the given threshold value.
-     * The completion callbacks will be triggered from the calling thread.
+     * Complete awaiting futures whose threshold value from {@link FuturePurgatory#await} are smaller
+     * than the given threshold value. The completion callbacks will be triggered from the calling thread.
      *
      * @param value         the threshold value used to determine which futures can be completed
      * @param currentTimeMs the current time in milliseconds that will be passed to
diff --git a/raft/src/main/java/org/apache/kafka/raft/internals/KafkaRaftMetrics.java b/raft/src/main/java/org/apache/kafka/raft/internals/KafkaRaftMetrics.java
index c7ffcfb0b6b8d..84748bd3306b1 100644
--- a/raft/src/main/java/org/apache/kafka/raft/internals/KafkaRaftMetrics.java
+++ b/raft/src/main/java/org/apache/kafka/raft/internals/KafkaRaftMetrics.java
@@ -27,6 +27,7 @@
 import org.apache.kafka.raft.OffsetAndEpoch;
 import org.apache.kafka.raft.QuorumState;
 
+import java.util.Arrays;
 import java.util.OptionalLong;
 import java.util.concurrent.TimeUnit;
 
@@ -34,11 +35,11 @@ public class KafkaRaftMetrics implements AutoCloseable {
 
     private final Metrics metrics;
 
-    private OffsetAndEpoch logEndOffset;
-    private int numUnknownVoterConnections;
-    private OptionalLong electionStartMs;
-    private OptionalLong pollStartMs;
-    private OptionalLong pollEndMs;
+    private volatile OffsetAndEpoch logEndOffset;
+    private volatile int numUnknownVoterConnections;
+    private volatile OptionalLong electionStartMs;
+    private volatile OptionalLong pollStartMs;
+    private volatile OptionalLong pollEndMs;
 
     private final MetricName currentLeaderIdMetricName;
     private final MetricName currentVotedIdMetricName;
@@ -186,19 +187,23 @@ public void maybeUpdateElectionLatency(long currentTimeMs) {
 
     @Override
     public void close() {
-        metrics.removeMetric(currentLeaderIdMetricName);
-        metrics.removeMetric(currentVotedIdMetricName);
-        metrics.removeMetric(currentEpochMetricName);
-        metrics.removeMetric(currentStateMetricName);
-        metrics.removeMetric(highWatermarkMetricName);
-        metrics.removeMetric(logEndOffsetMetricName);
-        metrics.removeMetric(logEndEpochMetricName);
-        metrics.removeMetric(numUnknownVoterConnectionsMetricName);
-
-        metrics.removeSensor(commitTimeSensor.name());
-        metrics.removeSensor(electionTimeSensor.name());
-        metrics.removeSensor(fetchRecordsSensor.name());
-        metrics.removeSensor(appendRecordsSensor.name());
-        metrics.removeSensor(pollIdleSensor.name());
+        Arrays.asList(
+            currentLeaderIdMetricName,
+            currentVotedIdMetricName,
+            currentEpochMetricName,
+            currentStateMetricName,
+            highWatermarkMetricName,
+            logEndOffsetMetricName,
+            logEndEpochMetricName,
+            numUnknownVoterConnectionsMetricName
+        ).forEach(metrics::removeMetric);
+
+        Arrays.asList(
+            commitTimeSensor.name(),
+            electionTimeSensor.name(),
+            fetchRecordsSensor.name(),
+            appendRecordsSensor.name(),
+            pollIdleSensor.name()
+        ).forEach(metrics::removeSensor);
     }
 }
diff --git a/raft/src/main/java/org/apache/kafka/raft/internals/RecordsBatchReader.java b/raft/src/main/java/org/apache/kafka/raft/internals/RecordsBatchReader.java
index e95206100a305..61819a9dccaea 100644
--- a/raft/src/main/java/org/apache/kafka/raft/internals/RecordsBatchReader.java
+++ b/raft/src/main/java/org/apache/kafka/raft/internals/RecordsBatchReader.java
@@ -100,11 +100,12 @@ public static <T> RecordsBatchReader<T> of(
         RecordSerde<T> serde,
         BufferSupplier bufferSupplier,
         int maxBatchSize,
-        CloseListener<BatchReader<T>> closeListener
+        CloseListener<BatchReader<T>> closeListener,
+        boolean doCrcValidation
     ) {
         return new RecordsBatchReader<>(
             baseOffset,
-            new RecordsIterator<>(records, serde, bufferSupplier, maxBatchSize),
+            new RecordsIterator<>(records, serde, bufferSupplier, maxBatchSize, doCrcValidation),
             closeListener
         );
     }
diff --git a/raft/src/main/java/org/apache/kafka/raft/internals/RecordsIterator.java b/raft/src/main/java/org/apache/kafka/raft/internals/RecordsIterator.java
index b36d4f156347e..ff415aa72ada0 100644
--- a/raft/src/main/java/org/apache/kafka/raft/internals/RecordsIterator.java
+++ b/raft/src/main/java/org/apache/kafka/raft/internals/RecordsIterator.java
@@ -41,6 +41,9 @@ public final class RecordsIterator<T> implements Iterator<Batch<T>>, AutoCloseab
     private final RecordSerde<T> serde;
     private final BufferSupplier bufferSupplier;
     private final int batchSize;
+    // Setting to true will make the RecordsIterator perform a CRC Validation
+    // on the batch header when iterating over them
+    private final boolean doCrcValidation;
 
     private Iterator<MutableRecordBatch> nextBatches = Collections.emptyIterator();
     private Optional<Batch<T>> nextBatch = Optional.empty();
@@ -54,12 +57,14 @@ public RecordsIterator(
         Records records,
         RecordSerde<T> serde,
         BufferSupplier bufferSupplier,
-        int batchSize
+        int batchSize,
+        boolean doCrcValidation
     ) {
         this.records = records;
         this.serde = serde;
         this.bufferSupplier = bufferSupplier;
         this.batchSize = Math.max(batchSize, Records.HEADER_SIZE_UP_TO_MAGIC);
+        this.doCrcValidation = doCrcValidation;
     }
 
     @Override
@@ -163,7 +168,6 @@ private Optional<Batch<T>> nextBatch() {
 
         if (nextBatches.hasNext()) {
             MutableRecordBatch nextBatch = nextBatches.next();
-
             // Update the buffer position to reflect the read batch
             allocatedBuffer.ifPresent(buffer -> buffer.position(buffer.position() + nextBatch.sizeInBytes()));
 
@@ -180,6 +184,11 @@ private Optional<Batch<T>> nextBatch() {
     }
 
     private Batch<T> readBatch(DefaultRecordBatch batch) {
+        if (doCrcValidation) {
+            // Perform a CRC validity check on this batch
+            batch.ensureValid();
+        }
+
         final Batch<T> result;
         if (batch.isControlBatch()) {
             result = Batch.control(
@@ -240,6 +249,7 @@ private T readRecord(Readable input) {
             throw new IllegalArgumentException();
         }
 
+        // Read the metadata record body from the file input reader
         T record = serde.read(input, valueSize);
 
         int numHeaders = input.readVarint();
diff --git a/raft/src/main/java/org/apache/kafka/snapshot/RecordsSnapshotReader.java b/raft/src/main/java/org/apache/kafka/snapshot/RecordsSnapshotReader.java
index 89ad26322299b..92b695146c39f 100644
--- a/raft/src/main/java/org/apache/kafka/snapshot/RecordsSnapshotReader.java
+++ b/raft/src/main/java/org/apache/kafka/snapshot/RecordsSnapshotReader.java
@@ -104,11 +104,12 @@ public static <T> RecordsSnapshotReader<T> of(
         RawSnapshotReader snapshot,
         RecordSerde<T> serde,
         BufferSupplier bufferSupplier,
-        int maxBatchSize
+        int maxBatchSize,
+        boolean doCrcValidation
     ) {
         return new RecordsSnapshotReader<>(
             snapshot.snapshotId(),
-            new RecordsIterator<>(snapshot.records(), serde, bufferSupplier, maxBatchSize)
+            new RecordsIterator<>(snapshot.records(), serde, bufferSupplier, maxBatchSize, doCrcValidation)
         );
     }
 
diff --git a/raft/src/main/java/org/apache/kafka/snapshot/Snapshots.java b/raft/src/main/java/org/apache/kafka/snapshot/Snapshots.java
index a4d3b5a8cd6ad..337e56a7f8abb 100644
--- a/raft/src/main/java/org/apache/kafka/snapshot/Snapshots.java
+++ b/raft/src/main/java/org/apache/kafka/snapshot/Snapshots.java
@@ -30,7 +30,7 @@
 
 public final class Snapshots {
     private static final Logger log = LoggerFactory.getLogger(Snapshots.class);
-    private static final String SUFFIX = ".checkpoint";
+    public static final String SUFFIX = ".checkpoint";
     private static final String PARTIAL_SUFFIX = String.format("%s.part", SUFFIX);
     private static final String DELETE_SUFFIX = String.format("%s.deleted", SUFFIX);
 
diff --git a/raft/src/test/java/org/apache/kafka/raft/RaftEventSimulationTest.java b/raft/src/test/java/org/apache/kafka/raft/RaftEventSimulationTest.java
index 4f79dc18cc6ee..a6117a33ca0e5 100644
--- a/raft/src/test/java/org/apache/kafka/raft/RaftEventSimulationTest.java
+++ b/raft/src/test/java/org/apache/kafka/raft/RaftEventSimulationTest.java
@@ -1112,7 +1112,7 @@ private void assertCommittedData(RaftNode node) {
                 startOffset.set(snapshotId.offset);
 
                 try (SnapshotReader<Integer> snapshot =
-                        RecordsSnapshotReader.of(log.readSnapshot(snapshotId).get(), node.intSerde, BufferSupplier.create(), Integer.MAX_VALUE)) {
+                        RecordsSnapshotReader.of(log.readSnapshot(snapshotId).get(), node.intSerde, BufferSupplier.create(), Integer.MAX_VALUE, true)) {
                     // Expect only one batch with only one record
                     assertTrue(snapshot.hasNext());
                     Batch<Integer> batch = snapshot.next();
diff --git a/raft/src/test/java/org/apache/kafka/raft/internals/RecordsBatchReaderTest.java b/raft/src/test/java/org/apache/kafka/raft/internals/RecordsBatchReaderTest.java
index 6fe540711c268..ae8b1dfb8e2c0 100644
--- a/raft/src/test/java/org/apache/kafka/raft/internals/RecordsBatchReaderTest.java
+++ b/raft/src/test/java/org/apache/kafka/raft/internals/RecordsBatchReaderTest.java
@@ -100,7 +100,8 @@ private void testBatchReader(
             serde,
             bufferSupplier,
             MAX_BATCH_BYTES,
-            closeListener
+            closeListener,
+            true
         );
 
         for (TestBatch<String> batch : expectedBatches) {
diff --git a/raft/src/test/java/org/apache/kafka/raft/internals/RecordsIteratorTest.java b/raft/src/test/java/org/apache/kafka/raft/internals/RecordsIteratorTest.java
index 7d98489312059..9dfbfd62fbfed 100644
--- a/raft/src/test/java/org/apache/kafka/raft/internals/RecordsIteratorTest.java
+++ b/raft/src/test/java/org/apache/kafka/raft/internals/RecordsIteratorTest.java
@@ -30,7 +30,9 @@
 import java.util.stream.Stream;
 import net.jqwik.api.ForAll;
 import net.jqwik.api.Property;
+import org.apache.kafka.common.errors.CorruptRecordException;
 import org.apache.kafka.common.record.CompressionType;
+import org.apache.kafka.common.record.DefaultRecordBatch;
 import org.apache.kafka.common.record.FileRecords;
 import org.apache.kafka.common.record.MemoryRecords;
 import org.apache.kafka.common.record.Records;
@@ -42,6 +44,7 @@
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
 import org.mockito.Mockito;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -60,7 +63,7 @@ private static Stream<Arguments> emptyRecords() throws IOException {
     @ParameterizedTest
     @MethodSource("emptyRecords")
     void testEmptyRecords(Records records) {
-        testIterator(Collections.emptyList(), records);
+        testIterator(Collections.emptyList(), records, true);
     }
 
     @Property
@@ -71,7 +74,7 @@ public void testMemoryRecords(
         List<TestBatch<String>> batches = createBatches(seed);
 
         MemoryRecords memRecords = buildRecords(compressionType, batches);
-        testIterator(batches, memRecords);
+        testIterator(batches, memRecords, true);
     }
 
     @Property
@@ -85,18 +88,58 @@ public void testFileRecords(
         FileRecords fileRecords = FileRecords.open(TestUtils.tempFile());
         fileRecords.append(memRecords);
 
-        testIterator(batches, fileRecords);
+        testIterator(batches, fileRecords, true);
+        fileRecords.close();
+    }
+
+    @Property
+    public void testCrcValidation(
+            @ForAll CompressionType compressionType,
+            @ForAll long seed
+    ) throws IOException {
+        List<TestBatch<String>> batches = createBatches(seed);
+        MemoryRecords memRecords = buildRecords(compressionType, batches);
+        // Read the Batch CRC for the first batch from the buffer
+        ByteBuffer readBuf = memRecords.buffer();
+        readBuf.position(DefaultRecordBatch.CRC_OFFSET);
+        int actualCrc = readBuf.getInt();
+        // Corrupt the CRC on the first batch
+        memRecords.buffer().putInt(DefaultRecordBatch.CRC_OFFSET, actualCrc + 1);
+
+        assertThrows(CorruptRecordException.class, () -> testIterator(batches, memRecords, true));
+
+        FileRecords fileRecords = FileRecords.open(TestUtils.tempFile());
+        fileRecords.append(memRecords);
+        assertThrows(CorruptRecordException.class, () -> testIterator(batches, fileRecords, true));
+
+        // Verify check does not trigger when doCrcValidation is false
+        assertDoesNotThrow(() -> testIterator(batches, memRecords, false));
+        assertDoesNotThrow(() -> testIterator(batches, fileRecords, false));
+
+        // Fix the corruption
+        memRecords.buffer().putInt(DefaultRecordBatch.CRC_OFFSET, actualCrc);
+
+        // Verify check does not trigger when the corruption is fixed
+        assertDoesNotThrow(() -> testIterator(batches, memRecords, true));
+        FileRecords moreFileRecords = FileRecords.open(TestUtils.tempFile());
+        moreFileRecords.append(memRecords);
+        assertDoesNotThrow(() -> testIterator(batches, moreFileRecords, true));
+
+        fileRecords.close();
+        moreFileRecords.close();
     }
 
     private void testIterator(
         List<TestBatch<String>> expectedBatches,
-        Records records
+        Records records,
+        boolean validateCrc
     ) {
         Set<ByteBuffer> allocatedBuffers = Collections.newSetFromMap(new IdentityHashMap<>());
 
         RecordsIterator<String> iterator = createIterator(
             records,
-            mockBufferSupplier(allocatedBuffers)
+            mockBufferSupplier(allocatedBuffers),
+            validateCrc
         );
 
         for (TestBatch<String> batch : expectedBatches) {
@@ -111,8 +154,12 @@ private void testIterator(
         assertEquals(Collections.emptySet(), allocatedBuffers);
     }
 
-    static RecordsIterator<String> createIterator(Records records, BufferSupplier bufferSupplier) {
-        return new RecordsIterator<>(records, STRING_SERDE, bufferSupplier, Records.HEADER_SIZE_UP_TO_MAGIC);
+    static RecordsIterator<String> createIterator(
+        Records records,
+        BufferSupplier bufferSupplier,
+        boolean validateCrc
+    ) {
+        return new RecordsIterator<>(records, STRING_SERDE, bufferSupplier, Records.HEADER_SIZE_UP_TO_MAGIC, validateCrc);
     }
 
     static BufferSupplier mockBufferSupplier(Set<ByteBuffer> buffers) {
diff --git a/raft/src/test/java/org/apache/kafka/snapshot/SnapshotWriterReaderTest.java b/raft/src/test/java/org/apache/kafka/snapshot/SnapshotWriterReaderTest.java
index d251e3635942a..cd86c709ff980 100644
--- a/raft/src/test/java/org/apache/kafka/snapshot/SnapshotWriterReaderTest.java
+++ b/raft/src/test/java/org/apache/kafka/snapshot/SnapshotWriterReaderTest.java
@@ -192,7 +192,8 @@ private SnapshotReader<String> readSnapshot(
             context.log.readSnapshot(snapshotId).get(),
             context.serde,
             BufferSupplier.create(),
-            maxBatchSize
+            maxBatchSize,
+            true
         );
     }
 
@@ -238,7 +239,7 @@ record = records.next();
         assertTrue(batch.isControlBatch());
 
         SnapshotFooterRecord footerRecord = ControlRecordUtils.deserializedSnapshotFooterRecord(record);
-        assertEquals(footerRecord.version(), ControlRecordUtils.SNAPSHOT_HEADER_HIGHEST_VERSION);
+        assertEquals(footerRecord.version(), ControlRecordUtils.SNAPSHOT_FOOTER_HIGHEST_VERSION);
 
         return countRecords;
     }
@@ -246,7 +247,7 @@ record = records.next();
     public static void assertSnapshot(List<List<String>> batches, RawSnapshotReader reader) {
         assertSnapshot(
             batches,
-            RecordsSnapshotReader.of(reader, new StringSerde(), BufferSupplier.create(), Integer.MAX_VALUE)
+            RecordsSnapshotReader.of(reader, new StringSerde(), BufferSupplier.create(), Integer.MAX_VALUE, true)
         );
     }
 
diff --git a/retry_zinc b/retry_zinc
new file mode 100755
index 0000000000000..9d7b5553fa997
--- /dev/null
+++ b/retry_zinc
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Hacky workaround for https://github.com/gradle/gradle/issues/3777
+# There is currently no configurable timeout, so we retry builds jenkins when we can't get a lock on the zinc compiler cache
+# Hopefully we can remove this in the future, but this will save us from having to manually rebuild for the time being.
+# Example:
+# [2021-10-19T17:25:07.234Z] * What went wrong:
+# [2021-10-19T17:25:07.234Z] Execution failed for task ':streams:streams-scala:compileScala'.
+# [2021-10-19T17:25:07.234Z] > Timeout waiting to lock zinc-1.3.5_2.13.6_8 compiler cache (/home/jenkins/.gradle/caches/7.0.2/zinc-1.3.5_2.13.6_8). It is currently in use by another Gradle instance.
+# [2021-10-19T17:25:07.234Z]   Owner PID: 3999
+# [2021-10-19T17:25:07.234Z]   Our PID: 3973
+# [2021-10-19T17:25:07.234Z]   Owner Operation: 
+# [2021-10-19T17:25:07.234Z]   Our operation: 
+# [2021-10-19T17:25:07.234Z]   Lock file: /home/jenkins/.gradle/caches/7.0.2/zinc-1.3.5_2.13.6_8/zinc-1.3.5_2.13.6_8.lock
+
+set -uf -o pipefail
+
+retryable=1
+while [[ "$retryable" != 0 ]]; do
+	retryable=0
+	rm -f buildoutput.log
+
+	"$@" 2>&1 | tee buildoutput.log
+	commandReturnCode=$?
+
+	if [ $commandReturnCode -ne 0 ]; then
+		if grep "Timeout waiting to lock zinc" buildoutput.log; then
+			retryable=1
+			echo 'Retrying due to zinc lock timeout'
+			continue
+		else
+			exit $commandReturnCode
+		fi
+	fi
+done
diff --git a/server-common/src/main/java/org/apache/kafka/metadata/FeatureLevelListener.java b/server-common/src/main/java/org/apache/kafka/metadata/FeatureLevelListener.java
new file mode 100644
index 0000000000000..de91e268a24a2
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/metadata/FeatureLevelListener.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.metadata;
+
+/**
+ * A callback for changes to feature levels. Currently, this is only used by the controller to receive a callback
+ * when committed FeatureLevelRecords are being replayed.
+ */
+public interface FeatureLevelListener {
+    void handle(String featureName, short finalizedVersion);
+}
diff --git a/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersion.java b/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersion.java
new file mode 100644
index 0000000000000..55916470cbd81
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersion.java
@@ -0,0 +1,453 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.common;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.regex.Pattern;
+import org.apache.kafka.common.record.RecordVersion;
+
+/**
+ * This class contains the different Kafka versions.
+ * Right now, we use them for upgrades - users can configure the version of the API brokers will use to communicate between themselves.
+ * This is only for inter-broker communications - when communicating with clients, the client decides on the API version.
+ *
+ * Note that the ID we initialize for each version is important.
+ * We consider a version newer than another if it is lower in the enum list (to avoid depending on lexicographic order)
+ *
+ * Since the api protocol may change more than once within the same release and to facilitate people deploying code from
+ * trunk, we have the concept of internal versions (first introduced during the 0.10.0 development cycle). For example,
+ * the first time we introduce a version change in a release, say 0.10.0, we will add a config value "0.10.0-IV0" and a
+ * corresponding enum constant IBP_0_10_0-IV0. We will also add a config value "0.10.0" that will be mapped to the
+ * latest internal version object, which is IBP_0_10_0-IV0. When we change the protocol a second time while developing
+ * 0.10.0, we will add a new config value "0.10.0-IV1" and a corresponding enum constant IBP_0_10_0-IV1. We will change
+ * the config value "0.10.0" to map to the latest internal version IBP_0_10_0-IV1. The config value of
+ * "0.10.0-IV0" is still mapped to IBP_0_10_0-IV0. This way, if people are deploying from trunk, they can use
+ * "0.10.0-IV0" and "0.10.0-IV1" to upgrade one internal version at a time. For most people who just want to use
+ * released version, they can use "0.10.0" when upgrading to the 0.10.0 release.
+ */
+public enum MetadataVersion {
+
+    IBP_0_8_0(-1, "0.8.0", ""),
+    IBP_0_8_1(-1, "0.8.1", ""),
+    IBP_0_8_2(-1, "0.8.2", ""),
+    IBP_0_9_0(-1, "0.9.0", ""),
+
+    // 0.10.0-IV0 is introduced for KIP-31/32 which changes the message format.
+    IBP_0_10_0_IV0(-1, "0.10.0", "IV0"),
+
+    // 0.10.0-IV1 is introduced for KIP-36(rack awareness) and KIP-43(SASL handshake).
+    IBP_0_10_0_IV1(-1, "0.10.0", "IV1"),
+
+    // introduced for JoinGroup protocol change in KIP-62
+    IBP_0_10_1_IV0(-1, "0.10.1", "IV0"),
+
+    // 0.10.1-IV1 is introduced for KIP-74(fetch response size limit).
+    IBP_0_10_1_IV1(-1, "0.10.1", "IV1"),
+
+    // introduced ListOffsetRequest v1 in KIP-79
+    IBP_0_10_1_IV2(-1, "0.10.1", "IV2"),
+
+    // introduced UpdateMetadataRequest v3 in KIP-103
+    IBP_0_10_2_IV0(-1, "0.10.2", "IV0"),
+
+    // KIP-98 (idempotent and transactional producer support)
+    IBP_0_11_0_IV0(-1, "0.11.0", "IV0"),
+
+    // introduced DeleteRecordsRequest v0 and FetchRequest v4 in KIP-107
+    IBP_0_11_0_IV1(-1, "0.11.0", "IV1"),
+
+    // Introduced leader epoch fetches to the replica fetcher via KIP-101
+    IBP_0_11_0_IV2(-1, "0.11.0", "IV2"),
+
+    // Introduced LeaderAndIsrRequest V1, UpdateMetadataRequest V4 and FetchRequest V6 via KIP-112
+    IBP_1_0_IV0(-1, "1.0", "IV0"),
+
+    // Introduced DeleteGroupsRequest V0 via KIP-229, plus KIP-227 incremental fetch requests,
+    // and KafkaStorageException for fetch requests.
+    IBP_1_1_IV0(-1, "1.1", "IV0"),
+
+    // Introduced OffsetsForLeaderEpochRequest V1 via KIP-279 (Fix log divergence between leader and follower after fast leader fail over)
+    IBP_2_0_IV0(-1, "2.0", "IV0"),
+
+    // Several request versions were bumped due to KIP-219 (Improve quota communication)
+    IBP_2_0_IV1(-1, "2.0", "IV1"),
+
+    // Introduced new schemas for group offset (v2) and group metadata (v2) (KIP-211)
+    IBP_2_1_IV0(-1, "2.1", "IV0"),
+
+    // New Fetch, OffsetsForLeaderEpoch, and ListOffsets schemas (KIP-320)
+    IBP_2_1_IV1(-1, "2.1", "IV1"),
+
+    // Support ZStandard Compression Codec (KIP-110)
+    IBP_2_1_IV2(-1, "2.1", "IV2"),
+
+    // Introduced broker generation (KIP-380), and
+    // LeaderAdnIsrRequest V2, UpdateMetadataRequest V5, StopReplicaRequest V1
+    IBP_2_2_IV0(-1, "2.2", "IV0"),
+
+    // New error code for ListOffsets when a new leader is lagging behind former HW (KIP-207)
+    IBP_2_2_IV1(-1, "2.2", "IV1"),
+
+    // Introduced static membership.
+    IBP_2_3_IV0(-1, "2.3", "IV0"),
+
+    // Add rack_id to FetchRequest, preferred_read_replica to FetchResponse, and replica_id to OffsetsForLeaderRequest
+    IBP_2_3_IV1(-1, "2.3", "IV1"),
+
+    // Add adding_replicas and removing_replicas fields to LeaderAndIsrRequest
+    IBP_2_4_IV0(-1, "2.4", "IV0"),
+
+    // Flexible version support in inter-broker APIs
+    IBP_2_4_IV1(-1, "2.4", "IV1"),
+
+    // No new APIs, equivalent to 2.4-IV1
+    IBP_2_5_IV0(-1, "2.5", "IV0"),
+
+    // Introduced StopReplicaRequest V3 containing the leader epoch for each partition (KIP-570)
+    IBP_2_6_IV0(-1, "2.6", "IV0"),
+
+    // Introduced feature versioning support (KIP-584)
+    IBP_2_7_IV0(-1, "2.7", "IV0"),
+
+    // Bup Fetch protocol for Raft protocol (KIP-595)
+    IBP_2_7_IV1(-1, "2.7", "IV1"),
+
+    // Introduced AlterPartition (KIP-497)
+    IBP_2_7_IV2(-1, "2.7", "IV2"),
+
+    // Flexible versioning on ListOffsets, WriteTxnMarkers and OffsetsForLeaderEpoch. Also adds topic IDs (KIP-516)
+    IBP_2_8_IV0(-1, "2.8", "IV0"),
+
+    // Introduced topic IDs to LeaderAndIsr and UpdateMetadata requests/responses (KIP-516)
+    IBP_2_8_IV1(-1, "2.8", "IV1"),
+
+    // Introduce AllocateProducerIds (KIP-730)
+    IBP_3_0_IV0(-1, "3.0", "IV0"),
+
+    // Introduce ListOffsets V7 which supports listing offsets by max timestamp (KIP-734)
+    // Assume message format version is 3.0 (KIP-724)
+    IBP_3_0_IV1(1, "3.0", "IV1", true),
+
+    // Adds topic IDs to Fetch requests/responses (KIP-516)
+    IBP_3_1_IV0(2, "3.1", "IV0", false),
+
+    // Support for leader recovery for unclean leader election (KIP-704)
+    IBP_3_2_IV0(3, "3.2", "IV0", true),
+
+    // Support for metadata.version feature flag and Removes min_version_level from the finalized version range that is written to ZooKeeper (KIP-778)
+    IBP_3_3_IV0(4, "3.3", "IV0", false),
+
+    // Support NoopRecord for the cluster metadata log (KIP-835)
+    IBP_3_3_IV1(5, "3.3", "IV1", true),
+
+    // In KRaft mode, use BrokerRegistrationChangeRecord instead of UnfenceBrokerRecord and FenceBrokerRecord.
+    IBP_3_3_IV2(6, "3.3", "IV2", true),
+
+    // Adds InControlledShutdown state to RegisterBrokerRecord and BrokerRegistrationChangeRecord (KIP-841).
+    IBP_3_3_IV3(7, "3.3", "IV3", true);
+
+    // NOTE: update the default version in @ClusterTest annotation to point to the latest version
+    
+    public static final String FEATURE_NAME = "metadata.version";
+
+    public static final MetadataVersion MINIMUM_KRAFT_VERSION = IBP_3_0_IV1;
+
+    public static final MetadataVersion[] VERSIONS;
+
+    private final short featureLevel;
+    private final String release;
+    private final String ibpVersion;
+    private final boolean didMetadataChange;
+
+    MetadataVersion(int featureLevel, String release, String subVersion) {
+        this(featureLevel, release, subVersion, true);
+    }
+
+    MetadataVersion(int featureLevel, String release, String subVersion, boolean didMetadataChange) {
+        this.featureLevel = (short) featureLevel;
+        this.release = release;
+        if (subVersion.isEmpty()) {
+            this.ibpVersion = release;
+        } else {
+            this.ibpVersion = String.format("%s-%s", release, subVersion);
+        }
+        this.didMetadataChange = didMetadataChange;
+    }
+
+    public short featureLevel() {
+        return featureLevel;
+    }
+
+    public boolean isSaslInterBrokerHandshakeRequestEnabled() {
+        return this.isAtLeast(IBP_0_10_0_IV1);
+    }
+
+    public boolean isOffsetForLeaderEpochSupported() {
+        return this.isAtLeast(IBP_0_11_0_IV2);
+    }
+
+    public boolean isFeatureVersioningSupported() {
+        return this.isAtLeast(IBP_2_7_IV0);
+    }
+
+    public boolean isTruncationOnFetchSupported() {
+        return this.isAtLeast(IBP_2_7_IV1);
+    }
+
+    public boolean isAlterPartitionSupported() {
+        return this.isAtLeast(IBP_2_7_IV2);
+    }
+
+    public boolean isTopicIdsSupported() {
+        return this.isAtLeast(IBP_2_8_IV0);
+    }
+
+    public boolean isAllocateProducerIdsSupported() {
+        return this.isAtLeast(IBP_3_0_IV0);
+    }
+
+    public boolean isLeaderRecoverySupported() {
+        return this.isAtLeast(IBP_3_2_IV0);
+    }
+
+    public boolean isNoOpRecordSupported() {
+        return this.isAtLeast(IBP_3_3_IV1);
+    }
+
+    public boolean isKRaftSupported() {
+        return this.featureLevel > 0;
+    }
+
+    public RecordVersion highestSupportedRecordVersion() {
+        if (this.isLessThan(IBP_0_10_0_IV0)) {
+            return RecordVersion.V0;
+        } else if (this.isLessThan(IBP_0_11_0_IV0)) {
+            return RecordVersion.V1;
+        } else {
+            return RecordVersion.V2;
+        }
+    }
+
+    public boolean isBrokerRegistrationChangeRecordSupported() {
+        return this.isAtLeast(IBP_3_3_IV2);
+    }
+
+    public boolean isInControlledShutdownStateSupported() {
+        return this.isAtLeast(IBP_3_3_IV3);
+    }
+
+    public short registerBrokerRecordVersion() {
+        if (isInControlledShutdownStateSupported()) {
+            return (short) 1;
+        } else {
+            return (short) 0;
+        }
+    }
+
+    public short fetchRequestVersion() {
+        if (this.isAtLeast(IBP_3_1_IV0)) {
+            return 13;
+        } else if (this.isAtLeast(IBP_2_7_IV1)) {
+            return 12;
+        } else if (this.isAtLeast(IBP_2_3_IV1)) {
+            return 11;
+        } else if (this.isAtLeast(IBP_2_1_IV2)) {
+            return 10;
+        } else if (this.isAtLeast(IBP_2_0_IV1)) {
+            return 8;
+        } else if (this.isAtLeast(IBP_1_1_IV0)) {
+            return 7;
+        } else if (this.isAtLeast(IBP_0_11_0_IV1)) {
+            return 5;
+        } else if (this.isAtLeast(IBP_0_11_0_IV0)) {
+            return 4;
+        } else if (this.isAtLeast(IBP_0_10_1_IV1)) {
+            return 3;
+        } else if (this.isAtLeast(IBP_0_10_0_IV0)) {
+            return 2;
+        } else if (this.isAtLeast(IBP_0_9_0)) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    public short offsetForLeaderEpochRequestVersion() {
+        if (this.isAtLeast(IBP_2_8_IV0)) {
+            return 4;
+        } else if (this.isAtLeast(IBP_2_3_IV1)) {
+            return 3;
+        } else if (this.isAtLeast(IBP_2_1_IV1)) {
+            return 2;
+        } else if (this.isAtLeast(IBP_2_0_IV0)) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    public short listOffsetRequestVersion() {
+        if (this.isAtLeast(IBP_3_0_IV1)) {
+            return 7;
+        } else if (this.isAtLeast(IBP_2_8_IV0)) {
+            return 6;
+        } else if (this.isAtLeast(IBP_2_2_IV1)) {
+            return 5;
+        } else if (this.isAtLeast(IBP_2_1_IV1)) {
+            return 4;
+        } else if (this.isAtLeast(IBP_2_0_IV1)) {
+            return 3;
+        } else if (this.isAtLeast(IBP_0_11_0_IV0)) {
+            return 2;
+        } else if (this.isAtLeast(IBP_0_10_1_IV2)) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    private static final Map<String, MetadataVersion> IBP_VERSIONS;
+    static {
+        {
+            MetadataVersion[] enumValues = MetadataVersion.values();
+            VERSIONS = Arrays.copyOf(enumValues, enumValues.length);
+
+            IBP_VERSIONS = new HashMap<>();
+            Map<String, MetadataVersion> maxInterVersion = new HashMap<>();
+            for (MetadataVersion metadataVersion : VERSIONS) {
+                maxInterVersion.put(metadataVersion.release, metadataVersion);
+                IBP_VERSIONS.put(metadataVersion.ibpVersion, metadataVersion);
+            }
+            IBP_VERSIONS.putAll(maxInterVersion);
+        }
+    }
+
+    public String shortVersion() {
+        return release;
+    }
+
+    public String version() {
+        return ibpVersion;
+    }
+
+    public boolean didMetadataChange() {
+        return didMetadataChange;
+    }
+
+    Optional<MetadataVersion> previous() {
+        int idx = this.ordinal();
+        if (idx > 0) {
+            return Optional.of(VERSIONS[idx - 1]);
+        } else {
+            return Optional.empty();
+        }
+    }
+
+    /**
+     * Return an `MetadataVersion` instance for `versionString`, which can be in a variety of formats (e.g. "0.8.0", "0.8.0.x",
+     * "0.10.0", "0.10.0-IV1"). `IllegalArgumentException` is thrown if `versionString` cannot be mapped to an `MetadataVersion`.
+     * Note that 'misconfigured' values such as "1.0.1" will be parsed to `IBP_1_0_IV0` as we ignore anything after the first
+     * two digits for versions that don't start with "0."
+     */
+    public static MetadataVersion fromVersionString(String versionString) {
+        String[] versionSegments = versionString.split(Pattern.quote("."));
+        int numSegments = (versionString.startsWith("0.")) ? 3 : 2;
+        String key;
+        if (numSegments >= versionSegments.length) {
+            key = versionString;
+        } else {
+            key = String.join(".", Arrays.copyOfRange(versionSegments, 0, numSegments));
+        }
+        return Optional.ofNullable(IBP_VERSIONS.get(key)).orElseThrow(() ->
+            new IllegalArgumentException("Version " + versionString + " is not a valid version")
+        );
+    }
+
+    public static MetadataVersion fromFeatureLevel(short version) {
+        for (MetadataVersion metadataVersion: MetadataVersion.values()) {
+            if (metadataVersion.featureLevel() == version) {
+                return metadataVersion;
+            }
+        }
+        throw new IllegalArgumentException("No MetadataVersion with metadata version " + version);
+    }
+
+    /**
+     * Return the minimum `MetadataVersion` that supports `RecordVersion`.
+     */
+    public static MetadataVersion minSupportedFor(RecordVersion recordVersion) {
+        switch (recordVersion) {
+            case V0:
+                return IBP_0_8_0;
+            case V1:
+                return IBP_0_10_0_IV0;
+            case V2:
+                return IBP_0_11_0_IV0;
+            default:
+                throw new IllegalArgumentException("Invalid message format version " + recordVersion);
+        }
+    }
+
+    public static MetadataVersion latest() {
+        return VERSIONS[VERSIONS.length - 1];
+    }
+
+    public static boolean checkIfMetadataChanged(MetadataVersion sourceVersion, MetadataVersion targetVersion) {
+        if (sourceVersion == targetVersion) {
+            return false;
+        }
+
+        final MetadataVersion highVersion, lowVersion;
+        if (sourceVersion.compareTo(targetVersion) < 0) {
+            highVersion = targetVersion;
+            lowVersion = sourceVersion;
+        } else {
+            highVersion = sourceVersion;
+            lowVersion = targetVersion;
+        }
+        return checkIfMetadataChangedOrdered(highVersion, lowVersion);
+    }
+
+    private static boolean checkIfMetadataChangedOrdered(MetadataVersion highVersion, MetadataVersion lowVersion) {
+        MetadataVersion version = highVersion;
+        while (!version.didMetadataChange() && version != lowVersion) {
+            Optional<MetadataVersion> prev = version.previous();
+            if (prev.isPresent()) {
+                version = prev.get();
+            } else {
+                break;
+            }
+        }
+        return version != lowVersion;
+    }
+
+    public boolean isAtLeast(MetadataVersion otherVersion) {
+        return this.compareTo(otherVersion) >= 0;
+    }
+
+    public boolean isLessThan(MetadataVersion otherVersion) {
+        return this.compareTo(otherVersion) < 0;
+    }
+
+    @Override
+    public String toString() {
+        return ibpVersion;
+    }
+}
diff --git a/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersionValidator.java b/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersionValidator.java
new file mode 100644
index 0000000000000..072f956eb8b4d
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/common/MetadataVersionValidator.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.server.common;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+import org.apache.kafka.common.config.ConfigDef.Validator;
+import org.apache.kafka.common.config.ConfigException;
+
+public class MetadataVersionValidator implements Validator {
+
+    @Override
+    public void ensureValid(String name, Object value) {
+        try {
+            MetadataVersion.fromVersionString(value.toString());
+        } catch (IllegalArgumentException e) {
+            throw new ConfigException(name, value.toString(), e.getMessage());
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "[" + Arrays.stream(MetadataVersion.VERSIONS).map(MetadataVersion::version).collect(
+             Collectors.joining(", ")) + "]";
+    }
+}
diff --git a/server-common/src/main/java/org/apache/kafka/server/fault/FaultHandler.java b/server-common/src/main/java/org/apache/kafka/server/fault/FaultHandler.java
new file mode 100644
index 0000000000000..5efc145ea9443
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/fault/FaultHandler.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.fault;
+
+
+/**
+ * Handle a server fault.
+ */
+public interface FaultHandler {
+    /**
+     * Handle a fault.
+     *
+     * @param failureMessage        The failure message to log.
+     *
+     * @return                      The fault exception.
+     */
+    default RuntimeException handleFault(String failureMessage) {
+        return handleFault(failureMessage, null);
+    }
+
+    /**
+     * Handle a fault.
+     *
+     * @param failureMessage        The failure message to log.
+     * @param cause                 The exception that caused the problem, or null.
+     *
+     * @return                      The fault exception.
+     */
+    RuntimeException handleFault(String failureMessage, Throwable cause);
+}
diff --git a/server-common/src/main/java/org/apache/kafka/server/fault/FaultHandlerException.java b/server-common/src/main/java/org/apache/kafka/server/fault/FaultHandlerException.java
new file mode 100644
index 0000000000000..ec3b7dc4b0c19
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/fault/FaultHandlerException.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.fault;
+
+
+/**
+ * An exception thrown by a fault handler.
+ */
+public class FaultHandlerException extends RuntimeException {
+    public FaultHandlerException(String failureMessage, Throwable cause) {
+        super(failureMessage, cause);
+        // If a cause exception was provided, set our the stack trace its stack trace. This is
+        // useful in junit tests where a limited number of stack frames are printed, and usually
+        // the stack frames of cause exceptions get trimmed.
+        if (cause != null) {
+            setStackTrace(cause.getStackTrace());
+        }
+    }
+
+    public FaultHandlerException(String failureMessage) {
+        this(failureMessage, null);
+    }
+}
diff --git a/server-common/src/main/java/org/apache/kafka/server/fault/LoggingFaultHandler.java b/server-common/src/main/java/org/apache/kafka/server/fault/LoggingFaultHandler.java
new file mode 100644
index 0000000000000..9242cef4eb9dc
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/fault/LoggingFaultHandler.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.fault;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * A fault handler which logs an error message and executes a runnable.
+ */
+public class LoggingFaultHandler implements FaultHandler {
+    private static final Logger log = LoggerFactory.getLogger(LoggingFaultHandler.class);
+    private final String type;
+    private final Runnable action;
+
+    public LoggingFaultHandler(
+        String type,
+        Runnable action
+    ) {
+        this.type = type;
+        this.action = action;
+    }
+
+    @Override
+    public RuntimeException handleFault(String failureMessage, Throwable cause) {
+        if (cause == null) {
+            log.error("Encountered {} fault: {}", type, failureMessage);
+        } else {
+            log.error("Encountered {} fault: {}", type, failureMessage, cause);
+        }
+        try {
+            action.run();
+        } catch (Throwable e) {
+            log.error("Failed to run LoggingFaultHandler action.", e);
+        }
+        return new FaultHandlerException(failureMessage, cause);
+    }
+}
diff --git a/server-common/src/main/java/org/apache/kafka/server/fault/ProcessExitingFaultHandler.java b/server-common/src/main/java/org/apache/kafka/server/fault/ProcessExitingFaultHandler.java
new file mode 100644
index 0000000000000..b7c0d241a2ad3
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/fault/ProcessExitingFaultHandler.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.fault;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.kafka.common.utils.Exit;
+
+
+/**
+ * This is a fault handler which exits the Java process.
+ */
+public class ProcessExitingFaultHandler implements FaultHandler {
+    private static final Logger log = LoggerFactory.getLogger(ProcessExitingFaultHandler.class);
+
+    @Override
+    public RuntimeException handleFault(String failureMessage, Throwable cause) {
+        if (cause == null) {
+            log.error("Encountered fatal fault: {}", failureMessage);
+        } else {
+            log.error("Encountered fatal fault: {}", failureMessage, cause);
+        }
+        Exit.exit(1);
+        return null;
+    }
+}
diff --git a/core/src/main/java/kafka/metrics/FilteringJmxReporter.java b/server-common/src/main/java/org/apache/kafka/server/metrics/FilteringJmxReporter.java
similarity index 97%
rename from core/src/main/java/kafka/metrics/FilteringJmxReporter.java
rename to server-common/src/main/java/org/apache/kafka/server/metrics/FilteringJmxReporter.java
index 3794448a78ed6..93a27c5d0f752 100644
--- a/core/src/main/java/kafka/metrics/FilteringJmxReporter.java
+++ b/server-common/src/main/java/org/apache/kafka/server/metrics/FilteringJmxReporter.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package kafka.metrics;
+package org.apache.kafka.server.metrics;
 
 import com.yammer.metrics.core.Metric;
 import com.yammer.metrics.core.MetricName;
diff --git a/server-common/src/main/java/org/apache/kafka/server/metrics/KafkaYammerMetrics.java b/server-common/src/main/java/org/apache/kafka/server/metrics/KafkaYammerMetrics.java
new file mode 100644
index 0000000000000..329083350cdc8
--- /dev/null
+++ b/server-common/src/main/java/org/apache/kafka/server/metrics/KafkaYammerMetrics.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.metrics;
+
+import com.yammer.metrics.core.MetricName;
+import com.yammer.metrics.core.MetricsRegistry;
+import org.apache.kafka.common.Reconfigurable;
+import org.apache.kafka.common.config.ConfigException;
+import org.apache.kafka.common.metrics.JmxReporter;
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Sanitizer;
+
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.function.Predicate;
+import java.util.function.Supplier;
+
+/**
+ * This class encapsulates the default yammer metrics registry for Kafka server,
+ * and configures the set of exported JMX metrics for Yammer metrics.
+ *
+ * KafkaYammerMetrics.defaultRegistry() should always be used instead of Metrics.defaultRegistry()
+ */
+public class KafkaYammerMetrics implements Reconfigurable {
+
+    public static final KafkaYammerMetrics INSTANCE = new KafkaYammerMetrics();
+
+    /**
+     * convenience method to replace {@link com.yammer.metrics.Metrics#defaultRegistry()}
+     */
+    public static MetricsRegistry defaultRegistry() {
+        return INSTANCE.metricsRegistry;
+    }
+
+    private final MetricsRegistry metricsRegistry = new MetricsRegistry();
+    private final FilteringJmxReporter jmxReporter = new FilteringJmxReporter(metricsRegistry,
+        metricName -> true);
+
+    private KafkaYammerMetrics() {
+        jmxReporter.start();
+        Exit.addShutdownHook("kafka-jmx-shutdown-hook", jmxReporter::shutdown);
+    }
+
+    @Override
+    public void configure(Map<String, ?> configs) {
+        reconfigure(configs);
+    }
+
+    @Override
+    public Set<String> reconfigurableConfigs() {
+        return JmxReporter.RECONFIGURABLE_CONFIGS;
+    }
+
+    @Override
+    public void validateReconfiguration(Map<String, ?> configs) throws ConfigException {
+        JmxReporter.compilePredicate(configs);
+    }
+
+    @Override
+    public void reconfigure(Map<String, ?> configs) {
+        Predicate<String> mBeanPredicate = JmxReporter.compilePredicate(configs);
+        jmxReporter.updatePredicate(metricName -> mBeanPredicate.test(metricName.getMBeanName()));
+    }
+
+    public static MetricName getMetricName(
+        String group,
+        String typeName,
+        String name
+    ) {
+        return getMetricName(
+            group,
+            typeName,
+            name,
+            null
+        );
+    }
+
+    public static MetricName getMetricName(
+        String group,
+        String typeName,
+        String name,
+        LinkedHashMap<String, String> tags
+    ) {
+        StringBuilder nameBuilder = new StringBuilder();
+        nameBuilder.append(group);
+        nameBuilder.append(":type=");
+        nameBuilder.append(typeName);
+
+        if (name.length() > 0) {
+            nameBuilder.append(",name=");
+            nameBuilder.append(name);
+        }
+
+        String scope = toScope(tags).orElse(null);
+        Optional<String> tagsName = toMBeanName(tags);
+        tagsName.ifPresent(nameBuilder::append);
+
+        return new MetricName(group, typeName, name, scope, nameBuilder.toString());
+    }
+
+    private static Optional<String> toMBeanName(LinkedHashMap<String, String> tags) {
+        if (tags == null) {
+            return Optional.empty();
+        }
+
+        LinkedHashMap<String, String> nonEmptyTags = collectNonEmptyTags(tags, LinkedHashMap::new);
+        if (nonEmptyTags.isEmpty()) {
+            return Optional.empty();
+        } else {
+            StringBuilder tagsString = new StringBuilder();
+            for (Map.Entry<String, String> tagEntry : nonEmptyTags.entrySet()) {
+                String sanitizedValue = Sanitizer.jmxSanitize(tagEntry.getValue());
+                tagsString.append(",");
+                tagsString.append(tagEntry.getKey());
+                tagsString.append("=");
+                tagsString.append(sanitizedValue);
+            }
+            return Optional.of(tagsString.toString());
+        }
+    }
+
+    private static <T extends Map<String, String>> T collectNonEmptyTags(
+        Map<String, String> tags,
+        Supplier<T> mapSupplier
+    ) {
+        T result = mapSupplier.get();
+        for (Map.Entry<String, String> tagEntry : tags.entrySet()) {
+            String tagValue = tagEntry.getValue();
+            if (!"".equals(tagValue)) {
+                result.put(tagEntry.getKey(), tagValue);
+            }
+        }
+        return result;
+    }
+
+    private static Optional<String> toScope(Map<String, String> tags) {
+        if (tags == null) {
+            return Optional.empty();
+        }
+
+        SortedMap<String, String> nonEmptyTags = collectNonEmptyTags(tags, TreeMap::new);
+        if (nonEmptyTags.isEmpty()) {
+            return Optional.empty();
+        } else {
+            StringBuilder tagsString = new StringBuilder();
+
+            for (Iterator<Map.Entry<String, String>> iterator = nonEmptyTags.entrySet().iterator(); iterator.hasNext();) {
+                // convert dot to _ since reporters like Graphite typically use dot to represent hierarchy
+                Map.Entry<String, String> tagEntry = iterator.next();
+                String convertedValue = tagEntry.getValue().replaceAll("\\.", "_");
+                tagsString.append(tagEntry.getKey());
+                tagsString.append(".");
+                tagsString.append(convertedValue);
+
+                if (iterator.hasNext()) {
+                    tagsString.append(".");
+                }
+            }
+            return Optional.of(tagsString.toString());
+        }
+    }
+}
diff --git a/server-common/src/test/java/org/apache/kafka/server/common/MetadataVersionTest.java b/server-common/src/test/java/org/apache/kafka/server/common/MetadataVersionTest.java
new file mode 100644
index 0000000000000..99f9cc3515ce3
--- /dev/null
+++ b/server-common/src/test/java/org/apache/kafka/server/common/MetadataVersionTest.java
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.common;
+
+import org.apache.kafka.common.record.RecordVersion;
+
+import java.util.Arrays;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EnumSource;
+
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_10_0_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_10_0_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_10_1_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_10_1_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_10_1_IV2;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_10_2_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_11_0_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_11_0_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_11_0_IV2;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_8_0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_8_1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_8_2;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_0_9_0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_1_0_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_1_1_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_0_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_0_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_1_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_1_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_1_IV2;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_2_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_2_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_3_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_3_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_4_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_4_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_5_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_6_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_7_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_7_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_7_IV2;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_8_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_2_8_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_0_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_0_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_1_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_2_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_3_IV0;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_3_IV1;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_3_IV2;
+import static org.apache.kafka.server.common.MetadataVersion.IBP_3_3_IV3;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class MetadataVersionTest {
+
+    @Test
+    public void testFeatureLevel() {
+        MetadataVersion[] metadataVersions = MetadataVersion.VERSIONS;
+        int firstFeatureLevelIndex = Arrays.asList(metadataVersions).indexOf(MetadataVersion.MINIMUM_KRAFT_VERSION);
+        for (int i = 0; i < firstFeatureLevelIndex; i++) {
+            assertTrue(metadataVersions[i].featureLevel() < 0);
+        }
+        short expectedFeatureLevel = 1;
+        for (int i = firstFeatureLevelIndex; i < metadataVersions.length; i++) {
+            MetadataVersion metadataVersion = metadataVersions[i];
+            assertEquals(expectedFeatureLevel, metadataVersion.featureLevel(),
+                    String.format("Metadata version %s should have feature level %s", metadataVersion.featureLevel(), expectedFeatureLevel));
+            expectedFeatureLevel += 1;
+        }
+    }
+
+    @Test
+    public void testFromVersionString() {
+        assertEquals(IBP_0_8_0, MetadataVersion.fromVersionString("0.8.0"));
+        assertEquals(IBP_0_8_0, MetadataVersion.fromVersionString("0.8.0.0"));
+        assertEquals(IBP_0_8_0, MetadataVersion.fromVersionString("0.8.0.1"));
+        // should throw an exception as long as IBP_8_0_IV0 is not defined
+        assertThrows(IllegalArgumentException.class, () -> MetadataVersion.fromVersionString("8.0"));
+
+        assertEquals(IBP_0_8_1, MetadataVersion.fromVersionString("0.8.1"));
+        assertEquals(IBP_0_8_1, MetadataVersion.fromVersionString("0.8.1.0"));
+        assertEquals(IBP_0_8_1, MetadataVersion.fromVersionString("0.8.1.1"));
+
+        assertEquals(IBP_0_8_2, MetadataVersion.fromVersionString("0.8.2"));
+        assertEquals(IBP_0_8_2, MetadataVersion.fromVersionString("0.8.2.0"));
+        assertEquals(IBP_0_8_2, MetadataVersion.fromVersionString("0.8.2.1"));
+
+        assertEquals(IBP_0_9_0, MetadataVersion.fromVersionString("0.9.0"));
+        assertEquals(IBP_0_9_0, MetadataVersion.fromVersionString("0.9.0.0"));
+        assertEquals(IBP_0_9_0, MetadataVersion.fromVersionString("0.9.0.1"));
+
+        assertEquals(IBP_0_10_0_IV0, MetadataVersion.fromVersionString("0.10.0-IV0"));
+
+        assertEquals(IBP_0_10_0_IV1, MetadataVersion.fromVersionString("0.10.0"));
+        assertEquals(IBP_0_10_0_IV1, MetadataVersion.fromVersionString("0.10.0.0"));
+        assertEquals(IBP_0_10_0_IV1, MetadataVersion.fromVersionString("0.10.0.0-IV0"));
+        assertEquals(IBP_0_10_0_IV1, MetadataVersion.fromVersionString("0.10.0.1"));
+
+        assertEquals(IBP_0_10_1_IV0, MetadataVersion.fromVersionString("0.10.1-IV0"));
+        assertEquals(IBP_0_10_1_IV1, MetadataVersion.fromVersionString("0.10.1-IV1"));
+
+        assertEquals(IBP_0_10_1_IV2, MetadataVersion.fromVersionString("0.10.1"));
+        assertEquals(IBP_0_10_1_IV2, MetadataVersion.fromVersionString("0.10.1.0"));
+        assertEquals(IBP_0_10_1_IV2, MetadataVersion.fromVersionString("0.10.1-IV2"));
+        assertEquals(IBP_0_10_1_IV2, MetadataVersion.fromVersionString("0.10.1.1"));
+
+        assertEquals(IBP_0_10_2_IV0, MetadataVersion.fromVersionString("0.10.2"));
+        assertEquals(IBP_0_10_2_IV0, MetadataVersion.fromVersionString("0.10.2.0"));
+        assertEquals(IBP_0_10_2_IV0, MetadataVersion.fromVersionString("0.10.2-IV0"));
+        assertEquals(IBP_0_10_2_IV0, MetadataVersion.fromVersionString("0.10.2.1"));
+
+        assertEquals(IBP_0_11_0_IV0, MetadataVersion.fromVersionString("0.11.0-IV0"));
+        assertEquals(IBP_0_11_0_IV1, MetadataVersion.fromVersionString("0.11.0-IV1"));
+
+        assertEquals(IBP_0_11_0_IV2, MetadataVersion.fromVersionString("0.11.0"));
+        assertEquals(IBP_0_11_0_IV2, MetadataVersion.fromVersionString("0.11.0.0"));
+        assertEquals(IBP_0_11_0_IV2, MetadataVersion.fromVersionString("0.11.0-IV2"));
+        assertEquals(IBP_0_11_0_IV2, MetadataVersion.fromVersionString("0.11.0.1"));
+
+        assertEquals(IBP_1_0_IV0, MetadataVersion.fromVersionString("1.0"));
+        assertEquals(IBP_1_0_IV0, MetadataVersion.fromVersionString("1.0.0"));
+        assertEquals(IBP_1_0_IV0, MetadataVersion.fromVersionString("1.0.0-IV0"));
+        assertEquals(IBP_1_0_IV0, MetadataVersion.fromVersionString("1.0.1"));
+        assertThrows(IllegalArgumentException.class, () -> MetadataVersion.fromVersionString("0.1.0"));
+        assertThrows(IllegalArgumentException.class, () -> MetadataVersion.fromVersionString("0.1.0.0"));
+        assertThrows(IllegalArgumentException.class, () -> MetadataVersion.fromVersionString("0.1.0-IV0"));
+        assertThrows(IllegalArgumentException.class, () -> MetadataVersion.fromVersionString("0.1.0.0-IV0"));
+
+        assertEquals(IBP_1_1_IV0, MetadataVersion.fromVersionString("1.1-IV0"));
+
+        assertEquals(IBP_2_0_IV1, MetadataVersion.fromVersionString("2.0"));
+        assertEquals(IBP_2_0_IV0, MetadataVersion.fromVersionString("2.0-IV0"));
+        assertEquals(IBP_2_0_IV1, MetadataVersion.fromVersionString("2.0-IV1"));
+
+        assertEquals(IBP_2_1_IV2, MetadataVersion.fromVersionString("2.1"));
+        assertEquals(IBP_2_1_IV0, MetadataVersion.fromVersionString("2.1-IV0"));
+        assertEquals(IBP_2_1_IV1, MetadataVersion.fromVersionString("2.1-IV1"));
+        assertEquals(IBP_2_1_IV2, MetadataVersion.fromVersionString("2.1-IV2"));
+
+        assertEquals(IBP_2_2_IV1, MetadataVersion.fromVersionString("2.2"));
+        assertEquals(IBP_2_2_IV0, MetadataVersion.fromVersionString("2.2-IV0"));
+        assertEquals(IBP_2_2_IV1, MetadataVersion.fromVersionString("2.2-IV1"));
+
+        assertEquals(IBP_2_3_IV1, MetadataVersion.fromVersionString("2.3"));
+        assertEquals(IBP_2_3_IV0, MetadataVersion.fromVersionString("2.3-IV0"));
+        assertEquals(IBP_2_3_IV1, MetadataVersion.fromVersionString("2.3-IV1"));
+
+        assertEquals(IBP_2_4_IV1, MetadataVersion.fromVersionString("2.4"));
+        assertEquals(IBP_2_4_IV0, MetadataVersion.fromVersionString("2.4-IV0"));
+        assertEquals(IBP_2_4_IV1, MetadataVersion.fromVersionString("2.4-IV1"));
+
+        assertEquals(IBP_2_5_IV0, MetadataVersion.fromVersionString("2.5"));
+        assertEquals(IBP_2_5_IV0, MetadataVersion.fromVersionString("2.5-IV0"));
+
+        assertEquals(IBP_2_6_IV0, MetadataVersion.fromVersionString("2.6"));
+        assertEquals(IBP_2_6_IV0, MetadataVersion.fromVersionString("2.6-IV0"));
+
+        assertEquals(IBP_2_7_IV0, MetadataVersion.fromVersionString("2.7-IV0"));
+        assertEquals(IBP_2_7_IV1, MetadataVersion.fromVersionString("2.7-IV1"));
+        assertEquals(IBP_2_7_IV2, MetadataVersion.fromVersionString("2.7-IV2"));
+
+        assertEquals(IBP_2_8_IV1, MetadataVersion.fromVersionString("2.8"));
+        assertEquals(IBP_2_8_IV0, MetadataVersion.fromVersionString("2.8-IV0"));
+        assertEquals(IBP_2_8_IV1, MetadataVersion.fromVersionString("2.8-IV1"));
+
+        assertEquals(IBP_3_0_IV1, MetadataVersion.fromVersionString("3.0"));
+        assertEquals(IBP_3_0_IV0, MetadataVersion.fromVersionString("3.0-IV0"));
+        assertEquals(IBP_3_0_IV1, MetadataVersion.fromVersionString("3.0-IV1"));
+
+        assertEquals(IBP_3_1_IV0, MetadataVersion.fromVersionString("3.1"));
+        assertEquals(IBP_3_1_IV0, MetadataVersion.fromVersionString("3.1-IV0"));
+
+        assertEquals(IBP_3_2_IV0, MetadataVersion.fromVersionString("3.2"));
+        assertEquals(IBP_3_2_IV0, MetadataVersion.fromVersionString("3.2-IV0"));
+
+        assertEquals(IBP_3_3_IV3, MetadataVersion.fromVersionString("3.3"));
+        assertEquals(IBP_3_3_IV0, MetadataVersion.fromVersionString("3.3-IV0"));
+        assertEquals(IBP_3_3_IV1, MetadataVersion.fromVersionString("3.3-IV1"));
+        assertEquals(IBP_3_3_IV2, MetadataVersion.fromVersionString("3.3-IV2"));
+        assertEquals(IBP_3_3_IV3, MetadataVersion.fromVersionString("3.3-IV3"));
+    }
+
+    @Test
+    public void testMinSupportedVersionFor() {
+        assertEquals(IBP_0_8_0, MetadataVersion.minSupportedFor(RecordVersion.V0));
+        assertEquals(IBP_0_10_0_IV0, MetadataVersion.minSupportedFor(RecordVersion.V1));
+        assertEquals(IBP_0_11_0_IV0, MetadataVersion.minSupportedFor(RecordVersion.V2));
+
+        // Ensure that all record versions have a defined min version so that we remember to update the method
+        for (RecordVersion recordVersion : RecordVersion.values()) {
+            assertNotNull(MetadataVersion.minSupportedFor(recordVersion));
+        }
+    }
+
+    @Test
+    public void testShortVersion() {
+        assertEquals("0.8.0", IBP_0_8_0.shortVersion());
+        assertEquals("0.10.0", IBP_0_10_0_IV0.shortVersion());
+        assertEquals("0.10.0", IBP_0_10_0_IV1.shortVersion());
+        assertEquals("0.11.0", IBP_0_11_0_IV0.shortVersion());
+        assertEquals("0.11.0", IBP_0_11_0_IV1.shortVersion());
+        assertEquals("0.11.0", IBP_0_11_0_IV2.shortVersion());
+        assertEquals("1.0", IBP_1_0_IV0.shortVersion());
+        assertEquals("1.1", IBP_1_1_IV0.shortVersion());
+        assertEquals("2.0", IBP_2_0_IV0.shortVersion());
+        assertEquals("2.0", IBP_2_0_IV1.shortVersion());
+        assertEquals("2.1", IBP_2_1_IV0.shortVersion());
+        assertEquals("2.1", IBP_2_1_IV1.shortVersion());
+        assertEquals("2.1", IBP_2_1_IV2.shortVersion());
+        assertEquals("2.2", IBP_2_2_IV0.shortVersion());
+        assertEquals("2.2", IBP_2_2_IV1.shortVersion());
+        assertEquals("2.3", IBP_2_3_IV0.shortVersion());
+        assertEquals("2.3", IBP_2_3_IV1.shortVersion());
+        assertEquals("2.4", IBP_2_4_IV0.shortVersion());
+        assertEquals("2.5", IBP_2_5_IV0.shortVersion());
+        assertEquals("2.6", IBP_2_6_IV0.shortVersion());
+        assertEquals("2.7", IBP_2_7_IV2.shortVersion());
+        assertEquals("2.8", IBP_2_8_IV0.shortVersion());
+        assertEquals("2.8", IBP_2_8_IV1.shortVersion());
+        assertEquals("3.0", IBP_3_0_IV0.shortVersion());
+        assertEquals("3.0", IBP_3_0_IV1.shortVersion());
+        assertEquals("3.1", IBP_3_1_IV0.shortVersion());
+        assertEquals("3.2", IBP_3_2_IV0.shortVersion());
+        assertEquals("3.3", IBP_3_3_IV0.shortVersion());
+        assertEquals("3.3", IBP_3_3_IV1.shortVersion());
+        assertEquals("3.3", IBP_3_3_IV2.shortVersion());
+        assertEquals("3.3", IBP_3_3_IV3.shortVersion());
+    }
+
+    @Test
+    public void testVersion() {
+        assertEquals("0.8.0", IBP_0_8_0.version());
+        assertEquals("0.8.2", IBP_0_8_2.version());
+        assertEquals("0.10.0-IV0", IBP_0_10_0_IV0.version());
+        assertEquals("0.10.0-IV1", IBP_0_10_0_IV1.version());
+        assertEquals("0.11.0-IV0", IBP_0_11_0_IV0.version());
+        assertEquals("0.11.0-IV1", IBP_0_11_0_IV1.version());
+        assertEquals("0.11.0-IV2", IBP_0_11_0_IV2.version());
+        assertEquals("1.0-IV0", IBP_1_0_IV0.version());
+        assertEquals("1.1-IV0", IBP_1_1_IV0.version());
+        assertEquals("2.0-IV0", IBP_2_0_IV0.version());
+        assertEquals("2.0-IV1", IBP_2_0_IV1.version());
+        assertEquals("2.1-IV0", IBP_2_1_IV0.version());
+        assertEquals("2.1-IV1", IBP_2_1_IV1.version());
+        assertEquals("2.1-IV2", IBP_2_1_IV2.version());
+        assertEquals("2.2-IV0", IBP_2_2_IV0.version());
+        assertEquals("2.2-IV1", IBP_2_2_IV1.version());
+        assertEquals("2.3-IV0", IBP_2_3_IV0.version());
+        assertEquals("2.3-IV1", IBP_2_3_IV1.version());
+        assertEquals("2.4-IV0", IBP_2_4_IV0.version());
+        assertEquals("2.5-IV0", IBP_2_5_IV0.version());
+        assertEquals("2.6-IV0", IBP_2_6_IV0.version());
+        assertEquals("2.7-IV2", IBP_2_7_IV2.version());
+        assertEquals("2.8-IV0", IBP_2_8_IV0.version());
+        assertEquals("2.8-IV1", IBP_2_8_IV1.version());
+        assertEquals("3.0-IV0", IBP_3_0_IV0.version());
+        assertEquals("3.0-IV1", IBP_3_0_IV1.version());
+        assertEquals("3.1-IV0", IBP_3_1_IV0.version());
+        assertEquals("3.2-IV0", IBP_3_2_IV0.version());
+        assertEquals("3.3-IV0", IBP_3_3_IV0.version());
+        assertEquals("3.3-IV1", IBP_3_3_IV1.version());
+        assertEquals("3.3-IV2", IBP_3_3_IV2.version());
+        assertEquals("3.3-IV3", IBP_3_3_IV3.version());
+    }
+
+    @Test
+    public void testPrevious() {
+        for (int i = 1; i < MetadataVersion.VERSIONS.length - 2; i++) {
+            MetadataVersion version = MetadataVersion.VERSIONS[i];
+            assertTrue(version.previous().isPresent(), version.toString());
+            assertEquals(MetadataVersion.VERSIONS[i - 1], version.previous().get());
+        }
+    }
+
+    @Test
+    public void testMetadataChanged() {
+        assertFalse(MetadataVersion.checkIfMetadataChanged(IBP_3_2_IV0, IBP_3_2_IV0));
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_3_2_IV0, IBP_3_1_IV0));
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_3_2_IV0, IBP_3_0_IV1));
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_3_2_IV0, IBP_3_0_IV0));
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_3_2_IV0, IBP_2_8_IV1));
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_3_3_IV1, IBP_3_3_IV0));
+
+        // Check that argument order doesn't matter
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_3_0_IV0, IBP_3_2_IV0));
+        assertTrue(MetadataVersion.checkIfMetadataChanged(IBP_2_8_IV1, IBP_3_2_IV0));
+    }
+
+    @Test
+    public void testKRaftVersions() {
+        for (MetadataVersion metadataVersion : MetadataVersion.VERSIONS) {
+            if (metadataVersion.isKRaftSupported()) {
+                assertTrue(metadataVersion.featureLevel() > 0);
+            } else {
+                assertEquals(-1, metadataVersion.featureLevel());
+            }
+        }
+
+        for (MetadataVersion metadataVersion : MetadataVersion.VERSIONS) {
+            if (metadataVersion.isAtLeast(IBP_3_0_IV1)) {
+                assertTrue(metadataVersion.isKRaftSupported(), metadataVersion.toString());
+            } else {
+                assertFalse(metadataVersion.isKRaftSupported());
+            }
+        }
+    }
+
+    @ParameterizedTest
+    @EnumSource(value = MetadataVersion.class)
+    public void testIsInControlledShutdownStateSupported(MetadataVersion metadataVersion) {
+        assertEquals(metadataVersion.isAtLeast(IBP_3_3_IV3),
+            metadataVersion.isInControlledShutdownStateSupported());
+    }
+
+    @ParameterizedTest
+    @EnumSource(value = MetadataVersion.class)
+    public void testRegisterBrokerRecordVersion(MetadataVersion metadataVersion) {
+        short expectedVersion = metadataVersion.isAtLeast(IBP_3_3_IV3) ?
+            (short) 1 : (short) 0;
+        assertEquals(expectedVersion, metadataVersion.registerBrokerRecordVersion());
+    }
+}
diff --git a/server-common/src/test/java/org/apache/kafka/server/common/MetadataVersionValidatorTest.java b/server-common/src/test/java/org/apache/kafka/server/common/MetadataVersionValidatorTest.java
new file mode 100644
index 0000000000000..707d0d11d3e0d
--- /dev/null
+++ b/server-common/src/test/java/org/apache/kafka/server/common/MetadataVersionValidatorTest.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.common;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+public class MetadataVersionValidatorTest {
+
+    @Test
+    public void testMetadataVersionValidator() {
+        String str = new MetadataVersionValidator().toString();
+        String[] apiVersions = str.substring(1).split(",");
+        assertEquals(MetadataVersion.VERSIONS.length, apiVersions.length);
+    }
+
+}
diff --git a/server-common/src/test/java/org/apache/kafka/server/fault/LoggingFaultHandlerTest.java b/server-common/src/test/java/org/apache/kafka/server/fault/LoggingFaultHandlerTest.java
new file mode 100644
index 0000000000000..1a11098a21b47
--- /dev/null
+++ b/server-common/src/test/java/org/apache/kafka/server/fault/LoggingFaultHandlerTest.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.fault;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+/**
+ * Tests LoggingFaultHandler
+ */
+public class LoggingFaultHandlerTest {
+    /**
+     * Test handling faults with and without exceptions.
+     */
+    @Test
+    public void testHandleFault() {
+        AtomicInteger counter = new AtomicInteger(0);
+        LoggingFaultHandler handler = new LoggingFaultHandler("test", () -> {
+            counter.incrementAndGet();
+        });
+        handler.handleFault("uh oh");
+        assertEquals(1, counter.get());
+        handler.handleFault("uh oh", new RuntimeException("yikes"));
+        assertEquals(2, counter.get());
+    }
+
+    /**
+     * Test handling an exception in the action callback.
+     */
+    @Test
+    public void testHandleExceptionInAction() {
+        LoggingFaultHandler handler = new LoggingFaultHandler("test", () -> {
+            throw new RuntimeException("action failed");
+        });
+        handler.handleFault("uh oh"); // should not throw
+        handler.handleFault("uh oh", new RuntimeException("yikes")); // should not throw
+    }
+}
diff --git a/server-common/src/test/java/org/apache/kafka/server/fault/MockFaultHandler.java b/server-common/src/test/java/org/apache/kafka/server/fault/MockFaultHandler.java
new file mode 100644
index 0000000000000..e49f2bdc6c25b
--- /dev/null
+++ b/server-common/src/test/java/org/apache/kafka/server/fault/MockFaultHandler.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.fault;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * This is a fault handler suitable for use in JUnit tests. It will store the result of the first
+ * call to handleFault that was made.
+ */
+public class MockFaultHandler implements FaultHandler {
+    private static final Logger log = LoggerFactory.getLogger(MockFaultHandler.class);
+
+    private final String name;
+    private FaultHandlerException firstException = null;
+    private boolean ignore = false;
+
+    public MockFaultHandler(String name) {
+        this.name = name;
+    }
+
+    @Override
+    public synchronized RuntimeException handleFault(String failureMessage, Throwable cause) {
+        if (cause == null) {
+            log.error("Encountered {} fault: {}", name, failureMessage);
+        } else {
+            log.error("Encountered {} fault: {}", name, failureMessage, cause);
+        }
+        FaultHandlerException e = (cause == null) ?
+                new FaultHandlerException(name + ": " + failureMessage) :
+                new FaultHandlerException(name + ": " + failureMessage +
+                        ": " + cause.getMessage(), cause);
+        if (firstException == null) {
+            firstException = e;
+        }
+        return firstException;
+    }
+
+    public synchronized void maybeRethrowFirstException() {
+        if (firstException != null && !ignore) {
+            throw firstException;
+        }
+    }
+
+    public synchronized FaultHandlerException firstException() {
+        return firstException;
+    }
+
+    public synchronized void setIgnore(boolean ignore) {
+        this.ignore = ignore;
+    }
+}
diff --git a/server-common/src/test/java/org/apache/kafka/server/metrics/KafkaYammerMetricsTest.java b/server-common/src/test/java/org/apache/kafka/server/metrics/KafkaYammerMetricsTest.java
new file mode 100644
index 0000000000000..dfe0aaf416e5b
--- /dev/null
+++ b/server-common/src/test/java/org/apache/kafka/server/metrics/KafkaYammerMetricsTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.server.metrics;
+
+import com.yammer.metrics.core.MetricName;
+import org.junit.jupiter.api.Test;
+
+import java.util.LinkedHashMap;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+class KafkaYammerMetricsTest {
+
+    @Test
+    public void testUntaggedMetric() {
+        MetricName metricName = KafkaYammerMetrics.getMetricName(
+            "kafka.metrics",
+            "TestMetrics",
+            "UntaggedMetric"
+        );
+
+        assertEquals("kafka.metrics", metricName.getGroup());
+        assertEquals("TestMetrics", metricName.getType());
+        assertEquals("UntaggedMetric", metricName.getName());
+        assertEquals("kafka.metrics:type=TestMetrics,name=UntaggedMetric",
+            metricName.getMBeanName());
+        assertNull(metricName.getScope());
+    }
+
+    @Test
+    public void testTaggedMetricName() {
+        LinkedHashMap<String, String> tags = new LinkedHashMap<>();
+        tags.put("foo", "bar");
+        tags.put("bar", "baz");
+        tags.put("baz", "raz.taz");
+
+        MetricName metricName = KafkaYammerMetrics.getMetricName(
+            "kafka.metrics",
+            "TestMetrics",
+            "TaggedMetric",
+            tags
+        );
+
+        assertEquals("kafka.metrics", metricName.getGroup());
+        assertEquals("TestMetrics", metricName.getType());
+        assertEquals("TaggedMetric", metricName.getName());
+
+        // MBean name should preserve initial ordering
+        assertEquals("kafka.metrics:type=TestMetrics,name=TaggedMetric,foo=bar,bar=baz,baz=raz.taz",
+            metricName.getMBeanName());
+
+        // Scope should be sorted by key
+        assertEquals("bar.baz.baz.raz_taz.foo.bar", metricName.getScope());
+    }
+
+    @Test
+    public void testTaggedMetricNameWithEmptyValue() {
+        LinkedHashMap<String, String> tags = new LinkedHashMap<>();
+        tags.put("foo", "bar");
+        tags.put("bar", "");
+        tags.put("baz", "raz.taz");
+
+        MetricName metricName = KafkaYammerMetrics.getMetricName(
+            "kafka.metrics",
+            "TestMetrics",
+            "TaggedMetric",
+            tags
+        );
+
+        assertEquals("kafka.metrics", metricName.getGroup());
+        assertEquals("TestMetrics", metricName.getType());
+        assertEquals("TaggedMetric", metricName.getName());
+
+        // MBean name should preserve initial ordering (with empty key value removed)
+        assertEquals("kafka.metrics:type=TestMetrics,name=TaggedMetric,foo=bar,baz=raz.taz",
+            metricName.getMBeanName());
+
+        // Scope should be sorted by key (with empty key value removed)
+        assertEquals("baz.raz_taz.foo.bar", metricName.getScope());
+    }
+
+}
\ No newline at end of file
diff --git a/settings.gradle b/settings.gradle
index 050096387a22b..5c0b8d1944b93 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -54,5 +54,6 @@ include 'clients',
     'streams:upgrade-system-tests-28',
     'streams:upgrade-system-tests-30',
     'streams:upgrade-system-tests-31',
+    'streams:upgrade-system-tests-32',
     'tools',
     'trogdor'
diff --git a/shell/src/main/java/org/apache/kafka/shell/MetadataNodeManager.java b/shell/src/main/java/org/apache/kafka/shell/MetadataNodeManager.java
index f7b867a6b01c3..9d4941f8020b6 100644
--- a/shell/src/main/java/org/apache/kafka/shell/MetadataNodeManager.java
+++ b/shell/src/main/java/org/apache/kafka/shell/MetadataNodeManager.java
@@ -59,6 +59,8 @@
 import java.util.concurrent.CompletableFuture;
 import java.util.function.Consumer;
 
+import static org.apache.kafka.metadata.LeaderRecoveryState.NO_CHANGE;
+
 /**
  * Maintains the in-memory metadata for the metadata tool.
  */
@@ -280,6 +282,9 @@ private void handleCommitImpl(MetadataRecordType type, ApiMessage message)
                     partition.setLeader(record.leader());
                     partition.setLeaderEpoch(partition.leaderEpoch() + 1);
                 }
+                if (record.leaderRecoveryState() != NO_CHANGE) {
+                    partition.setLeaderRecoveryState(record.leaderRecoveryState());
+                }
                 partition.setPartitionEpoch(partition.partitionEpoch() + 1);
                 file.setContents(PartitionRecordJsonConverter.write(partition,
                     PartitionRecord.HIGHEST_SUPPORTED_VERSION).toPrettyString());
diff --git a/shell/src/main/java/org/apache/kafka/shell/MetadataShell.java b/shell/src/main/java/org/apache/kafka/shell/MetadataShell.java
index 1d99623044e03..58acf28b7084d 100644
--- a/shell/src/main/java/org/apache/kafka/shell/MetadataShell.java
+++ b/shell/src/main/java/org/apache/kafka/shell/MetadataShell.java
@@ -24,6 +24,7 @@
 import net.sourceforge.argparse4j.inf.Namespace;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.metadata.util.SnapshotFileReader;
 import org.apache.kafka.server.common.ApiMessageAndVersion;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git a/shell/src/test/java/org/apache/kafka/shell/MetadataNodeManagerTest.java b/shell/src/test/java/org/apache/kafka/shell/MetadataNodeManagerTest.java
index f0cfffb28178b..c580e1d5c27de 100644
--- a/shell/src/test/java/org/apache/kafka/shell/MetadataNodeManagerTest.java
+++ b/shell/src/test/java/org/apache/kafka/shell/MetadataNodeManagerTest.java
@@ -31,6 +31,7 @@
 import org.apache.kafka.common.metadata.TopicRecord;
 import org.apache.kafka.common.metadata.UnfenceBrokerRecord;
 import org.apache.kafka.common.metadata.UnregisterBrokerRecord;
+import org.apache.kafka.metadata.LeaderRecoveryState;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -209,6 +210,12 @@ public void testPartitionChangeRecord() {
             partitionChangeRecord.duplicate().setLeader(1),
             newPartitionRecord.duplicate().setLeader(1).setLeaderEpoch(1)
         );
+
+        // Change leader recovery state
+        checkPartitionChangeRecord(
+            oldPartitionRecord,
+            partitionChangeRecord.duplicate().setLeaderRecoveryState(LeaderRecoveryState.RECOVERING.value()),
+            newPartitionRecord.duplicate().setLeaderRecoveryState(LeaderRecoveryState.RECOVERING.value()));
     }
 
     private void checkPartitionChangeRecord(PartitionRecord oldPartitionRecord,
diff --git a/storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataSnapshotFile.java b/storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataSnapshotFile.java
index cee77ee81db6b..db49bb9cb8a8f 100644
--- a/storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataSnapshotFile.java
+++ b/storage/src/main/java/org/apache/kafka/server/log/remote/metadata/storage/RemoteLogMetadataSnapshotFile.java
@@ -29,6 +29,7 @@
 import java.nio.channels.Channels;
 import java.nio.channels.FileChannel;
 import java.nio.channels.ReadableByteChannel;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
@@ -70,8 +71,11 @@ public class RemoteLogMetadataSnapshotFile {
 
         // Create an empty file if it does not exist.
         try {
-            boolean newFileCreated = metadataStoreFile.createNewFile();
-            log.info("Remote log metadata snapshot file: [{}], newFileCreated: [{}]", metadataStoreFile, newFileCreated);
+            final boolean fileExists = Files.exists(metadataStoreFile.toPath());
+            if (!fileExists) {
+                Files.createFile(metadataStoreFile.toPath());
+            }
+            log.info("Remote log metadata snapshot file: [{}], newFileCreated: [{}]", metadataStoreFile, !fileExists);
         } catch (IOException e) {
             throw new KafkaException(e);
         }
diff --git a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerDemo.java b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerDemo.java
index 028d317df5b4d..617e491e11193 100644
--- a/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerDemo.java
+++ b/streams/examples/src/main/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerDemo.java
@@ -25,8 +25,11 @@
 import org.apache.kafka.streams.kstream.Transformer;
 import org.apache.kafka.streams.kstream.TransformerSupplier;
 import org.apache.kafka.streams.processor.ConnectedStoreProvider;
-import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.PunctuationType;
+import org.apache.kafka.streams.processor.api.Processor;
+import org.apache.kafka.streams.processor.api.ProcessorContext;
+import org.apache.kafka.streams.processor.api.ProcessorSupplier;
+import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.StoreBuilder;
@@ -63,15 +66,15 @@
  */
 public final class WordCountTransformerDemo {
 
-    static class MyTransformerSupplier implements TransformerSupplier<String, String, KeyValue<String, String>> {
+    static class MyProcessorSupplier implements ProcessorSupplier<String, String, String, String> {
 
         @Override
-        public Transformer<String, String, KeyValue<String, String>> get() {
-            return new Transformer<String, String, KeyValue<String, String>>() {
+        public Processor<String, String, String, String> get() {
+            return new Processor<String, String, String, String>() {
                 private KeyValueStore<String, Integer> kvStore;
 
                 @Override
-                public void init(final ProcessorContext context) {
+                public void init(final ProcessorContext<String, String> context) {
                     context.schedule(Duration.ofSeconds(1), PunctuationType.STREAM_TIME, timestamp -> {
                         try (final KeyValueIterator<String, Integer> iter = kvStore.all()) {
                             System.out.println("----------- " + timestamp + " ----------- ");
@@ -80,8 +83,7 @@ public void init(final ProcessorContext context) {
                                 final KeyValue<String, Integer> entry = iter.next();
 
                                 System.out.println("[" + entry.key + ", " + entry.value + "]");
-
-                                context.forward(entry.key, entry.value.toString());
+                                context.forward(new Record<>(entry.key, entry.value.toString(), timestamp));
                             }
                         }
                     });
@@ -89,8 +91,8 @@ public void init(final ProcessorContext context) {
                 }
 
                 @Override
-                public KeyValue<String, String> transform(final String dummy, final String line) {
-                    final String[] words = line.toLowerCase(Locale.getDefault()).split("\\W+");
+                public void process(final Record<String, String> record) {
+                    final String[] words = record.value().toLowerCase(Locale.getDefault()).split("\\W+");
 
                     for (final String word : words) {
                         final Integer oldValue = this.kvStore.get(word);
@@ -101,8 +103,6 @@ public KeyValue<String, String> transform(final String dummy, final String line)
                             this.kvStore.put(word, oldValue + 1);
                         }
                     }
-
-                    return null;
                 }
 
                 @Override
@@ -141,8 +141,8 @@ public static void main(final String[] args) throws IOException {
         final StreamsBuilder builder = new StreamsBuilder();
 
         builder.<String, String>stream("streams-plaintext-input")
-            .transform(new MyTransformerSupplier())
-            .to("streams-wordcount-processor-output");
+                .process(new MyProcessorSupplier())
+                .to("streams-wordcount-processor-output");
 
         final KafkaStreams streams = new KafkaStreams(builder.build(), props);
         final CountDownLatch latch = new CountDownLatch(1);
diff --git a/streams/examples/src/test/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerTest.java b/streams/examples/src/test/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerTest.java
index 95a63916d8726..27d32ee9332d8 100644
--- a/streams/examples/src/test/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerTest.java
+++ b/streams/examples/src/test/java/org/apache/kafka/streams/examples/wordcount/WordCountTransformerTest.java
@@ -16,13 +16,13 @@
  */
 package org.apache.kafka.streams.examples.wordcount;
 
-import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.kstream.Transformer;
 import org.apache.kafka.streams.processor.Cancellable;
 import org.apache.kafka.streams.processor.PunctuationType;
 import org.apache.kafka.streams.processor.Punctuator;
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.api.MockProcessorContext;
+import org.apache.kafka.streams.processor.api.Processor;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.junit.jupiter.api.Test;
@@ -44,7 +44,7 @@ public void test() {
         final MockProcessorContext<String, String> context = new MockProcessorContext<>();
 
         // Create and initialize the transformer under test; including its provided store
-        final WordCountTransformerDemo.MyTransformerSupplier supplier = new WordCountTransformerDemo.MyTransformerSupplier();
+        final WordCountTransformerDemo.MyProcessorSupplier supplier = new WordCountTransformerDemo.MyProcessorSupplier();
         for (final StoreBuilder<?> storeBuilder : supplier.stores()) {
             final StateStore store = storeBuilder
                 .withLoggingDisabled() // Changelog is not supported by MockProcessorContext.
@@ -53,16 +53,16 @@ public void test() {
             store.init(context.getStateStoreContext(), store);
             context.getStateStoreContext().register(store, null);
         }
-        final Transformer<String, String, KeyValue<String, String>> transformer = supplier.get();
-        transformer.init(new org.apache.kafka.streams.processor.MockProcessorContext() {
+        final Processor<String, String, String, String> processor = supplier.get();
+        processor.init(new org.apache.kafka.streams.processor.api.MockProcessorContext<String, String>() {
             @Override
             public <S extends StateStore> S getStateStore(final String name) {
                 return context.getStateStore(name);
             }
 
             @Override
-            public <K, V> void forward(final K key, final V value) {
-                context.forward(new Record<>((String) key, (String) value, 0L));
+            public <K extends String, V extends String> void forward(final Record<K, V> record) {
+                context.forward(record);
             }
 
             @Override
@@ -72,7 +72,8 @@ public Cancellable schedule(final Duration interval, final PunctuationType type,
         });
 
         // send a record to the transformer
-        transformer.transform("key", "alpha beta\tgamma\n\talpha");
+        final Record<String, String> record = new Record<>("key", "alpha beta\tgamma\n\talpha", 0L);
+        processor.process(record);
 
         // note that the transformer does not forward during transform()
         assertTrue(context.forwarded().isEmpty());
diff --git a/streams/quickstart/java/pom.xml b/streams/quickstart/java/pom.xml
index ba32c037259ca..9ed863dc8c5f4 100644
--- a/streams/quickstart/java/pom.xml
+++ b/streams/quickstart/java/pom.xml
@@ -26,7 +26,7 @@
     <parent>
         <groupId>org.apache.kafka</groupId>
         <artifactId>streams-quickstart</artifactId>
-        <version>3.2.0-SNAPSHOT</version>
+        <version>3.4.0-SNAPSHOT</version>
         <relativePath>..</relativePath>
     </parent>
 
diff --git a/streams/quickstart/java/src/main/resources/archetype-resources/pom.xml b/streams/quickstart/java/src/main/resources/archetype-resources/pom.xml
index b9fa16c5bb55e..d763689a514b8 100644
--- a/streams/quickstart/java/src/main/resources/archetype-resources/pom.xml
+++ b/streams/quickstart/java/src/main/resources/archetype-resources/pom.xml
@@ -29,7 +29,7 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <kafka.version>3.2.0-SNAPSHOT</kafka.version>
+        <kafka.version>3.4.0-SNAPSHOT</kafka.version>
         <slf4j.version>1.7.7</slf4j.version>
         <log4j.version>1.2.17</log4j.version>
     </properties>
diff --git a/streams/quickstart/pom.xml b/streams/quickstart/pom.xml
index 2804d0ad7144e..56bd2305fc25c 100644
--- a/streams/quickstart/pom.xml
+++ b/streams/quickstart/pom.xml
@@ -22,7 +22,7 @@
     <groupId>org.apache.kafka</groupId>
     <artifactId>streams-quickstart</artifactId>
     <packaging>pom</packaging>
-    <version>3.2.0-SNAPSHOT</version>
+    <version>3.4.0-SNAPSHOT</version>
 
     <name>Kafka Streams :: Quickstart</name>
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
index 039fdaf2ec346..02b576c186b0c 100644
--- a/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
+++ b/streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
@@ -56,7 +56,6 @@
 import org.apache.kafka.streams.processor.internals.ClientUtils;
 import org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier;
 import org.apache.kafka.streams.processor.internals.GlobalStreamThread;
-import org.apache.kafka.streams.processor.internals.GlobalStreamThread.State;
 import org.apache.kafka.streams.processor.internals.StateDirectory;
 import org.apache.kafka.streams.processor.internals.StreamThread;
 import org.apache.kafka.streams.processor.internals.StreamsMetadataState;
@@ -65,6 +64,7 @@
 import org.apache.kafka.streams.processor.internals.TopologyMetadata;
 import org.apache.kafka.streams.processor.internals.assignment.AssignorError;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
+import org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology;
 import org.apache.kafka.streams.query.FailureReason;
 import org.apache.kafka.streams.query.PositionBound;
 import org.apache.kafka.streams.query.QueryConfig;
@@ -75,13 +75,11 @@
 import org.apache.kafka.streams.state.internals.GlobalStateStoreProvider;
 import org.apache.kafka.streams.state.internals.QueryableStoreProvider;
 import org.apache.kafka.streams.state.internals.StreamThreadStateStoreProvider;
-
 import org.slf4j.Logger;
 
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.function.BiConsumer;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -102,6 +100,7 @@
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 import java.util.stream.Collectors;
 
@@ -110,6 +109,7 @@
 import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
 import static org.apache.kafka.streams.internals.ApiUtils.validateMillisecondDuration;
 import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchEndOffsets;
+import static org.apache.kafka.streams.processor.internals.TopologyMetadata.UNNAMED_TOPOLOGY;
 
 /**
  * A Kafka client that allows for performing continuous computation on input coming from one or more input topics and
@@ -154,9 +154,6 @@ public class KafkaStreams implements AutoCloseable {
 
     private static final String JMX_PREFIX = "kafka.streams";
 
-    private static final Set<Class<? extends Throwable>> EXCEPTIONS_NOT_TO_BE_HANDLED_BY_USERS =
-        new HashSet<>(Arrays.asList(IllegalStateException.class, IllegalArgumentException.class));
-
     // processId is expected to be unique across JVMs and to be used
     // in userData of the subscription request to allow assignor be aware
     // of the co-location of stream thread's consumers. It is for internal
@@ -473,6 +470,13 @@ public void setUncaughtExceptionHandler(final StreamsUncaughtExceptionHandler us
                         exception -> handleStreamsUncaughtException(exception, userStreamsUncaughtExceptionHandler, false)
                     );
                 }
+                processStreamThread(thread -> thread.setUncaughtExceptionHandler((t, e) -> { }
+                ));
+
+                if (globalStreamThread != null) {
+                    globalStreamThread.setUncaughtExceptionHandler((t, e) -> { }
+                    );
+                }
             } else {
                 throw new IllegalStateException("Can only set UncaughtExceptionHandler before calling start(). " +
                     "Current state is: " + state);
@@ -514,25 +518,10 @@ private void replaceStreamThread(final Throwable throwable) {
         }
     }
 
-    private boolean wrappedExceptionIsIn(final Throwable throwable, final Set<Class<? extends Throwable>> exceptionsOfInterest) {
-        return throwable.getCause() != null && exceptionsOfInterest.contains(throwable.getCause().getClass());
-    }
-
-    private StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse getActionForThrowable(final Throwable throwable,
-                                                                                                final StreamsUncaughtExceptionHandler streamsUncaughtExceptionHandler) {
-        final StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse action;
-        if (wrappedExceptionIsIn(throwable, EXCEPTIONS_NOT_TO_BE_HANDLED_BY_USERS)) {
-            action = SHUTDOWN_CLIENT;
-        } else {
-            action = streamsUncaughtExceptionHandler.handle(throwable);
-        }
-        return action;
-    }
-
     private void handleStreamsUncaughtException(final Throwable throwable,
                                                 final StreamsUncaughtExceptionHandler streamsUncaughtExceptionHandler,
                                                 final boolean skipThreadReplacement) {
-        final StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse action = getActionForThrowable(throwable, streamsUncaughtExceptionHandler);
+        final StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse action = streamsUncaughtExceptionHandler.handle(throwable);
         if (oldHandler) {
             log.warn("Stream's new uncaught exception handler is set as well as the deprecated old handler." +
                     "The old handler will be ignored as long as a new handler is set.");
@@ -548,7 +537,7 @@ private void handleStreamsUncaughtException(final Throwable throwable,
                 break;
             case SHUTDOWN_CLIENT:
                 log.error("Encountered the following exception during processing " +
-                        "and Kafka Streams opted to " + action + "." +
+                        "and the registered exception handler opted to " + action + "." +
                         " The streams client is going to shut down now. ", throwable);
                 closeToError();
                 break;
@@ -990,7 +979,6 @@ private StreamThread createAndAddStreamThread(final long cacheSizePerThread, fin
             time,
             streamsMetadataState,
             cacheSizePerThread,
-
             stateDirectory,
             delegatingStateRestoreListener,
             threadIdx,
@@ -1338,15 +1326,33 @@ public synchronized void start() throws IllegalStateException, StreamsException
         }
     }
 
+    /**
+     * Class that handles options passed in case of {@code KafkaStreams} instance scale down
+     */
+    public static class CloseOptions {
+        private Duration timeout = Duration.ofMillis(Long.MAX_VALUE);
+        private boolean leaveGroup = false;
+
+        public CloseOptions timeout(final Duration timeout) {
+            this.timeout = timeout;
+            return this;
+        }
+
+        public CloseOptions leaveGroup(final boolean leaveGroup) {
+            this.leaveGroup = leaveGroup;
+            return this;
+        }
+    }
+
     /**
      * Shutdown this {@code KafkaStreams} instance by signaling all the threads to stop, and then wait for them to join.
      * This will block until all threads have stopped.
      */
     public void close() {
-        close(Long.MAX_VALUE);
+        close(Long.MAX_VALUE, false);
     }
 
-    private Thread shutdownHelper(final boolean error) {
+    private Thread shutdownHelper(final boolean error, final long timeoutMs, final boolean leaveGroup) {
         stateDirCleaner.shutdownNow();
         if (rocksDBMetricsRecordingService != null) {
             rocksDBMetricsRecordingService.shutdownNow();
@@ -1378,6 +1384,10 @@ private Thread shutdownHelper(final boolean error) {
                 }
             });
 
+            if (leaveGroup) {
+                processStreamThread(streamThreadLeaveConsumerGroup(timeoutMs));
+            }
+
             log.info("Shutdown {} stream threads complete", numStreamThreads);
 
             if (globalStreamThread != null) {
@@ -1412,7 +1422,7 @@ private Thread shutdownHelper(final boolean error) {
         }, clientId + "-CloseThread");
     }
 
-    private boolean close(final long timeoutMs) {
+    private boolean close(final long timeoutMs, final boolean leaveGroup) {
         if (state.hasCompletedShutdown()) {
             log.info("Streams client is already in the terminal {} state, all resources are closed and the client has stopped.", state);
             return true;
@@ -1437,7 +1447,8 @@ private boolean close(final long timeoutMs) {
             log.error("Failed to transition to PENDING_SHUTDOWN, current state is {}", state);
             throw new StreamsException("Failed to shut down while in state " + state);
         } else {
-            final Thread shutdownThread = shutdownHelper(false);
+
+            final Thread shutdownThread = shutdownHelper(false, timeoutMs, leaveGroup);
 
             shutdownThread.setDaemon(true);
             shutdownThread.start();
@@ -1456,7 +1467,7 @@ private void closeToError() {
         if (!setState(State.PENDING_ERROR)) {
             log.info("Skipping shutdown since we are already in " + state());
         } else {
-            final Thread shutdownThread = shutdownHelper(true);
+            final Thread shutdownThread = shutdownHelper(true, -1, false);
 
             shutdownThread.setDaemon(true);
             shutdownThread.start();
@@ -1484,7 +1495,55 @@ public synchronized boolean close(final Duration timeout) throws IllegalArgument
 
         log.debug("Stopping Streams client with timeoutMillis = {} ms.", timeoutMs);
 
-        return close(timeoutMs);
+        return close(timeoutMs, false);
+    }
+
+    /**
+     * Shutdown this {@code KafkaStreams} by signaling all the threads to stop, and then wait up to the timeout for the
+     * threads to join.
+     * @param options  contains timeout to specify how long to wait for the threads to shutdown, and a flag leaveGroup to
+     *                 trigger consumer leave call
+     * @return {@code true} if all threads were successfully stopped&mdash;{@code false} if the timeout was reached
+     * before all threads stopped
+     * Note that this method must not be called in the {@link StateListener#onChange(KafkaStreams.State, KafkaStreams.State)} callback of {@link StateListener}.
+     * @throws IllegalArgumentException if {@code timeout} can't be represented as {@code long milliseconds}
+     */
+    public synchronized boolean close(final CloseOptions options) throws IllegalArgumentException {
+        Objects.requireNonNull(options, "options cannot be null");
+        final String msgPrefix = prepareMillisCheckFailMsgPrefix(options.timeout, "timeout");
+        final long timeoutMs = validateMillisecondDuration(options.timeout, msgPrefix);
+        if (timeoutMs < 0) {
+            throw new IllegalArgumentException("Timeout can't be negative.");
+        }
+        log.debug("Stopping Streams client with timeoutMillis = {} ms.", timeoutMs);
+        return close(timeoutMs, options.leaveGroup);
+    }
+
+    private Consumer<StreamThread> streamThreadLeaveConsumerGroup(final long remainingTimeMs) {
+        return thread -> {
+            final Optional<String> groupInstanceId = thread.getGroupInstanceID();
+            if (groupInstanceId.isPresent()) {
+                log.debug("Sending leave group trigger to removing instance from consumer group: {}.",
+                    groupInstanceId.get());
+                final MemberToRemove memberToRemove = new MemberToRemove(groupInstanceId.get());
+                final Collection<MemberToRemove> membersToRemove = Collections.singletonList(memberToRemove);
+
+                final RemoveMembersFromConsumerGroupResult removeMembersFromConsumerGroupResult = adminClient
+                    .removeMembersFromConsumerGroup(
+                        applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG),
+                        new RemoveMembersFromConsumerGroupOptions(membersToRemove)
+                    );
+
+                try {
+                    removeMembersFromConsumerGroupResult.memberResult(memberToRemove)
+                        .get(remainingTimeMs, TimeUnit.MILLISECONDS);
+                } catch (final Exception e) {
+                    log.error("Could not remove static member {} from consumer group {} due to a: {}",
+                        groupInstanceId.get(),
+                        applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG), e);
+                }
+            }
+        };
     }
 
     /**
@@ -1651,6 +1710,51 @@ public <T> T store(final StoreQueryParameters<T> storeQueryParameters) {
         return queryableStoreProvider.getStore(storeQueryParameters);
     }
 
+    /**
+     *  This method pauses processing for the KafkaStreams instance.
+     *
+     *  Paused topologies will only skip over a) processing, b) punctuation, and c) standby tasks.
+     *  Notably, paused topologies will still poll Kafka consumers, and commit offsets.
+     *  This method sets transient state that is not maintained or managed among instances.
+     *  Note that pause() can be called before start() in order to start a KafkaStreams instance
+     *  in a manner where the processing is paused as described, but the consumers are started up.
+     */
+    public void pause() {
+        if (topologyMetadata.hasNamedTopologies()) {
+            for (final NamedTopology namedTopology : topologyMetadata.getAllNamedTopologies()) {
+                topologyMetadata.pauseTopology(namedTopology.name());
+            }
+        } else {
+            topologyMetadata.pauseTopology(UNNAMED_TOPOLOGY);
+        }
+    }
+
+    /**
+     * @return true when the KafkaStreams instance has its processing paused.
+     */
+    public boolean isPaused() {
+        if (topologyMetadata.hasNamedTopologies()) {
+            return topologyMetadata.getAllNamedTopologies().stream()
+                .map(NamedTopology::name)
+                .allMatch(topologyMetadata::isPaused);
+        } else {
+            return topologyMetadata.isPaused(UNNAMED_TOPOLOGY);
+        }
+    }
+
+    /**
+     * This method resumes processing for the KafkaStreams instance.
+     */
+    public void resume() {
+        if (topologyMetadata.hasNamedTopologies()) {
+            for (final NamedTopology namedTopology : topologyMetadata.getAllNamedTopologies()) {
+                topologyMetadata.resumeTopology(namedTopology.name());
+            }
+        } else {
+            topologyMetadata.resumeTopology(UNNAMED_TOPOLOGY);
+        }
+    }
+
     /**
      * handle each stream thread in a snapshot of threads.
      * noted: iteration over SynchronizedList is not thread safe so it must be manually synchronized. However, we may
@@ -1772,6 +1876,9 @@ protected Map<String, Map<Integer, LagInfo>> allLocalStorePartitionLags(final Li
      * This method allows callers outside of the Streams runtime to access the internal state of
      * stateful processors. See https://kafka.apache.org/documentation/streams/developer-guide/interactive-queries.html
      * for more information.
+     * <p>
+     * NOTICE: This functionality is {@link Evolving} and subject to change in minor versions.
+     * Once it is stabilized, this notice and the evolving annotation will be removed.
      *
      * @param <R> The result type specified by the query.
      * @throws StreamsNotStartedException If Streams has not yet been started. Just call {@link
diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamsBuilder.java b/streams/src/main/java/org/apache/kafka/streams/StreamsBuilder.java
index 5f5d0b7853aac..e913728984e18 100644
--- a/streams/src/main/java/org/apache/kafka/streams/StreamsBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/StreamsBuilder.java
@@ -38,7 +38,6 @@
 import org.apache.kafka.streams.processor.internals.ProcessorAdapter;
 import org.apache.kafka.streams.processor.internals.ProcessorNode;
 import org.apache.kafka.streams.processor.internals.SourceNode;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
 import org.apache.kafka.streams.state.StoreBuilder;
@@ -81,14 +80,19 @@ public StreamsBuilder() {
         internalStreamsBuilder = new InternalStreamsBuilder(internalTopologyBuilder);
     }
 
-    protected StreamsBuilder(final TopologyConfig topologyConfigs) {
+    /**
+     * Create a {@code StreamsBuilder} instance.
+     *
+     * @param topologyConfigs    the streams configs that apply at the topology level. Please refer to {@link TopologyConfig} for more detail
+     */
+    public StreamsBuilder(final TopologyConfig topologyConfigs) {
         topology = getNewTopology(topologyConfigs);
         internalTopologyBuilder = topology.internalTopologyBuilder;
         internalStreamsBuilder = new InternalStreamsBuilder(internalTopologyBuilder);
     }
 
     protected Topology getNewTopology(final TopologyConfig topologyConfigs) {
-        return new Topology();
+        return new Topology(topologyConfigs);
     }
 
     /**
diff --git a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java
index 62e5e82e24da1..8976a0983fa9d 100644
--- a/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java
+++ b/streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java
@@ -31,7 +31,9 @@
 import org.apache.kafka.common.config.TopicConfig;
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
 import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.errors.DefaultProductionExceptionHandler;
 import org.apache.kafka.streams.errors.DeserializationExceptionHandler;
 import org.apache.kafka.streams.errors.LogAndFailExceptionHandler;
@@ -48,12 +50,16 @@
 import java.time.Duration;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Properties;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import static org.apache.kafka.common.IsolationLevel.READ_COMMITTED;
+import static org.apache.kafka.common.config.ConfigDef.ListSize.atMostOfSize;
 import static org.apache.kafka.common.config.ConfigDef.Range.atLeast;
 import static org.apache.kafka.common.config.ConfigDef.Range.between;
 import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
@@ -148,6 +154,12 @@ public class StreamsConfig extends AbstractConfig {
     public static final int DUMMY_THREAD_INDEX = 1;
     public static final long MAX_TASK_IDLE_MS_DISABLED = -1;
 
+    // We impose these limitations because client tags are encoded into the subscription info,
+    // which is part of the group metadata message that is persisted into the internal topic.
+    public static final int MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE = 5;
+    public static final int MAX_RACK_AWARE_ASSIGNMENT_TAG_KEY_LENGTH = 20;
+    public static final int MAX_RACK_AWARE_ASSIGNMENT_TAG_VALUE_LENGTH = 30;
+
     /**
      * Prefix used to provide default topic configs to be applied when creating internal topics.
      * These should be valid properties from {@link org.apache.kafka.common.config.TopicConfig TopicConfig}.
@@ -212,6 +224,15 @@ public class StreamsConfig extends AbstractConfig {
     @SuppressWarnings("WeakerAccess")
     public static final String ADMIN_CLIENT_PREFIX = "admin.";
 
+    /**
+     * Prefix used to add arbitrary tags to a Kafka Stream's instance as key-value pairs.
+     * Example:
+     * client.tag.zone=zone1
+     * client.tag.cluster=cluster1
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String CLIENT_TAG_PREFIX = "client.tag.";
+
     /**
      * Config value for parameter {@link #TOPOLOGY_OPTIMIZATION_CONFIG "topology.optimization"} for disabling topology optimization
      */
@@ -282,6 +303,54 @@ public class StreamsConfig extends AbstractConfig {
     @SuppressWarnings("WeakerAccess")
     public static final String UPGRADE_FROM_23 = "2.3";
 
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 2.4.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_24 = "2.4";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 2.5.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_25 = "2.5";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 2.6.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_26 = "2.6";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 2.7.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_27 = "2.7";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 2.8.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_28 = "2.8";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 3.0.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_30 = "3.0";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 3.1.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_31 = "3.1";
+
+    /**
+     * Config value for parameter {@link #UPGRADE_FROM_CONFIG "upgrade.from"} for upgrading an application from version {@code 3.2.x}.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public static final String UPGRADE_FROM_32 = "3.2";
+
     /**
      * Config value for parameter {@link #PROCESSING_GUARANTEE_CONFIG "processing.guarantee"} for at-least-once processing guarantees.
      */
@@ -371,10 +440,19 @@ public class StreamsConfig extends AbstractConfig {
     /** {@code commit.interval.ms} */
     @SuppressWarnings("WeakerAccess")
     public static final String COMMIT_INTERVAL_MS_CONFIG = "commit.interval.ms";
-    private static final String COMMIT_INTERVAL_MS_DOC = "The frequency in milliseconds with which to save the position of the processor." +
+    private static final String COMMIT_INTERVAL_MS_DOC = "The frequency in milliseconds with which to commit processing progress." +
+        " For at-least-once processing, committing means to save the position (ie, offsets) of the processor." +
+        " For exactly-once processing, it means to commit the transaction which includes to save the position and to make the committed data in the output topic visible to consumers with isolation level read_committed." +
         " (Note, if <code>processing.guarantee</code> is set to <code>" + EXACTLY_ONCE_V2 + "</code>, <code>" + EXACTLY_ONCE + "</code>,the default value is <code>" + EOS_DEFAULT_COMMIT_INTERVAL_MS + "</code>," +
         " otherwise the default value is <code>" + DEFAULT_COMMIT_INTERVAL_MS + "</code>.";
 
+    /** {@code repartition.purge.interval.ms} */
+    @SuppressWarnings("WeakerAccess")
+    public static final String REPARTITION_PURGE_INTERVAL_MS_CONFIG = "repartition.purge.interval.ms";
+    private static final String REPARTITION_PURGE_INTERVAL_MS_DOC = "The frequency in milliseconds with which to delete fully consumed records from repartition topics." +
+            " Purging will occur after at least this value since the last purge, but may be delayed until later." +
+            " (Note, unlike <code>commit.interval.ms</code>, the default for this value remains unchanged when <code>processing.guarantee</code> is set to <code>" + EXACTLY_ONCE_V2 + "</code>).";
+
     /** {@code connections.max.idle.ms} */
     @SuppressWarnings("WeakerAccess")
     public static final String CONNECTIONS_MAX_IDLE_MS_CONFIG = CommonClientConfigs.CONNECTIONS_MAX_IDLE_MS_CONFIG;
@@ -389,6 +467,14 @@ public class StreamsConfig extends AbstractConfig {
     public static final String DEFAULT_PRODUCTION_EXCEPTION_HANDLER_CLASS_CONFIG = "default.production.exception.handler";
     private static final String DEFAULT_PRODUCTION_EXCEPTION_HANDLER_CLASS_DOC = "Exception handling class that implements the <code>org.apache.kafka.streams.errors.ProductionExceptionHandler</code> interface.";
 
+    /** {@code default.dsl.store} */
+    @SuppressWarnings("WeakerAccess")
+    public static final String DEFAULT_DSL_STORE_CONFIG = "default.dsl.store";
+    public static final String DEFAULT_DSL_STORE_DOC = "The default state store type used by DSL operators.";
+
+    public static final String ROCKS_DB = "rocksDB";
+    public static final String IN_MEMORY = "in_memory";
+
     /** {@code default.windowed.key.serde.inner} */
     @SuppressWarnings("WeakerAccess")
     @Deprecated
@@ -404,8 +490,8 @@ public class StreamsConfig extends AbstractConfig {
         "<code>org.apache.kafka.common.serialization.Serde</code> interface.";
 
     public static final String WINDOWED_INNER_CLASS_SERDE = "windowed.inner.class.serde";
-    private static final String WINDOWED_INNER_CLASS_SERDE_DOC = " Default serializer / deserializer for the inner class of a windowed record. Must implement the \" +\n" +
-        "        \"<code>org.apache.kafka.common.serialization.Serde</code> interface.. Note that setting this config in KafkaStreams application would result " +
+    private static final String WINDOWED_INNER_CLASS_SERDE_DOC = " Default serializer / deserializer for the inner class of a windowed record. Must implement the " +
+        "<code>org.apache.kafka.common.serialization.Serde</code> interface. Note that setting this config in KafkaStreams application would result " +
         "in an error as it is meant to be used only from Plain consumer client.";
 
     /** {@code default key.serde} */
@@ -504,6 +590,13 @@ public class StreamsConfig extends AbstractConfig {
     @SuppressWarnings("WeakerAccess")
     public static final String RECEIVE_BUFFER_CONFIG = CommonClientConfigs.RECEIVE_BUFFER_CONFIG;
 
+    /** {@code rack.aware.assignment.tags} */
+    @SuppressWarnings("WeakerAccess")
+    public static final String RACK_AWARE_ASSIGNMENT_TAGS_CONFIG = "rack.aware.assignment.tags";
+    private static final String RACK_AWARE_ASSIGNMENT_TAGS_DOC = "List of client tag keys used to distribute standby replicas across Kafka Streams instances." +
+                                                                 " When configured, Kafka Streams will make a best-effort to distribute" +
+                                                                 " the standby tasks over each client tag dimension.";
+
     /** {@code reconnect.backoff.ms} */
     @SuppressWarnings("WeakerAccess")
     public static final String RECONNECT_BACKOFF_MS_CONFIG = CommonClientConfigs.RECONNECT_BACKOFF_MS_CONFIG;
@@ -579,11 +672,14 @@ public class StreamsConfig extends AbstractConfig {
     public static final String UPGRADE_FROM_CONFIG = "upgrade.from";
     private static final String UPGRADE_FROM_DOC = "Allows upgrading in a backward compatible way. " +
         "This is needed when upgrading from [0.10.0, 1.1] to 2.0+, or when upgrading from [2.0, 2.3] to 2.4+. " +
-        "When upgrading from 2.4 to a newer version it is not required to specify this config. Default is `null`. " +
+        "When upgrading from 3.3 to a newer version it is not required to specify this config. Default is `null`. " +
         "Accepted values are \"" + UPGRADE_FROM_0100 + "\", \"" + UPGRADE_FROM_0101 + "\", \"" +
         UPGRADE_FROM_0102 + "\", \"" + UPGRADE_FROM_0110 + "\", \"" + UPGRADE_FROM_10 + "\", \"" +
         UPGRADE_FROM_11 + "\", \"" + UPGRADE_FROM_20 + "\", \"" + UPGRADE_FROM_21 + "\", \"" +
-        UPGRADE_FROM_22 + "\", \"" + UPGRADE_FROM_23 + "\" (for upgrading from the corresponding old version).";
+        UPGRADE_FROM_22 + "\", \"" + UPGRADE_FROM_23 + "\", \"" + UPGRADE_FROM_24 + "\", \"" +
+        UPGRADE_FROM_25 + "\", \"" + UPGRADE_FROM_26 + "\", \"" + UPGRADE_FROM_27 + "\", \"" +
+        UPGRADE_FROM_28 + "\", \"" + UPGRADE_FROM_30 + "\", \"" + UPGRADE_FROM_31 + "\", \"" +
+        UPGRADE_FROM_32 + "\" (for upgrading from the corresponding old version).";
 
     /** {@code windowstore.changelog.additional.retention.ms} */
     @SuppressWarnings("WeakerAccess")
@@ -719,6 +815,12 @@ public class StreamsConfig extends AbstractConfig {
                     in(AT_LEAST_ONCE, EXACTLY_ONCE, EXACTLY_ONCE_BETA, EXACTLY_ONCE_V2),
                     Importance.MEDIUM,
                     PROCESSING_GUARANTEE_DOC)
+            .define(RACK_AWARE_ASSIGNMENT_TAGS_CONFIG,
+                    Type.LIST,
+                    Collections.emptyList(),
+                    atMostOfSize(MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE),
+                    Importance.MEDIUM,
+                    RACK_AWARE_ASSIGNMENT_TAGS_DOC)
             .define(REPLICATION_FACTOR_CONFIG,
                     Type.INT,
                     -1,
@@ -727,6 +829,7 @@ public class StreamsConfig extends AbstractConfig {
             .define(SECURITY_PROTOCOL_CONFIG,
                     Type.STRING,
                     CommonClientConfigs.DEFAULT_SECURITY_PROTOCOL,
+                    in(Utils.enumOptions(SecurityProtocol.class)),
                     Importance.MEDIUM,
                     CommonClientConfigs.SECURITY_PROTOCOL_DOC)
             .define(TASK_TIMEOUT_MS_CONFIG,
@@ -768,11 +871,23 @@ public class StreamsConfig extends AbstractConfig {
                     atLeast(0),
                     Importance.LOW,
                     COMMIT_INTERVAL_MS_DOC)
+            .define(REPARTITION_PURGE_INTERVAL_MS_CONFIG,
+                    Type.LONG,
+                    DEFAULT_COMMIT_INTERVAL_MS,
+                    atLeast(0),
+                    Importance.LOW,
+                    REPARTITION_PURGE_INTERVAL_MS_DOC)
             .define(CONNECTIONS_MAX_IDLE_MS_CONFIG,
                     ConfigDef.Type.LONG,
                     9 * 60 * 1000L,
                     ConfigDef.Importance.LOW,
                     CommonClientConfigs.CONNECTIONS_MAX_IDLE_MS_DOC)
+            .define(DEFAULT_DSL_STORE_CONFIG,
+                    Type.STRING,
+                    ROCKS_DB,
+                    in(ROCKS_DB, IN_MEMORY),
+                    Importance.LOW,
+                    DEFAULT_DSL_STORE_DOC)
             .define(METADATA_MAX_AGE_CONFIG,
                     ConfigDef.Type.LONG,
                     5 * 60 * 1000L,
@@ -878,7 +993,15 @@ public class StreamsConfig extends AbstractConfig {
                        UPGRADE_FROM_20,
                        UPGRADE_FROM_21,
                        UPGRADE_FROM_22,
-                       UPGRADE_FROM_23),
+                       UPGRADE_FROM_23,
+                       UPGRADE_FROM_24,
+                       UPGRADE_FROM_25,
+                       UPGRADE_FROM_26,
+                       UPGRADE_FROM_27,
+                       UPGRADE_FROM_28,
+                       UPGRADE_FROM_30,
+                       UPGRADE_FROM_31,
+                       UPGRADE_FROM_32),
                     Importance.LOW,
                     UPGRADE_FROM_DOC)
             .define(WINDOWED_INNER_CLASS_SERDE,
@@ -949,6 +1072,9 @@ public static class InternalConfig {
         // Private API used to control the emit latency for left/outer join results (https://issues.apache.org/jira/browse/KAFKA-10847)
         public static final String EMIT_INTERVAL_MS_KSTREAMS_OUTER_JOIN_SPURIOUS_RESULTS_FIX = "__emit.interval.ms.kstreams.outer.join.spurious.results.fix__";
 
+        // Private API used to control the emit latency for windowed aggregation results for ON_WINDOW_CLOSE emit strategy
+        public static final String EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION = "__emit.interval.ms.kstreams.windowed.aggregation__";
+
         // Private API used to control the usage of consistency offset vectors
         public static final String IQ_CONSISTENCY_OFFSET_VECTOR_ENABLED = "__iq.consistency.offset"
             + ".vector.enabled__";
@@ -956,6 +1082,9 @@ public static class InternalConfig {
         // Private API used to control the prefix of the auto created topics
         public static final String TOPIC_PREFIX_ALTERNATIVE = "__internal.override.topic.prefix__";
 
+        // Private API to enable the state updater (i.e. state updating on a dedicated thread)
+        public static final String STATE_UPDATER_ENABLED = "__state.updater.enabled__";
+
         public static boolean getBoolean(final Map<String, Object> configs, final String key, final boolean defaultValue) {
             final Object value = configs.getOrDefault(key, defaultValue);
             if (value instanceof Boolean) {
@@ -1027,6 +1156,16 @@ public static String restoreConsumerPrefix(final String consumerProp) {
         return RESTORE_CONSUMER_PREFIX + consumerProp;
     }
 
+    /**
+     * Prefix a client tag key with {@link #CLIENT_TAG_PREFIX}.
+     *
+     * @param clientTagKey client tag key
+     * @return {@link #CLIENT_TAG_PREFIX} + {@code clientTagKey}
+     */
+    public static String clientTagPrefix(final String clientTagKey) {
+        return CLIENT_TAG_PREFIX + clientTagKey;
+    }
+
     /**
      * Prefix a property with {@link #GLOBAL_CONSUMER_PREFIX}. This is used to isolate {@link ConsumerConfig global consumer configs}
      * from other client configs.
@@ -1146,9 +1285,43 @@ protected Map<String, Object> postProcessParsedConfig(final Map<String, Object>
             configUpdates.put(COMMIT_INTERVAL_MS_CONFIG, EOS_DEFAULT_COMMIT_INTERVAL_MS);
         }
 
+        validateRackAwarenessConfiguration();
+
         return configUpdates;
     }
 
+    private void validateRackAwarenessConfiguration() {
+        final List<String> rackAwareAssignmentTags = getList(RACK_AWARE_ASSIGNMENT_TAGS_CONFIG);
+        final Map<String, String> clientTags = getClientTags();
+
+        if (clientTags.size() > MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE) {
+            throw new ConfigException("At most " + MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE + " client tags " +
+                                      "can be specified using " + CLIENT_TAG_PREFIX + " prefix.");
+        }
+
+        for (final String rackAwareAssignmentTag : rackAwareAssignmentTags) {
+            if (!clientTags.containsKey(rackAwareAssignmentTag)) {
+                throw new ConfigException(RACK_AWARE_ASSIGNMENT_TAGS_CONFIG,
+                                          rackAwareAssignmentTags,
+                                          "Contains invalid value [" + rackAwareAssignmentTag + "] " +
+                                          "which doesn't have corresponding tag set via [" + CLIENT_TAG_PREFIX + "] prefix.");
+            }
+        }
+
+        clientTags.forEach((tagKey, tagValue) -> {
+            if (tagKey.length() > MAX_RACK_AWARE_ASSIGNMENT_TAG_KEY_LENGTH) {
+                throw new ConfigException(CLIENT_TAG_PREFIX,
+                                          tagKey,
+                                          "Tag key exceeds maximum length of " + MAX_RACK_AWARE_ASSIGNMENT_TAG_KEY_LENGTH + ".");
+            }
+            if (tagValue.length() > MAX_RACK_AWARE_ASSIGNMENT_TAG_VALUE_LENGTH) {
+                throw new ConfigException(CLIENT_TAG_PREFIX,
+                                          tagValue,
+                                          "Tag value exceeds maximum length of " + MAX_RACK_AWARE_ASSIGNMENT_TAG_VALUE_LENGTH + ".");
+            }
+        });
+    }
+
     private Map<String, Object> getCommonConsumerConfigs() {
         final Map<String, Object> clientProvidedProps = getClientPropsWithPrefix(CONSUMER_PREFIX, ConsumerConfig.configNames());
 
@@ -1282,6 +1455,7 @@ public Map<String, Object> getMainConsumerConfigs(final String groupId, final St
         consumerProps.put(PROBING_REBALANCE_INTERVAL_MS_CONFIG, getLong(PROBING_REBALANCE_INTERVAL_MS_CONFIG));
         consumerProps.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, StreamsPartitionAssignor.class.getName());
         consumerProps.put(WINDOW_STORE_CHANGE_LOG_ADDITIONAL_RETENTION_MS_CONFIG, getLong(WINDOW_STORE_CHANGE_LOG_ADDITIONAL_RETENTION_MS_CONFIG));
+        consumerProps.put(RACK_AWARE_ASSIGNMENT_TAGS_CONFIG, getList(RACK_AWARE_ASSIGNMENT_TAGS_CONFIG));
 
         // disable auto topic creation
         consumerProps.put(ConsumerConfig.ALLOW_AUTO_CREATE_TOPICS_CONFIG, "false");
@@ -1428,6 +1602,21 @@ public Map<String, Object> getAdminConfigs(final String clientId) {
         return props;
     }
 
+    /**
+     * Get the configured client tags set with {@link #CLIENT_TAG_PREFIX} prefix.
+     *
+     * @return Map of the client tags.
+     */
+    @SuppressWarnings("WeakerAccess")
+    public Map<String, String> getClientTags() {
+        return originalsWithPrefix(CLIENT_TAG_PREFIX).entrySet().stream().collect(
+            Collectors.toMap(
+                Map.Entry::getKey,
+                tagEntry -> Objects.toString(tagEntry.getValue())
+            )
+        );
+    }
+
     private Map<String, Object> getClientPropsWithPrefix(final String prefix,
                                                          final Set<String> configNames) {
         final Map<String, Object> props = clientProps(configNames, originals());
diff --git a/streams/src/main/java/org/apache/kafka/streams/Topology.java b/streams/src/main/java/org/apache/kafka/streams/Topology.java
index 0eb5e2b3861b4..314d2190f666c 100644
--- a/streams/src/main/java/org/apache/kafka/streams/Topology.java
+++ b/streams/src/main/java/org/apache/kafka/streams/Topology.java
@@ -60,6 +60,10 @@ public Topology() {
         this(new InternalTopologyBuilder());
     }
 
+    public Topology(final TopologyConfig topologyConfigs) {
+        this(new InternalTopologyBuilder(topologyConfigs));
+    }
+
     protected Topology(final InternalTopologyBuilder internalTopologyBuilder) {
         this.internalTopologyBuilder = internalTopologyBuilder;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/TopologyConfig.java b/streams/src/main/java/org/apache/kafka/streams/TopologyConfig.java
similarity index 77%
rename from streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/TopologyConfig.java
rename to streams/src/main/java/org/apache/kafka/streams/TopologyConfig.java
index 2587a0ab5bc2e..c4bc85656eb05 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/TopologyConfig.java
+++ b/streams/src/main/java/org/apache/kafka/streams/TopologyConfig.java
@@ -14,22 +14,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.kafka.streams.processor.internals.namedtopology;
+package org.apache.kafka.streams;
 
 import org.apache.kafka.common.config.AbstractConfig;
 import org.apache.kafka.common.config.ConfigDef;
 import org.apache.kafka.common.config.ConfigDef.Importance;
 import org.apache.kafka.common.config.ConfigDef.Type;
-import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.errors.DeserializationExceptionHandler;
+import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.internals.StreamsConfigUtils;
 import org.apache.kafka.streams.processor.TimestampExtractor;
 
+import org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.util.Properties;
 import java.util.function.Supplier;
 
+import static org.apache.kafka.common.config.ConfigDef.ValidString.in;
 import static org.apache.kafka.streams.StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_CONFIG;
 import static org.apache.kafka.streams.StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_DOC;
 import static org.apache.kafka.streams.StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG;
@@ -42,6 +44,10 @@
 import static org.apache.kafka.streams.StreamsConfig.MAX_TASK_IDLE_MS_DOC;
 import static org.apache.kafka.streams.StreamsConfig.TASK_TIMEOUT_MS_CONFIG;
 import static org.apache.kafka.streams.StreamsConfig.TASK_TIMEOUT_MS_DOC;
+import static org.apache.kafka.streams.StreamsConfig.DEFAULT_DSL_STORE_CONFIG;
+import static org.apache.kafka.streams.StreamsConfig.DEFAULT_DSL_STORE_DOC;
+import static org.apache.kafka.streams.StreamsConfig.ROCKS_DB;
+import static org.apache.kafka.streams.StreamsConfig.IN_MEMORY;
 
 /**
  * Streams configs that apply at the topology level. The values in the {@link StreamsConfig} parameter of the
@@ -53,36 +59,42 @@ public class TopologyConfig extends AbstractConfig {
     private static final ConfigDef CONFIG;
     static {
         CONFIG = new ConfigDef()
-             .define(BUFFERED_RECORDS_PER_PARTITION_CONFIG,
-                     Type.INT,
-                     null,
-                     Importance.LOW,
-                     BUFFERED_RECORDS_PER_PARTITION_DOC)
+            .define(BUFFERED_RECORDS_PER_PARTITION_CONFIG,
+                Type.INT,
+                null,
+                Importance.LOW,
+                BUFFERED_RECORDS_PER_PARTITION_DOC)
             .define(CACHE_MAX_BYTES_BUFFERING_CONFIG,
-                    Type.LONG,
-                    null,
-                    Importance.MEDIUM,
-                    CACHE_MAX_BYTES_BUFFERING_DOC)
-             .define(DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG,
-                    Type.CLASS,
-                    null,
-                    Importance.MEDIUM,
-                    DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_DOC)
-             .define(DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG,
-                     Type.CLASS,
-                     null,
-                     Importance.MEDIUM,
-                     DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_DOC)
-             .define(MAX_TASK_IDLE_MS_CONFIG,
-                     Type.LONG,
-                     null,
-                     Importance.MEDIUM,
-                     MAX_TASK_IDLE_MS_DOC)
-             .define(TASK_TIMEOUT_MS_CONFIG,
-                     Type.LONG,
-                     null,
-                     Importance.MEDIUM,
-                     TASK_TIMEOUT_MS_DOC);
+                Type.LONG,
+                null,
+                Importance.MEDIUM,
+                CACHE_MAX_BYTES_BUFFERING_DOC)
+            .define(DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG,
+                Type.CLASS,
+                null,
+                Importance.MEDIUM,
+                DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_DOC)
+            .define(DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG,
+                Type.CLASS,
+                null,
+                Importance.MEDIUM,
+                DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_DOC)
+            .define(MAX_TASK_IDLE_MS_CONFIG,
+                Type.LONG,
+                null,
+                Importance.MEDIUM,
+                MAX_TASK_IDLE_MS_DOC)
+            .define(TASK_TIMEOUT_MS_CONFIG,
+                Type.LONG,
+                null,
+                Importance.MEDIUM,
+                TASK_TIMEOUT_MS_DOC)
+            .define(DEFAULT_DSL_STORE_CONFIG,
+                Type.STRING,
+                ROCKS_DB,
+                in(ROCKS_DB, IN_MEMORY),
+                Importance.LOW,
+                DEFAULT_DSL_STORE_DOC);
     }
     private final Logger log = LoggerFactory.getLogger(TopologyConfig.class);
 
@@ -96,6 +108,7 @@ public class TopologyConfig extends AbstractConfig {
     public final long cacheSize;
     public final long maxTaskIdleMs;
     public final long taskTimeoutMs;
+    public final String storeType;
     public final Supplier<TimestampExtractor> timestampExtractorSupplier;
     public final Supplier<DeserializationExceptionHandler> deserializationExceptionHandlerSupplier;
 
@@ -128,7 +141,7 @@ public TopologyConfig(final String topologyName, final StreamsConfig globalAppCo
 
         if (isTopologyOverride(MAX_TASK_IDLE_MS_CONFIG, topologyOverrides)) {
             maxTaskIdleMs = getLong(MAX_TASK_IDLE_MS_CONFIG);
-            log.info("Topology {} is overridding {} to {}", topologyName, MAX_TASK_IDLE_MS_CONFIG, maxTaskIdleMs);
+            log.info("Topology {} is overriding {} to {}", topologyName, MAX_TASK_IDLE_MS_CONFIG, maxTaskIdleMs);
         } else {
             maxTaskIdleMs = globalAppConfigs.getLong(MAX_TASK_IDLE_MS_CONFIG);
         }
@@ -153,6 +166,20 @@ public TopologyConfig(final String topologyName, final StreamsConfig globalAppCo
         } else {
             deserializationExceptionHandlerSupplier = () -> globalAppConfigs.getConfiguredInstance(DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG, DeserializationExceptionHandler.class);
         }
+
+        if (isTopologyOverride(DEFAULT_DSL_STORE_CONFIG, topologyOverrides)) {
+            storeType = getString(DEFAULT_DSL_STORE_CONFIG);
+            log.info("Topology {} is overriding {} to {}", topologyName, DEFAULT_DSL_STORE_CONFIG, storeType);
+        } else {
+            storeType = globalAppConfigs.getString(DEFAULT_DSL_STORE_CONFIG);
+        }
+    }
+
+    public Materialized.StoreType parseStoreType() {
+        if (storeType.equals(IN_MEMORY)) {
+            return Materialized.StoreType.IN_MEMORY;
+        }
+        return Materialized.StoreType.ROCKS_DB;
     }
 
     public boolean isNamedTopology() {
diff --git a/streams/src/main/java/org/apache/kafka/streams/TopologyDescription.java b/streams/src/main/java/org/apache/kafka/streams/TopologyDescription.java
index 6f26779c96fb2..deea3a465cd46 100644
--- a/streams/src/main/java/org/apache/kafka/streams/TopologyDescription.java
+++ b/streams/src/main/java/org/apache/kafka/streams/TopologyDescription.java
@@ -30,6 +30,7 @@
  * In contrast, two sub-topologies are not connected but can be linked to each other via topics, i.e., if one
  * sub-topology {@link Topology#addSink(String, String, String...) writes} into a topic and another sub-topology
  * {@link Topology#addSource(String, String...) reads} from the same topic.
+ * Message {@link ProcessorContext#forward(Object, Object) forwards} using custom Processors and Transformers are not considered in the topology graph.
  * <p>
  * When {@link KafkaStreams#start()} is called, different sub-topologies will be constructed and executed as independent
  * {@link StreamTask tasks}.
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/EmitStrategy.java b/streams/src/main/java/org/apache/kafka/streams/kstream/EmitStrategy.java
new file mode 100644
index 0000000000000..365a19c1426f5
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/EmitStrategy.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream;
+
+import org.apache.kafka.streams.kstream.internals.UnlimitedWindow;
+import org.apache.kafka.streams.kstream.internals.emitstrategy.WindowCloseStrategy;
+import org.apache.kafka.streams.kstream.internals.emitstrategy.WindowUpdateStrategy;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * This interface controls the strategy that can be used to control how we emit results in a processor.
+ */
+public interface EmitStrategy {
+
+    Logger log = LoggerFactory.getLogger(EmitStrategy.class);
+
+    enum StrategyType {
+        ON_WINDOW_UPDATE(0, new WindowUpdateStrategy()),
+        ON_WINDOW_CLOSE(1, new WindowCloseStrategy());
+
+        private final short code;
+        private final EmitStrategy strategy;
+
+        private short code() {
+            return this.code;
+        }
+
+        private EmitStrategy strategy() {
+            return this.strategy;
+        }
+
+        StrategyType(final int code, final EmitStrategy strategy) {
+            this.code = (short) code;
+            this.strategy = strategy;
+        }
+
+        private final static Map<Short, EmitStrategy> TYPE_TO_STRATEGY = new HashMap<>();
+
+        static {
+            for (final StrategyType type : StrategyType.values()) {
+                if (TYPE_TO_STRATEGY.put(type.code(), type.strategy()) != null)
+                    throw new IllegalStateException("Code " + type.code() + " for type " +
+                            type + " has already been used");
+            }
+        }
+
+        public static EmitStrategy forType(final StrategyType type) {
+            return TYPE_TO_STRATEGY.get(type.code());
+        }
+    }
+
+    /**
+     * Returns the strategy type
+     * @return Emit strategy type
+     */
+    StrategyType type();
+
+    /**
+     * This strategy indicates that the aggregated result for a window will only be emitted when the
+     * window closes instead of when there's an update to the window. Window close means that current
+     * event time is larger than (window end time + grace period).
+     *
+     * <p>This strategy should only be used for windows which can close. An exception will be thrown
+     * if it's used with {@link UnlimitedWindow}.
+     *
+     * @see TimeWindows
+     * @see SlidingWindows
+     * @see SessionWindows
+     * @see UnlimitedWindows
+     * @see WindowUpdateStrategy
+     *
+     * @return WindowCloseStrategy instance
+     */
+    static EmitStrategy onWindowClose() {
+        return new WindowCloseStrategy();
+    }
+
+    /**
+     * This strategy indicates that the aggregated result for a window will be emitted every time
+     * when there's an update to the window instead of when the window closes.
+     *
+     * @see TimeWindows
+     * @see SlidingWindows
+     * @see SessionWindows
+     * @see UnlimitedWindows
+     * @see WindowCloseStrategy
+     *
+     * @return WindowCloseStrategy instance
+     */
+    static EmitStrategy onWindowUpdate() {
+        return new WindowUpdateStrategy();
+    }
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
index c2ec7574d80a7..33f0892a6f479 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.streams.kstream;
 
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
@@ -25,6 +24,9 @@
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.Topology;
 import org.apache.kafka.streams.processor.ConnectedStoreProvider;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorContext;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
 import org.apache.kafka.streams.processor.api.Processor;
 import org.apache.kafka.streams.processor.api.ProcessorSupplier;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
@@ -3205,7 +3207,9 @@ <GK, GV, RV> KStream<K, RV> leftJoin(final GlobalKTable<GK, GV> globalTable,
      * @see #transformValues(ValueTransformerSupplier, String...)
      * @see #transformValues(ValueTransformerWithKeySupplier, String...)
      * @see #process(ProcessorSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#process(ProcessorSupplier, String...)} instead.
      */
+    @Deprecated
     <K1, V1> KStream<K1, V1> transform(final TransformerSupplier<? super K, ? super V, KeyValue<K1, V1>> transformerSupplier,
                                        final String... stateStoreNames);
 
@@ -3334,7 +3338,9 @@ <K1, V1> KStream<K1, V1> transform(final TransformerSupplier<? super K, ? super
      * @see #transformValues(ValueTransformerSupplier, String...)
      * @see #transformValues(ValueTransformerWithKeySupplier, String...)
      * @see #process(ProcessorSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#process(ProcessorSupplier, Named, String...)} instead.
      */
+    @Deprecated
     <K1, V1> KStream<K1, V1> transform(final TransformerSupplier<? super K, ? super V, KeyValue<K1, V1>> transformerSupplier,
                                        final Named named,
                                        final String... stateStoreNames);
@@ -3463,7 +3469,9 @@ <K1, V1> KStream<K1, V1> transform(final TransformerSupplier<? super K, ? super
      * @see #transformValues(ValueTransformerSupplier, String...)
      * @see #transformValues(ValueTransformerWithKeySupplier, String...)
      * @see #process(ProcessorSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#process(ProcessorSupplier, String...)} instead.
      */
+    @Deprecated
     <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super K, ? super V, Iterable<KeyValue<K1, V1>>> transformerSupplier,
                                            final String... stateStoreNames);
 
@@ -3592,7 +3600,9 @@ <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super K, ? su
      * @see #transformValues(ValueTransformerSupplier, String...)
      * @see #transformValues(ValueTransformerWithKeySupplier, String...)
      * @see #process(ProcessorSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#process(ProcessorSupplier, Named, String...)} instead.
      */
+    @Deprecated
     <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super K, ? super V, Iterable<KeyValue<K1, V1>>> transformerSupplier,
                                            final Named named,
                                            final String... stateStoreNames);
@@ -3702,7 +3712,9 @@ <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super K, ? su
      * @see #mapValues(ValueMapper)
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? super V, ? extends VR> valueTransformerSupplier,
                                         final String... stateStoreNames);
     /**
@@ -3812,7 +3824,9 @@ <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? super V, ?
      * @see #mapValues(ValueMapper)
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, Named, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? super V, ? extends VR> valueTransformerSupplier,
                                         final Named named,
                                         final String... stateStoreNames);
@@ -3926,7 +3940,9 @@ <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? super V, ?
      * @see #mapValues(ValueMapper)
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, ? extends VR> valueTransformerSupplier,
                                         final String... stateStoreNames);
 
@@ -4040,7 +4056,9 @@ <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier<? supe
      * @see #mapValues(ValueMapper)
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, Named, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, ? extends VR> valueTransformerSupplier,
                                         final Named named,
                                         final String... stateStoreNames);
@@ -4163,7 +4181,9 @@ <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier<? supe
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
      * @see #flatTransform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<? super V, Iterable<VR>> valueTransformerSupplier,
                                             final String... stateStoreNames);
 
@@ -4287,7 +4307,9 @@ <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<? super V
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
      * @see #flatTransform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, Named, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<? super V, Iterable<VR>> valueTransformerSupplier,
                                             final Named named,
                                             final String... stateStoreNames);
@@ -4412,7 +4434,9 @@ <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<? super V
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
      * @see #flatTransform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, Iterable<VR>> valueTransformerSupplier,
                                             final String... stateStoreNames);
 
@@ -4537,7 +4561,9 @@ <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupplier<?
      * @see #mapValues(ValueMapperWithKey)
      * @see #transform(TransformerSupplier, String...)
      * @see #flatTransform(TransformerSupplier, String...)
+     * @deprecated Since 3.3. Use {@link KStream#processValues(FixedKeyProcessorSupplier, Named, String...)} instead.
      */
+    @Deprecated
     <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, Iterable<VR>> valueTransformerSupplier,
                                             final Named named,
                                             final String... stateStoreNames);
@@ -4644,10 +4670,10 @@ <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupplier<?
     void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super K, ? super V> processorSupplier,
                  final String... stateStoreNames);
 
-
     /**
-     * Process all records in this stream, one record at a time, by applying a {@link Processor} (provided by the given
-     * {@link ProcessorSupplier}).
+     * Process all records in this stream, one record at a time, by applying a
+     * {@link org.apache.kafka.streams.processor.Processor} (provided by the given
+     * {@link org.apache.kafka.streams.processor.ProcessorSupplier}).
      * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #foreach(ForeachAction)}).
      * If you choose not to attach one, this operation is similar to the stateless {@link #foreach(ForeachAction)}
      * but allows access to the {@code ProcessorContext} and record metadata.
@@ -4677,7 +4703,8 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      *     }
      * }, "myProcessorState");
      * }</pre>
-     * The second strategy is for the given {@link ProcessorSupplier} to implement {@link ConnectedStoreProvider#stores()},
+     * The second strategy is for the given {@link org.apache.kafka.streams.processor.ProcessorSupplier}
+     * to implement {@link ConnectedStoreProvider#stores()},
      * which provides the {@link StoreBuilder}s to be automatically added to the topology and connected to the processor.
      * <pre>{@code
      * class MyProcessorSupplier implements ProcessorSupplier {
@@ -4702,7 +4729,8 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      * KStream outputStream = inputStream.process(new MyProcessorSupplier());
      * }</pre>
      * <p>
-     * With either strategy, within the {@link Processor}, the state is obtained via the {@link ProcessorContext}.
+     * With either strategy, within the {@link org.apache.kafka.streams.processor.Processor},
+     * the state is obtained via the {@link org.apache.kafka.streams.processor.ProcessorContext}.
      * To trigger periodic actions via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long) punctuate()},
      * a schedule must be registered.
      * <pre>{@code
@@ -4727,29 +4755,34 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      * Even if any upstream operation was key-changing, no auto-repartition is triggered.
      * If repartitioning is required, a call to {@link #repartition()} should be performed before {@code process()}.
      *
-     * @param processorSupplier an instance of {@link ProcessorSupplier} that generates a newly constructed {@link Processor}
-     *                          The supplier should always generate a new instance. Creating a single {@link Processor} object
-     *                          and returning the same object reference in {@link ProcessorSupplier#get()} is a
+     * @param processorSupplier an instance of {@link org.apache.kafka.streams.processor.ProcessorSupplier}
+     *                          that generates a newly constructed {@link org.apache.kafka.streams.processor.Processor}
+     *                          The supplier should always generate a new instance. Creating a single
+     *                          {@link org.apache.kafka.streams.processor.Processor} object
+     *                          and returning the same object reference in
+     *                          {@link org.apache.kafka.streams.processor.ProcessorSupplier#get()} is a
      *                          violation of the supplier pattern and leads to runtime exceptions.
-     * @param stateStoreNames     the names of the state stores used by the processor; not required if the supplier
-     *                            implements {@link ConnectedStoreProvider#stores()}
+     * @param named             a {@link Named} config used to name the processor in the topology
+     * @param stateStoreNames   the names of the state store used by the processor
      * @see #foreach(ForeachAction)
      * @see #transform(TransformerSupplier, String...)
+     * @deprecated Since 3.0. Use {@link KStream#process(org.apache.kafka.streams.processor.api.ProcessorSupplier, org.apache.kafka.streams.kstream.Named, java.lang.String...)} instead.
      */
-    void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processorSupplier,
+    @Deprecated
+    void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super K, ? super V> processorSupplier,
+                 final Named named,
                  final String... stateStoreNames);
 
     /**
-     * Process all records in this stream, one record at a time, by applying a
-     * {@link org.apache.kafka.streams.processor.Processor} (provided by the given
-     * {@link org.apache.kafka.streams.processor.ProcessorSupplier}).
-     * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #foreach(ForeachAction)}).
-     * If you choose not to attach one, this operation is similar to the stateless {@link #foreach(ForeachAction)}
-     * but allows access to the {@code ProcessorContext} and record metadata.
+     * Process all records in this stream, one record at a time, by applying a {@link Processor} (provided by the given
+     * {@link ProcessorSupplier}).
+     * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #map(KeyValueMapper)}).
+     * If you choose not to attach one, this operation is similar to the stateless {@link #map(KeyValueMapper)}
+     * but allows access to the {@link org.apache.kafka.streams.processor.api.ProcessorContext}
+     * and {@link org.apache.kafka.streams.processor.api.Record} metadata.
      * This is essentially mixing the Processor API into the DSL, and provides all the functionality of the PAPI.
      * Furthermore, via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long)} the processing progress
      * can be observed and additional periodic actions can be performed.
-     * Note that this is a terminal operation that returns void.
      * <p>
      * In order for the processor to use state stores, the stores must be added to the topology and connected to the
      * processor using at least one of two strategies (though it's not required to connect global state stores; read-only
@@ -4766,14 +4799,13 @@ void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processor
      * // add store
      * builder.addStateStore(keyValueStoreBuilder);
      *
-     * KStream outputStream = inputStream.processor(new ProcessorSupplier() {
+     * KStream outputStream = inputStream.process(new ProcessorSupplier() {
      *     public Processor get() {
      *         return new MyProcessor();
      *     }
      * }, "myProcessorState");
      * }</pre>
-     * The second strategy is for the given {@link org.apache.kafka.streams.processor.ProcessorSupplier}
-     * to implement {@link ConnectedStoreProvider#stores()},
+     * The second strategy is for the given {@link ProcessorSupplier} to implement {@link ConnectedStoreProvider#stores()},
      * which provides the {@link StoreBuilder}s to be automatically added to the topology and connected to the processor.
      * <pre>{@code
      * class MyProcessorSupplier implements ProcessorSupplier {
@@ -4798,8 +4830,7 @@ void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processor
      * KStream outputStream = inputStream.process(new MyProcessorSupplier());
      * }</pre>
      * <p>
-     * With either strategy, within the {@link org.apache.kafka.streams.processor.Processor},
-     * the state is obtained via the {@link org.apache.kafka.streams.processor.ProcessorContext}.
+     * With either strategy, within the {@link Processor}, the state is obtained via the {@link ProcessorContext}.
      * To trigger periodic actions via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long) punctuate()},
      * a schedule must be registered.
      * <pre>{@code
@@ -4812,7 +4843,7 @@ void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processor
      *         context.schedule(Duration.ofSeconds(1), PunctuationType.WALL_CLOCK_TIME, new Punctuator(..));
      *     }
      *
-     *     void process(K key, V value) {
+     *     void process(Record<K, V> record) {
      *         // can access this.state
      *     }
      *
@@ -4823,35 +4854,35 @@ void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processor
      * }</pre>
      * Even if any upstream operation was key-changing, no auto-repartition is triggered.
      * If repartitioning is required, a call to {@link #repartition()} should be performed before {@code process()}.
+     * <p>
+     * Processing records might result in an internal data redistribution if a key based operator (like an aggregation
+     * or join) is applied to the result {@code KStream}.
+     * (cf. {@link #processValues(FixedKeyProcessorSupplier, String...)})
      *
-     * @param processorSupplier an instance of {@link org.apache.kafka.streams.processor.ProcessorSupplier}
-     *                          that generates a newly constructed {@link org.apache.kafka.streams.processor.Processor}
-     *                          The supplier should always generate a new instance. Creating a single
-     *                          {@link org.apache.kafka.streams.processor.Processor} object
-     *                          and returning the same object reference in
-     *                          {@link org.apache.kafka.streams.processor.ProcessorSupplier#get()} is a
+     * @param processorSupplier an instance of {@link ProcessorSupplier} that generates a newly constructed {@link Processor}
+     *                          The supplier should always generate a new instance. Creating a single {@link Processor} object
+     *                          and returning the same object reference in {@link ProcessorSupplier#get()} is a
      *                          violation of the supplier pattern and leads to runtime exceptions.
-     * @param named             a {@link Named} config used to name the processor in the topology
-     * @param stateStoreNames   the names of the state store used by the processor
-     * @see #foreach(ForeachAction)
+     * @param stateStoreNames     the names of the state stores used by the processor; not required if the supplier
+     *                            implements {@link ConnectedStoreProvider#stores()}
+     * @see #map(KeyValueMapper)
      * @see #transform(TransformerSupplier, String...)
-     * @deprecated Since 3.0. Use {@link KStream#process(org.apache.kafka.streams.processor.api.ProcessorSupplier, org.apache.kafka.streams.kstream.Named, java.lang.String...)} instead.
      */
-    @Deprecated
-    void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super K, ? super V> processorSupplier,
-                 final Named named,
-                 final String... stateStoreNames);
+    <KOut, VOut> KStream<KOut, VOut> process(
+        final ProcessorSupplier<? super K, ? super V, KOut, VOut> processorSupplier,
+        final String... stateStoreNames
+    );
 
     /**
      * Process all records in this stream, one record at a time, by applying a {@link Processor} (provided by the given
      * {@link ProcessorSupplier}).
-     * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #foreach(ForeachAction)}).
-     * If you choose not to attach one, this operation is similar to the stateless {@link #foreach(ForeachAction)}
-     * but allows access to the {@code ProcessorContext} and record metadata.
+     * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #map(KeyValueMapper)}).
+     * If you choose not to attach one, this operation is similar to the stateless {@link #map(KeyValueMapper)}
+     * but allows access to the {@link org.apache.kafka.streams.processor.api.ProcessorContext}
+     * and {@link org.apache.kafka.streams.processor.api.Record} metadata.
      * This is essentially mixing the Processor API into the DSL, and provides all the functionality of the PAPI.
      * Furthermore, via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long)} the processing progress
      * can be observed and additional periodic actions can be performed.
-     * Note that this is a terminal operation that returns void.
      * <p>
      * In order for the processor to use state stores, the stores must be added to the topology and connected to the
      * processor using at least one of two strategies (though it's not required to connect global state stores; read-only
@@ -4868,7 +4899,7 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      * // add store
      * builder.addStateStore(keyValueStoreBuilder);
      *
-     * KStream outputStream = inputStream.processor(new ProcessorSupplier() {
+     * KStream outputStream = inputStream.process(new ProcessorSupplier() {
      *     public Processor get() {
      *         return new MyProcessor();
      *     }
@@ -4912,7 +4943,7 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      *         context.schedule(Duration.ofSeconds(1), PunctuationType.WALL_CLOCK_TIME, new Punctuator(..));
      *     }
      *
-     *     void process(K key, V value) {
+     *     void process(Record<K, V> record) {
      *         // can access this.state
      *     }
      *
@@ -4923,6 +4954,10 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      * }</pre>
      * Even if any upstream operation was key-changing, no auto-repartition is triggered.
      * If repartitioning is required, a call to {@link #repartition()} should be performed before {@code process()}.
+     * <p>
+     * Processing records might result in an internal data redistribution if a key based operator (like an aggregation
+     * or join) is applied to the result {@code KStream}.
+     * (cf. {@link #processValues(FixedKeyProcessorSupplier, Named, String...)})
      *
      * @param processorSupplier an instance of {@link ProcessorSupplier} that generates a newly constructed {@link Processor}
      *                          The supplier should always generate a new instance. Creating a single {@link Processor} object
@@ -4930,10 +4965,212 @@ void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super
      *                          violation of the supplier pattern and leads to runtime exceptions.
      * @param named             a {@link Named} config used to name the processor in the topology
      * @param stateStoreNames   the names of the state store used by the processor
-     * @see #foreach(ForeachAction)
-     * @see #transform(TransformerSupplier, String...)
+     * @see #map(KeyValueMapper)
+     * @see #processValues(FixedKeyProcessorSupplier, Named, String...)
      */
-    void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processorSupplier,
-                 final Named named,
-                 final String... stateStoreNames);
+    <KOut, VOut> KStream<KOut, VOut> process(
+        final ProcessorSupplier<? super K, ? super V, KOut, VOut> processorSupplier,
+        final Named named,
+        final String... stateStoreNames
+    );
+
+    /**
+     * Process all records in this stream, one record at a time, by applying a {@link FixedKeyProcessor} (provided by the given
+     * {@link FixedKeyProcessorSupplier}).
+     * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #mapValues(ValueMapper)}).
+     * If you choose not to attach one, this operation is similar to the stateless {@link #mapValues(ValueMapper)}
+     * but allows access to the {@link org.apache.kafka.streams.processor.api.ProcessorContext}
+     * and {@link org.apache.kafka.streams.processor.api.Record} metadata.
+     * This is essentially mixing the Processor API into the DSL, and provides all the functionality of the PAPI.
+     * Furthermore, via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long)} the processing progress
+     * can be observed and additional periodic actions can be performed.
+     * <p>
+     * In order for the processor to use state stores, the stores must be added to the topology and connected to the
+     * processor using at least one of two strategies (though it's not required to connect global state stores; read-only
+     * access to global state stores is available by default).
+     * <p>
+     * The first strategy is to manually add the {@link StoreBuilder}s via {@link Topology#addStateStore(StoreBuilder, String...)},
+     * and specify the store names via {@code stateStoreNames} so they will be connected to the processor.
+     * <pre>{@code
+     * // create store
+     * StoreBuilder<KeyValueStore<String,String>> keyValueStoreBuilder =
+     *         Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("myProcessorState"),
+     *                 Serdes.String(),
+     *                 Serdes.String());
+     * // add store
+     * builder.addStateStore(keyValueStoreBuilder);
+     *
+     * KStream outputStream = inputStream.processValues(new ProcessorSupplier() {
+     *     public Processor get() {
+     *         return new MyProcessor();
+     *     }
+     * }, "myProcessorState");
+     * }</pre>
+     * The second strategy is for the given {@link ProcessorSupplier} to implement {@link ConnectedStoreProvider#stores()},
+     * which provides the {@link StoreBuilder}s to be automatically added to the topology and connected to the processor.
+     * <pre>{@code
+     * class MyProcessorSupplier implements FixedKeyProcessorSupplier {
+     *     // supply processor
+     *     FixedKeyProcessor get() {
+     *         return new MyProcessor();
+     *     }
+     *
+     *     // provide store(s) that will be added and connected to the associated processor
+     *     // the store name from the builder ("myProcessorState") is used to access the store later via the ProcessorContext
+     *     Set<StoreBuilder> stores() {
+     *         StoreBuilder<KeyValueStore<String, String>> keyValueStoreBuilder =
+     *                   Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("myProcessorState"),
+     *                   Serdes.String(),
+     *                   Serdes.String());
+     *         return Collections.singleton(keyValueStoreBuilder);
+     *     }
+     * }
+     *
+     * ...
+     *
+     * KStream outputStream = inputStream.processValues(new MyProcessorSupplier());
+     * }</pre>
+     * <p>
+     * With either strategy, within the {@link FixedKeyProcessor}, the state is obtained via the {@link FixedKeyProcessorContext}.
+     * To trigger periodic actions via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long) punctuate()},
+     * a schedule must be registered.
+     * <pre>{@code
+     * class MyProcessor implements FixedKeyProcessor {
+     *     private StateStore state;
+     *
+     *     void init(ProcessorContext context) {
+     *         this.state = context.getStateStore("myProcessorState");
+     *         // punctuate each second, can access this.state
+     *         context.schedule(Duration.ofSeconds(1), PunctuationType.WALL_CLOCK_TIME, new Punctuator(..));
+     *     }
+     *
+     *     void process(FixedKeyRecord<K, V> record) {
+     *         // can access this.state
+     *     }
+     *
+     *     void close() {
+     *         // can access this.state
+     *     }
+     * }
+     * }</pre>
+     * Even if any upstream operation was key-changing, no auto-repartition is triggered.
+     * If repartitioning is required, a call to {@link #repartition()} should be performed before {@code process()}.
+     * <p>
+     * Setting a new value preserves data co-location with respect to the key.
+     * Thus, <em>no</em> internal data redistribution is required if a key based operator (like an aggregation or join)
+     * is applied to the result {@code KStream}. (cf. {@link #process(ProcessorSupplier, String...)})
+     *
+     * @param processorSupplier an instance of {@link FixedKeyProcessorSupplier} that generates a newly constructed {@link FixedKeyProcessor}
+     *                          The supplier should always generate a new instance. Creating a single {@link FixedKeyProcessor} object
+     *                          and returning the same object reference in {@link FixedKeyProcessorSupplier#get()} is a
+     *                          violation of the supplier pattern and leads to runtime exceptions.
+     * @param stateStoreNames   the names of the state store used by the processor
+     * @see #mapValues(ValueMapper)
+     * @see #process(ProcessorSupplier, Named, String...)
+     */
+    <VOut> KStream<K, VOut> processValues(
+        final FixedKeyProcessorSupplier<? super K, ? super V, VOut> processorSupplier,
+        final String... stateStoreNames
+    );
+
+    /**
+     * Process all records in this stream, one record at a time, by applying a {@link FixedKeyProcessor} (provided by the given
+     * {@link FixedKeyProcessorSupplier}).
+     * Attaching a state store makes this a stateful record-by-record operation (cf. {@link #mapValues(ValueMapper)}).
+     * If you choose not to attach one, this operation is similar to the stateless {@link #mapValues(ValueMapper)}
+     * but allows access to the {@link org.apache.kafka.streams.processor.api.ProcessorContext}
+     * and {@link org.apache.kafka.streams.processor.api.Record} metadata.
+     * This is essentially mixing the Processor API into the DSL, and provides all the functionality of the PAPI.
+     * Furthermore, via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long)} the processing progress
+     * can be observed and additional periodic actions can be performed.
+     * <p>
+     * In order for the processor to use state stores, the stores must be added to the topology and connected to the
+     * processor using at least one of two strategies (though it's not required to connect global state stores; read-only
+     * access to global state stores is available by default).
+     * <p>
+     * The first strategy is to manually add the {@link StoreBuilder}s via {@link Topology#addStateStore(StoreBuilder, String...)},
+     * and specify the store names via {@code stateStoreNames} so they will be connected to the processor.
+     * <pre>{@code
+     * // create store
+     * StoreBuilder<KeyValueStore<String,String>> keyValueStoreBuilder =
+     *         Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("myProcessorState"),
+     *                 Serdes.String(),
+     *                 Serdes.String());
+     * // add store
+     * builder.addStateStore(keyValueStoreBuilder);
+     *
+     * KStream outputStream = inputStream.processValues(new ProcessorSupplier() {
+     *     public Processor get() {
+     *         return new MyProcessor();
+     *     }
+     * }, "myProcessorState");
+     * }</pre>
+     * The second strategy is for the given {@link ProcessorSupplier} to implement {@link ConnectedStoreProvider#stores()},
+     * which provides the {@link StoreBuilder}s to be automatically added to the topology and connected to the processor.
+     * <pre>{@code
+     * class MyProcessorSupplier implements FixedKeyProcessorSupplier {
+     *     // supply processor
+     *     FixedKeyProcessor get() {
+     *         return new MyProcessor();
+     *     }
+     *
+     *     // provide store(s) that will be added and connected to the associated processor
+     *     // the store name from the builder ("myProcessorState") is used to access the store later via the ProcessorContext
+     *     Set<StoreBuilder> stores() {
+     *         StoreBuilder<KeyValueStore<String, String>> keyValueStoreBuilder =
+     *                   Stores.keyValueStoreBuilder(Stores.persistentKeyValueStore("myProcessorState"),
+     *                   Serdes.String(),
+     *                   Serdes.String());
+     *         return Collections.singleton(keyValueStoreBuilder);
+     *     }
+     * }
+     *
+     * ...
+     *
+     * KStream outputStream = inputStream.processValues(new MyProcessorSupplier());
+     * }</pre>
+     * <p>
+     * With either strategy, within the {@link FixedKeyProcessor}, the state is obtained via the {@link FixedKeyProcessorContext}.
+     * To trigger periodic actions via {@link org.apache.kafka.streams.processor.Punctuator#punctuate(long) punctuate()},
+     * a schedule must be registered.
+     * <pre>{@code
+     * class MyProcessor implements FixedKeyProcessor {
+     *     private StateStore state;
+     *
+     *     void init(ProcessorContext context) {
+     *         this.state = context.getStateStore("myProcessorState");
+     *         // punctuate each second, can access this.state
+     *         context.schedule(Duration.ofSeconds(1), PunctuationType.WALL_CLOCK_TIME, new Punctuator(..));
+     *     }
+     *
+     *     void process(FixedKeyRecord<K, V> record) {
+     *         // can access this.state
+     *     }
+     *
+     *     void close() {
+     *         // can access this.state
+     *     }
+     * }
+     * }</pre>
+     * Even if any upstream operation was key-changing, no auto-repartition is triggered.
+     * If repartitioning is required, a call to {@link #repartition()} should be performed before {@code process()}.
+     * <p>
+     * Setting a new value preserves data co-location with respect to the key.
+     * Thus, <em>no</em> internal data redistribution is required if a key based operator (like an aggregation or join)
+     * is applied to the result {@code KStream}. (cf. {@link #process(ProcessorSupplier, String...)})
+     *
+     * @param processorSupplier an instance of {@link FixedKeyProcessorSupplier} that generates a newly constructed {@link FixedKeyProcessor}
+     *                          The supplier should always generate a new instance. Creating a single {@link FixedKeyProcessor} object
+     *                          and returning the same object reference in {@link FixedKeyProcessorSupplier#get()} is a
+     *                          violation of the supplier pattern and leads to runtime exceptions.
+     * @param named             a {@link Named} config used to name the processor in the topology
+     * @param stateStoreNames   the names of the state store used by the processor
+     * @see #mapValues(ValueMapper)
+     * @see #process(ProcessorSupplier, Named, String...)
+     */
+    <VOut> KStream<K, VOut> processValues(
+        final FixedKeyProcessorSupplier<? super K, ? super V, VOut> processorSupplier,
+        final Named named,
+        final String... stateStoreNames
+    );
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Materialized.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Materialized.java
index 82b38000e09b5..f63b6b9773df6 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/Materialized.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Materialized.java
@@ -64,6 +64,13 @@ public class Materialized<K, V, S extends StateStore> {
     protected boolean cachingEnabled = true;
     protected Map<String, String> topicConfig = new HashMap<>();
     protected Duration retention;
+    public StoreType storeType;
+
+    // the built-in state store types
+    public enum StoreType {
+        ROCKS_DB,
+        IN_MEMORY
+    }
 
     private Materialized(final StoreSupplier<S> storeSupplier) {
         this.storeSupplier = storeSupplier;
@@ -73,6 +80,10 @@ private Materialized(final String storeName) {
         this.storeName = storeName;
     }
 
+    private Materialized(final StoreType storeType) {
+        this.storeType = storeType;
+    }
+
     /**
      * Copy constructor.
      * @param materialized  the {@link Materialized} instance to copy.
@@ -86,6 +97,21 @@ protected Materialized(final Materialized<K, V, S> materialized) {
         this.cachingEnabled = materialized.cachingEnabled;
         this.topicConfig = materialized.topicConfig;
         this.retention = materialized.retention;
+        this.storeType = materialized.storeType;
+    }
+
+    /**
+     * Materialize a {@link StateStore} with the given {@link StoreType}.
+     *
+     * @param storeType  the type of the state store
+     * @param <K>       key type of the store
+     * @param <V>       value type of the store
+     * @param <S>       type of the {@link StateStore}
+     * @return a new {@link Materialized} instance with the given storeName
+     */
+    public static <K, V, S extends StateStore> Materialized<K, V, S> as(final StoreType storeType) {
+        Objects.requireNonNull(storeType, "store type can't be null");
+        return new Materialized<>(storeType);
     }
 
     /**
@@ -251,12 +277,28 @@ public Materialized<K, V, S> withCachingDisabled() {
      */
     public Materialized<K, V, S> withRetention(final Duration retention) throws IllegalArgumentException {
         final String msgPrefix = prepareMillisCheckFailMsgPrefix(retention, "retention");
-        final long retenationMs = validateMillisecondDuration(retention, msgPrefix);
+        final long retentionMs = validateMillisecondDuration(retention, msgPrefix);
 
-        if (retenationMs < 0) {
+        if (retentionMs < 0) {
             throw new IllegalArgumentException("Retention must not be negative.");
         }
         this.retention = retention;
         return this;
     }
+
+    /**
+     * Set the type of the materialized {@link StateStore}.
+     *
+     * @param storeType  the store type {@link StoreType} to use.
+     * @return itself
+     * @throws IllegalArgumentException if store supplier is also pre-configured
+     */
+    public Materialized<K, V, S> withStoreType(final StoreType storeType) throws IllegalArgumentException {
+        Objects.requireNonNull(storeType, "store type can't be null");
+        if (storeSupplier != null) {
+            throw new IllegalArgumentException("Cannot set store type when store supplier is pre-configured.");
+        }
+        this.storeType = storeType;
+        return this;
+    }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Produced.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Produced.java
index b14c846925f40..7848c2db8749f 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/Produced.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Produced.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.streams.kstream;
 
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
 import org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner;
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/Repartitioned.java b/streams/src/main/java/org/apache/kafka/streams/kstream/Repartitioned.java
index 40f66f036c603..929b14ab29072 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/Repartitioned.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/Repartitioned.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.streams.kstream;
 
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
 import org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner;
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/SessionWindowedKStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/SessionWindowedKStream.java
index 1b7a363d3c201..a9eaddbee483d 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/SessionWindowedKStream.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/SessionWindowedKStream.java
@@ -39,7 +39,7 @@
  * materialized view) that can be queried using the name provided in the {@link Materialized} instance.
  * Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
  * "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
- * New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
+ * New events are added to sessions until their grace period ends (see {@link SessionWindows#ofInactivityGapAndGrace(Duration, Duration)}).
  * <p>
  * A {@code SessionWindowedKStream} must be obtained from a {@link KGroupedStream} via
  * {@link KGroupedStream#windowedBy(SessionWindows)}.
@@ -643,4 +643,7 @@ KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
     KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
                                   final Named named,
                                   final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
+
+    // TODO: add javadoc
+    SessionWindowedKStream<K, V> emitStrategy(final EmitStrategy emitStrategy);
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindowedKStream.java b/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindowedKStream.java
index c015e79032a2c..122f73f7a709a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindowedKStream.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/TimeWindowedKStream.java
@@ -648,4 +648,7 @@ KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
     KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
                                   final Named named,
                                   final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
+
+    // TODO: add javadoc
+    TimeWindowedKStream<K, V> emitStrategy(final EmitStrategy emitStrategy);
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractKStreamTimeWindowAggregateProcessor.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractKStreamTimeWindowAggregateProcessor.java
new file mode 100644
index 0000000000000..a081a280bafb8
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/AbstractKStreamTimeWindowAggregateProcessor.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream.internals;
+
+import static org.apache.kafka.streams.StreamsConfig.InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION;
+import static org.apache.kafka.streams.processor.internals.metrics.ProcessorNodeMetrics.emitFinalLatencySensor;
+import static org.apache.kafka.streams.processor.internals.metrics.ProcessorNodeMetrics.emittedRecordsSensor;
+import static org.apache.kafka.streams.processor.internals.metrics.TaskMetrics.droppedRecordsSensor;
+
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
+import org.apache.kafka.streams.kstream.Window;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.internals.KStreamImplJoin.TimeTracker;
+import org.apache.kafka.streams.processor.api.ContextualProcessor;
+import org.apache.kafka.streams.processor.api.ProcessorContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.api.RecordMetadata;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.TimestampedWindowStore;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+import org.slf4j.Logger;
+
+public abstract class AbstractKStreamTimeWindowAggregateProcessor<KIn, VIn, VAgg> extends ContextualProcessor<KIn, VIn, Windowed<KIn>, Change<VAgg>> {
+
+    private final Time time = Time.SYSTEM;
+    private final String storeName;
+    private final EmitStrategy emitStrategy;
+    private final boolean sendOldValues;
+    protected final TimeTracker timeTracker = new TimeTracker();
+
+    private TimestampedTupleForwarder<Windowed<KIn>, VAgg> tupleForwarder;
+    protected TimestampedWindowStore<KIn, VAgg> windowStore;
+    protected Sensor droppedRecordsSensor;
+    protected Sensor emittedRecordsSensor;
+    protected Sensor emitFinalLatencySensor;
+    protected long lastEmitWindowCloseTime = ConsumerRecord.NO_TIMESTAMP;
+    protected long observedStreamTime = ConsumerRecord.NO_TIMESTAMP;
+    protected InternalProcessorContext<Windowed<KIn>, Change<VAgg>> internalProcessorContext;
+
+    protected AbstractKStreamTimeWindowAggregateProcessor(final String storeName,
+                                                          final EmitStrategy emitStrategy,
+                                                          final boolean sendOldValues) {
+        this.storeName = storeName;
+        this.emitStrategy = emitStrategy;
+        this.sendOldValues = sendOldValues;
+    }
+
+    @Override
+    public void init(final ProcessorContext<Windowed<KIn>, Change<VAgg>> context) {
+        super.init(context);
+        internalProcessorContext = (InternalProcessorContext<Windowed<KIn>, Change<VAgg>>) context;
+        final StreamsMetricsImpl metrics = internalProcessorContext.metrics();
+        final String threadId = Thread.currentThread().getName();
+        final String processorName = internalProcessorContext.currentNode().name();
+        droppedRecordsSensor = droppedRecordsSensor(threadId, context.taskId().toString(), metrics);
+        emittedRecordsSensor = emittedRecordsSensor(threadId, context.taskId().toString(), processorName, metrics);
+        emitFinalLatencySensor = emitFinalLatencySensor(threadId, context.taskId().toString(), processorName, metrics);
+        windowStore = context.getStateStore(storeName);
+
+        if (emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE) {
+            // Restore last emit close time for ON_WINDOW_CLOSE strategy
+            final Long lastEmitWindowCloseTime = internalProcessorContext.processorMetadataForKey(storeName);
+            if (lastEmitWindowCloseTime != null) {
+                this.lastEmitWindowCloseTime = lastEmitWindowCloseTime;
+            }
+            final long emitInterval = StreamsConfig.InternalConfig.getLong(
+                context.appConfigs(),
+                EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION,
+                1000L
+            );
+            timeTracker.setEmitInterval(emitInterval);
+
+            tupleForwarder = new TimestampedTupleForwarder<>(context, sendOldValues);
+        } else {
+            tupleForwarder = new TimestampedTupleForwarder<>(
+                windowStore,
+                context,
+                new TimestampedCacheFlushListener<>(context),
+                sendOldValues);
+        }
+    }
+
+    protected void maybeForwardUpdate(final Record<KIn, VIn> record,
+                                      final Window window,
+                                      final VAgg oldAgg,
+                                      final VAgg newAgg,
+                                      final long newTimestamp) {
+        if (emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE) {
+            return;
+        }
+
+        tupleForwarder.maybeForward(
+            record.withKey(new Windowed<>(record.key(), window))
+                .withValue(new Change<>(newAgg, sendOldValues ? oldAgg : null))
+                .withTimestamp(newTimestamp));
+    }
+
+    protected void maybeForwardFinalResult(final Record<KIn, VIn> record, final long windowCloseTime) {
+        if (shouldEmitFinal(windowCloseTime)) {
+            final long emitRangeUpperBound = emitRangeUpperBound(windowCloseTime);
+
+            // if the upper bound is smaller than 0, then there's no window closed ever;
+            // and we can skip range fetching
+            if (emitRangeUpperBound >= 0) {
+                final long emitRangeLowerBound = emitRangeLowerBound(windowCloseTime);
+
+                if (shouldRangeFetch(emitRangeLowerBound, emitRangeUpperBound)) {
+                    fetchAndEmit(record, windowCloseTime, emitRangeLowerBound, emitRangeUpperBound);
+                }
+            }
+        }
+    }
+
+    protected void logSkippedRecordForExpiredWindow(final Logger log,
+                                                    final long timestamp,
+                                                    final long windowExpire,
+                                                    final String window) {
+        if (context().recordMetadata().isPresent()) {
+            final RecordMetadata recordMetadata = context().recordMetadata().get();
+            log.warn("Skipping record for expired window. " +
+                "topic=[{}] " +
+                "partition=[{}] " +
+                "offset=[{}] " +
+                "timestamp=[{}] " +
+                "window={} " +
+                "expiration=[{}] " +
+                "streamTime=[{}]",
+                recordMetadata.topic(),
+                recordMetadata.partition(),
+                recordMetadata.offset(),
+                timestamp,
+                window,
+                windowExpire,
+                observedStreamTime
+            );
+        } else {
+            log.warn("Skipping record for expired window. Topic, partition, and offset not known. " +
+                "timestamp=[{}] " +
+                "window={} " +
+                "expiration=[{}] " +
+                "streamTime=[{}]",
+                timestamp,
+                window,
+                windowExpire,
+                observedStreamTime
+            );
+        }
+        droppedRecordsSensor.record();
+    }
+
+    protected void updateObservedStreamTime(final long timestamp) {
+        observedStreamTime = Math.max(observedStreamTime, timestamp);
+    }
+
+    private boolean shouldEmitFinal(final long windowCloseTime) {
+        if (emitStrategy.type() != StrategyType.ON_WINDOW_CLOSE) {
+            return false;
+        }
+
+        final long now = internalProcessorContext.currentSystemTimeMs();
+        // Throttle emit frequency
+        if (now < timeTracker.nextTimeToEmit) {
+            return false;
+        }
+
+        // Schedule next emit time based on now to avoid the case that if system time jumps a lot,
+        // this can be triggered every time
+        timeTracker.nextTimeToEmit = now;
+        timeTracker.advanceNextTimeToEmit();
+
+        // Only EMIT if the window close time does progress
+        return lastEmitWindowCloseTime == ConsumerRecord.NO_TIMESTAMP || lastEmitWindowCloseTime < windowCloseTime;
+    }
+
+    private void fetchAndEmit(final Record<KIn, VIn> record,
+                              final long windowCloseTime,
+                              final long emitRangeLowerBound,
+                              final long emitRangeUpperBound) {
+        final long startMs = time.milliseconds();
+
+        final KeyValueIterator<Windowed<KIn>, ValueAndTimestamp<VAgg>> windowToEmit = windowStore
+            .fetchAll(emitRangeLowerBound, emitRangeUpperBound);
+
+        int emittedCount = 0;
+        while (windowToEmit.hasNext()) {
+            emittedCount++;
+            final KeyValue<Windowed<KIn>, ValueAndTimestamp<VAgg>> kv = windowToEmit.next();
+
+            tupleForwarder.maybeForward(
+                record.withKey(kv.key)
+                    .withValue(new Change<>(kv.value.value(), null))
+                    .withTimestamp(kv.value.timestamp())
+                    .withHeaders(record.headers()));
+        }
+        emittedRecordsSensor.record(emittedCount);
+        emitFinalLatencySensor.record(time.milliseconds() - startMs);
+
+        lastEmitWindowCloseTime = windowCloseTime;
+        internalProcessorContext.addProcessorMetadataKeyValue(storeName, windowCloseTime);
+    }
+
+    // upper and lower bound are inclusive; the bounds could be negative in which case we would skip range fetching and emitting
+    abstract protected long emitRangeLowerBound(final long windowCloseTime);
+
+    abstract protected long emitRangeUpperBound(final long windowCloseTime);
+
+    abstract protected boolean shouldRangeFetch(final long emitRangeLowerBound, final long emitRangeUpperBound);
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/CogroupedStreamAggregateBuilder.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/CogroupedStreamAggregateBuilder.java
index c8ff1bd786cc6..3adc8beec8cd2 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/CogroupedStreamAggregateBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/CogroupedStreamAggregateBuilder.java
@@ -26,6 +26,7 @@
 import java.util.Map.Entry;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.KTable;
 import org.apache.kafka.streams.kstream.Merger;
@@ -99,6 +100,7 @@ <KR, W extends Window> KTable<KR, VOut> build(final Map<KGroupedStreamImpl<K, ?>
                 (KStreamAggProcessorSupplier<K, ?, K, ?>) new KStreamWindowAggregate<K, K, VOut, W>(
                     windows,
                     storeBuilder.name(),
+                    EmitStrategy.onWindowUpdate(),
                     initializer,
                     kGroupedStream.getValue());
             parentProcessors.add(parentProcessor);
@@ -137,6 +139,7 @@ <KR> KTable<KR, VOut> build(final Map<KGroupedStreamImpl<K, ?>, Aggregator<? sup
                 (KStreamAggProcessorSupplier<K, ?, K, ?>) new KStreamSessionWindowAggregate<K, K, VOut>(
                     sessionWindows,
                     storeBuilder.name(),
+                    EmitStrategy.onWindowUpdate(),
                     initializer,
                     kGroupedStream.getValue(),
                     sessionMerger);
@@ -175,6 +178,8 @@ <KR> KTable<KR, VOut> build(final Map<KGroupedStreamImpl<K, ?>, Aggregator<? sup
                 (KStreamAggProcessorSupplier<K, ?, K, ?>) new KStreamSlidingWindowAggregate<K, K, VOut>(
                     slidingWindows,
                     storeBuilder.name(),
+                    // TODO: We do not have other emit policies for co-group yet
+                    EmitStrategy.onWindowUpdate(),
                     initializer,
                     kGroupedStream.getValue());
             parentProcessors.add(parentProcessor);
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/InternalStreamsBuilder.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/InternalStreamsBuilder.java
index f33d599fba320..ac9d281be4e9c 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/InternalStreamsBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/InternalStreamsBuilder.java
@@ -328,21 +328,27 @@ private void mergeDuplicateSourceNodes() {
                         root.removeChild(graphNode);
                     }
                 } else {
-                    for (final String topic : currentSourceNode.topicNames().get()) {
-                        if (!topicsToSourceNodes.containsKey(topic)) {
-                            topicsToSourceNodes.put(topic, currentSourceNode);
-                        } else {
-                            final StreamSourceNode<?, ?> mainSourceNode = topicsToSourceNodes.get(topic);
-                            // TODO we only merge source nodes if the subscribed topic(s) are an exact match, so it's still not
-                            // possible to subscribe to topicA in one KStream and topicA + topicB in another. We could achieve
-                            // this by splitting these source nodes into one topic per node and routing to the subscribed children
-                            if (!mainSourceNode.topicNames().equals(currentSourceNode.topicNames())) {
-                                LOG.error("Topic {} was found in  subscription for non-equal source nodes {} and {}",
-                                          topic, mainSourceNode, currentSourceNode);
-                                throw new TopologyException("Two source nodes are subscribed to overlapping but not equal input topics");
+                    if (currentSourceNode.topicNames().isPresent()) {
+                        for (final String topic : currentSourceNode.topicNames().get()) {
+                            if (!topicsToSourceNodes.containsKey(topic)) {
+                                topicsToSourceNodes.put(topic, currentSourceNode);
+                            } else {
+                                final StreamSourceNode<?, ?> mainSourceNode = topicsToSourceNodes.get(
+                                    topic);
+                                // TODO we only merge source nodes if the subscribed topic(s) are an exact match, so it's still not
+                                // possible to subscribe to topicA in one KStream and topicA + topicB in another. We could achieve
+                                // this by splitting these source nodes into one topic per node and routing to the subscribed children
+                                if (!mainSourceNode.topicNames()
+                                    .equals(currentSourceNode.topicNames())) {
+                                    LOG.error(
+                                        "Topic {} was found in  subscription for non-equal source nodes {} and {}",
+                                        topic, mainSourceNode, currentSourceNode);
+                                    throw new TopologyException(
+                                        "Two source nodes are subscribed to overlapping but not equal input topics");
+                                }
+                                mainSourceNode.merge(currentSourceNode);
+                                root.removeChild(graphNode);
                             }
-                            mainSourceNode.merge(currentSourceNode);
-                            root.removeChild(graphNode);
                         }
                     }
                 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java
index ffafd10b460c4..2fc08608f08f0 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFilter.java
@@ -17,12 +17,12 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.streams.kstream.Predicate;
-import org.apache.kafka.streams.processor.api.ContextualProcessor;
-import org.apache.kafka.streams.processor.api.Processor;
-import org.apache.kafka.streams.processor.api.ProcessorSupplier;
-import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.api.ContextualFixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 
-class KStreamFilter<K, V> implements ProcessorSupplier<K, V, K, V> {
+class KStreamFilter<K, V> implements FixedKeyProcessorSupplier<K, V, V> {
 
     private final Predicate<K, V> predicate;
     private final boolean filterNot;
@@ -33,13 +33,13 @@ public KStreamFilter(final Predicate<K, V> predicate, final boolean filterNot) {
     }
 
     @Override
-    public Processor<K, V, K, V> get() {
+    public FixedKeyProcessor<K, V, V> get() {
         return new KStreamFilterProcessor();
     }
 
-    private class KStreamFilterProcessor extends ContextualProcessor<K, V, K, V> {
+    private class KStreamFilterProcessor extends ContextualFixedKeyProcessor<K, V, V> {
         @Override
-        public void process(final Record<K, V> record) {
+        public void process(final FixedKeyRecord<K, V> record) {
             if (filterNot ^ predicate.test(record.key(), record.value())) {
                 context().forward(record);
             }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFlatMapValues.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFlatMapValues.java
index 1008b297b3d3c..d9e64e314d4b9 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFlatMapValues.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamFlatMapValues.java
@@ -17,12 +17,12 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.streams.kstream.ValueMapperWithKey;
-import org.apache.kafka.streams.processor.api.ContextualProcessor;
-import org.apache.kafka.streams.processor.api.Processor;
-import org.apache.kafka.streams.processor.api.ProcessorSupplier;
-import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.api.ContextualFixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 
-class KStreamFlatMapValues<KIn, VIn, VOut> implements ProcessorSupplier<KIn, VIn, KIn, VOut> {
+class KStreamFlatMapValues<KIn, VIn, VOut> implements FixedKeyProcessorSupplier<KIn, VIn, VOut> {
 
     private final ValueMapperWithKey<? super KIn, ? super VIn, ? extends Iterable<? extends VOut>> mapper;
 
@@ -31,13 +31,14 @@ class KStreamFlatMapValues<KIn, VIn, VOut> implements ProcessorSupplier<KIn, VIn
     }
 
     @Override
-    public Processor<KIn, VIn, KIn, VOut> get() {
+    public FixedKeyProcessor<KIn, VIn, VOut> get() {
         return new KStreamFlatMapValuesProcessor();
     }
 
-    private class KStreamFlatMapValuesProcessor extends ContextualProcessor<KIn, VIn, KIn, VOut> {
+    private class KStreamFlatMapValuesProcessor extends
+        ContextualFixedKeyProcessor<KIn, VIn, VOut> {
         @Override
-        public void process(final Record<KIn, VIn> record) {
+        public void process(final FixedKeyRecord<KIn, VIn> record) {
             final Iterable<? extends VOut> newValues = mapper.apply(record.key(), record.value());
             for (final VOut v : newValues) {
                 context().forward(record.withValue(v));
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java
index 81faa42c86b65..05f8ebccf4136 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamImpl.java
@@ -22,6 +22,7 @@
 import org.apache.kafka.streams.internals.ApiUtils;
 import org.apache.kafka.streams.kstream.BranchedKStream;
 import org.apache.kafka.streams.kstream.ForeachAction;
+import org.apache.kafka.streams.kstream.ForeachProcessor;
 import org.apache.kafka.streams.kstream.GlobalKTable;
 import org.apache.kafka.streams.kstream.Grouped;
 import org.apache.kafka.streams.kstream.JoinWindows;
@@ -46,6 +47,7 @@
 import org.apache.kafka.streams.kstream.ValueTransformerWithKeySupplier;
 import org.apache.kafka.streams.kstream.internals.graph.BaseRepartitionNode;
 import org.apache.kafka.streams.kstream.internals.graph.BaseRepartitionNode.BaseRepartitionNodeBuilder;
+import org.apache.kafka.streams.kstream.internals.graph.GraphNode;
 import org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode;
 import org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode.OptimizableRepartitionNodeBuilder;
 import org.apache.kafka.streams.kstream.internals.graph.ProcessorGraphNode;
@@ -54,14 +56,13 @@
 import org.apache.kafka.streams.kstream.internals.graph.StreamSinkNode;
 import org.apache.kafka.streams.kstream.internals.graph.StreamTableJoinNode;
 import org.apache.kafka.streams.kstream.internals.graph.StreamToTableNode;
-import org.apache.kafka.streams.kstream.internals.graph.GraphNode;
 import org.apache.kafka.streams.kstream.internals.graph.UnoptimizableRepartitionNode;
 import org.apache.kafka.streams.kstream.internals.graph.UnoptimizableRepartitionNode.UnoptimizableRepartitionNodeBuilder;
 import org.apache.kafka.streams.processor.FailOnInvalidTimestamp;
-import org.apache.kafka.streams.processor.api.ProcessorSupplier;
 import org.apache.kafka.streams.processor.StreamPartitioner;
 import org.apache.kafka.streams.processor.TopicNameExtractor;
-import org.apache.kafka.streams.kstream.ForeachProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+import org.apache.kafka.streams.processor.api.ProcessorSupplier;
 import org.apache.kafka.streams.processor.internals.InternalTopicProperties;
 import org.apache.kafka.streams.processor.internals.StaticTopicNameExtractor;
 import org.apache.kafka.streams.state.KeyValueStore;
@@ -119,6 +120,8 @@ public class KStreamImpl<K, V> extends AbstractStream<K, V> implements KStream<K
 
     private static final String PROCESSOR_NAME = "KSTREAM-PROCESSOR-";
 
+    private static final String PROCESSVALUES_NAME = "KSTREAM-PROCESSVALUES-";
+
     private static final String PRINTING_NAME = "KSTREAM-PRINTER-";
 
     private static final String KEY_SELECT_NAME = "KSTREAM-KEY-SELECT-";
@@ -1278,6 +1281,7 @@ private <VO, VR> KStream<K, VR> doStreamTableJoin(final KTable<K, VO> table,
     }
 
     @Override
+    @Deprecated
     public <KR, VR> KStream<KR, VR> transform(final TransformerSupplier<? super K, ? super V, KeyValue<KR, VR>> transformerSupplier,
                                               final String... stateStoreNames) {
         Objects.requireNonNull(transformerSupplier, "transformerSupplier can't be null");
@@ -1286,6 +1290,7 @@ public <KR, VR> KStream<KR, VR> transform(final TransformerSupplier<? super K, ?
     }
 
     @Override
+    @Deprecated
     public <KR, VR> KStream<KR, VR> transform(final TransformerSupplier<? super K, ? super V, KeyValue<KR, VR>> transformerSupplier,
                                               final Named named,
                                               final String... stateStoreNames) {
@@ -1294,6 +1299,7 @@ public <KR, VR> KStream<KR, VR> transform(final TransformerSupplier<? super K, ?
     }
 
     @Override
+    @Deprecated
     public <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super K, ? super V, Iterable<KeyValue<K1, V1>>> transformerSupplier,
                                                   final String... stateStoreNames) {
         Objects.requireNonNull(transformerSupplier, "transformerSupplier can't be null");
@@ -1302,6 +1308,7 @@ public <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super
     }
 
     @Override
+    @Deprecated
     public <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super K, ? super V, Iterable<KeyValue<K1, V1>>> transformerSupplier,
                                                   final Named named,
                                                   final String... stateStoreNames) {
@@ -1334,6 +1341,7 @@ public <K1, V1> KStream<K1, V1> flatTransform(final TransformerSupplier<? super
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? super V, ? extends VR> valueTransformerSupplier,
                                                final String... stateStoreNames) {
         Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
@@ -1344,6 +1352,7 @@ public <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? supe
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? super V, ? extends VR> valueTransformerSupplier,
                                                final Named named,
                                                final String... stateStoreNames) {
@@ -1356,6 +1365,7 @@ public <VR> KStream<K, VR> transformValues(final ValueTransformerSupplier<? supe
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, ? extends VR> valueTransformerSupplier,
                                                final String... stateStoreNames) {
         Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
@@ -1363,6 +1373,7 @@ public <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> transformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, ? extends VR> valueTransformerSupplier,
                                                final Named named,
                                                final String... stateStoreNames) {
@@ -1401,6 +1412,7 @@ private <VR> KStream<K, VR> doTransformValues(final ValueTransformerWithKeySuppl
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<? super V, Iterable<VR>> valueTransformerSupplier,
                                                    final String... stateStoreNames) {
         Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
@@ -1411,6 +1423,7 @@ public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<?
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<? super V, Iterable<VR>> valueTransformerSupplier,
                                                    final Named named,
                                                    final String... stateStoreNames) {
@@ -1422,6 +1435,7 @@ public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerSupplier<?
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, Iterable<VR>> valueTransformerSupplier,
                                                    final String... stateStoreNames) {
         Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
@@ -1429,6 +1443,7 @@ public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupp
     }
 
     @Override
+    @Deprecated
     public <VR> KStream<K, VR> flatTransformValues(final ValueTransformerWithKeySupplier<? super K, ? super V, Iterable<VR>> valueTransformerSupplier,
                                                    final Named named,
                                                    final String... stateStoreNames) {
@@ -1472,12 +1487,6 @@ public void process(final org.apache.kafka.streams.processor.ProcessorSupplier<?
         process(processorSupplier, Named.as(builder.newProcessorName(PROCESSOR_NAME)), stateStoreNames);
     }
 
-    @Override
-    public void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processorSupplier,
-                        final String... stateStoreNames) {
-        process(processorSupplier, Named.as(builder.newProcessorName(PROCESSOR_NAME)), stateStoreNames);
-    }
-
     @Override
     @Deprecated
     public void process(final org.apache.kafka.streams.processor.ProcessorSupplier<? super K, ? super V> processorSupplier,
@@ -1501,9 +1510,23 @@ public void process(final org.apache.kafka.streams.processor.ProcessorSupplier<?
     }
 
     @Override
-    public void process(final ProcessorSupplier<? super K, ? super V, Void, Void> processorSupplier,
-                        final Named named,
-                        final String... stateStoreNames) {
+    public <KOut, VOut> KStream<KOut, VOut> process(
+        final ProcessorSupplier<? super K, ? super V, KOut, VOut> processorSupplier,
+        final String... stateStoreNames
+    ) {
+        return process(
+            processorSupplier,
+            Named.as(builder.newProcessorName(PROCESSOR_NAME)),
+            stateStoreNames
+        );
+    }
+
+    @Override
+    public <KOut, VOut> KStream<KOut, VOut> process(
+        final ProcessorSupplier<? super K, ? super V, KOut, VOut> processorSupplier,
+        final Named named,
+        final String... stateStoreNames
+    ) {
         Objects.requireNonNull(processorSupplier, "processorSupplier can't be null");
         Objects.requireNonNull(named, "named can't be null");
         Objects.requireNonNull(stateStoreNames, "stateStoreNames can't be a null array");
@@ -1519,5 +1542,59 @@ public void process(final ProcessorSupplier<? super K, ? super V, Void, Void> pr
             stateStoreNames);
 
         builder.addGraphNode(graphNode, processNode);
+
+        // cannot inherit key and value serde
+        return new KStreamImpl<>(
+            name,
+            null,
+            null,
+            subTopologySourceNodes,
+            true,
+            processNode,
+            builder);
+    }
+
+    @Override
+    public <VOut> KStream<K, VOut> processValues(
+        final FixedKeyProcessorSupplier<? super K, ? super V, VOut> processorSupplier,
+        final String... stateStoreNames
+    ) {
+        return processValues(
+            processorSupplier,
+            Named.as(builder.newProcessorName(PROCESSVALUES_NAME)),
+            stateStoreNames
+        );
+    }
+
+    @Override
+    public <VOut> KStream<K, VOut> processValues(
+        final FixedKeyProcessorSupplier<? super K, ? super V, VOut> processorSupplier,
+        final Named named,
+        final String... stateStoreNames
+    ) {
+        Objects.requireNonNull(processorSupplier, "processorSupplier can't be null");
+        Objects.requireNonNull(named, "named can't be null");
+        Objects.requireNonNull(stateStoreNames, "stateStoreNames can't be a null array");
+        ApiUtils.checkSupplier(processorSupplier);
+        for (final String stateStoreName : stateStoreNames) {
+            Objects.requireNonNull(stateStoreName, "stateStoreNames can't be null");
+        }
+
+        final String name = new NamedInternal(named).name();
+        final StatefulProcessorNode<? super K, ? super V> processNode = new StatefulProcessorNode<>(
+            name,
+            new ProcessorParameters<>(processorSupplier, name),
+            stateStoreNames);
+
+        builder.addGraphNode(graphNode, processNode);
+        // cannot inherit value serde
+        return new KStreamImpl<>(
+            name,
+            keySerde,
+            null,
+            subTopologySourceNodes,
+            repartitionRequired,
+            processNode,
+            builder);
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java
index 305cb3843c73a..ecf988d6140f1 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java
@@ -51,6 +51,7 @@ class KStreamKStreamJoin<K, V1, V2, VOut> implements ProcessorSupplier<K, V1, K,
     private final long joinAfterMs;
     private final long joinGraceMs;
     private final boolean enableSpuriousResultFix;
+    private final long joinSpuriousLookBackTimeMs;
 
     private final boolean outer;
     private final boolean isLeftSide;
@@ -71,9 +72,11 @@ class KStreamKStreamJoin<K, V1, V2, VOut> implements ProcessorSupplier<K, V1, K,
         if (isLeftSide) {
             this.joinBeforeMs = windows.beforeMs;
             this.joinAfterMs = windows.afterMs;
+            this.joinSpuriousLookBackTimeMs = windows.beforeMs;
         } else {
             this.joinBeforeMs = windows.afterMs;
             this.joinAfterMs = windows.beforeMs;
+            this.joinSpuriousLookBackTimeMs = windows.afterMs;
         }
         this.joinGraceMs = windows.gracePeriodMs();
         this.enableSpuriousResultFix = windows.spuriousResultFixEnabled();
@@ -152,7 +155,7 @@ public void process(final Record<K, V1> record) {
 
             // Emit all non-joined records which window has closed
             if (inputRecordTimestamp == sharedTimeTracker.streamTime) {
-                outerJoinStore.ifPresent(this::emitNonJoinedOuterRecords);
+                outerJoinStore.ifPresent(store -> emitNonJoinedOuterRecords(store, record));
             }
 
             try (final WindowStoreIterator<V2> iter = otherWindowStore.fetch(record.key(), timeFrom, timeTo)) {
@@ -206,12 +209,15 @@ public void process(final Record<K, V1> record) {
         }
 
         @SuppressWarnings("unchecked")
-        private void emitNonJoinedOuterRecords(final KeyValueStore<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>> store) {
+        private void emitNonJoinedOuterRecords(
+            final KeyValueStore<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>> store,
+            final Record<K, V1> record) {
+
             // calling `store.all()` creates an iterator what is an expensive operation on RocksDB;
             // to reduce runtime cost, we try to avoid paying those cost
 
             // only try to emit left/outer join results if there _might_ be any result records
-            if (sharedTimeTracker.minTime >= sharedTimeTracker.streamTime - joinAfterMs - joinGraceMs) {
+            if (sharedTimeTracker.minTime >= sharedTimeTracker.streamTime - joinSpuriousLookBackTimeMs - joinGraceMs) {
                 return;
             }
             // throttle the emit frequency to a (configurable) interval;
@@ -232,10 +238,9 @@ private void emitNonJoinedOuterRecords(final KeyValueStore<TimestampedKeyAndJoin
                 TimestampedKeyAndJoinSide<K> prevKey = null;
 
                 while (it.hasNext()) {
-                    final KeyValue<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>> record = it.next();
-
-                    final TimestampedKeyAndJoinSide<K> timestampedKeyAndJoinSide = record.key;
-                    final LeftOrRightValue<V1, V2> value = record.value;
+                    final KeyValue<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>> next = it.next();
+                    final TimestampedKeyAndJoinSide<K> timestampedKeyAndJoinSide = next.key;
+                    final LeftOrRightValue<V1, V2> value = next.value;
                     final K key = timestampedKeyAndJoinSide.getKey();
                     final long timestamp = timestampedKeyAndJoinSide.getTimestamp();
                     sharedTimeTracker.minTime = timestamp;
@@ -256,7 +261,9 @@ private void emitNonJoinedOuterRecords(final KeyValueStore<TimestampedKeyAndJoin
                                 (V2) value.getLeftValue());
                     }
 
-                    context().forward(new Record<>(key, nullJoinedValue, timestamp));
+                    context().forward(
+                        record.withKey(key).withValue(nullJoinedValue).withTimestamp(timestamp)
+                    );
 
                     if (prevKey != null && !prevKey.equals(timestampedKeyAndJoinSide)) {
                         // blind-delete the previous key from the outer window store now it is emitted;
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamMapValues.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamMapValues.java
index f73bfdd53bc33..b51e3568521fe 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamMapValues.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamMapValues.java
@@ -17,12 +17,12 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.streams.kstream.ValueMapperWithKey;
-import org.apache.kafka.streams.processor.api.ContextualProcessor;
-import org.apache.kafka.streams.processor.api.Processor;
-import org.apache.kafka.streams.processor.api.ProcessorSupplier;
-import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.api.ContextualFixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 
-class KStreamMapValues<KIn, VIn, VOut> implements ProcessorSupplier<KIn, VIn, KIn, VOut> {
+class KStreamMapValues<KIn, VIn, VOut> implements FixedKeyProcessorSupplier<KIn, VIn, VOut> {
 
     private final ValueMapperWithKey<KIn, VIn, VOut> mapper;
 
@@ -31,13 +31,13 @@ public KStreamMapValues(final ValueMapperWithKey<KIn, VIn, VOut> mapper) {
     }
 
     @Override
-    public Processor<KIn, VIn, KIn, VOut> get() {
+    public FixedKeyProcessor<KIn, VIn, VOut> get() {
         return new KStreamMapProcessor();
     }
 
-    private class KStreamMapProcessor extends ContextualProcessor<KIn, VIn, KIn, VOut> {
+    private class KStreamMapProcessor extends ContextualFixedKeyProcessor<KIn, VIn, VOut> {
         @Override
-        public void process(final Record<KIn, VIn> record) {
+        public void process(final FixedKeyRecord<KIn, VIn> record) {
             final VOut newValue = mapper.apply(record.key(), record.value());
             context().forward(record.withValue(newValue));
         }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamPeek.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamPeek.java
index 69b5e7fc3316f..bb894093e54f8 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamPeek.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamPeek.java
@@ -17,12 +17,12 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.streams.kstream.ForeachAction;
-import org.apache.kafka.streams.processor.api.ContextualProcessor;
-import org.apache.kafka.streams.processor.api.Processor;
-import org.apache.kafka.streams.processor.api.ProcessorSupplier;
-import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.api.ContextualFixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 
-class KStreamPeek<K, V> implements ProcessorSupplier<K, V, K, V> {
+class KStreamPeek<K, V> implements FixedKeyProcessorSupplier<K, V, V> {
 
     private final ForeachAction<K, V> action;
 
@@ -31,13 +31,13 @@ public KStreamPeek(final ForeachAction<K, V> action) {
     }
 
     @Override
-    public Processor<K, V, K, V> get() {
+    public FixedKeyProcessor<K, V, V> get() {
         return new KStreamPeekProcessor();
     }
 
-    private class KStreamPeekProcessor extends ContextualProcessor<K, V, K, V> {
+    private class KStreamPeekProcessor extends ContextualFixedKeyProcessor<K, V, V> {
         @Override
-        public void process(final Record<K, V> record) {
+        public void process(final FixedKeyRecord<K, V> record) {
             action.apply(record.key(), record.value());
             context().forward(record);
         }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregate.java
index eff7ac327a46a..f8252358b08f6 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregate.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregate.java
@@ -18,8 +18,11 @@
 
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.Merger;
 import org.apache.kafka.streams.kstream.SessionWindows;
@@ -29,6 +32,7 @@
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.api.RecordMetadata;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.SessionStore;
@@ -39,6 +43,9 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.apache.kafka.streams.StreamsConfig.InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION;
+import static org.apache.kafka.streams.processor.internals.metrics.ProcessorNodeMetrics.emitFinalLatencySensor;
+import static org.apache.kafka.streams.processor.internals.metrics.ProcessorNodeMetrics.emittedRecordsSensor;
 import static org.apache.kafka.streams.processor.internals.metrics.TaskMetrics.droppedRecordsSensor;
 
 public class KStreamSessionWindowAggregate<KIn, VIn, VAgg> implements KStreamAggProcessorSupplier<KIn, VIn, Windowed<KIn>, VAgg> {
@@ -50,16 +57,19 @@ public class KStreamSessionWindowAggregate<KIn, VIn, VAgg> implements KStreamAgg
     private final Initializer<VAgg> initializer;
     private final Aggregator<? super KIn, ? super VIn, VAgg> aggregator;
     private final Merger<? super KIn, VAgg> sessionMerger;
+    private final EmitStrategy emitStrategy;
 
     private boolean sendOldValues = false;
 
     public KStreamSessionWindowAggregate(final SessionWindows windows,
-        final String storeName,
-        final Initializer<VAgg> initializer,
-        final Aggregator<? super KIn, ? super VIn, VAgg> aggregator,
-        final Merger<? super KIn, VAgg> sessionMerger) {
+                                         final String storeName,
+                                         final EmitStrategy emitStrategy,
+                                         final Initializer<VAgg> initializer,
+                                         final Aggregator<? super KIn, ? super VIn, VAgg> aggregator,
+                                         final Merger<? super KIn, VAgg> sessionMerger) {
         this.windows = windows;
         this.storeName = storeName;
+        this.emitStrategy = emitStrategy;
         this.initializer = initializer;
         this.aggregator = aggregator;
         this.sessionMerger = sessionMerger;
@@ -83,24 +93,50 @@ private class KStreamSessionWindowAggregateProcessor extends
         ContextualProcessor<KIn, VIn, Windowed<KIn>, Change<VAgg>> {
 
         private SessionStore<KIn, VAgg> store;
-        private SessionTupleForwarder<KIn, VAgg> tupleForwarder;
+        private TimestampedTupleForwarder<Windowed<KIn>, VAgg> tupleForwarder;
         private Sensor droppedRecordsSensor;
+        private Sensor emittedRecordsSensor;
+        private Sensor emitFinalLatencySensor;
+        private long lastEmitWindowCloseTime = ConsumerRecord.NO_TIMESTAMP;
         private long observedStreamTime = ConsumerRecord.NO_TIMESTAMP;
+        private InternalProcessorContext<Windowed<KIn>, Change<VAgg>> internalProcessorContext;
+
+        private final Time time = Time.SYSTEM;
+        protected final KStreamImplJoin.TimeTracker timeTracker = new KStreamImplJoin.TimeTracker();
 
         @Override
         public void init(final ProcessorContext<Windowed<KIn>, Change<VAgg>> context) {
             super.init(context);
+            internalProcessorContext = (InternalProcessorContext<Windowed<KIn>, Change<VAgg>>) context;
             final StreamsMetricsImpl metrics = (StreamsMetricsImpl) context.metrics();
             final String threadId = Thread.currentThread().getName();
-            droppedRecordsSensor = droppedRecordsSensor(threadId, context.taskId().toString(),
-                metrics);
+            final String processorName = internalProcessorContext.currentNode().name();
+            droppedRecordsSensor = droppedRecordsSensor(threadId, context.taskId().toString(), metrics);
+            emittedRecordsSensor = emittedRecordsSensor(threadId, context.taskId().toString(), processorName, metrics);
+            emitFinalLatencySensor = emitFinalLatencySensor(threadId, context.taskId().toString(), processorName, metrics);
             store = context.getStateStore(storeName);
-            tupleForwarder = new SessionTupleForwarder<>(
-                store,
-                context,
-                new SessionCacheFlushListener<>(context),
-                sendOldValues
-            );
+
+            if (emitStrategy.type() == EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
+                // Restore last emit close time for ON_WINDOW_CLOSE strategy
+                final Long lastEmitWindowCloseTime = internalProcessorContext.processorMetadataForKey(storeName);
+                if (lastEmitWindowCloseTime != null) {
+                    this.lastEmitWindowCloseTime = lastEmitWindowCloseTime;
+                }
+                final long emitInterval = StreamsConfig.InternalConfig.getLong(
+                        context.appConfigs(),
+                        EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION,
+                        1000L
+                );
+                timeTracker.setEmitInterval(emitInterval);
+
+                tupleForwarder = new TimestampedTupleForwarder<>(context, sendOldValues);
+            } else {
+                tupleForwarder = new TimestampedTupleForwarder<>(
+                    store,
+                    context,
+                    new SessionCacheFlushListener<>(context),
+                    sendOldValues);
+            }
         }
 
         @Override
@@ -108,25 +144,13 @@ public void process(final Record<KIn, VIn> record) {
             // if the key is null, we do not need proceed aggregating
             // the record with the table
             if (record.key() == null) {
-                if (context().recordMetadata().isPresent()) {
-                    final RecordMetadata recordMetadata = context().recordMetadata().get();
-                    LOG.warn(
-                        "Skipping record due to null key. "
-                            + "topic=[{}] partition=[{}] offset=[{}]",
-                        recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset()
-                    );
-                } else {
-                    LOG.warn(
-                        "Skipping record due to null key. Topic, partition, and offset not known."
-                    );
-                }
-                droppedRecordsSensor.record();
+                logSkippedRecordForNullKey();
                 return;
             }
 
             final long timestamp = record.timestamp();
             observedStreamTime = Math.max(observedStreamTime, timestamp);
-            final long closeTime = observedStreamTime - windows.gracePeriodMs() - windows.inactivityGap();
+            final long windowCloseTime = observedStreamTime - windows.gracePeriodMs() - windows.inactivityGap();
 
             final List<KeyValue<Windowed<KIn>, VAgg>> merged = new ArrayList<>();
             final SessionWindow newSessionWindow = new SessionWindow(timestamp, timestamp);
@@ -148,55 +172,174 @@ public void process(final Record<KIn, VIn> record) {
                 }
             }
 
-            if (mergedWindow.end() < closeTime) {
-                if (context().recordMetadata().isPresent()) {
-                    final RecordMetadata recordMetadata = context().recordMetadata().get();
-                    LOG.warn(
-                        "Skipping record for expired window. " +
-                            "topic=[{}] " +
-                            "partition=[{}] " +
-                            "offset=[{}] " +
-                            "timestamp=[{}] " +
-                            "window=[{},{}] " +
-                            "expiration=[{}] " +
-                            "streamTime=[{}]",
-                        recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset(),
-                        timestamp,
-                        mergedWindow.start(), mergedWindow.end(),
-                        closeTime,
-                        observedStreamTime
-                    );
-                } else {
-                    LOG.warn(
-                        "Skipping record for expired window. Topic, partition, and offset not known. " +
-                            "timestamp=[{}] " +
-                            "window=[{},{}] " +
-                            "expiration=[{}] " +
-                            "streamTime=[{}]",
-                        timestamp,
-                        mergedWindow.start(), mergedWindow.end(),
-                        closeTime,
-                        observedStreamTime
-                    );
-                }
-                droppedRecordsSensor.record();
+            if (mergedWindow.end() < windowCloseTime) {
+                logSkippedRecordForExpiredWindow(timestamp, windowCloseTime, mergedWindow);
             } else {
                 if (!mergedWindow.equals(newSessionWindow)) {
                     for (final KeyValue<Windowed<KIn>, VAgg> session : merged) {
                         store.remove(session.key);
-                        tupleForwarder.maybeForward(
-                            record.withKey(session.key)
-                                .withValue(new Change<>(null, session.value)));
+
+                        maybeForwardUpdate(session.key, session.value, null);
                     }
                 }
 
                 agg = aggregator.apply(record.key(), record.value(), agg);
                 final Windowed<KIn> sessionKey = new Windowed<>(record.key(), mergedWindow);
                 store.put(sessionKey, agg);
+
+                maybeForwardUpdate(sessionKey, null, agg);
+            }
+
+            maybeForwardFinalResult(record, windowCloseTime);
+        }
+
+        private void maybeForwardUpdate(final Windowed<KIn> windowedkey,
+                                        final VAgg oldAgg,
+                                        final VAgg newAgg) {
+            if (emitStrategy.type() == EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
+                return;
+            }
+
+            // Update the sent record timestamp to the window end time if possible
+            final long newTimestamp = windowedkey.window().end();
+            tupleForwarder.maybeForward(new Record<>(windowedkey, new Change<>(newAgg, sendOldValues ? oldAgg : null), newTimestamp));
+        }
+
+        // TODO: consolidate SessionWindow with TimeWindow to merge common functions
+        private void maybeForwardFinalResult(final Record<KIn, VIn> record, final long windowCloseTime) {
+            if (shouldEmitFinal(windowCloseTime)) {
+                final long emitRangeUpperBound = emitRangeUpperBound(windowCloseTime);
+
+                // if the upper bound is smaller than 0, then there's no window closed ever;
+                // and we can skip range fetching
+                if (emitRangeUpperBound >= 0) {
+                    final long emitRangeLowerBound = emitRangeLowerBound();
+
+                    if (shouldRangeFetch(emitRangeLowerBound, emitRangeUpperBound)) {
+                        fetchAndEmit(record, windowCloseTime, emitRangeLowerBound, emitRangeUpperBound);
+                    }
+                }
+            }
+        }
+
+        private boolean shouldEmitFinal(final long windowCloseTime) {
+            if (emitStrategy.type() != EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
+                return false;
+            }
+
+            final long now = internalProcessorContext.currentSystemTimeMs();
+            // Throttle emit frequency
+            if (now < timeTracker.nextTimeToEmit) {
+                return false;
+            }
+
+            // Schedule next emit time based on now to avoid the case that if system time jumps a lot,
+            // this can be triggered every time
+            timeTracker.nextTimeToEmit = now;
+            timeTracker.advanceNextTimeToEmit();
+
+            // Only EMIT if the window close time does progress
+            return lastEmitWindowCloseTime == ConsumerRecord.NO_TIMESTAMP || lastEmitWindowCloseTime < windowCloseTime;
+        }
+
+        private long emitRangeLowerBound() {
+            return Math.max(0L, lastEmitWindowCloseTime);
+        }
+
+        private long emitRangeUpperBound(final long windowCloseTime) {
+            // Session window's start and end timestamps are inclusive, so
+            // we should minus 1 for the inclusive closed window-end upper bound
+            return windowCloseTime - 1;
+        }
+
+        private boolean shouldRangeFetch(final long emitRangeLowerBound, final long emitRangeUpperBound) {
+            // since a session window could be a single point (i.e. [t, t]),
+            // we need to range fetch and emit even if the upper and lower bound are the same
+            return emitRangeUpperBound >= emitRangeLowerBound;
+        }
+
+        private void fetchAndEmit(final Record<KIn, VIn> record,
+                                  final long windowCloseTime,
+                                  final long emitRangeLowerBound,
+                                  final long emitRangeUpperBound) {
+            final long startMs = time.milliseconds();
+
+            // Only time ordered (indexed) session store should have implemented
+            // this function, otherwise a not-supported exception would throw
+            final KeyValueIterator<Windowed<KIn>, VAgg> windowToEmit = store
+                .findSessions(emitRangeLowerBound, emitRangeUpperBound);
+
+            int emittedCount = 0;
+            while (windowToEmit.hasNext()) {
+                emittedCount++;
+                final KeyValue<Windowed<KIn>, VAgg> kv = windowToEmit.next();
+
                 tupleForwarder.maybeForward(
-                    record.withKey(sessionKey)
-                        .withValue(new Change<>(agg, null)));
+                    record.withKey(kv.key)
+                        .withValue(new Change<>(kv.value, null))
+                        // set the timestamp as the window end timestamp
+                        .withTimestamp(kv.key.window().end())
+                        .withHeaders(record.headers()));
             }
+            emittedRecordsSensor.record(emittedCount);
+            emitFinalLatencySensor.record(time.milliseconds() - startMs);
+
+            lastEmitWindowCloseTime = windowCloseTime;
+            internalProcessorContext.addProcessorMetadataKeyValue(storeName, windowCloseTime);
+        }
+
+        private void logSkippedRecordForNullKey() {
+            if (context().recordMetadata().isPresent()) {
+                final RecordMetadata recordMetadata = context().recordMetadata().get();
+                LOG.warn(
+                        "Skipping record due to null key. "
+                                + "topic=[{}] partition=[{}] offset=[{}]",
+                        recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset()
+                );
+            } else {
+                LOG.warn(
+                        "Skipping record due to null key. Topic, partition, and offset not known."
+                );
+            }
+            droppedRecordsSensor.record();
+        }
+
+        private void logSkippedRecordForExpiredWindow(final long timestamp,
+                                                      final long windowExpire,
+                                                      final SessionWindow window) {
+            final String windowString = "[" + window.start() + "," + window.end() + "]";
+
+            if (context().recordMetadata().isPresent()) {
+                final RecordMetadata recordMetadata = context().recordMetadata().get();
+                LOG.warn("Skipping record for expired window. " +
+                                "topic=[{}] " +
+                                "partition=[{}] " +
+                                "offset=[{}] " +
+                                "timestamp=[{}] " +
+                                "window={} " +
+                                "expiration=[{}] " +
+                                "streamTime=[{}]",
+                        recordMetadata.topic(),
+                        recordMetadata.partition(),
+                        recordMetadata.offset(),
+                        timestamp,
+                        windowString,
+                        windowExpire,
+                        observedStreamTime
+                );
+            } else {
+                LOG.warn("Skipping record for expired window. Topic, partition, and offset not known. " +
+                                "timestamp=[{}] " +
+                                "window={} " +
+                                "expiration=[{}] " +
+                                "streamTime=[{}]",
+                        timestamp,
+                        windowString,
+                        windowExpire,
+                        observedStreamTime
+                );
+            }
+            droppedRecordsSensor.record();
         }
     }
 
@@ -237,5 +380,4 @@ public ValueAndTimestamp<VAgg> get(final Windowed<KIn> key) {
                 key.window().end());
         }
     }
-
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregate.java
index fd0198b71c737..aa0841a38f0af 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregate.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregate.java
@@ -17,20 +17,17 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.Window;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.SlidingWindows;
-import org.apache.kafka.streams.processor.api.ContextualProcessor;
 import org.apache.kafka.streams.processor.api.Processor;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.api.RecordMetadata;
-import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
-import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.TimestampedWindowStore;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
@@ -39,7 +36,6 @@
 import java.util.HashSet;
 import java.util.Set;
 
-import static org.apache.kafka.streams.processor.internals.metrics.TaskMetrics.droppedRecordsSensor;
 import static org.apache.kafka.streams.state.ValueAndTimestamp.getValueOrNull;
 
 public class KStreamSlidingWindowAggregate<KIn, VIn, VAgg> implements KStreamAggProcessorSupplier<KIn, VIn, Windowed<KIn>, VAgg> {
@@ -50,22 +46,25 @@ public class KStreamSlidingWindowAggregate<KIn, VIn, VAgg> implements KStreamAgg
     private final SlidingWindows windows;
     private final Initializer<VAgg> initializer;
     private final Aggregator<? super KIn, ? super VIn, VAgg> aggregator;
+    private final EmitStrategy emitStrategy;
 
     private boolean sendOldValues = false;
 
     public KStreamSlidingWindowAggregate(final SlidingWindows windows,
                                          final String storeName,
+                                         final EmitStrategy emitStrategy,
                                          final Initializer<VAgg> initializer,
                                          final Aggregator<? super KIn, ? super VIn, VAgg> aggregator) {
         this.windows = windows;
         this.storeName = storeName;
         this.initializer = initializer;
         this.aggregator = aggregator;
+        this.emitStrategy = emitStrategy;
     }
 
     @Override
     public Processor<KIn, VIn, Windowed<KIn>, Change<VAgg>> get() {
-        return new KStreamSlidingWindowAggregateProcessor();
+        return new KStreamSlidingWindowAggregateProcessor(storeName, emitStrategy, sendOldValues);
     }
 
     public SlidingWindows windows() {
@@ -77,27 +76,13 @@ public void enableSendingOldValues() {
         sendOldValues = true;
     }
 
-    private class KStreamSlidingWindowAggregateProcessor extends ContextualProcessor<KIn, VIn, Windowed<KIn>, Change<VAgg>> {
-        private TimestampedWindowStore<KIn, VAgg> windowStore;
-        private TimestampedTupleForwarder<Windowed<KIn>, VAgg> tupleForwarder;
-        private Sensor droppedRecordsSensor;
-        private long observedStreamTime = ConsumerRecord.NO_TIMESTAMP;
+    private class KStreamSlidingWindowAggregateProcessor extends AbstractKStreamTimeWindowAggregateProcessor<KIn, VIn, VAgg> {
         private Boolean reverseIteratorPossible = null;
 
-        @Override
-        public void init(final ProcessorContext<Windowed<KIn>, Change<VAgg>> context) {
-            super.init(context);
-            final InternalProcessorContext<Windowed<KIn>, Change<VAgg>> internalProcessorContext =
-                (InternalProcessorContext<Windowed<KIn>, Change<VAgg>>) context;
-            final StreamsMetricsImpl metrics = internalProcessorContext.metrics();
-            final String threadId = Thread.currentThread().getName();
-            droppedRecordsSensor = droppedRecordsSensor(threadId, context.taskId().toString(), metrics);
-            windowStore = context.getStateStore(storeName);
-            tupleForwarder = new TimestampedTupleForwarder<>(
-                windowStore,
-                context,
-                new TimestampedCacheFlushListener<>(context),
-                sendOldValues);
+        protected KStreamSlidingWindowAggregateProcessor(final String storeName,
+                                                         final EmitStrategy emitStrategy,
+                                                         final boolean sendOldValues) {
+            super(storeName, emitStrategy, sendOldValues);
         }
 
         @Override
@@ -119,46 +104,19 @@ public void process(final Record<KIn, VIn> record) {
                 return;
             }
 
-            observedStreamTime = Math.max(observedStreamTime, record.timestamp());
-            final long closeTime = observedStreamTime - windows.gracePeriodMs();
+            updateObservedStreamTime(record.timestamp());
+            final long windowCloseTime = observedStreamTime - windows.gracePeriodMs();
 
-            if (record.timestamp() + 1L + windows.timeDifferenceMs() <= closeTime) {
-                if (context().recordMetadata().isPresent()) {
-                    final RecordMetadata recordMetadata = context().recordMetadata().get();
-                    log.warn(
-                        "Skipping record for expired window. " +
-                            "topic=[{}] " +
-                            "partition=[{}] " +
-                            "offset=[{}] " +
-                            "timestamp=[{}] " +
-                            "window=[{},{}] " +
-                            "expiration=[{}] " +
-                            "streamTime=[{}]",
-                        recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset(),
-                        record.timestamp(),
-                        record.timestamp() - windows.timeDifferenceMs(), record.timestamp(),
-                        closeTime,
-                        observedStreamTime
-                    );
-                } else {
-                    log.warn(
-                        "Skipping record for expired window. Topic, partition, and offset not known. " +
-                            "timestamp=[{}] " +
-                            "window=[{},{}] " +
-                            "expiration=[{}] " +
-                            "streamTime=[{}]",
-                        record.timestamp(),
-                        record.timestamp() - windows.timeDifferenceMs(), record.timestamp(),
-                        closeTime,
-                        observedStreamTime
-                    );
-                }
-                droppedRecordsSensor.record();
+            final long windowStart = record.timestamp();
+            final long windowEnd = record.timestamp() + windows.timeDifferenceMs();
+            if (windowEnd < windowCloseTime) {
+                final String window = "[" + windowStart + "," + windowEnd + "]";
+                logSkippedRecordForExpiredWindow(log, record.timestamp(), windowCloseTime, window);
                 return;
             }
 
             if (record.timestamp() < windows.timeDifferenceMs()) {
-                processEarly(record, closeTime);
+                processEarly(record, windowCloseTime);
                 return;
             }
 
@@ -174,14 +132,15 @@ public void process(final Record<KIn, VIn> record) {
             }
 
             if (reverseIteratorPossible) {
-                processReverse(record, closeTime);
+                processReverse(record, windowCloseTime);
             } else {
-                processInOrder(record, closeTime);
+                processInOrder(record, windowCloseTime);
             }
-        }
 
-        public void processInOrder(final Record<KIn, VIn> record, final long closeTime) {
+            maybeForwardFinalResult(record, windowCloseTime);
+        }
 
+        public void processInOrder(final Record<KIn, VIn> record, final long windowCloseTime) {
             final Set<Long> windowStartTimes = new HashSet<>();
 
             // aggregate that will go in the current record’s left/right window (if needed)
@@ -221,14 +180,14 @@ public void processInOrder(final Record<KIn, VIn> record, final long closeTime)
                             windowBeingProcessed.key.window(),
                             windowBeingProcessed.value,
                             record,
-                            closeTime);
+                            windowCloseTime);
                     } else if (endTime > record.timestamp() && startTime <= record.timestamp()) {
                         rightWinAgg = windowBeingProcessed.value;
                         updateWindowAndForward(
                             windowBeingProcessed.key.window(),
                             windowBeingProcessed.value,
                             record,
-                            closeTime);
+                            windowCloseTime);
                     } else if (startTime == record.timestamp() + 1) {
                         rightWinAlreadyCreated = true;
                     } else {
@@ -240,11 +199,10 @@ public void processInOrder(final Record<KIn, VIn> record, final long closeTime)
                     }
                 }
             }
-            createWindows(record, closeTime, windowStartTimes, rightWinAgg, leftWinAgg, leftWinAlreadyCreated, rightWinAlreadyCreated, previousRecordTimestamp);
+            createWindows(record, windowCloseTime, windowStartTimes, rightWinAgg, leftWinAgg, leftWinAlreadyCreated, rightWinAlreadyCreated, previousRecordTimestamp);
         }
 
-        public void processReverse(final Record<KIn, VIn> record, final long closeTime) {
-
+        public void processReverse(final Record<KIn, VIn> record, final long windowCloseTime) {
             final Set<Long> windowStartTimes = new HashSet<>();
 
             // aggregate that will go in the current record’s left/right window (if needed)
@@ -277,10 +235,10 @@ public void processReverse(final Record<KIn, VIn> record, final long closeTime)
                         if (rightWinAgg == null) {
                             rightWinAgg = windowBeingProcessed.value;
                         }
-                        updateWindowAndForward(windowBeingProcessed.key.window(), windowBeingProcessed.value, record, closeTime);
+                        updateWindowAndForward(windowBeingProcessed.key.window(), windowBeingProcessed.value, record, windowCloseTime);
                     } else if (endTime == record.timestamp()) {
                         leftWinAlreadyCreated = true;
-                        updateWindowAndForward(windowBeingProcessed.key.window(), windowBeingProcessed.value, record, closeTime);
+                        updateWindowAndForward(windowBeingProcessed.key.window(), windowBeingProcessed.value, record, windowCloseTime);
                         if (windowMaxRecordTimestamp < record.timestamp()) {
                             previousRecordTimestamp = windowMaxRecordTimestamp;
                         } else {
@@ -299,7 +257,7 @@ public void processReverse(final Record<KIn, VIn> record, final long closeTime)
                     }
                 }
             }
-            createWindows(record, closeTime, windowStartTimes, rightWinAgg, leftWinAgg, leftWinAlreadyCreated, rightWinAlreadyCreated, previousRecordTimestamp);
+            createWindows(record, windowCloseTime, windowStartTimes, rightWinAgg, leftWinAgg, leftWinAlreadyCreated, rightWinAlreadyCreated, previousRecordTimestamp);
         }
 
         /**
@@ -307,7 +265,7 @@ public void processReverse(final Record<KIn, VIn> record, final long closeTime)
          * windows with negative start times, which is not supported. Instead, we will put them into the [0, timeDifferenceMs]
          * window as a "workaround", and we will update or create their right windows as new records come in later
          */
-        private void processEarly(final Record<KIn, VIn> record, final long closeTime) {
+        private void processEarly(final Record<KIn, VIn> record, final long windowCloseTime) {
             if (record.timestamp() < 0 || record.timestamp() >= windows.timeDifferenceMs()) {
                 log.error(
                     "Early record for sliding windows must fall between fall between 0 <= inputRecordTimestamp. Timestamp {} does not fall between 0 <= {}",
@@ -349,7 +307,7 @@ private void processEarly(final Record<KIn, VIn> record, final long closeTime) {
 
                     } else if (startTime <= record.timestamp()) {
                         rightWinAgg = windowBeingProcessed.value;
-                        updateWindowAndForward(windowBeingProcessed.key.window(), windowBeingProcessed.value, record, closeTime);
+                        updateWindowAndForward(windowBeingProcessed.key.window(), windowBeingProcessed.value, record, windowCloseTime);
                     } else if (startTime == record.timestamp() + 1) {
                         rightWinAlreadyCreated = true;
                     } else {
@@ -379,17 +337,17 @@ private void processEarly(final Record<KIn, VIn> record, final long closeTime) {
 
             //create the right window for the previous record if the previous record exists and the window hasn't already been created
             if (previousRecordTimestamp != null && !windowStartTimes.contains(previousRecordTimestamp + 1)) {
-                createPreviousRecordRightWindow(previousRecordTimestamp + 1, record, closeTime);
+                createPreviousRecordRightWindow(previousRecordTimestamp + 1, record, windowCloseTime);
             }
 
             if (combinedWindow == null) {
                 final TimeWindow window = new TimeWindow(0, windows.timeDifferenceMs());
                 final ValueAndTimestamp<VAgg> valueAndTime = ValueAndTimestamp.make(initializer.apply(), record.timestamp());
-                updateWindowAndForward(window, valueAndTime, record, closeTime);
+                updateWindowAndForward(window, valueAndTime, record, windowCloseTime);
 
             } else {
                 //update the combined window with the new aggregate
-                updateWindowAndForward(combinedWindow.key.window(), combinedWindow.value, record, closeTime);
+                updateWindowAndForward(combinedWindow.key.window(), combinedWindow.value, record, windowCloseTime);
             }
 
         }
@@ -402,7 +360,7 @@ private void createWindows(final Record<KIn, VIn> record,
                                    final boolean leftWinAlreadyCreated,
                                    final boolean rightWinAlreadyCreated,
                                    final Long previousRecordTimestamp) {
-            //create right window for previous record
+            // create right window for previous record
             if (previousRecordTimestamp != null) {
                 final long previousRightWinStart = previousRecordTimestamp + 1;
                 if (previousRecordRightWindowDoesNotExistAndIsNotEmpty(windowStartTimes, previousRightWinStart, record.timestamp())) {
@@ -410,7 +368,7 @@ private void createWindows(final Record<KIn, VIn> record,
                 }
             }
 
-            //create left window for new record
+            // create left window for new record
             if (!leftWinAlreadyCreated) {
                 final ValueAndTimestamp<VAgg> valueAndTime;
                 if (leftWindowNotEmpty(previousRecordTimestamp, record.timestamp())) {
@@ -436,10 +394,7 @@ private void createCurrentRecordRightWindow(final long inputRecordTimestamp,
                 record.key(),
                 rightWinAgg,
                 window.start());
-            tupleForwarder.maybeForward(
-                record.withKey(new Windowed<>(record.key(), window))
-                    .withValue(new Change<>(rightWinAgg.value(), null))
-                    .withTimestamp(rightWinAgg.timestamp()));
+            maybeForwardUpdate(record, window, null, rightWinAgg.value(), rightWinAgg.timestamp());
         }
 
         private void createPreviousRecordRightWindow(final long windowStart,
@@ -467,14 +422,33 @@ private boolean rightWindowIsNotEmpty(final ValueAndTimestamp<VAgg> rightWinAgg,
             return rightWinAgg != null && rightWinAgg.timestamp() > inputRecordTimestamp;
         }
 
+        @Override
+        protected long emitRangeLowerBound(final long windowCloseTime) {
+            return lastEmitWindowCloseTime == ConsumerRecord.NO_TIMESTAMP ?
+                0L : Math.max(0L, lastEmitWindowCloseTime - windows.timeDifferenceMs());
+        }
+
+        @Override
+        protected long emitRangeUpperBound(final long windowCloseTime) {
+            // Sliding window's start and end timestamps are inclusive, so
+            // we should minus 1 for the inclusive closed window-end upper bound
+            return windowCloseTime - windows.timeDifferenceMs() - 1;
+        }
+
+        @Override
+        protected boolean shouldRangeFetch(final long emitRangeLowerBound, final long emitRangeUpperBound) {
+            return true;
+        }
+
         private void updateWindowAndForward(final Window window,
                                             final ValueAndTimestamp<VAgg> valueAndTime,
                                             final Record<KIn, VIn> record,
-                                            final long closeTime) {
+                                            final long windowCloseTime) {
             final long windowStart = window.start();
             final long windowEnd = window.end();
-            if (windowEnd > closeTime) {
-                //get aggregate from existing window
+
+            if (windowEnd >= windowCloseTime) {
+                // get aggregate from existing window
                 final VAgg oldAgg = getValueOrNull(valueAndTime);
                 final VAgg newAgg = aggregator.apply(record.key(), record.value(), oldAgg);
 
@@ -483,42 +457,10 @@ private void updateWindowAndForward(final Window window,
                     record.key(),
                     ValueAndTimestamp.make(newAgg, newTimestamp),
                     windowStart);
-                tupleForwarder.maybeForward(
-                    record.withKey(new Windowed<>(record.key(), window))
-                        .withValue(new Change<>(newAgg, sendOldValues ? oldAgg : null))
-                        .withTimestamp(newTimestamp));
+                maybeForwardUpdate(record, window, oldAgg, newAgg, newTimestamp);
             } else {
-                if (context().recordMetadata().isPresent()) {
-                    final RecordMetadata recordMetadata = context().recordMetadata().get();
-                    log.warn(
-                        "Skipping record for expired window. " +
-                            "topic=[{}] " +
-                            "partition=[{}] " +
-                            "offset=[{}] " +
-                            "timestamp=[{}] " +
-                            "window=[{},{}] " +
-                            "expiration=[{}] " +
-                            "streamTime=[{}]",
-                        recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset(),
-                        record.timestamp(),
-                        windowStart, windowEnd,
-                        closeTime,
-                        observedStreamTime
-                    );
-                } else {
-                    log.warn(
-                        "Skipping record for expired window. Topic, partition, and offset not known. " +
-                            "timestamp=[{}] " +
-                            "window=[{},{}] " +
-                            "expiration=[{}] " +
-                            "streamTime=[{}]",
-                        record.timestamp(),
-                        windowStart, windowEnd,
-                        closeTime,
-                        observedStreamTime
-                    );
-                }
-                droppedRecordsSensor.record();
+                final String windowString = "[" + windowStart + "," + windowEnd + "]";
+                logSkippedRecordForExpiredWindow(log, record.timestamp(), windowCloseTime, windowString);
             }
         }
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java
index 8e88b5de7a6a8..561524f87e799 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregate.java
@@ -17,27 +17,24 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.clients.consumer.ConsumerRecord;
-import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
 import org.apache.kafka.streams.kstream.Initializer;
+import org.apache.kafka.streams.kstream.TimeWindows;
 import org.apache.kafka.streams.kstream.Window;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.Windows;
-import org.apache.kafka.streams.processor.api.ContextualProcessor;
 import org.apache.kafka.streams.processor.api.Processor;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.api.RecordMetadata;
-import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
-import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.state.TimestampedWindowStore;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
 import java.util.Map;
 
-import static org.apache.kafka.streams.processor.internals.metrics.TaskMetrics.droppedRecordsSensor;
 import static org.apache.kafka.streams.state.ValueAndTimestamp.getValueOrNull;
 
 public class KStreamWindowAggregate<KIn, VIn, VAgg, W extends Window> implements KStreamAggProcessorSupplier<KIn, VIn, Windowed<KIn>, VAgg> {
@@ -48,22 +45,32 @@ public class KStreamWindowAggregate<KIn, VIn, VAgg, W extends Window> implements
     private final Windows<W> windows;
     private final Initializer<VAgg> initializer;
     private final Aggregator<? super KIn, ? super VIn, VAgg> aggregator;
+    private final EmitStrategy emitStrategy;
 
     private boolean sendOldValues = false;
 
     public KStreamWindowAggregate(final Windows<W> windows,
                                   final String storeName,
+                                  final EmitStrategy emitStrategy,
                                   final Initializer<VAgg> initializer,
                                   final Aggregator<? super KIn, ? super VIn, VAgg> aggregator) {
         this.windows = windows;
         this.storeName = storeName;
+        this.emitStrategy = emitStrategy;
         this.initializer = initializer;
         this.aggregator = aggregator;
+
+        if (emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE) {
+            if (!(windows instanceof TimeWindows)) {
+                throw new IllegalArgumentException("ON_WINDOW_CLOSE strategy is only supported for "
+                    + "TimeWindows and SlidingWindows for TimeWindowedKStream");
+            }
+        }
     }
 
     @Override
     public Processor<KIn, VIn, Windowed<KIn>, Change<VAgg>> get() {
-        return new KStreamWindowAggregateProcessor();
+        return new KStreamWindowAggregateProcessor(storeName, emitStrategy, sendOldValues);
     }
 
     public Windows<W> windows() {
@@ -75,27 +82,9 @@ public void enableSendingOldValues() {
         sendOldValues = true;
     }
 
-
-    private class KStreamWindowAggregateProcessor extends ContextualProcessor<KIn, VIn, Windowed<KIn>, Change<VAgg>> {
-        private TimestampedWindowStore<KIn, VAgg> windowStore;
-        private TimestampedTupleForwarder<Windowed<KIn>, VAgg> tupleForwarder;
-        private Sensor droppedRecordsSensor;
-        private long observedStreamTime = ConsumerRecord.NO_TIMESTAMP;
-
-        @Override
-        public void init(final ProcessorContext<Windowed<KIn>, Change<VAgg>> context) {
-            super.init(context);
-            final InternalProcessorContext<Windowed<KIn>, Change<VAgg>> internalProcessorContext =
-                (InternalProcessorContext<Windowed<KIn>, Change<VAgg>>) context;
-            final StreamsMetricsImpl metrics = internalProcessorContext.metrics();
-            final String threadId = Thread.currentThread().getName();
-            droppedRecordsSensor = droppedRecordsSensor(threadId, context.taskId().toString(), metrics);
-            windowStore = context.getStateStore(storeName);
-            tupleForwarder = new TimestampedTupleForwarder<>(
-                windowStore,
-                context,
-                new TimestampedCacheFlushListener<>(context),
-                sendOldValues);
+    private class KStreamWindowAggregateProcessor extends AbstractKStreamTimeWindowAggregateProcessor<KIn, VIn, VAgg> {
+        protected KStreamWindowAggregateProcessor(final String storeName, final EmitStrategy emitStrategy, final boolean sendOldValues) {
+            super(storeName, emitStrategy, sendOldValues);
         }
 
         @Override
@@ -119,16 +108,17 @@ public void process(final Record<KIn, VIn> record) {
 
             // first get the matching windows
             final long timestamp = record.timestamp();
-            observedStreamTime = Math.max(observedStreamTime, timestamp);
-            final long closeTime = observedStreamTime - windows.gracePeriodMs();
+            updateObservedStreamTime(timestamp);
+            final long windowCloseTime = observedStreamTime - windows.gracePeriodMs();
 
             final Map<Long, W> matchedWindows = windows.windowsFor(timestamp);
 
-            // try update the window, and create the new window for the rest of unmatched window that do not exist yet
+            // try update the window whose end time is still larger than the window close time,
+            // and create the new window for the rest of unmatched window that do not exist yet;
             for (final Map.Entry<Long, W> entry : matchedWindows.entrySet()) {
                 final Long windowStart = entry.getKey();
                 final long windowEnd = entry.getValue().end();
-                if (windowEnd > closeTime) {
+                if (windowEnd > windowCloseTime) {
                     final ValueAndTimestamp<VAgg> oldAggAndTimestamp = windowStore.fetch(record.key(), windowStart);
                     VAgg oldAgg = getValueOrNull(oldAggAndTimestamp);
 
@@ -146,44 +136,47 @@ public void process(final Record<KIn, VIn> record) {
 
                     // update the store with the new value
                     windowStore.put(record.key(), ValueAndTimestamp.make(newAgg, newTimestamp), windowStart);
-                    tupleForwarder.maybeForward(
-                        record.withKey(new Windowed<>(record.key(), entry.getValue()))
-                            .withValue(new Change<>(newAgg, sendOldValues ? oldAgg : null))
-                            .withTimestamp(newTimestamp));
+                    maybeForwardUpdate(record, entry.getValue(), oldAgg, newAgg, newTimestamp);
                 } else {
-                    if (context().recordMetadata().isPresent()) {
-                        final RecordMetadata recordMetadata = context().recordMetadata().get();
-                        log.warn(
-                            "Skipping record for expired window. " +
-                                "topic=[{}] " +
-                                "partition=[{}] " +
-                                "offset=[{}] " +
-                                "timestamp=[{}] " +
-                                "window=[{},{}) " +
-                                "expiration=[{}] " +
-                                "streamTime=[{}]",
-                            recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset(),
-                            record.timestamp(),
-                            windowStart, windowEnd,
-                            closeTime,
-                            observedStreamTime
-                        );
-                    } else {
-                        log.warn(
-                            "Skipping record for expired window. Topic, partition, and offset not known. " +
-                                "timestamp=[{}] " +
-                                "window=[{},{}] " +
-                                "expiration=[{}] " +
-                                "streamTime=[{}]",
-                            record.timestamp(),
-                            windowStart, windowEnd,
-                            closeTime,
-                            observedStreamTime
-                        );
-                    }
-                    droppedRecordsSensor.record();
+                    final String windowString = "[" + windowStart + "," + windowEnd + ")";
+                    logSkippedRecordForExpiredWindow(log, record.timestamp(), windowCloseTime, windowString);
+                }
+            }
+
+            maybeForwardFinalResult(record, windowCloseTime);
+        }
+
+        @Override
+        protected long emitRangeLowerBound(final long windowCloseTime) {
+            // Since time window end timestamp is exclusive, we set the inclusive lower bound plus 1;
+            // Set lower bound to 0L for the first time emit so that when we fetchAll, we fetch from 0L
+            return lastEmitWindowCloseTime == ConsumerRecord.NO_TIMESTAMP ?
+                0L : Math.max(0L, lastEmitWindowCloseTime - windows.size()) + 1;
+        }
+
+        @Override
+        protected long emitRangeUpperBound(final long windowCloseTime) {
+            return windowCloseTime - windows.size();
+        }
+
+        @Override
+        protected boolean shouldRangeFetch(final long emitRangeLowerBound, final long emitRangeUpperBound) {
+            // Don't fetch store if the new emit window close time doesn't
+            // progress enough to cover next window;
+            // Note since window-end time is exclusive we would not count windows whose end time == lower bound, hence
+            // would minus 1 when finding matched windows
+            if (lastEmitWindowCloseTime != ConsumerRecord.NO_TIMESTAMP) {
+                final Map<Long, W> matchedCloseWindows = windows.windowsFor(emitRangeUpperBound);
+                final Map<Long, W> matchedEmitWindows = windows.windowsFor(emitRangeLowerBound - 1);
+
+                if (matchedCloseWindows.equals(matchedEmitWindows)) {
+                    log.trace("No new windows to emit. LastEmitCloseTime={}, emitRangeLowerBound={}, emitRangeUpperBound={}",
+                            lastEmitWindowCloseTime, emitRangeLowerBound, emitRangeUpperBound);
+                    return false;
                 }
             }
+
+            return true;
         }
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java
index 3c88732063bba..2abe7f5386bb1 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableImpl.java
@@ -1197,7 +1197,7 @@ private <VR, KO, VO> KTable<K, VR> doJoinOnForeignKey(final KTable<KO, VO> forei
 
         final StreamPartitioner<K, SubscriptionResponseWrapper<VO>> foreignResponseSinkPartitioner =
             tableJoinedInternal.partitioner() == null
-                ? null
+                ? (topic, key, subscriptionResponseWrapper, numPartitions) -> subscriptionResponseWrapper.getPrimaryPartition()
                 : (topic, key, val, numPartitions) ->
                     tableJoinedInternal.partitioner().partition(topic, key, null, numPartitions);
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java
index 6236c4c5c1fc0..6de8ede316b86 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/KTableSource.java
@@ -131,7 +131,7 @@ public void process(final Record<KIn, VIn> record) {
                                     + "topic=[{}] partition=[{}] offset=[{}].",
                                 store.name(),
                                 oldValueAndTimestamp.timestamp(), record.timestamp(),
-                                recordMetadata.topic(), recordMetadata.offset(), recordMetadata.partition()
+                                recordMetadata.topic(), recordMetadata.partition(), recordMetadata.offset() 
                             );
                         } else {
                             LOG.warn(
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/MaterializedInternal.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/MaterializedInternal.java
index 4a3cbb25cd24b..e81934716b299 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/MaterializedInternal.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/MaterializedInternal.java
@@ -19,6 +19,7 @@
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.StoreSupplier;
 
 import java.time.Duration;
@@ -43,6 +44,18 @@ public MaterializedInternal(final Materialized<K, V, S> materialized,
         if (!queryable && nameProvider != null) {
             storeName = nameProvider.newStoreName(generatedStorePrefix);
         }
+
+        // if store type is not configured during creating Materialized, then try to get the topologyConfigs from nameProvider
+        // otherwise, set to default rocksDB
+        if (storeType == null) {
+            storeType = StoreType.ROCKS_DB;
+            if (nameProvider instanceof InternalStreamsBuilder) {
+                final TopologyConfig topologyConfig = ((InternalStreamsBuilder) nameProvider).internalTopologyBuilder.topologyConfigs();
+                if (topologyConfig != null) {
+                    storeType = topologyConfig.parseStoreType();
+                }
+            }
+        }
     }
 
     public String queryableStoreName() {
@@ -56,6 +69,10 @@ public String storeName() {
         return storeName;
     }
 
+    public StoreType storeType() {
+        return storeType;
+    }
+
     public StoreSupplier<S> storeSupplier() {
         return storeSupplier;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionTupleForwarder.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionTupleForwarder.java
deleted file mode 100644
index e1c302f875510..0000000000000
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionTupleForwarder.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.kafka.streams.kstream.internals;
-
-import org.apache.kafka.streams.kstream.Windowed;
-import org.apache.kafka.streams.processor.StateStore;
-import org.apache.kafka.streams.processor.api.ProcessorContext;
-import org.apache.kafka.streams.processor.api.Record;
-import org.apache.kafka.streams.state.internals.CacheFlushListener;
-import org.apache.kafka.streams.state.internals.WrappedStateStore;
-
-/**
- * This class is used to determine if a processor should forward values to child nodes.
- * Forwarding by this class only occurs when caching is not enabled. If caching is enabled,
- * forwarding occurs in the flush listener when the cached store flushes.
- *
- * @param <K>
- * @param <V>
- */
-class SessionTupleForwarder<K, V> {
-    private final ProcessorContext<Windowed<K>, Change<V>> context;
-    private final boolean sendOldValues;
-    private final boolean cachingEnabled;
-
-    @SuppressWarnings("unchecked")
-    SessionTupleForwarder(final StateStore store,
-                          final ProcessorContext<Windowed<K>, Change<V>> context,
-                          final CacheFlushListener<Windowed<K>, V> flushListener,
-                          final boolean sendOldValues) {
-        this.context = context;
-        this.sendOldValues = sendOldValues;
-        cachingEnabled = ((WrappedStateStore) store).setFlushListener(flushListener, sendOldValues);
-    }
-
-    public void maybeForward(final Record<Windowed<K>, Change<V>> record) {
-        if (!cachingEnabled) {
-            context.forward(
-                record.withValue(new Change<>(record.value().newValue, sendOldValues ? record.value().oldValue : null))
-                    .withTimestamp(record.key() != null ? record.key().window().end() : record.timestamp()));
-        }
-    }
-}
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedCogroupedKStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedCogroupedKStreamImpl.java
index a78bcd3dae44e..4279224fa8127 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedCogroupedKStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedCogroupedKStreamImpl.java
@@ -121,11 +121,25 @@ private  StoreBuilder<SessionStore<K, V>> materialize(final MaterializedInternal
                     + " grace=[" + sessionWindows.gracePeriodMs() + "],"
                     + " retention=[" + retentionPeriod + "]");
             }
-            supplier = Stores.persistentSessionStore(
-                materialized.storeName(),
-                Duration.ofMillis(retentionPeriod)
-            );
+
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemorySessionStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod)
+                    );
+                    break;
+                case ROCKS_DB:
+                    supplier = Stores.persistentSessionStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod)
+                    );
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
+
         final StoreBuilder<SessionStore<K, V>> builder = Stores.sessionStoreBuilder(
             supplier,
             materialized.keySerde(),
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImpl.java
index fe9a3a1f8643c..8c60019fccbde 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImpl.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.KTable;
 import org.apache.kafka.streams.kstream.Materialized;
@@ -35,6 +36,7 @@
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.internals.RocksDbTimeOrderedSessionBytesStoreSupplier;
 
 import java.time.Duration;
 import java.util.Objects;
@@ -48,6 +50,8 @@ public class SessionWindowedKStreamImpl<K, V> extends AbstractStream<K, V> imple
     private final GroupedStreamAggregateBuilder<K, V> aggregateBuilder;
     private final Merger<K, Long> countMerger = (aggKey, aggOne, aggTwo) -> aggOne + aggTwo;
 
+    private EmitStrategy emitStrategy = EmitStrategy.onWindowUpdate();
+
     SessionWindowedKStreamImpl(final SessionWindows windows,
                                final InternalStreamsBuilder builder,
                                final Set<String> subTopologySourceNodes,
@@ -90,6 +94,12 @@ public KTable<Windowed<K>, Long> count(final Named named, final Materialized<K,
         return doCount(named, materialized);
     }
 
+    @Override
+    public SessionWindowedKStream<K, V> emitStrategy(final EmitStrategy emitStrategy) {
+        this.emitStrategy = emitStrategy;
+        return this;
+    }
+
     private KTable<Windowed<K>, Long> doCount(final Named named,
                                               final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized) {
         final MaterializedInternal<K, Long, SessionStore<Bytes, byte[]>> materializedInternal =
@@ -109,6 +119,7 @@ private KTable<Windowed<K>, Long> doCount(final Named named,
             new KStreamSessionWindowAggregate<>(
                 windows,
                 materializedInternal.storeName(),
+                emitStrategy,
                 aggregateBuilder.countInitializer,
                 aggregateBuilder.countAggregator,
                 countMerger),
@@ -157,6 +168,7 @@ public KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
             new KStreamSessionWindowAggregate<>(
                 windows,
                 materializedInternal.storeName(),
+                emitStrategy,
                 aggregateBuilder.reduceInitializer,
                 reduceAggregator,
                 mergerForAggregator(reduceAggregator)
@@ -214,6 +226,7 @@ public <VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
             new KStreamSessionWindowAggregate<>(
                 windows,
                 materializedInternal.storeName(),
+                emitStrategy,
                 initializer,
                 aggregator,
                 sessionMerger),
@@ -237,11 +250,30 @@ private <VR> StoreBuilder<SessionStore<K, VR>> materialize(final MaterializedInt
                                                        + " grace=[" + windows.gracePeriodMs() + "],"
                                                        + " retention=[" + retentionPeriod + "]");
             }
-            supplier = Stores.persistentSessionStore(
-                materialized.storeName(),
-                Duration.ofMillis(retentionPeriod)
-            );
+
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemorySessionStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod)
+                    );
+                    break;
+                case ROCKS_DB:
+                    supplier = emitStrategy.type() == EmitStrategy.StrategyType.ON_WINDOW_CLOSE ?
+                        new RocksDbTimeOrderedSessionBytesStoreSupplier(
+                            materialized.storeName(),
+                            retentionPeriod,
+                            true) :
+                        Stores.persistentSessionStore(
+                            materialized.storeName(),
+                            Duration.ofMillis(retentionPeriod)
+                        );
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
+
         final StoreBuilder<SessionStore<K, VR>> builder = Stores.sessionStoreBuilder(
             supplier,
             materialized.keySerde(),
@@ -254,9 +286,13 @@ private <VR> StoreBuilder<SessionStore<K, VR>> materialize(final MaterializedInt
             builder.withLoggingDisabled();
         }
 
-        if (materialized.cachingEnabled()) {
+        // do not enable cache if the emit final strategy is used
+        if (materialized.cachingEnabled() && emitStrategy.type() != EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
             builder.withCachingEnabled();
+        } else {
+            builder.withCachingDisabled();
         }
+
         return builder;
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedCogroupedKStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedCogroupedKStreamImpl.java
index a432b1f1462bb..383fed70a690a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedCogroupedKStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedCogroupedKStreamImpl.java
@@ -32,6 +32,7 @@
 import org.apache.kafka.streams.state.TimestampedWindowStore;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
 import org.apache.kafka.streams.state.WindowStore;
+
 import java.time.Duration;
 import java.util.Map;
 import java.util.Objects;
@@ -113,14 +114,28 @@ private StoreBuilder<TimestampedWindowStore<K, V>> materialize(final Materialize
                     + "]");
             }
 
-            supplier = Stores.persistentTimestampedWindowStore(
-                materialized.storeName(),
-                Duration.ofMillis(retentionPeriod),
-                Duration.ofMillis(windows.timeDifferenceMs()),
-                false
-            );
-
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemoryWindowStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod),
+                        Duration.ofMillis(windows.timeDifferenceMs()),
+                        false
+                    );
+                    break;
+                case ROCKS_DB:
+                    supplier = Stores.persistentTimestampedWindowStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod),
+                        Duration.ofMillis(windows.timeDifferenceMs()),
+                        false
+                    );
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
+
         final StoreBuilder<TimestampedWindowStore<K, V>> builder = Stores
             .timestampedWindowStoreBuilder(
                 supplier,
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedKStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedKStreamImpl.java
index ddfe9abc8bc53..587d2d5a876af 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedKStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/SlidingWindowedKStreamImpl.java
@@ -20,6 +20,8 @@
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.KTable;
 import org.apache.kafka.streams.kstream.Materialized;
@@ -34,15 +36,19 @@
 import org.apache.kafka.streams.state.TimestampedWindowStore;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
 import org.apache.kafka.streams.state.WindowStore;
+
 import java.time.Duration;
 import java.util.Objects;
 import java.util.Set;
+import org.apache.kafka.streams.state.internals.RocksDbIndexedTimeOrderedWindowBytesStoreSupplier;
+
 import static org.apache.kafka.streams.kstream.internals.KGroupedStreamImpl.AGGREGATE_NAME;
 import static org.apache.kafka.streams.kstream.internals.KGroupedStreamImpl.REDUCE_NAME;
 
 public class SlidingWindowedKStreamImpl<K, V> extends AbstractStream<K, V> implements TimeWindowedKStream<K, V> {
     private final SlidingWindows windows;
     private final GroupedStreamAggregateBuilder<K, V> aggregateBuilder;
+    private EmitStrategy emitStrategy = EmitStrategy.onWindowUpdate();
 
     SlidingWindowedKStreamImpl(final SlidingWindows windows,
                                final InternalStreamsBuilder builder,
@@ -95,7 +101,7 @@ private KTable<Windowed<K>, Long> doCount(final Named named,
         return aggregateBuilder.build(
                 new NamedInternal(aggregateName),
                 materialize(materializedInternal),
-                new KStreamSlidingWindowAggregate<>(windows, materializedInternal.storeName(), aggregateBuilder.countInitializer, aggregateBuilder.countAggregator),
+                new KStreamSlidingWindowAggregate<>(windows, materializedInternal.storeName(), emitStrategy, aggregateBuilder.countInitializer, aggregateBuilder.countAggregator),
                 materializedInternal.queryableStoreName(),
                 materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.timeDifferenceMs()) : null,
                 materializedInternal.valueSerde());
@@ -139,7 +145,7 @@ public <VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
         return aggregateBuilder.build(
                 new NamedInternal(aggregateName),
                 materialize(materializedInternal),
-                new KStreamSlidingWindowAggregate<>(windows, materializedInternal.storeName(), initializer, aggregator),
+                new KStreamSlidingWindowAggregate<>(windows, materializedInternal.storeName(), emitStrategy, initializer, aggregator),
                 materializedInternal.queryableStoreName(),
                 materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.timeDifferenceMs()) : null,
                 materializedInternal.valueSerde());
@@ -184,12 +190,18 @@ public KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
         return aggregateBuilder.build(
                 new NamedInternal(reduceName),
                 materialize(materializedInternal),
-                new KStreamSlidingWindowAggregate<>(windows, materializedInternal.storeName(), aggregateBuilder.reduceInitializer, aggregatorForReducer(reducer)),
+                new KStreamSlidingWindowAggregate<>(windows, materializedInternal.storeName(), emitStrategy, aggregateBuilder.reduceInitializer, aggregatorForReducer(reducer)),
                 materializedInternal.queryableStoreName(),
                 materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.timeDifferenceMs()) : null,
                 materializedInternal.valueSerde());
     }
 
+    @Override
+    public TimeWindowedKStream<K, V> emitStrategy(final EmitStrategy emitStrategy) {
+        this.emitStrategy = emitStrategy;
+        return this;
+    }
+
     private <VR> StoreBuilder<TimestampedWindowStore<K, VR>> materialize(final MaterializedInternal<K, VR, WindowStore<Bytes, byte[]>> materialized) {
         WindowBytesStoreSupplier supplier = (WindowBytesStoreSupplier) materialized.storeSupplier();
         if (supplier == null) {
@@ -204,13 +216,37 @@ private <VR> StoreBuilder<TimestampedWindowStore<K, VR>> materialize(final Mater
                         + " grace=[" + windows.gracePeriodMs() + "],"
                         + " retention=[" + retentionPeriod + "]");
             }
-            supplier = Stores.persistentTimestampedWindowStore(
-                    materialized.storeName(),
-                    Duration.ofMillis(retentionPeriod),
-                    Duration.ofMillis(windows.timeDifferenceMs()),
-                    false
-            );
+
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemoryWindowStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod),
+                        Duration.ofMillis(windows.timeDifferenceMs()),
+                        false
+                    );
+                    break;
+                case ROCKS_DB:
+                    supplier = emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE ?
+                        RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create(
+                            materialized.storeName(),
+                            Duration.ofMillis(retentionPeriod),
+                            Duration.ofMillis(windows.timeDifferenceMs()),
+                            false,
+                            true
+                        ) :
+                        Stores.persistentTimestampedWindowStore(
+                            materialized.storeName(),
+                            Duration.ofMillis(retentionPeriod),
+                            Duration.ofMillis(windows.timeDifferenceMs()),
+                            false
+                        );
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
+
         final StoreBuilder<TimestampedWindowStore<K, VR>> builder = Stores.timestampedWindowStoreBuilder(
                 supplier,
                 materialized.keySerde(),
@@ -222,7 +258,9 @@ private <VR> StoreBuilder<TimestampedWindowStore<K, VR>> materialize(final Mater
         } else {
             builder.withLoggingDisabled();
         }
-        if (materialized.cachingEnabled()) {
+
+        // do not enable cache if the emit final strategy is used
+        if (materialized.cachingEnabled() && emitStrategy.type() != StrategyType.ON_WINDOW_CLOSE) {
             builder.withCachingEnabled();
         } else {
             builder.withCachingDisabled();
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedCogroupedKStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedCogroupedKStreamImpl.java
index 8cef89f6197e1..07b75bd1454f3 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedCogroupedKStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedCogroupedKStreamImpl.java
@@ -120,12 +120,26 @@ private StoreBuilder<TimestampedWindowStore<K, V>> materialize(
                         + "]");
             }
 
-            supplier = Stores.persistentTimestampedWindowStore(
-                    materialized.storeName(),
-                    Duration.ofMillis(retentionPeriod),
-                    Duration.ofMillis(windows.size()),
-                    false
-            );
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemoryWindowStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod),
+                        Duration.ofMillis(windows.size()),
+                        false
+                    );
+                    break;
+                case ROCKS_DB:
+                    supplier = Stores.persistentTimestampedWindowStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod),
+                        Duration.ofMillis(windows.size()),
+                        false
+                    );
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
 
         final StoreBuilder<TimestampedWindowStore<K, V>> builder = Stores
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImpl.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImpl.java
index 2282672a13651..c07b7839780f2 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImpl.java
@@ -20,12 +20,15 @@
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.KTable;
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.kstream.Named;
 import org.apache.kafka.streams.kstream.Reducer;
 import org.apache.kafka.streams.kstream.TimeWindowedKStream;
+import org.apache.kafka.streams.kstream.UnlimitedWindows;
 import org.apache.kafka.streams.kstream.Window;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.Windows;
@@ -39,6 +42,7 @@
 import java.time.Duration;
 import java.util.Objects;
 import java.util.Set;
+import org.apache.kafka.streams.state.internals.RocksDbIndexedTimeOrderedWindowBytesStoreSupplier;
 
 import static org.apache.kafka.streams.kstream.internals.KGroupedStreamImpl.AGGREGATE_NAME;
 import static org.apache.kafka.streams.kstream.internals.KGroupedStreamImpl.REDUCE_NAME;
@@ -47,6 +51,7 @@ public class TimeWindowedKStreamImpl<K, V, W extends Window> extends AbstractStr
 
     private final Windows<W> windows;
     private final GroupedStreamAggregateBuilder<K, V> aggregateBuilder;
+    private EmitStrategy emitStrategy = EmitStrategy.onWindowUpdate();
 
     TimeWindowedKStreamImpl(final Windows<W> windows,
                             final InternalStreamsBuilder builder,
@@ -105,14 +110,17 @@ private KTable<Windowed<K>, Long> doCount(final Named named,
         final String aggregateName = new NamedInternal(named).orElseGenerateWithPrefix(builder, AGGREGATE_NAME);
 
         return aggregateBuilder.build(
-                new NamedInternal(aggregateName),
-                materialize(materializedInternal),
-                new KStreamWindowAggregate<>(windows, materializedInternal.storeName(), aggregateBuilder.countInitializer, aggregateBuilder.countAggregator),
-                materializedInternal.queryableStoreName(),
-                materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.size()) : null,
-                materializedInternal.valueSerde());
-
-
+            new NamedInternal(aggregateName),
+            materialize(materializedInternal),
+            new KStreamWindowAggregate<>(
+                windows,
+                materializedInternal.storeName(),
+                emitStrategy,
+                aggregateBuilder.countInitializer,
+                aggregateBuilder.countAggregator),
+            materializedInternal.queryableStoreName(),
+            materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.size()) : null,
+            materializedInternal.valueSerde());
     }
 
     @Override
@@ -153,14 +161,17 @@ public <VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
         final String aggregateName = new NamedInternal(named).orElseGenerateWithPrefix(builder, AGGREGATE_NAME);
 
         return aggregateBuilder.build(
-                new NamedInternal(aggregateName),
-                materialize(materializedInternal),
-                new KStreamWindowAggregate<>(windows, materializedInternal.storeName(), initializer, aggregator),
-                materializedInternal.queryableStoreName(),
-                materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.size()) : null,
-                materializedInternal.valueSerde());
-
-
+            new NamedInternal(aggregateName),
+            materialize(materializedInternal),
+            new KStreamWindowAggregate<>(
+                windows,
+                materializedInternal.storeName(),
+                emitStrategy,
+                initializer,
+                aggregator),
+            materializedInternal.queryableStoreName(),
+            materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.size()) : null,
+            materializedInternal.valueSerde());
     }
 
     @Override
@@ -200,14 +211,27 @@ public KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
         final String reduceName = new NamedInternal(named).orElseGenerateWithPrefix(builder, REDUCE_NAME);
 
         return aggregateBuilder.build(
-                new NamedInternal(reduceName),
-                materialize(materializedInternal),
-                new KStreamWindowAggregate<>(windows, materializedInternal.storeName(), aggregateBuilder.reduceInitializer, aggregatorForReducer(reducer)),
-                materializedInternal.queryableStoreName(),
-                materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.size()) : null,
-                materializedInternal.valueSerde());
-
+            new NamedInternal(reduceName),
+            materialize(materializedInternal),
+            new KStreamWindowAggregate<>(
+                windows,
+                materializedInternal.storeName(),
+                emitStrategy,
+                aggregateBuilder.reduceInitializer,
+                aggregatorForReducer(reducer)),
+            materializedInternal.queryableStoreName(),
+            materializedInternal.keySerde() != null ? new FullTimeWindowedSerde<>(materializedInternal.keySerde(), windows.size()) : null,
+            materializedInternal.valueSerde());
+    }
 
+    @Override
+    public TimeWindowedKStream<K, V> emitStrategy(final EmitStrategy emitStrategy) {
+        if (this.windows instanceof UnlimitedWindows
+            && emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE) {
+            throw new IllegalArgumentException("ON_WINDOW_CLOSE emit strategy cannot be used for UnlimitedWindows");
+        }
+        this.emitStrategy = emitStrategy;
+        return this;
     }
 
     private <VR> StoreBuilder<TimestampedWindowStore<K, VR>> materialize(final MaterializedInternal<K, VR, WindowStore<Bytes, byte[]>> materialized) {
@@ -224,12 +248,34 @@ private <VR> StoreBuilder<TimestampedWindowStore<K, VR>> materialize(final Mater
                         + " retention=[" + retentionPeriod + "]");
             }
 
-            supplier = Stores.persistentTimestampedWindowStore(
-                    materialized.storeName(),
-                    Duration.ofMillis(retentionPeriod),
-                    Duration.ofMillis(windows.size()),
-                    false
-            );
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemoryWindowStore(
+                        materialized.storeName(),
+                        Duration.ofMillis(retentionPeriod),
+                        Duration.ofMillis(windows.size()),
+                        false
+                    );
+                    break;
+                case ROCKS_DB:
+                    supplier = emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE ?
+                        RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create(
+                            materialized.storeName(),
+                            Duration.ofMillis(retentionPeriod),
+                            Duration.ofMillis(windows.size()),
+                            false,
+                            false
+                        ) :
+                        Stores.persistentTimestampedWindowStore(
+                            materialized.storeName(),
+                            Duration.ofMillis(retentionPeriod),
+                            Duration.ofMillis(windows.size()),
+                            false
+                    );
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
 
         final StoreBuilder<TimestampedWindowStore<K, VR>> builder = Stores.timestampedWindowStoreBuilder(
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedKeyValueStoreMaterializer.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedKeyValueStoreMaterializer.java
index fb40b464ce748..edbde7e11c8ba 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedKeyValueStoreMaterializer.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedKeyValueStoreMaterializer.java
@@ -36,9 +36,18 @@ public TimestampedKeyValueStoreMaterializer(final MaterializedInternal<K, V, Key
     public StoreBuilder<TimestampedKeyValueStore<K, V>> materialize() {
         KeyValueBytesStoreSupplier supplier = (KeyValueBytesStoreSupplier) materialized.storeSupplier();
         if (supplier == null) {
-            final String name = materialized.storeName();
-            supplier = Stores.persistentTimestampedKeyValueStore(name);
+            switch (materialized.storeType()) {
+                case IN_MEMORY:
+                    supplier = Stores.inMemoryKeyValueStore(materialized.storeName());
+                    break;
+                case ROCKS_DB:
+                    supplier = Stores.persistentTimestampedKeyValueStore(materialized.storeName());
+                    break;
+                default:
+                    throw new IllegalStateException("Unknown store type: " + materialized.storeType());
+            }
         }
+
         final StoreBuilder<TimestampedKeyValueStore<K, V>> builder = Stores.timestampedKeyValueStoreBuilder(
             supplier,
             materialized.keySerde(),
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedTupleForwarder.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedTupleForwarder.java
index 49f2ab157d9dc..bc686ada7277f 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedTupleForwarder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/TimestampedTupleForwarder.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.state.internals.CacheFlushListener;
 import org.apache.kafka.streams.state.internals.WrappedStateStore;
 
 /**
@@ -38,13 +39,20 @@ class TimestampedTupleForwarder<K, V> {
     @SuppressWarnings({"unchecked", "rawtypes"})
     TimestampedTupleForwarder(final StateStore store,
                               final ProcessorContext<K, Change<V>> context,
-                              final TimestampedCacheFlushListener<K, V> flushListener,
+                              final CacheFlushListener<K, ?> flushListener,
                               final boolean sendOldValues) {
         this.context = (InternalProcessorContext<K, Change<V>>) context;
         this.sendOldValues = sendOldValues;
         cachingEnabled = ((WrappedStateStore) store).setFlushListener(flushListener, sendOldValues);
     }
 
+    TimestampedTupleForwarder(final ProcessorContext<K, Change<V>> context,
+                              final boolean sendOldValues) {
+        this.context = (InternalProcessorContext<K, Change<V>>) context;
+        this.sendOldValues = sendOldValues;
+        cachingEnabled = false;
+    }
+
     public void maybeForward(final Record<K, Change<V>> record) {
         if (!cachingEnabled) {
             if (sendOldValues) {
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitioner.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitioner.java
index 8e1476a7ed0d4..d68a52b8d02a5 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitioner.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitioner.java
@@ -16,12 +16,10 @@
  */
 package org.apache.kafka.streams.kstream.internals;
 
-import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.clients.producer.internals.BuiltInPartitioner;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.processor.StreamPartitioner;
 
-import static org.apache.kafka.common.utils.Utils.toPositive;
-
 public class WindowedStreamPartitioner<K, V> implements StreamPartitioner<Windowed<K>, V> {
 
     private final WindowedSerializer<K> serializer;
@@ -43,9 +41,11 @@ public WindowedStreamPartitioner(final WindowedSerializer<K> serializer) {
      */
     @Override
     public Integer partition(final String topic, final Windowed<K> windowedKey, final V value, final int numPartitions) {
+        // for windowed key, the key bytes should never be null
         final byte[] keyBytes = serializer.serializeBaseKey(topic, windowedKey);
 
-        // hash the keyBytes to choose a partition
-        return toPositive(Utils.murmur2(keyBytes)) % numPartitions;
+        // stick with the same built-in partitioner util functions that producer used
+        // to make sure its behavior is consistent with the producer
+        return BuiltInPartitioner.partitionForKey(keyBytes, numPartitions);
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/emitstrategy/WindowCloseStrategy.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/emitstrategy/WindowCloseStrategy.java
new file mode 100644
index 0000000000000..ddbf1090ac000
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/emitstrategy/WindowCloseStrategy.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream.internals.emitstrategy;
+
+import org.apache.kafka.streams.kstream.EmitStrategy;
+
+/**
+ * An emit strategy which indicates only output when a window closes.
+ */
+public class WindowCloseStrategy implements EmitStrategy {
+
+    @Override
+    public StrategyType type() {
+        return StrategyType.ON_WINDOW_CLOSE;
+    }
+
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/emitstrategy/WindowUpdateStrategy.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/emitstrategy/WindowUpdateStrategy.java
new file mode 100644
index 0000000000000..0f87ab22f574b
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/emitstrategy/WindowUpdateStrategy.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream.internals.emitstrategy;
+
+import org.apache.kafka.streams.kstream.EmitStrategy;
+
+/**
+ * An emit strategy which indicates output everytime when a window gets an update.
+ */
+public class WindowUpdateStrategy implements EmitStrategy {
+
+    @Override
+    public StrategyType type() {
+        return StrategyType.ON_WINDOW_UPDATE;
+    }
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java
index 63afb71a3c283..55e40fce64f18 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplier.java
@@ -108,7 +108,10 @@ public void process(final Record<KO, Change<VO>> record) {
                         final CombinedKey<KO, K> combinedKey = keySchema.fromBytes(next.key);
                         context().forward(
                             record.withKey(combinedKey.getPrimaryKey())
-                                .withValue(new SubscriptionResponseWrapper<>(next.value.value().getHash(), record.value().newValue))
+                                .withValue(new SubscriptionResponseWrapper<>(
+                                    next.value.value().getHash(),
+                                    record.value().newValue,
+                                    next.value.value().getPrimaryPartition()))
                         );
                     }
                 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java
index 9394487c86c1e..0efe4da2bcbf9 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionSendProcessorSupplier.java
@@ -103,6 +103,7 @@ public void process(final Record<K, Change<V>> record) {
                 null :
                 Murmur3.hash128(valueSerializer.serialize(valueSerdeTopic, record.value().newValue));
 
+            final int partition = context().recordMetadata().get().partition();
             if (record.value().oldValue != null) {
                 final KO oldForeignKey = foreignKeyExtractor.apply(record.value().oldValue);
                 if (oldForeignKey == null) {
@@ -149,19 +150,34 @@ public void process(final Record<K, Change<V>> record) {
                         //Delete it from the oldKey's state store
                         context().forward(
                             record.withKey(oldForeignKey)
-                                .withValue(new SubscriptionWrapper<>(currentHash, DELETE_KEY_NO_PROPAGATE, record.key())));
+                                .withValue(new SubscriptionWrapper<>(
+                                    currentHash,
+                                    DELETE_KEY_NO_PROPAGATE,
+                                    record.key(),
+                                    partition
+                                )));
                         //Add to the newKey's state store. Additionally, propagate null if no FK is found there,
                         //since we must "unset" any output set by the previous FK-join. This is true for both INNER
                         //and LEFT join.
                     }
                     context().forward(
                         record.withKey(newForeignKey)
-                            .withValue(new SubscriptionWrapper<>(currentHash, PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE, record.key())));
+                            .withValue(new SubscriptionWrapper<>(
+                                currentHash,
+                                PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE,
+                                record.key(),
+                                partition
+                            )));
                 } else {
                     //A simple propagatable delete. Delete from the state store and propagate the delete onwards.
                     context().forward(
                         record.withKey(oldForeignKey)
-                           .withValue(new SubscriptionWrapper<>(currentHash, DELETE_KEY_AND_PROPAGATE, record.key())));
+                           .withValue(new SubscriptionWrapper<>(
+                               currentHash,
+                               DELETE_KEY_AND_PROPAGATE,
+                               record.key(),
+                               partition
+                           )));
                 }
             } else if (record.value().newValue != null) {
                 //change.oldValue is null, which means it was deleted at least once before, or it is brand new.
@@ -193,7 +209,11 @@ public void process(final Record<K, Change<V>> record) {
                 } else {
                     context().forward(
                         record.withKey(newForeignKey)
-                            .withValue(new SubscriptionWrapper<>(currentHash, instruction, record.key())));
+                            .withValue(new SubscriptionWrapper<>(
+                                currentHash,
+                                instruction,
+                                record.key(),
+                                partition)));
                 }
             }
         }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java
index 4820beafe1a94..56d6a13321ffb 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionJoinForeignProcessorSupplier.java
@@ -70,7 +70,7 @@ public void process(final Record<CombinedKey<KO, K>, Change<ValueAndTimestamp<Su
                 Objects.requireNonNull(valueAndTimestamp, "This processor should never see a null newValue.");
                 final SubscriptionWrapper<K> value = valueAndTimestamp.value();
 
-                if (value.getVersion() != SubscriptionWrapper.CURRENT_VERSION) {
+                if (value.getVersion() > SubscriptionWrapper.CURRENT_VERSION) {
                     //Guard against modifications to SubscriptionWrapper. Need to ensure that there is compatibility
                     //with previous versions to enable rolling upgrades. Must develop a strategy for upgrading
                     //from older SubscriptionWrapper versions to newer versions.
@@ -88,7 +88,11 @@ public void process(final Record<CombinedKey<KO, K>, Change<ValueAndTimestamp<Su
                     case DELETE_KEY_AND_PROPAGATE:
                         context().forward(
                             record.withKey(record.key().getPrimaryKey())
-                                .withValue(new SubscriptionResponseWrapper<VO>(value.getHash(), null))
+                                .withValue(new SubscriptionResponseWrapper<VO>(
+                                    value.getHash(),
+                                    null,
+                                    value.getPrimaryPartition()
+                                ))
                                 .withTimestamp(resultTimestamp)
                         );
                         break;
@@ -100,7 +104,7 @@ public void process(final Record<CombinedKey<KO, K>, Change<ValueAndTimestamp<Su
 
                         context().forward(
                             record.withKey(record.key().getPrimaryKey())
-                                .withValue(new SubscriptionResponseWrapper<>(value.getHash(), valueToSend))
+                                .withValue(new SubscriptionResponseWrapper<>(value.getHash(), valueToSend, value.getPrimaryPartition()))
                                 .withTimestamp(resultTimestamp)
                         );
                         break;
@@ -108,7 +112,11 @@ public void process(final Record<CombinedKey<KO, K>, Change<ValueAndTimestamp<Su
                         if (foreignValueAndTime != null) {
                             context().forward(
                                 record.withKey(record.key().getPrimaryKey())
-                                   .withValue(new SubscriptionResponseWrapper<>(value.getHash(), foreignValueAndTime.value()))
+                                   .withValue(new SubscriptionResponseWrapper<>(
+                                       value.getHash(),
+                                       foreignValueAndTime.value(),
+                                       value.getPrimaryPartition()
+                                   ))
                                    .withTimestamp(resultTimestamp)
                             );
                         }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java
index 9c79e468213a0..99556f16e4621 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapper.java
@@ -16,27 +16,36 @@
  */
 package org.apache.kafka.streams.kstream.internals.foreignkeyjoin;
 
+import java.util.Objects;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 
 import java.util.Arrays;
 
 public class SubscriptionResponseWrapper<FV> {
-    final static byte CURRENT_VERSION = 0x00;
+    final static byte CURRENT_VERSION = 0;
+    // v0 fields:
     private final long[] originalValueHash;
     private final FV foreignValue;
     private final byte version;
+    // non-serializing fields
+    private final Integer primaryPartition;
 
-    public SubscriptionResponseWrapper(final long[] originalValueHash, final FV foreignValue) {
-        this(originalValueHash, foreignValue, CURRENT_VERSION);
+    public SubscriptionResponseWrapper(final long[] originalValueHash, final FV foreignValue, final Integer primaryPartition) {
+        this(originalValueHash, foreignValue, CURRENT_VERSION, primaryPartition);
     }
 
-    public SubscriptionResponseWrapper(final long[] originalValueHash, final FV foreignValue, final byte version) {
-        if (version != CURRENT_VERSION) {
+    public SubscriptionResponseWrapper(
+        final long[] originalValueHash,
+        final FV foreignValue,
+        final byte version,
+        final Integer primaryPartition) {
+        if (version < 0 || version > CURRENT_VERSION) {
             throw new UnsupportedVersionException("SubscriptionWrapper does not support version " + version);
         }
         this.originalValueHash = originalValueHash;
         this.foreignValue = foreignValue;
         this.version = version;
+        this.primaryPartition = primaryPartition;
     }
 
     public long[] getOriginalValueHash() {
@@ -51,12 +60,40 @@ public byte getVersion() {
         return version;
     }
 
+    public Integer getPrimaryPartition() {
+        return primaryPartition;
+    }
+
     @Override
     public String toString() {
         return "SubscriptionResponseWrapper{" +
             "version=" + version +
             ", foreignValue=" + foreignValue +
             ", originalValueHash=" + Arrays.toString(originalValueHash) +
+            ", primaryPartition=" + primaryPartition +
             '}';
     }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+        final SubscriptionResponseWrapper<?> that = (SubscriptionResponseWrapper<?>) o;
+        return version == that.version &&
+               Arrays.equals(originalValueHash,
+               that.originalValueHash) &&
+               Objects.equals(foreignValue, that.foreignValue) &&
+               Objects.equals(primaryPartition, that.primaryPartition);
+    }
+
+    @Override
+    public int hashCode() {
+        int result = Objects.hash(foreignValue, version, primaryPartition);
+        result = 31 * result + Arrays.hashCode(originalValueHash);
+        return result;
+    }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java
index 8910ff80c3fdd..12a14e7cc4d8d 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerde.java
@@ -91,7 +91,6 @@ public byte[] serialize(final String topic, final SubscriptionResponseWrapper<V>
                 buf.put(serializedData);
             return buf.array();
         }
-
     }
 
     private static final class SubscriptionResponseWrapperDeserializer<V>
@@ -141,9 +140,7 @@ public SubscriptionResponseWrapper<V> deserialize(final String topic, final byte
                 value = null;
             }
 
-            return new SubscriptionResponseWrapper<>(hash, value, version);
+            return new SubscriptionResponseWrapper<>(hash, value, version, null);
         }
-
     }
-
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java
index a757895aecf0d..41d5f1198e549 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapper.java
@@ -23,12 +23,18 @@
 
 
 public class SubscriptionWrapper<K> {
-    static final byte CURRENT_VERSION = 0;
+    static final byte VERSION_0 = 0;
+    static final byte VERSION_1 = 1;
 
+    static final byte CURRENT_VERSION = VERSION_1;
+
+    // v0 fields:
     private final long[] hash;
     private final Instruction instruction;
     private final byte version;
     private final K primaryKey;
+    // v1 fields:
+    private final Integer primaryPartition;
 
     public enum Instruction {
         //Send nothing. Do not propagate.
@@ -65,14 +71,14 @@ public static Instruction fromValue(final byte value) {
         }
     }
 
-    public SubscriptionWrapper(final long[] hash, final Instruction instruction, final K primaryKey) {
-        this(hash, instruction, primaryKey, CURRENT_VERSION);
+    public SubscriptionWrapper(final long[] hash, final Instruction instruction, final K primaryKey, final Integer primaryPartition) {
+        this(hash, instruction, primaryKey, CURRENT_VERSION, primaryPartition);
     }
 
-    public SubscriptionWrapper(final long[] hash, final Instruction instruction, final K primaryKey, final byte version) {
+    public SubscriptionWrapper(final long[] hash, final Instruction instruction, final K primaryKey, final byte version, final Integer primaryPartition) {
         Objects.requireNonNull(instruction, "instruction cannot be null. Required by downstream processor.");
         Objects.requireNonNull(primaryKey, "primaryKey cannot be null. Required by downstream processor.");
-        if (version != CURRENT_VERSION) {
+        if (version < 0 || version > CURRENT_VERSION) {
             throw new UnsupportedVersionException("SubscriptionWrapper does not support version " + version);
         }
 
@@ -80,6 +86,7 @@ public SubscriptionWrapper(final long[] hash, final Instruction instruction, fin
         this.hash = hash;
         this.primaryKey = primaryKey;
         this.version = version;
+        this.primaryPartition = primaryPartition;
     }
 
     public Instruction getInstruction() {
@@ -98,6 +105,10 @@ public byte getVersion() {
         return version;
     }
 
+    public Integer getPrimaryPartition() {
+        return primaryPartition;
+    }
+
     @Override
     public String toString() {
         return "SubscriptionWrapper{" +
@@ -105,6 +116,7 @@ public String toString() {
             ", primaryKey=" + primaryKey +
             ", instruction=" + instruction +
             ", hash=" + Arrays.toString(hash) +
+            ", primaryPartition=" + primaryPartition +
             '}';
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java
index e71376216b693..c04125495cda8 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerde.java
@@ -16,10 +16,12 @@
  */
 package org.apache.kafka.streams.kstream.internals.foreignkeyjoin;
 
+import java.util.Map;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.Serializer;
+import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.internals.WrappingNullableDeserializer;
 import org.apache.kafka.streams.kstream.internals.WrappingNullableSerde;
 import org.apache.kafka.streams.kstream.internals.WrappingNullableSerializer;
@@ -45,6 +47,7 @@ private static class SubscriptionWrapperSerializer<K>
         private final Supplier<String> primaryKeySerializationPseudoTopicSupplier;
         private String primaryKeySerializationPseudoTopic = null;
         private Serializer<K> primaryKeySerializer;
+        private boolean upgradeFromV0 = false;
 
         SubscriptionWrapperSerializer(final Supplier<String> primaryKeySerializationPseudoTopicSupplier,
                                       final Serializer<K> primaryKeySerializer) {
@@ -60,34 +63,85 @@ public void setIfUnset(final SerdeGetter getter) {
             }
         }
 
+        @Override
+        public void configure(final Map<String, ?> configs, final boolean isKey) {
+            this.upgradeFromV0 = upgradeFromV0(configs);
+        }
+
+        private static boolean upgradeFromV0(final Map<String, ?> configs) {
+            final Object upgradeFrom = configs.get(StreamsConfig.UPGRADE_FROM_CONFIG);
+            if (upgradeFrom == null) {
+                return false;
+            }
+
+            switch ((String) upgradeFrom) {
+                case StreamsConfig.UPGRADE_FROM_0100:
+                case StreamsConfig.UPGRADE_FROM_0101:
+                case StreamsConfig.UPGRADE_FROM_0102:
+                case StreamsConfig.UPGRADE_FROM_0110:
+                case StreamsConfig.UPGRADE_FROM_10:
+                case StreamsConfig.UPGRADE_FROM_11:
+                case StreamsConfig.UPGRADE_FROM_20:
+                case StreamsConfig.UPGRADE_FROM_21:
+                case StreamsConfig.UPGRADE_FROM_22:
+                case StreamsConfig.UPGRADE_FROM_23:
+                case StreamsConfig.UPGRADE_FROM_24:
+                case StreamsConfig.UPGRADE_FROM_25:
+                case StreamsConfig.UPGRADE_FROM_26:
+                case StreamsConfig.UPGRADE_FROM_27:
+                case StreamsConfig.UPGRADE_FROM_28:
+                case StreamsConfig.UPGRADE_FROM_30:
+                case StreamsConfig.UPGRADE_FROM_31:
+                case StreamsConfig.UPGRADE_FROM_32:
+                    return true;
+                default:
+                    return false;
+            }
+        }
+
         @Override
         public byte[] serialize(final String ignored, final SubscriptionWrapper<K> data) {
-            //{1-bit-isHashNull}{7-bits-version}{1-byte-instruction}{Optional-16-byte-Hash}{PK-serialized}
+            //{1-bit-isHashNull}{7-bits-version}{1-byte-instruction}{Optional-16-byte-Hash}{PK-serialized}{4-bytes-primaryPartition}
 
             //7-bit (0x7F) maximum for data version.
             if (Byte.compare((byte) 0x7F, data.getVersion()) < 0) {
                 throw new UnsupportedVersionException("SubscriptionWrapper version is larger than maximum supported 0x7F");
             }
 
+            final int version = data.getVersion();
+            if (upgradeFromV0 || version == 0) {
+                return serializeV0(data);
+            } else if (version == 1) {
+                return serializeV1(data);
+            } else {
+                throw new UnsupportedVersionException("Unsupported SubscriptionWrapper version " + data.getVersion());
+            }
+        }
+
+        private byte[] serializePrimaryKey(final SubscriptionWrapper<K> data) {
             if (primaryKeySerializationPseudoTopic == null) {
                 primaryKeySerializationPseudoTopic = primaryKeySerializationPseudoTopicSupplier.get();
             }
 
-            final byte[] primaryKeySerializedData = primaryKeySerializer.serialize(
+            return  primaryKeySerializer.serialize(
                 primaryKeySerializationPseudoTopic,
                 data.getPrimaryKey()
             );
+        }
 
+        private ByteBuffer serializeCommon(final SubscriptionWrapper<K> data, final byte version, final int extraLength) {
+            final byte[] primaryKeySerializedData = serializePrimaryKey(data);
             final ByteBuffer buf;
+            int dataLength = 2 + primaryKeySerializedData.length + extraLength;
             if (data.getHash() != null) {
-                buf = ByteBuffer.allocate(2 + 2 * Long.BYTES + primaryKeySerializedData.length);
-                buf.put(data.getVersion());
+                dataLength += 2 * Long.BYTES;
+                buf = ByteBuffer.allocate(dataLength);
+                buf.put(version);
             } else {
                 //Don't store hash as it's null.
-                buf = ByteBuffer.allocate(2 + primaryKeySerializedData.length);
-                buf.put((byte) (data.getVersion() | (byte) 0x80));
+                buf = ByteBuffer.allocate(dataLength);
+                buf.put((byte) (version | (byte) 0x80));
             }
-
             buf.put(data.getInstruction().getValue());
             final long[] elem = data.getHash();
             if (data.getHash() != null) {
@@ -95,9 +149,18 @@ public byte[] serialize(final String ignored, final SubscriptionWrapper<K> data)
                 buf.putLong(elem[1]);
             }
             buf.put(primaryKeySerializedData);
-            return buf.array();
+            return buf;
+        }
+
+        private byte[] serializeV0(final SubscriptionWrapper<K> data) {
+            return serializeCommon(data, (byte) 0, 0).array();
         }
 
+        private byte[] serializeV1(final SubscriptionWrapper<K> data) {
+            final ByteBuffer buf = serializeCommon(data, data.getVersion(), Integer.BYTES);
+            buf.putInt(data.getPrimaryPartition());
+            return buf.array();
+        }
     }
 
     private static class SubscriptionWrapperDeserializer<K>
@@ -123,15 +186,15 @@ public void setIfUnset(final SerdeGetter getter) {
 
         @Override
         public SubscriptionWrapper<K> deserialize(final String ignored, final byte[] data) {
-            //{7-bits-version}{1-bit-isHashNull}{1-byte-instruction}{Optional-16-byte-Hash}{PK-serialized}
+            //{7-bits-version}{1-bit-isHashNull}{1-byte-instruction}{Optional-16-byte-Hash}{PK-serialized}{4-bytes-primaryPartition}
             final ByteBuffer buf = ByteBuffer.wrap(data);
             final byte versionAndIsHashNull = buf.get();
             final byte version = (byte) (0x7F & versionAndIsHashNull);
             final boolean isHashNull = (0x80 & versionAndIsHashNull) == 0x80;
             final SubscriptionWrapper.Instruction inst = SubscriptionWrapper.Instruction.fromValue(buf.get());
 
-            final long[] hash;
             int lengthSum = 2; //The first 2 bytes
+            final long[] hash;
             if (isHashNull) {
                 hash = null;
             } else {
@@ -141,17 +204,31 @@ public SubscriptionWrapper<K> deserialize(final String ignored, final byte[] dat
                 lengthSum += 2 * Long.BYTES;
             }
 
-            final byte[] primaryKeyRaw = new byte[data.length - lengthSum]; //The remaining data is the serialized pk
-            buf.get(primaryKeyRaw, 0, primaryKeyRaw.length);
+            final int primaryKeyLength;
+            if (version > 0) {
+                primaryKeyLength = data.length - lengthSum - Integer.BYTES;
+            } else {
+                primaryKeyLength = data.length - lengthSum;
+            }
+            final byte[] primaryKeyRaw = new byte[primaryKeyLength];
+            buf.get(primaryKeyRaw, 0, primaryKeyLength);
 
             if (primaryKeySerializationPseudoTopic == null) {
                 primaryKeySerializationPseudoTopic = primaryKeySerializationPseudoTopicSupplier.get();
             }
 
-            final K primaryKey = primaryKeyDeserializer.deserialize(primaryKeySerializationPseudoTopic,
-                                                                    primaryKeyRaw);
+            final K primaryKey = primaryKeyDeserializer.deserialize(
+                primaryKeySerializationPseudoTopic,
+                primaryKeyRaw
+            );
+            final Integer primaryPartition;
+            if (version > 0) {
+                primaryPartition = buf.getInt();
+            } else {
+                primaryPartition = null;
+            }
 
-            return new SubscriptionWrapper<>(hash, inst, primaryKey, version);
+            return new SubscriptionWrapper<>(hash, inst, primaryKey, version, primaryPartition);
         }
 
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java
index a9693ec814cb8..1135d5791bbb9 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/OptimizableRepartitionNode.java
@@ -67,11 +67,7 @@ public String toString() {
     public void writeToTopology(final InternalTopologyBuilder topologyBuilder) {
         topologyBuilder.addInternalTopic(repartitionTopic, internalTopicProperties);
 
-        topologyBuilder.addProcessor(
-            processorParameters.processorName(),
-            processorParameters.processorSupplier(),
-            parentNodeNames()
-        );
+        processorParameters.addProcessorTo(topologyBuilder, parentNodeNames());
 
         topologyBuilder.addSink(
             sinkName,
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java
index a38f516bc1c57..1c8e8cace2b30 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorGraphNode.java
@@ -56,7 +56,6 @@ public String toString() {
 
     @Override
     public void writeToTopology(final InternalTopologyBuilder topologyBuilder) {
-
-        topologyBuilder.addProcessor(processorParameters.processorName(), processorParameters.processorSupplier(), parentNodeNames());
+        processorParameters.addProcessorTo(topologyBuilder, parentNodeNames());
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorParameters.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorParameters.java
index 0107d675042b5..3da4197c861f2 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorParameters.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/ProcessorParameters.java
@@ -20,15 +20,21 @@
 import org.apache.kafka.streams.kstream.internals.KTableKTableJoinMerger;
 import org.apache.kafka.streams.kstream.internals.KTableProcessorSupplier;
 import org.apache.kafka.streams.kstream.internals.KTableSource;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
 import org.apache.kafka.streams.processor.api.ProcessorSupplier;
+import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
 import org.apache.kafka.streams.processor.internals.ProcessorAdapter;
+import org.apache.kafka.streams.state.StoreBuilder;
 
 /**
- * Class used to represent a {@link ProcessorSupplier} and the name
+ * Class used to represent a {@link ProcessorSupplier} or {@link FixedKeyProcessorSupplier} and the name
  * used to register it with the {@link org.apache.kafka.streams.processor.internals.InternalTopologyBuilder}
  *
  * Used by the Join nodes as there are several parameters, this abstraction helps
  * keep the number of arguments more reasonable.
+ *
+ * @see ProcessorSupplier
+ * @see FixedKeyProcessorSupplier
  */
 public class ProcessorParameters<KIn, VIn, KOut, VOut> {
 
@@ -37,6 +43,7 @@ public class ProcessorParameters<KIn, VIn, KOut, VOut> {
     @SuppressWarnings("deprecation") // Old PAPI. Needs to be migrated.
     private final org.apache.kafka.streams.processor.ProcessorSupplier<KIn, VIn> oldProcessorSupplier;
     private final ProcessorSupplier<KIn, VIn, KOut, VOut> processorSupplier;
+    private final FixedKeyProcessorSupplier<KIn, VIn, VOut> fixedKeyProcessorSupplier;
     private final String processorName;
 
     @SuppressWarnings("deprecation") // Old PAPI compatibility.
@@ -44,6 +51,7 @@ public ProcessorParameters(final org.apache.kafka.streams.processor.ProcessorSup
                                final String processorName) {
         oldProcessorSupplier = processorSupplier;
         this.processorSupplier = () -> ProcessorAdapter.adapt(processorSupplier.get());
+        fixedKeyProcessorSupplier = null;
         this.processorName = processorName;
     }
 
@@ -51,6 +59,15 @@ public ProcessorParameters(final ProcessorSupplier<KIn, VIn, KOut, VOut> process
                                final String processorName) {
         oldProcessorSupplier = null;
         this.processorSupplier = processorSupplier;
+        fixedKeyProcessorSupplier = null;
+        this.processorName = processorName;
+    }
+
+    public ProcessorParameters(final FixedKeyProcessorSupplier<KIn, VIn, VOut> processorSupplier,
+                               final String processorName) {
+        oldProcessorSupplier = null;
+        this.processorSupplier = null;
+        fixedKeyProcessorSupplier = processorSupplier;
         this.processorName = processorName;
     }
 
@@ -58,9 +75,36 @@ public ProcessorSupplier<KIn, VIn, KOut, VOut> processorSupplier() {
         return processorSupplier;
     }
 
-    @SuppressWarnings("deprecation") // Old PAPI. Needs to be migrated.
-    public org.apache.kafka.streams.processor.ProcessorSupplier<KIn, VIn> oldProcessorSupplier() {
-        return oldProcessorSupplier;
+    public FixedKeyProcessorSupplier<KIn, VIn, VOut> fixedKeyProcessorSupplier() {
+        return fixedKeyProcessorSupplier;
+    }
+
+    public void addProcessorTo(final InternalTopologyBuilder topologyBuilder, final String[] parentNodeNames) {
+        if (processorSupplier != null) {
+            topologyBuilder.addProcessor(processorName, processorSupplier, parentNodeNames);
+            if (processorSupplier.stores() != null) {
+                for (final StoreBuilder<?> storeBuilder : processorSupplier.stores()) {
+                    topologyBuilder.addStateStore(storeBuilder, processorName);
+                }
+            }
+        }
+
+        if (fixedKeyProcessorSupplier != null) {
+            topologyBuilder.addProcessor(processorName, fixedKeyProcessorSupplier, parentNodeNames);
+            if (fixedKeyProcessorSupplier.stores() != null) {
+                for (final StoreBuilder<?> storeBuilder : fixedKeyProcessorSupplier.stores()) {
+                    topologyBuilder.addStateStore(storeBuilder, processorName);
+                }
+            }
+        }
+
+        // temporary hack until KIP-478 is fully implemented
+        // Old PAPI. Needs to be migrated.
+        if (oldProcessorSupplier != null && oldProcessorSupplier.stores() != null) {
+            for (final StoreBuilder<?> storeBuilder : oldProcessorSupplier.stores()) {
+                topologyBuilder.addStateStore(storeBuilder, processorName);
+            }
+        }
     }
 
     @SuppressWarnings("unchecked")
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java
index 381a88a10d034..f13d1e0647634 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/StatefulProcessorNode.java
@@ -17,7 +17,6 @@
 package org.apache.kafka.streams.kstream.internals.graph;
 
 import org.apache.kafka.streams.kstream.internals.KTableValueGetterSupplier;
-import org.apache.kafka.streams.processor.api.ProcessorSupplier;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
 import org.apache.kafka.streams.state.StoreBuilder;
 
@@ -79,35 +78,14 @@ public String toString() {
 
     @Override
     public void writeToTopology(final InternalTopologyBuilder topologyBuilder) {
-
-        final String processorName = processorParameters().processorName();
-        final ProcessorSupplier<K, V, ?, ?> processorSupplier = processorParameters().processorSupplier();
-
-        topologyBuilder.addProcessor(processorName, processorSupplier, parentNodeNames());
+        processorParameters().addProcessorTo(topologyBuilder, parentNodeNames());
 
         if (storeNames != null && storeNames.length > 0) {
-            topologyBuilder.connectProcessorAndStateStores(processorName, storeNames);
+            topologyBuilder.connectProcessorAndStateStores(processorParameters().processorName(), storeNames);
         }
 
         if (storeBuilder != null) {
-            topologyBuilder.addStateStore(storeBuilder, processorName);
-        }
-
-        if (processorSupplier.stores() != null) {
-            for (final StoreBuilder<?> storeBuilder : processorSupplier.stores()) {
-                topologyBuilder.addStateStore(storeBuilder, processorName);
-            }
+            topologyBuilder.addStateStore(storeBuilder, processorParameters().processorName());
         }
-
-        // temporary hack until KIP-478 is fully implemented
-        @SuppressWarnings("deprecation") // Old PAPI. Needs to be migrated.
-        final org.apache.kafka.streams.processor.ProcessorSupplier<K, V> oldProcessorSupplier =
-            processorParameters().oldProcessorSupplier();
-        if (oldProcessorSupplier != null && oldProcessorSupplier.stores() != null) {
-            for (final StoreBuilder<?> storeBuilder : oldProcessorSupplier.stores()) {
-                topologyBuilder.addStateStore(storeBuilder, processorName);
-            }
-        }
-
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/UnoptimizableRepartitionNode.java b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/UnoptimizableRepartitionNode.java
index daac9bdd532e9..afbb03d7955f6 100644
--- a/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/UnoptimizableRepartitionNode.java
+++ b/streams/src/main/java/org/apache/kafka/streams/kstream/internals/graph/UnoptimizableRepartitionNode.java
@@ -53,11 +53,7 @@ private UnoptimizableRepartitionNode(final String nodeName,
     public void writeToTopology(final InternalTopologyBuilder topologyBuilder) {
         topologyBuilder.addInternalTopic(repartitionTopic, internalTopicProperties);
 
-        topologyBuilder.addProcessor(
-            processorParameters.processorName(),
-            processorParameters.processorSupplier(),
-            parentNodeNames()
-        );
+        processorParameters.addProcessorTo(topologyBuilder, parentNodeNames());
 
         topologyBuilder.addSink(
             sinkName,
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/RecordContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/RecordContext.java
index 9b21df83dd058..66b1f8dff05ac 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/RecordContext.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/RecordContext.java
@@ -21,7 +21,7 @@
 
 /**
  * The context associated with the current record being processed by
- * an {@link org.apache.kafka.streams.processor.api.Processor}
+ * a {@link org.apache.kafka.streams.processor.Processor}
  */
 public interface RecordContext {
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java b/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java
index a435cafb89bea..90ffa3a4a8362 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/StreamPartitioner.java
@@ -16,12 +16,11 @@
  */
 package org.apache.kafka.streams.processor;
 
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.streams.Topology;
 
 /**
  * Determine how records are distributed among the partitions in a Kafka topic. If not specified, the underlying producer's
- * {@link DefaultPartitioner} will be used to determine the partition.
+ * {@link org.apache.kafka.clients.producer.internals.DefaultPartitioner} will be used to determine the partition.
  * <p>
  * Kafka topics are divided into one or more <i>partitions</i>. Since each partition must fit on the servers that host it, so
  * using multiple partitions allows the topic to scale beyond a size that will fit on a single machine. Partitions also enable you
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/ContextualFixedKeyProcessor.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/ContextualFixedKeyProcessor.java
new file mode 100644
index 0000000000000..02f0ae2af1d73
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/ContextualFixedKeyProcessor.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+/**
+ * An abstract implementation of {@link FixedKeyProcessor} that manages the
+ * {@link FixedKeyProcessorContext} instance and provides default no-op
+ * implementation of {@link #close()}.
+ *
+ * @param <KIn> the type of input keys
+ * @param <VIn> the type of input values
+ * @param <VOut> the type of output values
+ */
+public abstract class ContextualFixedKeyProcessor<KIn, VIn, VOut> implements FixedKeyProcessor<KIn, VIn, VOut> {
+
+    private FixedKeyProcessorContext<KIn, VOut> context;
+
+    protected ContextualFixedKeyProcessor() {}
+
+    @Override
+    public void init(final FixedKeyProcessorContext<KIn, VOut> context) {
+        this.context = context;
+    }
+
+    /**
+     * Get the processor's context set during {@link #init(FixedKeyProcessorContext) initialization}.
+     *
+     * @return the processor context; null only when called prior to {@link #init(FixedKeyProcessorContext) initialization}.
+     */
+    protected final FixedKeyProcessorContext<KIn, VOut> context() {
+        return context;
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessor.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessor.java
new file mode 100644
index 0000000000000..dbdd8dd744293
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessor.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+import org.apache.kafka.streams.processor.PunctuationType;
+import org.apache.kafka.streams.processor.Punctuator;
+import org.apache.kafka.streams.processor.StateStore;
+
+import java.time.Duration;
+
+/**
+ * A processor of key-value pair records where keys are immutable.
+ *
+ * @param <KIn> the type of input keys
+ * @param <VIn> the type of input values
+ * @param <VOut> the type of output values
+ */
+public interface FixedKeyProcessor<KIn, VIn, VOut> {
+
+    /**
+     * Initialize this processor with the given context. The framework ensures this is called once per processor when the topology
+     * that contains it is initialized. When the framework is done with the processor, {@link #close()} will be called on it; the
+     * framework may later re-use the processor by calling {@code #init()} again.
+     * <p>
+     * The provided {@link FixedKeyProcessorContext context} can be used to access topology and record metadata, to
+     * {@link FixedKeyProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
+     * {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
+     *
+     * @param context the context; may not be null
+     */
+    default void init(final FixedKeyProcessorContext<KIn, VOut> context) {}
+
+    /**
+     * Process the record. Note that record metadata is undefined in cases such as a forward call from a punctuator.
+     *
+     * @param record the record to process
+     */
+    void process(FixedKeyRecord<KIn, VIn> record);
+
+    /**
+     * Close this processor and clean up any resources. Be aware that {@code #close()} is called after an internal cleanup.
+     * Thus, it is not possible to write anything to Kafka as underlying clients are already closed. The framework may
+     * later re-use this processor by calling {@code #init()} on it again.
+     * <p>
+     * Note: Do not close any streams managed resources, like {@link StateStore}s here, as they are managed by the library.
+     */
+    default void close() {}
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessorContext.java
new file mode 100644
index 0000000000000..1b940c2d17a0b
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessorContext.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+/**
+ * Processor context interface for {@link FixedKeyRecord}.
+ *
+ * @param <KForward> a bound on the types of keys that may be forwarded
+ * @param <VForward> a bound on the types of values that may be forwarded
+ */
+public interface FixedKeyProcessorContext<KForward, VForward> extends ProcessingContext {
+
+    /**
+     * Forward a record to all child processors.
+     * <p>
+     * Note that the forwarded {@link FixedKeyRecord} is shared between the parent and child
+     * processors. And of course, the parent may forward the same object to multiple children,
+     * and the child may forward it to grandchildren, etc. Therefore, you should be mindful
+     * of mutability.
+     * <p>
+     * The {@link FixedKeyRecord} class itself is immutable (all the setter-style methods return an
+     * independent copy of the instance). However, the value and headers referenced by
+     * the Record may themselves be mutable.
+     * <p>
+     * Some programs may opt to make use of this mutability for high performance, in which case
+     * the input record may be mutated and then forwarded by each {@link FixedKeyProcessor}. However,
+     * most applications should instead favor safety.
+     * <p>
+     * Forwarding records safely simply means to make a copy of the record before you mutate it.
+     * This is trivial when using the {@link FixedKeyRecord#withValue(Object)},
+     * and {@link FixedKeyRecord#withTimestamp(long)} methods, as each of these methods make a copy of the
+     * record as a matter of course. But a little extra care must be taken with headers, since
+     * the {@link org.apache.kafka.common.header.Header} class is mutable. The easiest way to
+     * safely handle headers is to use the {@link FixedKeyRecord} constructors to make a copy before
+     * modifying headers.
+     * <p>
+     * In other words, this would be considered unsafe:
+     * <code>
+     *     process(FixedKeyRecord inputRecord) {
+     *         inputRecord.headers().add(...);
+     *         context.forward(inputRecord);
+     *     }
+     * </code>
+     * This is unsafe because the parent, and potentially siblings, grandparents, etc.,
+     * all will see this modification to their shared Headers reference. This is a violation
+     * of causality and could lead to undefined behavior.
+     * <p>
+     * A safe usage would look like this:
+     * <code>
+     *     process(FixedKeyRecord inputRecord) {
+     *         // makes a copy of the headers
+     *         FixedKeyRecord toForward = inputRecord.withHeaders(inputRecord.headers());
+     *         // Other options to create a safe copy are:
+     *         // * use any copy-on-write method, which makes a copy of all fields:
+     *         //   toForward = inputRecord.withValue();
+     *         // * explicitly copy all fields:
+     *         //   toForward = new FixedKeyRecord(inputRecord.key(), inputRecord.value(), inputRecord.timestamp(), inputRecord.headers());
+     *         // * create a fresh, empty Headers:
+     *         //   toForward = new FixedKeyRecord(inputRecord.key(), inputRecord.value(), inputRecord.timestamp());
+     *         // * etc.
+     *
+     *         // now, we are modifying our own independent copy of the headers.
+     *         toForward.headers().add(...);
+     *         context.forward(toForward);
+     *     }
+     * </code>
+     * @param record The record to forward to all children
+     */
+    <K extends KForward, V extends VForward> void forward(FixedKeyRecord<K, V> record);
+
+    /**
+     * Forward a record to the specified child processor.
+     * See {@link FixedKeyProcessorContext#forward(FixedKeyRecord)} for considerations.
+     *
+     * @param record The record to forward
+     * @param childName The name of the child processor to receive the record
+     * @see FixedKeyProcessorContext#forward(FixedKeyRecord)
+     */
+    <K extends KForward, V extends VForward> void forward(FixedKeyRecord<K, V> record, final String childName);
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessorSupplier.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessorSupplier.java
new file mode 100644
index 0000000000000..06c5699b9d5b4
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyProcessorSupplier.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+import org.apache.kafka.streams.processor.ConnectedStoreProvider;
+
+import java.util.function.Supplier;
+
+/**
+ * A processor supplier that can create one or more {@link FixedKeyProcessor} instances.
+ * <p>
+ * The supplier should always generate a new instance each time {@link FixedKeyProcessorSupplier#get()} gets called. Creating
+ * a single {@link FixedKeyProcessor} object and returning the same object reference in {@link FixedKeyProcessorSupplier#get()} would be
+ * a violation of the supplier pattern and leads to runtime exceptions.
+ *
+ * @param <KIn> the type of input keys
+ * @param <VIn> the type of input values
+ * @param <VOut> the type of output values
+ */
+@FunctionalInterface
+public interface FixedKeyProcessorSupplier<KIn, VIn, VOut>
+    extends ConnectedStoreProvider, Supplier<FixedKeyProcessor<KIn, VIn, VOut>> {
+
+    /**
+     * Return a newly constructed {@link FixedKeyProcessor} instance.
+     * The supplier should always generate a new instance each time {@code FixedKeyProcessorSupplier#get()} gets called.
+     * <p>
+     * Creating a single {@link FixedKeyProcessor} object and returning the same object reference in {@code FixedKeyProcessorSupplier#get()}
+     * is a violation of the supplier pattern and leads to runtime exceptions.
+     *
+     * @return a new {@link FixedKeyProcessor} instance
+     */
+    FixedKeyProcessor<KIn, VIn, VOut> get();
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyRecord.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyRecord.java
new file mode 100644
index 0000000000000..1d9490d7750e7
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/FixedKeyRecord.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.streams.errors.StreamsException;
+
+import java.util.Objects;
+
+/**
+ * A data class representing an incoming record with fixed key for processing in a {@link FixedKeyProcessor}
+ * or a record to forward to downstream processors via {@link FixedKeyProcessorContext}.
+ *
+ * This class encapsulates all the data attributes of a record: the key and value, but
+ * also the timestamp of the record and any record headers.
+ * Though key is not allowed to be changes.
+ *
+ * This class is immutable, though the objects referenced in the attributes of this class
+ * may themselves be mutable.
+ *
+ * @param <K> The type of the fixed key
+ * @param <V> The type of the value
+ */
+public final class FixedKeyRecord<K, V> {
+
+    private final K key;
+    private final V value;
+    private final long timestamp;
+    private final Headers headers;
+
+    /**
+     * Package-private constructor. Users must not construct this class directly, but only
+     * modify records they were handed by the framework.
+     */
+    FixedKeyRecord(final K key, final V value, final long timestamp, final Headers headers) {
+        this.key = key;
+        this.value = value;
+        if (timestamp < 0) {
+            throw new StreamsException(
+                "Malformed Record",
+                new IllegalArgumentException("Timestamp may not be negative. Got: " + timestamp)
+            );
+        }
+        this.timestamp = timestamp;
+        this.headers = new RecordHeaders(headers);
+    }
+
+    /**
+     * The key of the record. May be null.
+     */
+    public K key() {
+        return key;
+    }
+
+    /**
+     * The value of the record. May be null.
+     */
+    public V value() {
+        return value;
+    }
+
+    /**
+     * The timestamp of the record. Will never be negative.
+     */
+    public long timestamp() {
+        return timestamp;
+    }
+
+    /**
+     * The headers of the record. Never null.
+     */
+    public Headers headers() {
+        return headers;
+    }
+
+    /**
+     * A convenient way to produce a new record if you only need to change the value.
+     *
+     * Copies the attributes of this record with the value replaced.
+     *
+     * @param value The value of the result record.
+     * @param <NewV> The type of the new record's value.
+     * @return A new Record instance with all the same attributes (except that the value is replaced).
+     */
+    public <NewV> FixedKeyRecord<K, NewV> withValue(final NewV value) {
+        return new FixedKeyRecord<>(key, value, timestamp, headers);
+    }
+
+    /**
+     * A convenient way to produce a new record if you only need to change the timestamp.
+     *
+     * Copies the attributes of this record with the timestamp replaced.
+     *
+     * @param timestamp The timestamp of the result record.
+     * @return A new Record instance with all the same attributes (except that the timestamp is replaced).
+     */
+    public FixedKeyRecord<K, V> withTimestamp(final long timestamp) {
+        return new FixedKeyRecord<>(key, value, timestamp, headers);
+    }
+
+    /**
+     * A convenient way to produce a new record if you only need to change the headers.
+     *
+     * Copies the attributes of this record with the headers replaced.
+     * Also makes a copy of the provided headers.
+     *
+     * See {@link FixedKeyProcessorContext#forward(FixedKeyRecord)} for
+     * considerations around mutability of keys, values, and headers.
+     *
+     * @param headers The headers of the result record.
+     * @return A new Record instance with all the same attributes (except that the headers are replaced).
+     */
+    public FixedKeyRecord<K, V> withHeaders(final Headers headers) {
+        return new FixedKeyRecord<>(key, value, timestamp, headers);
+    }
+
+    @Override
+    public String toString() {
+        return "FixedKeyRecord{" +
+            "key=" + key +
+            ", value=" + value +
+            ", timestamp=" + timestamp +
+            ", headers=" + headers +
+            '}';
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        final FixedKeyRecord<?, ?> record = (FixedKeyRecord<?, ?>) o;
+        return timestamp == record.timestamp &&
+            Objects.equals(key, record.key) &&
+            Objects.equals(value, record.value) &&
+            Objects.equals(headers, record.headers);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(key, value, timestamp, headers);
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/InternalFixedKeyRecordFactory.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/InternalFixedKeyRecordFactory.java
new file mode 100644
index 0000000000000..2d215a821e581
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/InternalFixedKeyRecordFactory.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+public final class InternalFixedKeyRecordFactory {
+
+    private InternalFixedKeyRecordFactory() {
+    }
+
+    /**
+     * Only allowed way to create {@link FixedKeyRecord}s.
+     * <p/>
+     * DO NOT USE THIS FACTORY OUTSIDE THE FRAMEWORK.
+     * This could produce undesired results by not partitioning record properly.
+     *
+     * @see FixedKeyProcessor
+     */
+    public static <KIn, VIn> FixedKeyRecord<KIn, VIn> create(final Record<KIn, VIn> record) {
+        return new FixedKeyRecord<>(
+            record.key(),
+            record.value(),
+            record.timestamp(),
+            record.headers()
+        );
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessingContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessingContext.java
new file mode 100644
index 0000000000000..30be03753caf6
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessingContext.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.api;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsMetrics;
+import org.apache.kafka.streams.Topology;
+import org.apache.kafka.streams.processor.Cancellable;
+import org.apache.kafka.streams.processor.PunctuationType;
+import org.apache.kafka.streams.processor.Punctuator;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.TimestampExtractor;
+
+import java.io.File;
+import java.time.Duration;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * Processor context interface.
+ */
+public interface ProcessingContext {
+
+    /**
+     * Return the application id.
+     *
+     * @return the application id
+     */
+    String applicationId();
+
+    /**
+     * Return the task id.
+     *
+     * @return the task id
+     */
+    TaskId taskId();
+
+    /**
+     * Return the metadata of the current record if available. Processors may be invoked to
+     * process a source record from an input topic, to run a scheduled punctuation
+     * (see {@link ProcessingContext#schedule(Duration, PunctuationType, Punctuator)}),
+     * or because a parent processor called {@code forward(Record)}.
+     * <p>
+     * In the case of a punctuation, there is no source record, so this metadata would be
+     * undefined. Note that when a punctuator invokes {@code forward(Record)},
+     * downstream processors will receive the forwarded record as a regular
+     * {@link Processor#process(Record)} or {@link FixedKeyProcessor#process(FixedKeyRecord)} invocation.
+     * In other words, it wouldn't be apparent to
+     * downstream processors whether the record being processed came from an input topic
+     * or punctuation and therefore whether this metadata is defined. This is why
+     * the return type of this method is {@link Optional}.
+     * <p>
+     * If there is any possibility of punctuators upstream, any access
+     * to this field should consider the case of
+     * "<code>recordMetadata().isPresent() == false</code>".
+     * Of course, it would be safest to always guard this condition.
+     */
+    Optional<RecordMetadata> recordMetadata();
+
+    /**
+     * Return the default key serde.
+     *
+     * @return the key serializer
+     */
+    Serde<?> keySerde();
+
+    /**
+     * Return the default value serde.
+     *
+     * @return the value serializer
+     */
+    Serde<?> valueSerde();
+
+    /**
+     * Return the state directory for the partition.
+     *
+     * @return the state directory
+     */
+    File stateDir();
+
+    /**
+     * Return Metrics instance.
+     *
+     * @return StreamsMetrics
+     */
+    StreamsMetrics metrics();
+
+    /**
+     * Get the state store given the store name.
+     *
+     * @param name The store name
+     * @param <S> The type or interface of the store to return
+     * @return The state store instance
+     *
+     * @throws ClassCastException if the return type isn't a type or interface of the actual returned store.
+     */
+    <S extends StateStore> S getStateStore(final String name);
+
+    /**
+     * Schedule a periodic operation for processors. A processor may call this method during
+     * {@link Processor#init(ProcessorContext) initialization},
+     * {@link Processor#process(Record) processing},
+     * {@link FixedKeyProcessor#init(FixedKeyProcessorContext) initialization}, or
+     * {@link FixedKeyProcessor#process(FixedKeyRecord) processing} to
+     * schedule a periodic callback &mdash; called a punctuation &mdash; to {@link Punctuator#punctuate(long)}.
+     * The type parameter controls what notion of time is used for punctuation:
+     * <ul>
+     *   <li>{@link PunctuationType#STREAM_TIME} &mdash; uses "stream time", which is advanced by the processing of messages
+     *   in accordance with the timestamp as extracted by the {@link TimestampExtractor} in use.
+     *   The first punctuation will be triggered by the first record that is processed.
+     *   <b>NOTE:</b> Only advanced if messages arrive</li>
+     *   <li>{@link PunctuationType#WALL_CLOCK_TIME} &mdash; uses system time (the wall-clock time),
+     *   which is advanced independent of whether new messages arrive.
+     *   The first punctuation will be triggered after interval has elapsed.
+     *   <b>NOTE:</b> This is best effort only as its granularity is limited by how long an iteration of the
+     *   processing loop takes to complete</li>
+     * </ul>
+     *
+     * <b>Skipping punctuations:</b> Punctuations will not be triggered more than once at any given timestamp.
+     * This means that "missed" punctuation will be skipped.
+     * It's possible to "miss" a punctuation if:
+     * <ul>
+     *   <li>with {@link PunctuationType#STREAM_TIME}, when stream time advances more than interval</li>
+     *   <li>with {@link PunctuationType#WALL_CLOCK_TIME}, on GC pause, too short interval, ...</li>
+     * </ul>
+     *
+     * @param interval the time interval between punctuations (supported minimum is 1 millisecond)
+     * @param type one of: {@link PunctuationType#STREAM_TIME}, {@link PunctuationType#WALL_CLOCK_TIME}
+     * @param callback a function consuming timestamps representing the current stream or system time
+     * @return a handle allowing cancellation of the punctuation schedule established by this method
+     * @throws IllegalArgumentException if the interval is not representable in milliseconds
+     */
+    Cancellable schedule(final Duration interval,
+                         final PunctuationType type,
+                         final Punctuator callback);
+
+    /**
+     * Request a commit.
+     */
+    void commit();
+
+    /**
+     * Returns all the application config properties as key/value pairs.
+     *
+     * <p> The config properties are defined in the {@link org.apache.kafka.streams.StreamsConfig}
+     * object and associated to the ProcessorContext.
+     *
+     * <p> The type of the values is dependent on the {@link org.apache.kafka.common.config.ConfigDef.Type type} of the property
+     * (e.g. the value of {@link org.apache.kafka.streams.StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG DEFAULT_KEY_SERDE_CLASS_CONFIG}
+     * will be of type {@link Class}, even if it was specified as a String to
+     * {@link org.apache.kafka.streams.StreamsConfig#StreamsConfig(Map) StreamsConfig(Map)}).
+     *
+     * @return all the key/values from the StreamsConfig properties
+     */
+    Map<String, Object> appConfigs();
+
+    /**
+     * Return all the application config properties with the given key prefix, as key/value pairs
+     * stripping the prefix.
+     *
+     * <p> The config properties are defined in the {@link org.apache.kafka.streams.StreamsConfig}
+     * object and associated to the ProcessorContext.
+     *
+     * @param prefix the properties prefix
+     * @return the key/values matching the given prefix from the StreamsConfig properties.
+     */
+    Map<String, Object> appConfigsWithPrefix(final String prefix);
+
+    /**
+     * Return the current system timestamp (also called wall-clock time) in milliseconds.
+     *
+     * <p> Note: this method returns the internally cached system timestamp from the Kafka Stream runtime.
+     * Thus, it may return a different value compared to {@code System.currentTimeMillis()}.
+     *
+     * @return the current system timestamp in milliseconds
+     */
+    long currentSystemTimeMs();
+
+    /**
+     * Return the current stream-time in milliseconds.
+     *
+     * <p> Stream-time is the maximum observed {@link TimestampExtractor record timestamp} so far
+     * (including the currently processed record), i.e., it can be considered a high-watermark.
+     * Stream-time is tracked on a per-task basis and is preserved across restarts and during task migration.
+     *
+     * <p> Note: this method is not supported for global processors (cf. {@link Topology#addGlobalStore} (...)
+     * and {@link StreamsBuilder#addGlobalStore} (...),
+     * because there is no concept of stream-time for this case.
+     * Calling this method in a global processor will result in an {@link UnsupportedOperationException}.
+     *
+     * @return the current stream-time in milliseconds
+     */
+    long currentStreamTimeMs();
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessorContext.java
index d110a76ef7bbb..4ce993c8887ee 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessorContext.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/api/ProcessorContext.java
@@ -16,138 +16,13 @@
  */
 package org.apache.kafka.streams.processor.api;
 
-import org.apache.kafka.common.serialization.Serde;
-import org.apache.kafka.streams.StreamsMetrics;
-import org.apache.kafka.streams.processor.Cancellable;
-import org.apache.kafka.streams.processor.PunctuationType;
-import org.apache.kafka.streams.processor.Punctuator;
-import org.apache.kafka.streams.processor.StateStore;
-import org.apache.kafka.streams.processor.TaskId;
-import org.apache.kafka.streams.processor.TimestampExtractor;
-
-import java.io.File;
-import java.time.Duration;
-import java.util.Map;
-import java.util.Optional;
-
 /**
- * Processor context interface.
+ * Processor context interface for {@link Record}.
  *
  * @param <KForward> a bound on the types of keys that may be forwarded
  * @param <VForward> a bound on the types of values that may be forwarded
  */
-public interface ProcessorContext<KForward, VForward> {
-
-    /**
-     * Return the application id.
-     *
-     * @return the application id
-     */
-    String applicationId();
-
-    /**
-     * Return the task id.
-     *
-     * @return the task id
-     */
-    TaskId taskId();
-
-    /**
-     * Return the metadata of the current record if available. Processors may be invoked to
-     * process a source record from an input topic, to run a scheduled punctuation
-     * (see {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator)}),
-     * or because a parent processor called {@link ProcessorContext#forward(Record)}.
-     * <p>
-     * In the case of a punctuation, there is no source record, so this metadata would be
-     * undefined. Note that when a punctuator invokes {@link ProcessorContext#forward(Record)},
-     * downstream processors will receive the forwarded record as a regular
-     * {@link Processor#process(Record)} invocation. In other words, it wouldn't be apparent to
-     * downstream processors whether or not the record being processed came from an input topic
-     * or punctuation and therefore whether or not this metadata is defined. This is why
-     * the return type of this method is {@link Optional}.
-     * <p>
-     * If there is any possibility of punctuators upstream, any access
-     * to this field should consider the case of
-     * "<code>recordMetadata().isPresent() == false</code>".
-     * Of course, it would be safest to always guard this condition.
-     */
-    Optional<RecordMetadata> recordMetadata();
-
-    /**
-     * Return the default key serde.
-     *
-     * @return the key serializer
-     */
-    Serde<?> keySerde();
-
-    /**
-     * Return the default value serde.
-     *
-     * @return the value serializer
-     */
-    Serde<?> valueSerde();
-
-    /**
-     * Return the state directory for the partition.
-     *
-     * @return the state directory
-     */
-    File stateDir();
-
-    /**
-     * Return Metrics instance.
-     *
-     * @return StreamsMetrics
-     */
-    StreamsMetrics metrics();
-
-    /**
-     * Get the state store given the store name.
-     *
-     * @param name The store name
-     * @param <S> The type or interface of the store to return
-     * @return The state store instance
-     *
-     * @throws ClassCastException if the return type isn't a type or interface of the actual returned store.
-     */
-    <S extends StateStore> S getStateStore(final String name);
-
-    /**
-     * Schedule a periodic operation for processors. A processor may call this method during
-     * {@link Processor#init(ProcessorContext) initialization} or
-     * {@link Processor#process(Record)}  processing} to
-     * schedule a periodic callback &mdash; called a punctuation &mdash; to {@link Punctuator#punctuate(long)}.
-     * The type parameter controls what notion of time is used for punctuation:
-     * <ul>
-     *   <li>{@link PunctuationType#STREAM_TIME} &mdash; uses "stream time", which is advanced by the processing of messages
-     *   in accordance with the timestamp as extracted by the {@link TimestampExtractor} in use.
-     *   The first punctuation will be triggered by the first record that is processed.
-     *   <b>NOTE:</b> Only advanced if messages arrive</li>
-     *   <li>{@link PunctuationType#WALL_CLOCK_TIME} &mdash; uses system time (the wall-clock time),
-     *   which is advanced independent of whether new messages arrive.
-     *   The first punctuation will be triggered after interval has elapsed.
-     *   <b>NOTE:</b> This is best effort only as its granularity is limited by how long an iteration of the
-     *   processing loop takes to complete</li>
-     * </ul>
-     *
-     * <b>Skipping punctuations:</b> Punctuations will not be triggered more than once at any given timestamp.
-     * This means that "missed" punctuation will be skipped.
-     * It's possible to "miss" a punctuation if:
-     * <ul>
-     *   <li>with {@link PunctuationType#STREAM_TIME}, when stream time advances more than interval</li>
-     *   <li>with {@link PunctuationType#WALL_CLOCK_TIME}, on GC pause, too short interval, ...</li>
-     * </ul>
-     *
-     * @param interval the time interval between punctuations (supported minimum is 1 millisecond)
-     * @param type one of: {@link PunctuationType#STREAM_TIME}, {@link PunctuationType#WALL_CLOCK_TIME}
-     * @param callback a function consuming timestamps representing the current stream or system time
-     * @return a handle allowing cancellation of the punctuation schedule established by this method
-     * @throws IllegalArgumentException if the interval is not representable in milliseconds
-     */
-    Cancellable schedule(final Duration interval,
-                         final PunctuationType type,
-                         final Punctuator callback);
-
+public interface ProcessorContext<KForward, VForward> extends ProcessingContext {
     /**
      * Forward a record to all child processors.
      * <p>
@@ -215,36 +90,4 @@ Cancellable schedule(final Duration interval,
      * @see ProcessorContext#forward(Record)
      */
     <K extends KForward, V extends VForward> void forward(Record<K, V> record, final String childName);
-
-    /**
-     * Request a commit.
-     */
-    void commit();
-
-    /**
-     * Returns all the application config properties as key/value pairs.
-     *
-     * <p> The config properties are defined in the {@link org.apache.kafka.streams.StreamsConfig}
-     * object and associated to the ProcessorContext.
-     *
-     * <p> The type of the values is dependent on the {@link org.apache.kafka.common.config.ConfigDef.Type type} of the property
-     * (e.g. the value of {@link org.apache.kafka.streams.StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG DEFAULT_KEY_SERDE_CLASS_CONFIG}
-     * will be of type {@link Class}, even if it was specified as a String to
-     * {@link org.apache.kafka.streams.StreamsConfig#StreamsConfig(Map) StreamsConfig(Map)}).
-     *
-     * @return all the key/values from the StreamsConfig properties
-     */
-    Map<String, Object> appConfigs();
-
-    /**
-     * Return all the application config properties with the given key prefix, as key/value pairs
-     * stripping the prefix.
-     *
-     * <p> The config properties are defined in the {@link org.apache.kafka.streams.StreamsConfig}
-     * object and associated to the ProcessorContext.
-     *
-     * @param prefix the properties prefix
-     * @return the key/values matching the given prefix from the StreamsConfig properties.
-     */
-    Map<String, Object> appConfigsWithPrefix(final String prefix);
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContext.java
index 4140f129be33b..af6190e9645d9 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContext.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContext.java
@@ -48,6 +48,7 @@ public abstract class AbstractProcessorContext<KOut, VOut> implements InternalPr
     protected ProcessorNode<?, ?, ?, ?> currentNode;
     private long cachedSystemTimeMs;
     protected ThreadCache cache;
+    private ProcessorMetadata processorMetadata;
 
     public AbstractProcessorContext(final TaskId taskId,
                                     final StreamsConfig config,
@@ -60,6 +61,7 @@ public AbstractProcessorContext(final TaskId taskId,
         valueSerde = null;
         keySerde = null;
         this.cache = cache;
+        processorMetadata = new ProcessorMetadata();
     }
 
     protected abstract StateManager stateManager();
@@ -254,4 +256,25 @@ public TaskType taskType() {
     public String changelogFor(final String storeName) {
         return stateManager().changelogFor(storeName);
     }
+
+    @Override
+    public void addProcessorMetadataKeyValue(final String key, final long value) {
+        processorMetadata.put(key, value);
+    }
+
+    @Override
+    public Long processorMetadataForKey(final String key) {
+        return processorMetadata.get(key);
+    }
+
+    @Override
+    public void setProcessorMetadata(final ProcessorMetadata metadata) {
+        Objects.requireNonNull(metadata);
+        processorMetadata = metadata;
+    }
+
+    @Override
+    public ProcessorMetadata getProcessorMetadata() {
+        return processorMetadata;
+    }
 }
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractReadWriteDecorator.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractReadWriteDecorator.java
index aff099af5fec6..3c7f70ea0706e 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractReadWriteDecorator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractReadWriteDecorator.java
@@ -259,6 +259,12 @@ public KeyValueIterator<Windowed<K>, AGG> findSessions(final K keyFrom,
             return wrapped().findSessions(keyFrom, keyTo, earliestSessionEndTime, latestSessionStartTime);
         }
 
+        @Override
+        public KeyValueIterator<Windowed<K>, AGG> findSessions(final long earliestSessionEndTime,
+                                                               final long latestSessionEndTime) {
+            return wrapped().findSessions(earliestSessionEndTime, latestSessionEndTime);
+        }
+
         @Override
         public void remove(final Windowed<K> sessionKey) {
             wrapped().remove(sessionKey);
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java
index 4e652a6dfc47d..a88d89fc333f5 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/AbstractTask.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.TopologyConfig.TaskConfig;
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.errors.TaskMigratedException;
 import org.apache.kafka.streams.processor.StateStore;
@@ -27,6 +28,7 @@
 import org.slf4j.Logger;
 
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -41,10 +43,11 @@ public abstract class AbstractTask implements Task {
     private Task.State state = CREATED;
     private long deadlineMs = NO_DEADLINE;
 
-    protected Set<TopicPartition> inputPartitions;
     protected final Logger log;
-    protected final LogContext logContext;
     protected final String logPrefix;
+    protected final LogContext logContext;
+
+    protected Set<TopicPartition> inputPartitions;
 
     /**
      * If the checkpoint has not been loaded from the file yet (null), then we should not overwrite the checkpoint;
@@ -55,25 +58,25 @@ public abstract class AbstractTask implements Task {
     protected Map<TopicPartition, Long> offsetSnapshotSinceLastFlush = null;
 
     protected final TaskId id;
+    protected final TaskConfig config;
     protected final ProcessorTopology topology;
     protected final StateDirectory stateDirectory;
     protected final ProcessorStateManager stateMgr;
-    private final long taskTimeoutMs;
 
     AbstractTask(final TaskId id,
                  final ProcessorTopology topology,
                  final StateDirectory stateDirectory,
                  final ProcessorStateManager stateMgr,
                  final Set<TopicPartition> inputPartitions,
-                 final long taskTimeoutMs,
+                 final TaskConfig config,
                  final String taskType,
                  final Class<? extends AbstractTask> clazz) {
         this.id = id;
         this.stateMgr = stateMgr;
         this.topology = topology;
+        this.config = config;
         this.inputPartitions = inputPartitions;
         this.stateDirectory = stateDirectory;
-        this.taskTimeoutMs = taskTimeoutMs;
 
         final String threadIdPrefix = String.format("stream-thread [%s] ", Thread.currentThread().getName());
         logPrefix = threadIdPrefix + String.format("%s [%s] ", taskType, id);
@@ -88,7 +91,8 @@ public abstract class AbstractTask implements Task {
      * @throws StreamsException fatal error when flushing the state store, for example sending changelog records failed
      *                          or flushing state store get IO errors; such error should cause the thread to die
      */
-    protected void maybeWriteCheckpoint(final boolean enforceCheckpoint) {
+    @Override
+    public void maybeCheckpoint(final boolean enforceCheckpoint) {
         final Map<TopicPartition, Long> offsetSnapshot = stateMgr.changelogOffsets();
         if (StateManagerUtil.checkpointNeeded(enforceCheckpoint, offsetSnapshotSinceLastFlush, offsetSnapshot)) {
             // the state's current offset would be used to checkpoint
@@ -98,7 +102,6 @@ protected void maybeWriteCheckpoint(final boolean enforceCheckpoint) {
         }
     }
 
-
     @Override
     public TaskId id() {
         return id;
@@ -106,11 +109,11 @@ public TaskId id() {
 
     @Override
     public Set<TopicPartition> inputPartitions() {
-        return inputPartitions;
+        return Collections.unmodifiableSet(inputPartitions);
     }
 
     @Override
-    public Collection<TopicPartition> changelogPartitions() {
+    public Set<TopicPartition> changelogPartitions() {
         return stateMgr.changelogPartitions();
     }
 
@@ -151,7 +154,8 @@ final void transitionTo(final Task.State newState) {
 
     @Override
     public void updateInputPartitions(final Set<TopicPartition> topicPartitions, final Map<String, List<String>> allTopologyNodesToSourceTopics) {
-        this.inputPartitions = topicPartitions;
+        this.inputPartitions.clear();
+        this.inputPartitions.addAll(topicPartitions);
         topology.updateSourceTopics(allTopologyNodesToSourceTopics);
     }
 
@@ -159,12 +163,12 @@ public void updateInputPartitions(final Set<TopicPartition> topicPartitions, fin
     public void maybeInitTaskTimeoutOrThrow(final long currentWallClockMs,
                                             final Exception cause) {
         if (deadlineMs == NO_DEADLINE) {
-            deadlineMs = currentWallClockMs + taskTimeoutMs;
+            deadlineMs = currentWallClockMs + config.taskTimeoutMs;
         } else if (currentWallClockMs > deadlineMs) {
             final String errorMessage = String.format(
                 "Task %s did not make progress within %d ms. Adjust `%s` if needed.",
                 id,
-                currentWallClockMs - deadlineMs + taskTimeoutMs,
+                currentWallClockMs - deadlineMs + config.taskTimeoutMs,
                 StreamsConfig.TASK_TIMEOUT_MS_CONFIG
             );
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreator.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreator.java
index d5545eafece7e..46455111dbd98 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreator.java
@@ -28,7 +28,6 @@
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode;
 import org.apache.kafka.streams.processor.TaskId;
-import org.apache.kafka.streams.processor.internals.Task.TaskType;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.metrics.ThreadMetrics;
 import org.apache.kafka.streams.state.internals.ThreadCache;
@@ -44,7 +43,6 @@
 import java.util.UUID;
 import java.util.stream.Collectors;
 
-import static org.apache.kafka.common.utils.Utils.filterMap;
 import static org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode.EXACTLY_ONCE_ALPHA;
 import static org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode.EXACTLY_ONCE_V2;
 import static org.apache.kafka.streams.internals.StreamsConfigUtils.eosEnabled;
@@ -68,11 +66,6 @@ class ActiveTaskCreator {
     private final Map<TaskId, StreamsProducer> taskProducers;
     private final ProcessingMode processingMode;
 
-    // Tasks may have been assigned for a NamedTopology that is not yet known by this host. When that occurs we stash
-    // these unknown tasks until either the corresponding NamedTopology is added and we can create them at last, or
-    // we receive a new assignment and they are revoked from the thread.
-    private final Map<TaskId, Set<TopicPartition>> unknownTasksToBeCreated = new HashMap<>();
-
     ActiveTaskCreator(final TopologyMetadata topologyMetadata,
                       final StreamsConfig applicationConfig,
                       final StreamsMetricsImpl streamsMetrics,
@@ -142,33 +135,14 @@ StreamsProducer threadProducer() {
         return threadProducer;
     }
 
-    void removeRevokedUnknownTasks(final Set<TaskId> assignedTasks) {
-        unknownTasksToBeCreated.keySet().retainAll(assignedTasks);
-    }
-
-    Map<TaskId, Set<TopicPartition>> uncreatedTasksForTopologies(final Set<String> currentTopologies) {
-        return filterMap(unknownTasksToBeCreated, t -> currentTopologies.contains(t.getKey().topologyName()));
-    }
-
-    // TODO: change return type to `StreamTask`
-    Collection<Task> createTasks(final Consumer<byte[], byte[]> consumer,
-                                 final Map<TaskId, Set<TopicPartition>> tasksToBeCreated) {
-        // TODO: change type to `StreamTask`
+    public Collection<Task> createTasks(final Consumer<byte[], byte[]> consumer,
+                                        final Map<TaskId, Set<TopicPartition>> tasksToBeCreated) {
         final List<Task> createdTasks = new ArrayList<>();
-        final Map<TaskId, Set<TopicPartition>> newUnknownTasks = new HashMap<>();
 
         for (final Map.Entry<TaskId, Set<TopicPartition>> newTaskAndPartitions : tasksToBeCreated.entrySet()) {
             final TaskId taskId = newTaskAndPartitions.getKey();
-            final Set<TopicPartition> partitions = newTaskAndPartitions.getValue();
-
             final LogContext logContext = getLogContext(taskId);
-
-            // task belongs to a named topology that hasn't been added yet, wait until it has to create this
-            if (taskId.topologyName() != null && !topologyMetadata.namedTopologiesView().contains(taskId.topologyName())) {
-                newUnknownTasks.put(taskId, partitions);
-                continue;
-            }
-
+            final Set<TopicPartition> partitions = newTaskAndPartitions.getValue();
             final ProcessorTopology topology = topologyMetadata.buildSubtopology(taskId);
 
             final ProcessorStateManager stateManager = new ProcessorStateManager(
@@ -182,7 +156,7 @@ Collection<Task> createTasks(final Consumer<byte[], byte[]> consumer,
                 partitions
             );
 
-            final InternalProcessorContext context = new ProcessorContextImpl(
+            final InternalProcessorContext<Object, Object> context = new ProcessorContextImpl(
                 taskId,
                 applicationConfig,
                 stateManager,
@@ -201,44 +175,13 @@ Collection<Task> createTasks(final Consumer<byte[], byte[]> consumer,
                     context
                 )
             );
-            unknownTasksToBeCreated.remove(taskId);
-        }
-        if (!newUnknownTasks.isEmpty()) {
-            log.info("Delaying creation of tasks not yet known by this instance: {}", newUnknownTasks.keySet());
-            unknownTasksToBeCreated.putAll(newUnknownTasks);
         }
         return createdTasks;
     }
 
-
-    StreamTask createActiveTaskFromStandby(final StandbyTask standbyTask,
-                                           final Set<TopicPartition> inputPartitions,
-                                           final Consumer<byte[], byte[]> consumer) {
-        final InternalProcessorContext context = standbyTask.processorContext();
-        final ProcessorStateManager stateManager = standbyTask.stateMgr;
-        final LogContext logContext = getLogContext(standbyTask.id);
-
-        standbyTask.closeCleanAndRecycleState();
-        stateManager.transitionTaskType(TaskType.ACTIVE, logContext);
-
-        return createActiveTask(
-            standbyTask.id,
-            inputPartitions,
-            consumer,
-            logContext,
-            topologyMetadata.buildSubtopology(standbyTask.id),
-            stateManager,
-            context
-        );
-    }
-
-    private StreamTask createActiveTask(final TaskId taskId,
-                                        final Set<TopicPartition> inputPartitions,
-                                        final Consumer<byte[], byte[]> consumer,
-                                        final LogContext logContext,
-                                        final ProcessorTopology topology,
-                                        final ProcessorStateManager stateManager,
-                                        final InternalProcessorContext context) {
+    private RecordCollector createRecordCollector(final TaskId taskId,
+                                                  final LogContext logContext,
+                                                  final ProcessorTopology topology) {
         final StreamsProducer streamsProducer;
         if (processingMode == ProcessingMode.EXACTLY_ONCE_ALPHA) {
             log.info("Creating producer client for task {}", taskId);
@@ -249,20 +192,69 @@ private StreamTask createActiveTask(final TaskId taskId,
                 taskId,
                 null,
                 logContext,
-                time);
+                time
+            );
             taskProducers.put(taskId, streamsProducer);
         } else {
             streamsProducer = threadProducer;
         }
 
-        final RecordCollector recordCollector = new RecordCollectorImpl(
+        return new RecordCollectorImpl(
             logContext,
             taskId,
             streamsProducer,
             applicationConfig.defaultProductionExceptionHandler(),
-            streamsMetrics
+            streamsMetrics,
+            topology
+        );
+    }
+
+    /*
+     * TODO: we pass in the new input partitions to validate if they still match,
+     *       in the future we when we have fixed partitions -> tasks mapping,
+     *       we should always reuse the input partition and hence no need validations
+     */
+    StreamTask createActiveTaskFromStandby(final StandbyTask standbyTask,
+                                           final Set<TopicPartition> inputPartitions,
+                                           final Consumer<byte[], byte[]> consumer) {
+        if (!inputPartitions.equals(standbyTask.inputPartitions)) {
+            log.warn("Detected unmatched input partitions for task {} when recycling it from standby to active", standbyTask.id);
+        }
+
+        standbyTask.prepareRecycle();
+        standbyTask.stateMgr.transitionTaskType(Task.TaskType.ACTIVE);
+
+        final RecordCollector recordCollector = createRecordCollector(standbyTask.id, getLogContext(standbyTask.id), standbyTask.topology);
+        final StreamTask task = new StreamTask(
+            standbyTask.id,
+            inputPartitions,
+            standbyTask.topology,
+            consumer,
+            standbyTask.config,
+            streamsMetrics,
+            stateDirectory,
+            cache,
+            time,
+            standbyTask.stateMgr,
+            recordCollector,
+            standbyTask.processorContext,
+            standbyTask.logContext
         );
 
+        log.trace("Created active task {} from recycled standby task with assigned partitions {}", task.id, inputPartitions);
+        createTaskSensor.record();
+        return task;
+    }
+
+    private StreamTask createActiveTask(final TaskId taskId,
+                                        final Set<TopicPartition> inputPartitions,
+                                        final Consumer<byte[], byte[]> consumer,
+                                        final LogContext logContext,
+                                        final ProcessorTopology topology,
+                                        final ProcessorStateManager stateManager,
+                                        final InternalProcessorContext<Object, Object> context) {
+        final RecordCollector recordCollector = createRecordCollector(taskId, logContext, topology);
+
         final StreamTask task = new StreamTask(
             taskId,
             inputPartitions,
@@ -279,7 +271,7 @@ private StreamTask createActiveTask(final TaskId taskId,
             logContext
         );
 
-        log.trace("Created task {} with assigned partitions {}", taskId, inputPartitions);
+        log.trace("Created active task {} with assigned partitions {}", taskId, inputPartitions);
         createTaskSensor.record();
         return task;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java
index 9c62dd182e8bd..03199d294caae 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ChangelogReader.java
@@ -46,6 +46,17 @@ public interface ChangelogReader extends ChangelogRegister {
      */
     Set<TopicPartition> completedChangelogs();
 
+    /**
+     * Returns whether all changelog partitions were completely read.
+     *
+     * Since changelog partitions for standby tasks are never completely read, this method will always return
+     * {@code false} if the changelog reader registered changelog partitions for standby tasks.
+     *
+     * @return {@code true} if all changelog partitions were completely read and no standby changelog partitions are read,
+     *         {@code false} otherwise
+     */
+    boolean allChangelogsCompleted();
+
     /**
      * Clear all partitions
      */
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ClientUtils.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ClientUtils.java
index b47b68f8c4aa7..1177e29d8259c 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ClientUtils.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ClientUtils.java
@@ -21,12 +21,17 @@
 import org.apache.kafka.clients.admin.OffsetSpec;
 import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.Metric;
 import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.errors.TimeoutException;
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.processor.TaskId;
@@ -166,4 +171,46 @@ public static String extractThreadId(final String fullThreadName) {
         final int index = fullThreadName.indexOf("StreamThread-");
         return fullThreadName.substring(index);
     }
+
+    public static long producerRecordSizeInBytes(final ProducerRecord<byte[], byte[]> record) {
+        return recordSizeInBytes(
+            record.key() == null ? 0 : record.key().length,
+            record.value() == null ? 0 : record.value().length,
+            record.topic(),
+            record.headers()
+        );
+    }
+
+    public static long consumerRecordSizeInBytes(final ConsumerRecord<byte[], byte[]> record) {
+        return recordSizeInBytes(
+            record.serializedKeySize(),
+            record.serializedValueSize(),
+            record.topic(),
+            record.headers()
+        );
+    }
+
+    private static long recordSizeInBytes(final long keyBytes,
+                                          final long valueBytes,
+                                          final String topic,
+                                          final Headers headers) {
+        long headerSizeInBytes = 0L;
+
+        if (headers != null) {
+            for (final Header header : headers.toArray()) {
+                headerSizeInBytes += Utils.utf8(header.key()).length;
+                if (header.value() != null) {
+                    headerSizeInBytes += header.value().length;
+                }
+            }
+        }
+
+        return keyBytes +
+            valueBytes +
+            8L + // timestamp
+            8L + // offset
+            Utils.utf8(topic).length +
+            4L + // partition
+            headerSizeInBytes;
+    }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStateUpdater.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStateUpdater.java
new file mode 100644
index 0000000000000..803b96bf2ce1c
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStateUpdater.java
@@ -0,0 +1,609 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.errors.StreamsException;
+import org.apache.kafka.streams.errors.TaskCorruptedException;
+import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.internals.Task.State;
+import org.apache.kafka.streams.processor.internals.TaskAndAction.Action;
+import org.slf4j.Logger;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public class DefaultStateUpdater implements StateUpdater {
+
+    private final static String BUG_ERROR_MESSAGE = "This indicates a bug. " +
+        "Please report at https://issues.apache.org/jira/projects/KAFKA/issues or to the dev-mailing list (https://kafka.apache.org/contact).";
+
+    private class StateUpdaterThread extends Thread {
+
+        private final ChangelogReader changelogReader;
+        private final AtomicBoolean isRunning = new AtomicBoolean(true);
+        private final Map<TaskId, Task> updatingTasks = new ConcurrentHashMap<>();
+        private final Logger log;
+
+        public StateUpdaterThread(final String name, final ChangelogReader changelogReader) {
+            super(name);
+            this.changelogReader = changelogReader;
+
+            final String logPrefix = String.format("%s ", name);
+            final LogContext logContext = new LogContext(logPrefix);
+            log = logContext.logger(DefaultStateUpdater.class);
+        }
+
+        public Collection<Task> getUpdatingTasks() {
+            return updatingTasks.values();
+        }
+
+        public Collection<StandbyTask> getUpdatingStandbyTasks() {
+            return updatingTasks.values().stream()
+                .filter(t -> !t.isActive())
+                .map(t -> (StandbyTask) t)
+                .collect(Collectors.toList());
+        }
+
+        public boolean onlyStandbyTasksLeft() {
+            return !updatingTasks.isEmpty() && updatingTasks.values().stream().noneMatch(Task::isActive);
+        }
+
+        @Override
+        public void run() {
+            log.info("State updater thread started");
+            try {
+                while (isRunning.get()) {
+                    try {
+                        runOnce();
+                    } catch (final InterruptedException interruptedException) {
+                        return;
+                    }
+                }
+            } catch (final RuntimeException anyOtherException) {
+                handleRuntimeException(anyOtherException);
+            } finally {
+                clear();
+                shutdownGate.countDown();
+                log.info("State updater thread shutdown");
+            }
+        }
+
+        private void runOnce() throws InterruptedException {
+            performActionsOnTasks();
+            restoreTasks();
+            maybeCheckpointUpdatingTasks(time.milliseconds());
+            waitIfAllChangelogsCompletelyRead();
+        }
+
+        private void performActionsOnTasks() {
+            tasksAndActionsLock.lock();
+            try {
+                for (final TaskAndAction taskAndAction : getTasksAndActions()) {
+                    final Action action = taskAndAction.getAction();
+                    switch (action) {
+                        case ADD:
+                            addTask(taskAndAction.getTask());
+                            break;
+                        case REMOVE:
+                            removeTask(taskAndAction.getTaskId());
+                            break;
+                        case PAUSE:
+                            pauseTask(taskAndAction.getTaskId());
+                            break;
+                        case RESUME:
+                            resumeTask(taskAndAction.getTaskId());
+                            break;
+                    }
+                }
+            } finally {
+                tasksAndActionsLock.unlock();
+            }
+        }
+
+        private void restoreTasks() {
+            try {
+                changelogReader.restore(updatingTasks);
+            } catch (final TaskCorruptedException taskCorruptedException) {
+                handleTaskCorruptedException(taskCorruptedException);
+            } catch (final StreamsException streamsException) {
+                handleStreamsException(streamsException);
+            }
+            final Set<TopicPartition> completedChangelogs = changelogReader.completedChangelogs();
+            final List<Task> activeTasks = updatingTasks.values().stream().filter(Task::isActive).collect(Collectors.toList());
+            for (final Task task : activeTasks) {
+                maybeCompleteRestoration((StreamTask) task, completedChangelogs);
+            }
+        }
+
+        private void handleRuntimeException(final RuntimeException runtimeException) {
+            log.error("An unexpected error occurred within the state updater thread: " + runtimeException);
+            addToExceptionsAndFailedTasksThenClearUpdatingTasks(new ExceptionAndTasks(new HashSet<>(updatingTasks.values()), runtimeException));
+            isRunning.set(false);
+        }
+
+        private void handleTaskCorruptedException(final TaskCorruptedException taskCorruptedException) {
+            log.info("Encountered task corrupted exception: ", taskCorruptedException);
+            final Set<TaskId> corruptedTaskIds = taskCorruptedException.corruptedTasks();
+            final Set<Task> corruptedTasks = new HashSet<>();
+            for (final TaskId taskId : corruptedTaskIds) {
+                final Task corruptedTask = updatingTasks.get(taskId);
+                if (corruptedTask == null) {
+                    throw new IllegalStateException("Task " + taskId + " is corrupted but is not updating. " + BUG_ERROR_MESSAGE);
+                }
+                corruptedTasks.add(corruptedTask);
+            }
+            addToExceptionsAndFailedTasksThenRemoveFromUpdatingTasks(new ExceptionAndTasks(corruptedTasks, taskCorruptedException));
+        }
+
+        private void handleStreamsException(final StreamsException streamsException) {
+            log.info("Encountered streams exception: ", streamsException);
+            if (streamsException.taskId().isPresent()) {
+                handleStreamsExceptionWithTask(streamsException);
+            } else {
+                handleStreamsExceptionWithoutTask(streamsException);
+            }
+        }
+
+        private void handleStreamsExceptionWithTask(final StreamsException streamsException) {
+            final TaskId failedTaskId = streamsException.taskId().get();
+            if (!updatingTasks.containsKey(failedTaskId)) {
+                throw new IllegalStateException("Task " + failedTaskId + " failed but is not updating. " + BUG_ERROR_MESSAGE);
+            }
+            final Set<Task> failedTask = new HashSet<>();
+            failedTask.add(updatingTasks.get(failedTaskId));
+            addToExceptionsAndFailedTasksThenRemoveFromUpdatingTasks(new ExceptionAndTasks(failedTask, streamsException));
+        }
+
+        private void handleStreamsExceptionWithoutTask(final StreamsException streamsException) {
+            addToExceptionsAndFailedTasksThenClearUpdatingTasks(
+                new ExceptionAndTasks(new HashSet<>(updatingTasks.values()), streamsException));
+        }
+
+        // It is important to remove the corrupted tasks from the updating tasks after they were added to the
+        // failed tasks.
+        // This ensures that all tasks are found in DefaultStateUpdater#getTasks().
+        private void addToExceptionsAndFailedTasksThenRemoveFromUpdatingTasks(final ExceptionAndTasks exceptionAndTasks) {
+            exceptionsAndFailedTasks.add(exceptionAndTasks);
+            exceptionAndTasks.getTasks().stream().map(Task::id).forEach(updatingTasks::remove);
+            transitToUpdateStandbysIfOnlyStandbysLeft();
+        }
+
+        private void addToExceptionsAndFailedTasksThenClearUpdatingTasks(final ExceptionAndTasks exceptionAndTasks) {
+            exceptionsAndFailedTasks.add(exceptionAndTasks);
+            updatingTasks.clear();
+        }
+
+        private void waitIfAllChangelogsCompletelyRead() throws InterruptedException {
+            if (isRunning.get() && changelogReader.allChangelogsCompleted()) {
+                tasksAndActionsLock.lock();
+                try {
+                    while (tasksAndActions.isEmpty()) {
+                        tasksAndActionsCondition.await();
+                    }
+                } finally {
+                    tasksAndActionsLock.unlock();
+                }
+            }
+        }
+
+        private void clear() {
+            tasksAndActionsLock.lock();
+            restoredActiveTasksLock.lock();
+            try {
+                tasksAndActions.clear();
+                restoredActiveTasks.clear();
+            } finally {
+                restoredActiveTasksLock.unlock();
+                tasksAndActionsLock.unlock();
+            }
+            changelogReader.clear();
+            updatingTasks.clear();
+        }
+
+        private List<TaskAndAction> getTasksAndActions() {
+            final List<TaskAndAction> tasksAndActionsToProcess = new ArrayList<>(tasksAndActions);
+            tasksAndActions.clear();
+            return tasksAndActionsToProcess;
+        }
+
+        private void addTask(final Task task) {
+            if (isStateless(task)) {
+                addToRestoredTasks((StreamTask) task);
+                log.debug("Stateless active task " + task.id() + " was added to the restored tasks of the state updater");
+            } else {
+                final Task existingTask = updatingTasks.putIfAbsent(task.id(), task);
+                if (existingTask != null) {
+                    throw new IllegalStateException((existingTask.isActive() ? "Active" : "Standby") + " task " + task.id() + " already exist, " +
+                        "should not try to add another " + (task.isActive() ? "active" : "standby") + " task with the same id. " + BUG_ERROR_MESSAGE);
+                }
+
+                if (task.isActive()) {
+                    log.debug("Stateful active task " + task.id() + " was added to the updating tasks of the state updater");
+                    changelogReader.enforceRestoreActive();
+                } else {
+                    log.debug("Standby task " + task.id() + " was added to the updating tasks of the state updater");
+                    if (updatingTasks.size() == 1) {
+                        changelogReader.transitToUpdateStandby();
+                    }
+                }
+            }
+        }
+
+        private void removeTask(final TaskId taskId) {
+            final Task task;
+            if (updatingTasks.containsKey(taskId)) {
+                task = updatingTasks.get(taskId);
+                task.maybeCheckpoint(true);
+                final Collection<TopicPartition> changelogPartitions = task.changelogPartitions();
+                changelogReader.unregister(changelogPartitions);
+                removedTasks.add(task);
+                updatingTasks.remove(taskId);
+                transitToUpdateStandbysIfOnlyStandbysLeft();
+                log.debug((task.isActive() ? "Active" : "Standby")
+                    + " task " + task.id() + " was removed from the updating tasks and added to the removed tasks.");
+            } else if (pausedTasks.containsKey(taskId)) {
+                task = pausedTasks.get(taskId);
+                final Collection<TopicPartition> changelogPartitions = task.changelogPartitions();
+                changelogReader.unregister(changelogPartitions);
+                removedTasks.add(task);
+                pausedTasks.remove(taskId);
+                log.debug((task.isActive() ? "Active" : "Standby")
+                    + " task " + task.id() + " was removed from the paused tasks and added to the removed tasks.");
+            } else {
+                log.debug("Task " + taskId + " was not removed since it is not updating or paused.");
+            }
+        }
+
+        private void pauseTask(final TaskId taskId) {
+            final Task task = updatingTasks.get(taskId);
+            if (task != null) {
+                // do not need to unregister changelog partitions for paused tasks
+                task.maybeCheckpoint(true);
+                pausedTasks.put(taskId, task);
+                updatingTasks.remove(taskId);
+                transitToUpdateStandbysIfOnlyStandbysLeft();
+                log.debug((task.isActive() ? "Active" : "Standby")
+                    + " task " + task.id() + " was paused from the updating tasks and added to the paused tasks.");
+            } else {
+                log.debug("Task " + taskId + " was not paused since it is not updating.");
+            }
+        }
+
+        private void resumeTask(final TaskId taskId) {
+            final Task task = pausedTasks.get(taskId);
+            if (task != null) {
+                updatingTasks.put(taskId, task);
+                pausedTasks.remove(taskId);
+
+                if (task.isActive()) {
+                    log.debug("Stateful active task " + task.id() + " was resumed to the updating tasks of the state updater");
+                    changelogReader.enforceRestoreActive();
+                } else {
+                    log.debug("Standby task " + task.id() + " was resumed to the updating tasks of the state updater");
+                    if (updatingTasks.size() == 1) {
+                        changelogReader.transitToUpdateStandby();
+                    }
+                }
+            } else {
+                log.debug("Task " + taskId + " was not resumed since it is not paused.");
+            }
+        }
+
+        private boolean isStateless(final Task task) {
+            return task.changelogPartitions().isEmpty() && task.isActive();
+        }
+
+        private void maybeCompleteRestoration(final StreamTask task,
+                                              final Set<TopicPartition> restoredChangelogs) {
+            final Collection<TopicPartition> taskChangelogPartitions = task.changelogPartitions();
+            if (restoredChangelogs.containsAll(taskChangelogPartitions)) {
+                task.maybeCheckpoint(true);
+                addToRestoredTasks(task);
+                updatingTasks.remove(task.id());
+                log.debug("Stateful active task " + task.id() + " completed restoration");
+                transitToUpdateStandbysIfOnlyStandbysLeft();
+            }
+        }
+
+        private void transitToUpdateStandbysIfOnlyStandbysLeft() {
+            if (onlyStandbyTasksLeft()) {
+                changelogReader.transitToUpdateStandby();
+            }
+        }
+
+        private void addToRestoredTasks(final StreamTask task) {
+            restoredActiveTasksLock.lock();
+            try {
+                restoredActiveTasks.add(task);
+                log.debug("Active task " + task.id() + " was added to the restored tasks");
+                restoredActiveTasksCondition.signalAll();
+            } finally {
+                restoredActiveTasksLock.unlock();
+            }
+        }
+
+        private void maybeCheckpointUpdatingTasks(final long now) {
+            final long elapsedMsSinceLastCommit = now - lastCommitMs;
+            if (elapsedMsSinceLastCommit > commitIntervalMs) {
+                if (log.isDebugEnabled()) {
+                    log.debug("Checkpointing all restoring tasks since {}ms has elapsed (commit interval is {}ms)",
+                        elapsedMsSinceLastCommit, commitIntervalMs);
+                }
+
+                for (final Task task : updatingTasks.values()) {
+                    // do not enforce checkpointing during restoration if its position has not advanced much
+                    task.maybeCheckpoint(false);
+                }
+
+                lastCommitMs = now;
+            }
+        }
+    }
+
+    private final Time time;
+    private final ChangelogReader changelogReader;
+    private final Queue<TaskAndAction> tasksAndActions = new LinkedList<>();
+    private final Lock tasksAndActionsLock = new ReentrantLock();
+    private final Condition tasksAndActionsCondition = tasksAndActionsLock.newCondition();
+    private final Queue<StreamTask> restoredActiveTasks = new LinkedList<>();
+    private final Lock restoredActiveTasksLock = new ReentrantLock();
+    private final Condition restoredActiveTasksCondition = restoredActiveTasksLock.newCondition();
+    private final BlockingQueue<ExceptionAndTasks> exceptionsAndFailedTasks = new LinkedBlockingQueue<>();
+    private final BlockingQueue<Task> removedTasks = new LinkedBlockingQueue<>();
+    private final Map<TaskId, Task> pausedTasks = new ConcurrentHashMap<>();
+
+    private final long commitIntervalMs;
+    private long lastCommitMs;
+
+    private StateUpdaterThread stateUpdaterThread = null;
+    private CountDownLatch shutdownGate;
+
+    public DefaultStateUpdater(final StreamsConfig config,
+                               final ChangelogReader changelogReader,
+                               final Time time) {
+        this.changelogReader = changelogReader;
+        this.time = time;
+        this.commitIntervalMs = config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG);
+    }
+
+    public void start() {
+        if (stateUpdaterThread == null) {
+            stateUpdaterThread = new StateUpdaterThread("state-updater", changelogReader);
+            stateUpdaterThread.start();
+            shutdownGate = new CountDownLatch(1);
+
+            // initialize the last commit as of now to prevent first commit happens immediately
+            this.lastCommitMs = time.milliseconds();
+        }
+    }
+
+    @Override
+    public void shutdown(final Duration timeout) {
+        if (stateUpdaterThread != null) {
+            stateUpdaterThread.isRunning.set(false);
+            stateUpdaterThread.interrupt();
+            try {
+                if (!shutdownGate.await(timeout.toMillis(), TimeUnit.MILLISECONDS)) {
+                    throw new StreamsException("State updater thread did not shutdown within the timeout");
+                }
+                stateUpdaterThread = null;
+            } catch (final InterruptedException ignored) {
+            }
+        }
+    }
+
+    @Override
+    public void add(final Task task) {
+        verifyStateFor(task);
+
+        tasksAndActionsLock.lock();
+        try {
+            tasksAndActions.add(TaskAndAction.createAddTask(task));
+            tasksAndActionsCondition.signalAll();
+        } finally {
+            tasksAndActionsLock.unlock();
+        }
+    }
+
+    private void verifyStateFor(final Task task) {
+        if (task.isActive() && task.state() != State.RESTORING) {
+            throw new IllegalStateException("Active task " + task.id() + " is not in state RESTORING. " + BUG_ERROR_MESSAGE);
+        }
+        if (!task.isActive() && task.state() != State.RUNNING) {
+            throw new IllegalStateException("Standby task " + task.id() + " is not in state RUNNING. " + BUG_ERROR_MESSAGE);
+        }
+    }
+
+    @Override
+    public void remove(final TaskId taskId) {
+        tasksAndActionsLock.lock();
+        try {
+            tasksAndActions.add(TaskAndAction.createRemoveTask(taskId));
+            tasksAndActionsCondition.signalAll();
+        } finally {
+            tasksAndActionsLock.unlock();
+        }
+    }
+
+    @Override
+    public void pause(final TaskId taskId) {
+        tasksAndActionsLock.lock();
+        try {
+            tasksAndActions.add(TaskAndAction.createPauseTask(taskId));
+            tasksAndActionsCondition.signalAll();
+        } finally {
+            tasksAndActionsLock.unlock();
+        }
+    }
+
+    @Override
+    public void resume(final TaskId taskId) {
+        tasksAndActionsLock.lock();
+        try {
+            tasksAndActions.add(TaskAndAction.createResumeTask(taskId));
+            tasksAndActionsCondition.signalAll();
+        } finally {
+            tasksAndActionsLock.unlock();
+        }
+    }
+
+    @Override
+    public Set<StreamTask> drainRestoredActiveTasks(final Duration timeout) {
+        final long timeoutMs = timeout.toMillis();
+        final long startTime = time.milliseconds();
+        final long deadline = startTime + timeoutMs;
+        long now = startTime;
+        final Set<StreamTask> result = new HashSet<>();
+        try {
+            while (now <= deadline && result.isEmpty()) {
+                restoredActiveTasksLock.lock();
+                try {
+                    while (restoredActiveTasks.isEmpty() && now <= deadline) {
+                        final boolean elapsed = restoredActiveTasksCondition.await(deadline - now, TimeUnit.MILLISECONDS);
+                        now = time.milliseconds();
+                    }
+                    result.addAll(restoredActiveTasks);
+                    restoredActiveTasks.clear();
+                } finally {
+                    restoredActiveTasksLock.unlock();
+                }
+                now = time.milliseconds();
+            }
+            return result;
+        } catch (final InterruptedException ignored) {
+        }
+        return result;
+    }
+
+    @Override
+    public Set<Task> drainRemovedTasks() {
+        final List<Task> result = new ArrayList<>();
+        removedTasks.drainTo(result);
+        return new HashSet<>(result);
+    }
+
+    @Override
+    public List<ExceptionAndTasks> drainExceptionsAndFailedTasks() {
+        final List<ExceptionAndTasks> result = new ArrayList<>();
+        exceptionsAndFailedTasks.drainTo(result);
+        return result;
+    }
+
+    public Set<StandbyTask> getUpdatingStandbyTasks() {
+        return stateUpdaterThread != null
+            ? Collections.unmodifiableSet(new HashSet<>(stateUpdaterThread.getUpdatingStandbyTasks()))
+            : Collections.emptySet();
+    }
+
+    public Set<Task> getUpdatingTasks() {
+        return stateUpdaterThread != null
+            ? Collections.unmodifiableSet(new HashSet<>(stateUpdaterThread.getUpdatingTasks()))
+            : Collections.emptySet();
+    }
+
+    public Set<StreamTask> getRestoredActiveTasks() {
+        restoredActiveTasksLock.lock();
+        try {
+            return Collections.unmodifiableSet(new HashSet<>(restoredActiveTasks));
+        } finally {
+            restoredActiveTasksLock.unlock();
+        }
+    }
+
+    public List<ExceptionAndTasks> getExceptionsAndFailedTasks() {
+        return Collections.unmodifiableList(new ArrayList<>(exceptionsAndFailedTasks));
+    }
+
+    public Set<Task> getRemovedTasks() {
+        return Collections.unmodifiableSet(new HashSet<>(removedTasks));
+    }
+
+    public Set<Task> getPausedTasks() {
+        return Collections.unmodifiableSet(new HashSet<>(pausedTasks.values()));
+    }
+
+    @Override
+    public Set<Task> getTasks() {
+        return executeWithQueuesLocked(() -> getStreamOfTasks().map(ReadOnlyTask::new).collect(Collectors.toSet()));
+    }
+
+    @Override
+    public Set<StreamTask> getActiveTasks() {
+        return executeWithQueuesLocked(
+            () -> getStreamOfTasks().filter(Task::isActive).map(t -> (StreamTask) t).collect(Collectors.toSet())
+        );
+    }
+
+    @Override
+    public Set<StandbyTask> getStandbyTasks() {
+        return executeWithQueuesLocked(
+            () -> getStreamOfTasks().filter(t -> !t.isActive()).map(t -> (StandbyTask) t).collect(Collectors.toSet())
+        );
+    }
+
+    private <T> Set<T> executeWithQueuesLocked(final Supplier<Set<T>> action) {
+        tasksAndActionsLock.lock();
+        restoredActiveTasksLock.lock();
+        try {
+            return action.get();
+        } finally {
+            restoredActiveTasksLock.unlock();
+            tasksAndActionsLock.unlock();
+        }
+    }
+
+    private Stream<Task> getStreamOfTasks() {
+        return
+            Stream.concat(
+                tasksAndActions.stream()
+                    .filter(taskAndAction -> taskAndAction.getAction() == Action.ADD)
+                    .map(TaskAndAction::getTask),
+                Stream.concat(
+                    getUpdatingTasks().stream(),
+                    Stream.concat(
+                        restoredActiveTasks.stream(),
+                        Stream.concat(
+                            exceptionsAndFailedTasks.stream().flatMap(exceptionAndTasks -> exceptionAndTasks.getTasks().stream()),
+                            Stream.concat(
+                                getPausedTasks().stream(),
+                                removedTasks.stream())))));
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStreamPartitioner.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStreamPartitioner.java
index a90a028d729b9..c7d909c65a392 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStreamPartitioner.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/DefaultStreamPartitioner.java
@@ -16,26 +16,29 @@
  */
 package org.apache.kafka.streams.processor.internals;
 
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
-import org.apache.kafka.common.Cluster;
+import org.apache.kafka.clients.producer.internals.BuiltInPartitioner;
 import org.apache.kafka.common.serialization.Serializer;
 import org.apache.kafka.streams.processor.StreamPartitioner;
 
 public class DefaultStreamPartitioner<K, V> implements StreamPartitioner<K, V> {
 
-    private final Cluster cluster;
     private final Serializer<K> keySerializer;
-    private final DefaultPartitioner defaultPartitioner;
 
-    public DefaultStreamPartitioner(final Serializer<K> keySerializer, final Cluster cluster) {
-        this.cluster = cluster;
+    public DefaultStreamPartitioner(final Serializer<K> keySerializer) {
         this.keySerializer = keySerializer;
-        this.defaultPartitioner = new DefaultPartitioner();
     }
 
     @Override
     public Integer partition(final String topic, final K key, final V value, final int numPartitions) {
         final byte[] keyBytes = keySerializer.serialize(topic, key);
-        return defaultPartitioner.partition(topic, key, keyBytes, value, null, cluster, numPartitions);
+
+        // if the key bytes are not available, we just return null to let the producer to decide
+        // which partition to send internally; otherwise stick with the same built-in partitioner
+        // util functions that producer used to make sure its behavior is consistent with the producer
+        if (keyBytes == null) {
+            return null;
+        } else {
+            return BuiltInPartitioner.partitionForKey(keyBytes, numPartitions);
+        }
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalProcessorContextImpl.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalProcessorContextImpl.java
index 00df384658cec..4a7363cd53e75 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalProcessorContextImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/GlobalProcessorContextImpl.java
@@ -25,6 +25,7 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.query.Position;
@@ -96,6 +97,19 @@ public <K, V> void forward(final K key, final V value, final To to) {
         }
     }
 
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record) {
+        forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+    }
+
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record, final String childName) {
+        forward(
+            new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+            childName
+        );
+    }
+
     @Override
     public void commit() {
         //no-op
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java
index 6c12f2a60c861..db82be1afb437 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalProcessorContext.java
@@ -31,12 +31,15 @@
 import org.apache.kafka.streams.state.internals.ThreadCache.DirtyEntryFlushListener;
 
 /**
- * For internal use so we can update the {@link RecordContext} and current
+ * For internal use, so we can update the {@link RecordContext} and current
  * {@link ProcessorNode} when we are forwarding items that have been evicted or flushed from
  * {@link ThreadCache}
  */
 public interface InternalProcessorContext<KOut, VOut>
-    extends ProcessorContext, org.apache.kafka.streams.processor.api.ProcessorContext<KOut, VOut>, StateStoreContext {
+    extends ProcessorContext,
+    org.apache.kafka.streams.processor.api.ProcessorContext<KOut, VOut>,
+    org.apache.kafka.streams.processor.api.FixedKeyProcessorContext<KOut, VOut>,
+    StateStoreContext {
 
     BytesSerializer BYTES_KEY_SERIALIZER = new BytesSerializer();
     ByteArraySerializer BYTEARRAY_VALUE_SERIALIZER = new ByteArraySerializer();
@@ -120,4 +123,13 @@ void logChange(final String storeName,
                    final Position position);
 
     String changelogFor(final String storeName);
+
+    void addProcessorMetadataKeyValue(final String key, final long value);
+
+    Long processorMetadataForKey(final String key);
+
+    void setProcessorMetadata(final ProcessorMetadata metadata);
+
+    ProcessorMetadata getProcessorMetadata();
+
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java
index 8fa9b15a312ef..b4db744389c22 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilder.java
@@ -28,10 +28,11 @@
 import org.apache.kafka.streams.processor.StreamPartitioner;
 import org.apache.kafka.streams.processor.TimestampExtractor;
 import org.apache.kafka.streams.processor.TopicNameExtractor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
 import org.apache.kafka.streams.processor.api.ProcessorSupplier;
 import org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology;
 import org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.internals.SessionStoreBuilder;
 import org.apache.kafka.streams.state.internals.TimestampedWindowStoreBuilder;
@@ -247,6 +248,32 @@ Processor describe() {
         }
     }
 
+    private static class FixedKeyProcessorNodeFactory<KIn, VIn, VOut> extends ProcessorNodeFactory<KIn, VIn, KIn, VOut> {
+        private final FixedKeyProcessorSupplier<KIn, VIn, VOut> supplier;
+        private final Set<String> stateStoreNames = new HashSet<>();
+
+        FixedKeyProcessorNodeFactory(final String name,
+                             final String[] predecessors,
+                             final FixedKeyProcessorSupplier<KIn, VIn, VOut> supplier) {
+            super(name, predecessors.clone(), null);
+            this.supplier = supplier;
+        }
+
+        public void addStateStore(final String stateStoreName) {
+            stateStoreNames.add(stateStoreName);
+        }
+
+        @Override
+        public ProcessorNode<KIn, VIn, KIn, VOut> build() {
+            return new ProcessorNode<>(name, supplier.get(), stateStoreNames);
+        }
+
+        @Override
+        Processor describe() {
+            return new Processor(name, new HashSet<>(stateStoreNames));
+        }
+    }
+
     // Map from topics to their matched regex patterns, this is to ensure one topic is passed through on source node
     // even if it can be matched by multiple regex patterns. Only used by SourceNodeFactory
     private final Map<String, Pattern> topicToPatterns = new HashMap<>();
@@ -544,6 +571,36 @@ public final <KIn, VIn, KOut, VOut> void addProcessor(final String name,
         nodeGroups = null;
     }
 
+    public final <KIn, VIn, VOut> void addProcessor(final String name,
+                                                    final FixedKeyProcessorSupplier<KIn, VIn, VOut> supplier,
+                                                    final String... predecessorNames) {
+        Objects.requireNonNull(name, "name must not be null");
+        Objects.requireNonNull(supplier, "supplier must not be null");
+        Objects.requireNonNull(predecessorNames, "predecessor names must not be null");
+        ApiUtils.checkSupplier(supplier);
+        if (nodeFactories.containsKey(name)) {
+            throw new TopologyException("Processor " + name + " is already added.");
+        }
+        if (predecessorNames.length == 0) {
+            throw new TopologyException("Processor " + name + " must have at least one parent");
+        }
+
+        for (final String predecessor : predecessorNames) {
+            Objects.requireNonNull(predecessor, "predecessor name must not be null");
+            if (predecessor.equals(name)) {
+                throw new TopologyException("Processor " + name + " cannot be a predecessor of itself.");
+            }
+            if (!nodeFactories.containsKey(predecessor)) {
+                throw new TopologyException("Predecessor processor " + predecessor + " is not added yet for " + name);
+            }
+        }
+
+        nodeFactories.put(name, new FixedKeyProcessorNodeFactory<>(name, predecessorNames, supplier));
+        nodeGrouper.add(name);
+        nodeGrouper.unite(name, predecessorNames);
+        nodeGroups = null;
+    }
+
     public final void addStateStore(final StoreBuilder<?> storeBuilder,
                                     final String... processorNames) {
         addStateStore(storeBuilder, false, processorNames);
@@ -1058,8 +1115,11 @@ private void buildProcessorNode(final Map<String, ProcessorNode<?, ?, ?, ?>> pro
 
                     // remember the changelog topic if this state store is change-logging enabled
                     if (stateStoreFactory.loggingEnabled() && !storeToChangelogTopic.containsKey(stateStoreName)) {
+                        final String prefix = topologyConfigs == null ?
+                                applicationId :
+                                ProcessorContextUtils.getPrefix(topologyConfigs.applicationConfigs.originals(), applicationId);
                         final String changelogTopic =
-                            ProcessorStateManager.storeChangelogTopic(getPrefix(), stateStoreName, topologyName);
+                            ProcessorStateManager.storeChangelogTopic(prefix, stateStoreName, topologyName);
                         storeToChangelogTopic.put(stateStoreName, changelogTopic);
                         changelogTopicToStore.put(changelogTopic, stateStoreName);
                     }
@@ -1351,22 +1411,15 @@ private String decorateTopic(final String topic) {
                                             + "applicationId hasn't been set. Call "
                                             + "setApplicationId first");
         }
+        final String prefix = topologyConfigs == null ?
+                                applicationId :
+                                ProcessorContextUtils.getPrefix(topologyConfigs.applicationConfigs.originals(), applicationId);
+
         if (hasNamedTopology()) {
-            return getPrefix() + "-" + topologyName + "-" + topic;
+            return prefix + "-" + topologyName + "-" + topic;
         } else {
-            return getPrefix() + "-" + topic;
-        }
-    }
-
-    String getPrefix() {
-        if (topologyConfigs == null) {
-            return applicationId;
+            return prefix + "-" + topic;
         }
-        return StreamsConfig.InternalConfig.getString(
-            topologyConfigs.applicationConfigs.originals(),
-            StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-            applicationId
-        );
     }
 
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java
index 199bc0e6456cc..7ce538e66badf 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/PartitionGroup.java
@@ -58,7 +58,7 @@
  */
 public class PartitionGroup {
 
-    private  final Logger logger;
+    private final Logger logger;
     private final Map<TopicPartition, RecordQueue> partitionQueues;
     private final Function<TopicPartition, OptionalLong> lagProvider;
     private final Sensor enforcedProcessingSensor;
@@ -118,11 +118,11 @@ public boolean readyToProcess(final long wallClockTime) {
                     }
                 }
                 logger.trace("Ready for processing because max.task.idle.ms is disabled." +
-                              "\n\tThere may be out-of-order processing for this task as a result." +
-                              "\n\tBuffered partitions: {}" +
-                              "\n\tNon-buffered partitions: {}",
-                          bufferedPartitions,
-                          emptyPartitions);
+                                "\n\tThere may be out-of-order processing for this task as a result." +
+                                "\n\tBuffered partitions: {}" +
+                                "\n\tNon-buffered partitions: {}",
+                        bufferedPartitions,
+                        emptyPartitions);
             }
             return true;
         }
@@ -151,9 +151,9 @@ public boolean readyToProcess(final long wallClockTime) {
                     // must wait to poll the data we know to be on the broker
                     idlePartitionDeadlines.remove(partition);
                     logger.trace(
-                        "Lag for {} is currently {}, but no data is buffered locally. Waiting to buffer some records.",
-                        partition,
-                        fetchedLag.getAsLong()
+                            "Lag for {} is currently {}, but no data is buffered locally. Waiting to buffer some records.",
+                            partition,
+                            fetchedLag.getAsLong()
                     );
                     return false;
                 } else {
@@ -167,11 +167,11 @@ public boolean readyToProcess(final long wallClockTime) {
                     final long deadline = idlePartitionDeadlines.get(partition);
                     if (wallClockTime < deadline) {
                         logger.trace(
-                            "Lag for {} is currently 0 and current time is {}. Waiting for new data to be produced for configured idle time {} (deadline is {}).",
-                            partition,
-                            wallClockTime,
-                            maxTaskIdleMs,
-                            deadline
+                                "Lag for {} is currently 0 and current time is {}. Waiting for new data to be produced for configured idle time {} (deadline is {}).",
+                                partition,
+                                wallClockTime,
+                                maxTaskIdleMs,
+                                deadline
                         );
                         return false;
                     } else {
@@ -193,15 +193,15 @@ public boolean readyToProcess(final long wallClockTime) {
         } else {
             enforcedProcessingSensor.record(1.0d, wallClockTime);
             logger.trace("Continuing to process although some partitions are empty on the broker." +
-                         "\n\tThere may be out-of-order processing for this task as a result." +
-                         "\n\tPartitions with local data: {}." +
-                         "\n\tPartitions we gave up waiting for, with their corresponding deadlines: {}." +
-                         "\n\tConfigured max.task.idle.ms: {}." +
-                         "\n\tCurrent wall-clock time: {}.",
-                     queued,
-                     enforced,
-                     maxTaskIdleMs,
-                     wallClockTime);
+                            "\n\tThere may be out-of-order processing for this task as a result." +
+                            "\n\tPartitions with local data: {}." +
+                            "\n\tPartitions we gave up waiting for, with their corresponding deadlines: {}." +
+                            "\n\tConfigured max.task.idle.ms: {}." +
+                            "\n\tCurrent wall-clock time: {}.",
+                    queued,
+                    enforced,
+                    maxTaskIdleMs,
+                    wallClockTime);
             return true;
         }
     }
@@ -216,8 +216,9 @@ long partitionTimestamp(final TopicPartition partition) {
     }
 
     // creates queues for new partitions, removes old queues, saves cached records for previously assigned partitions
-    void updatePartitions(final Set<TopicPartition> newInputPartitions, final Function<TopicPartition, RecordQueue> recordQueueCreator) {
+    void updatePartitions(final Set<TopicPartition> inputPartitions, final Function<TopicPartition, RecordQueue> recordQueueCreator) {
         final Set<TopicPartition> removedPartitions = new HashSet<>();
+        final Set<TopicPartition> newInputPartitions = new HashSet<>(inputPartitions);
         final Iterator<Map.Entry<TopicPartition, RecordQueue>> queuesIterator = partitionQueues.entrySet().iterator();
         while (queuesIterator.hasNext()) {
             final Map.Entry<TopicPartition, RecordQueue> queueEntry = queuesIterator.next();
@@ -261,7 +262,7 @@ StampedRecord nextRecord(final RecordInfo info, final long wallClockTime) {
 
         if (queue != null) {
             // get the first record from this queue.
-            record = queue.poll();
+            record = queue.poll(wallClockTime);
 
             if (record != null) {
                 --totalBuffered;
@@ -289,8 +290,8 @@ record = queue.poll();
     /**
      * Adds raw records to this partition group
      *
-     * @param partition the partition
-     * @param rawRecords  the raw records
+     * @param partition  the partition
+     * @param rawRecords the raw records
      * @return the queue size for the partition
      */
     int addRawRecords(final TopicPartition partition, final Iterable<ConsumerRecord<byte[], byte[]>> rawRecords) {
@@ -370,4 +371,10 @@ void clear() {
         totalBuffered = 0;
         streamTime = RecordQueue.UNKNOWN;
     }
-}
+
+    void close() {
+        for (final RecordQueue queue : partitionQueues.values()) {
+            queue.close();
+        }
+    }
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextImpl.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextImpl.java
index 7519c62d40fa9..ffa5dcaf73bc7 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextImpl.java
@@ -29,6 +29,7 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.Task.TaskType;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
@@ -146,8 +147,9 @@ public void logChange(final String storeName,
             changelogPartition.partition(),
             timestamp,
             BYTES_KEY_SERIALIZER,
-            BYTEARRAY_VALUE_SERIALIZER
-        );
+            BYTEARRAY_VALUE_SERIALIZER,
+            null,
+            null);
     }
 
     /**
@@ -208,6 +210,19 @@ public <K, V> void forward(final K key,
         forward(toForward, toInternal.child());
     }
 
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record) {
+        forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+    }
+
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record, final String childName) {
+        forward(
+            new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+            childName
+        );
+    }
+
     @Override
     public <K, V> void forward(final Record<K, V> record) {
         forward(record, null);
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextUtils.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextUtils.java
index 39e92a9232ae0..c23c0fc9e4913 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextUtils.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorContextUtils.java
@@ -56,21 +56,29 @@ public static StreamsMetricsImpl getMetricsImpl(final StateStoreContext context)
         return (StreamsMetricsImpl) context.metrics();
     }
 
-    public static String changelogFor(final ProcessorContext context, final String storeName) {
+    public static String changelogFor(final ProcessorContext context, final String storeName, final Boolean newChangelogTopic) {
         final String prefix = getPrefix(context.appConfigs(), context.applicationId());
-        return context instanceof InternalProcessorContext
-            ? ((InternalProcessorContext) context).changelogFor(storeName)
-            : ProcessorStateManager.storeChangelogTopic(prefix, storeName, context.taskId().topologyName());
+        if (context instanceof InternalProcessorContext && !newChangelogTopic) {
+            final String changelogTopic = ((InternalProcessorContext) context).changelogFor(storeName);
+            if (changelogTopic != null)
+                return changelogTopic;
+
+        }
+        return ProcessorStateManager.storeChangelogTopic(prefix, storeName, context.taskId().topologyName());
     }
 
-    public static String changelogFor(final StateStoreContext context, final String storeName) {
+    public static String changelogFor(final StateStoreContext context, final String storeName, final Boolean newChangelogTopic) {
         final String prefix = getPrefix(context.appConfigs(), context.applicationId());
-        return context instanceof InternalProcessorContext
-            ? ((InternalProcessorContext) context).changelogFor(storeName)
-            : ProcessorStateManager.storeChangelogTopic(prefix, storeName, context.taskId().topologyName());
+        if (context instanceof InternalProcessorContext && !newChangelogTopic) {
+            final String changelogTopic = ((InternalProcessorContext) context).changelogFor(storeName);
+            if (changelogTopic != null)
+                return changelogTopic;
+
+        }
+        return ProcessorStateManager.storeChangelogTopic(prefix, storeName, context.taskId().topologyName());
     }
 
-    private static String getPrefix(final Map<String, Object> configs, final String applicationId) {
+    public static String getPrefix(final Map<String, Object> configs, final String applicationId) {
         if (configs == null) {
             return applicationId;
         } else {
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorMetadata.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorMetadata.java
new file mode 100644
index 0000000000000..943beb64e017f
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorMetadata.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * ProcessorMetadata to be access and populated by processor node. This will be committed along with
+ * offset. This metadata is mainly for windowed aggregation processor to store last emitted timestamp
+ * for now. Therefore, the supported metadata value type is only Long which is timestamp type.
+ */
+public class ProcessorMetadata {
+
+    private final Map<String, Long> metadata;
+
+    // Whether metadata should be committed. We only need to commit if metadata is updated via
+    // put() or set explicitly
+    private boolean needsCommit;
+
+    public ProcessorMetadata() {
+        this(new HashMap<>());
+    }
+
+    public ProcessorMetadata(final Map<String, Long> metadata) {
+        this.metadata = metadata;
+        needsCommit = false;
+    }
+
+    public static ProcessorMetadata deserialize(final byte[] metaDataBytes) {
+        if (metaDataBytes == null || metaDataBytes.length == 0) {
+            return new ProcessorMetadata();
+        }
+
+        final ByteBuffer buffer = ByteBuffer.wrap(metaDataBytes);
+        final int entrySize = buffer.getInt();
+        final Map<String, Long> metadata = new HashMap<>(entrySize);
+        for (int i = 0; i < entrySize; i++) {
+            final int keySize = buffer.getInt();
+            final byte[] keyBytes = new byte[keySize];
+            buffer.get(keyBytes);
+            final Long value = buffer.getLong();
+            metadata.put(new String(keyBytes, StandardCharsets.UTF_8), value);
+        }
+        return new ProcessorMetadata(metadata);
+    }
+
+    public byte[] serialize() {
+        if (metadata.isEmpty()) {
+            return new byte[0];
+        }
+
+        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+        final byte[] mapSizeBytes = ByteBuffer.allocate(Integer.BYTES).putInt(metadata.size()).array();
+        outputStream.write(mapSizeBytes, 0, mapSizeBytes.length);
+
+        for (final Map.Entry<String, Long> entry : metadata.entrySet()) {
+            final byte[] keyBytes = entry.getKey().getBytes(StandardCharsets.UTF_8);
+            final int keyLen = keyBytes.length;
+            final byte[] buffer = ByteBuffer.allocate(Integer.BYTES + keyBytes.length + Long.BYTES)
+                .putInt(keyLen)
+                .put(keyBytes)
+                .putLong(entry.getValue())
+                .array();
+            outputStream.write(buffer, 0, buffer.length);
+        }
+        return outputStream.toByteArray();
+    }
+
+    public void put(final String key, final long value) {
+        metadata.put(key, value);
+        needsCommit = true;
+    }
+
+    public Long get(final String key) {
+        return metadata.get(key);
+    }
+
+    /**
+     * Merge with other metadata. Missing keys will be added. Existing key's value will be updated to
+     * max
+     * @param other Other metadata to be merged
+     */
+    public void update(final ProcessorMetadata other) {
+        if (other == null) {
+            return;
+        }
+        for (final Map.Entry<String, Long> kv : other.metadata.entrySet()) {
+            final Long value = metadata.get(kv.getKey());
+            if (value == null || value < kv.getValue()) {
+                metadata.put(kv.getKey(), kv.getValue());
+            }
+        }
+    }
+
+    public void setNeedsCommit(final boolean needsCommit) {
+        this.needsCommit = needsCommit;
+    }
+
+    /**
+     * Whether metadata needs to be committed. It should be committed only if put is or
+     * {@link #setNeedsCommit} is called explicitly
+     *
+     * @return If metadata needs to be committed.
+     */
+    public boolean needsCommit() {
+        return needsCommit;
+    }
+
+    @Override
+    public int hashCode() {
+        // needsCommit is not considered in hashCode or equals
+        return Objects.hashCode(metadata);
+    }
+
+    @Override
+    public boolean equals(final Object obj) {
+        if (obj == null || obj.getClass() != getClass()) {
+            return false;
+        }
+        if (this == obj) {
+            return true;
+        }
+
+        return metadata.equals(((ProcessorMetadata) obj).metadata);
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java
index 48c95f115de89..d5ecfa7ffe504 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorNode.java
@@ -20,6 +20,9 @@
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.processor.Punctuator;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorContext;
+import org.apache.kafka.streams.processor.api.InternalFixedKeyRecordFactory;
 import org.apache.kafka.streams.processor.api.Processor;
 import org.apache.kafka.streams.processor.api.Record;
 
@@ -35,12 +38,13 @@ public class ProcessorNode<KIn, VIn, KOut, VOut> {
     private final Map<String, ProcessorNode<KOut, VOut, ?, ?>> childByName;
 
     private final Processor<KIn, VIn, KOut, VOut> processor;
+    private final FixedKeyProcessor<KIn, VIn, VOut> fixedKeyProcessor;
     private final String name;
     private final Time time;
 
     public final Set<String> stateStores;
 
-    private InternalProcessorContext internalProcessorContext;
+    private InternalProcessorContext<KOut, VOut> internalProcessorContext;
     private String threadId;
 
     private boolean closed = true;
@@ -55,6 +59,7 @@ public ProcessorNode(final String name,
 
         this.name = name;
         this.processor = processor;
+        this.fixedKeyProcessor = null;
         this.children = new ArrayList<>();
         this.childByName = new HashMap<>();
         this.stateStores = stateStores;
@@ -62,11 +67,12 @@ public ProcessorNode(final String name,
     }
 
     public ProcessorNode(final String name,
-                         @SuppressWarnings("deprecation") final org.apache.kafka.streams.processor.Processor<KIn, VIn> processor,
+                         final FixedKeyProcessor<KIn, VIn, VOut> processor,
                          final Set<String> stateStores) {
 
         this.name = name;
-        this.processor = ProcessorAdapter.adapt(processor);
+        this.processor = null;
+        this.fixedKeyProcessor = processor;
         this.children = new ArrayList<>();
         this.childByName = new HashMap<>();
         this.stateStores = stateStores;
@@ -77,10 +83,6 @@ public final String name() {
         return name;
     }
 
-    public final Processor<KIn, VIn, KOut, VOut> processor() {
-        return processor;
-    }
-
     public List<ProcessorNode<KOut, VOut, ?, ?>> children() {
         return children;
     }
@@ -104,6 +106,11 @@ public void init(final InternalProcessorContext<KOut, VOut> context) {
             if (processor != null) {
                 processor.init(context);
             }
+            if (fixedKeyProcessor != null) {
+                @SuppressWarnings("unchecked") final FixedKeyProcessorContext<KIn, VOut> fixedKeyProcessorContext =
+                    (FixedKeyProcessorContext<KIn, VOut>) context;
+                fixedKeyProcessor.init(fixedKeyProcessorContext);
+            }
         } catch (final Exception e) {
             throw new StreamsException(String.format("failed to initialize processor %s", name), e);
         }
@@ -120,6 +127,9 @@ public void close() {
             if (processor != null) {
                 processor.close();
             }
+            if (fixedKeyProcessor != null) {
+                fixedKeyProcessor.close();
+            }
             internalProcessorContext.metrics().removeAllNodeLevelSensors(
                 threadId,
                 internalProcessorContext.taskId().toString(),
@@ -143,7 +153,17 @@ public void process(final Record<KIn, VIn> record) {
         throwIfClosed();
 
         try {
-            processor.process(record);
+            if (processor != null) {
+                processor.process(record);
+            } else if (fixedKeyProcessor != null) {
+                fixedKeyProcessor.process(
+                    InternalFixedKeyRecordFactory.create(record)
+                );
+            } else {
+                throw new IllegalStateException(
+                    "neither the processor nor the fixed key processor were set."
+                );
+            }
         } catch (final ClassCastException e) {
             final String keyClass = record.key() == null ? "unknown because key is null" : record.key().getClass().getName();
             final String valueClass = record.value() == null ? "unknown because value is null" : record.value().getClass().getName();
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java
index 3c8c40fc324de..d80a1edd4008a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ProcessorStateManager.java
@@ -43,8 +43,10 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 import static java.lang.String.format;
@@ -364,21 +366,22 @@ public StateStore getStore(final String name) {
         }
     }
 
-    Collection<TopicPartition> changelogPartitions() {
-        return changelogOffsets().keySet();
+    Set<TopicPartition> changelogPartitions() {
+        return Collections.unmodifiableSet(changelogOffsets().keySet());
     }
 
     void markChangelogAsCorrupted(final Collection<TopicPartition> partitions) {
+        final Collection<TopicPartition> partitionsToMarkAsCorrupted = new LinkedList<>(partitions);
         for (final StateStoreMetadata storeMetadata : stores.values()) {
-            if (partitions.contains(storeMetadata.changelogPartition)) {
+            if (partitionsToMarkAsCorrupted.contains(storeMetadata.changelogPartition)) {
                 storeMetadata.corrupted = true;
-                partitions.remove(storeMetadata.changelogPartition);
+                partitionsToMarkAsCorrupted.remove(storeMetadata.changelogPartition);
             }
         }
 
-        if (!partitions.isEmpty()) {
-            throw new IllegalStateException("Some partitions " + partitions + " are not contained in the store list of task " +
-                taskId + " marking as corrupted, this is not expected");
+        if (!partitionsToMarkAsCorrupted.isEmpty()) {
+            throw new IllegalStateException("Some partitions " + partitionsToMarkAsCorrupted + " are not contained in " +
+                "the store list of task " + taskId + " marking as corrupted, this is not expected");
         }
     }
 
@@ -576,17 +579,12 @@ void recycle() {
         changelogReader.unregister(allChangelogs);
     }
 
-    void transitionTaskType(final TaskType newType, final LogContext logContext) {
+    void transitionTaskType(final TaskType newType) {
         if (taskType.equals(newType)) {
             throw new IllegalStateException("Tried to recycle state for task type conversion but new type was the same.");
         }
 
-        final TaskType oldType = taskType;
         taskType = newType;
-        log = logContext.logger(ProcessorStateManager.class);
-        logPrefix = logContext.logPrefix();
-
-        log.debug("Transitioning state manager for {} task {} to {}", oldType, taskId, newType);
     }
 
     @Override
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/ReadOnlyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ReadOnlyTask.java
new file mode 100644
index 0000000000000..00c1006b85e22
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/ReadOnlyTask.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.TaskId;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.Consumer;
+
+public class ReadOnlyTask implements Task {
+
+    private final Task task;
+
+    public ReadOnlyTask(final Task task) {
+        this.task = task;
+    }
+
+    @Override
+    public TaskId id() {
+        return task.id();
+    }
+
+    @Override
+    public boolean isActive() {
+        return task.isActive();
+    }
+
+    @Override
+    public Set<TopicPartition> inputPartitions() {
+        return task.inputPartitions();
+    }
+
+    @Override
+    public Set<TopicPartition> changelogPartitions() {
+        return task.changelogPartitions();
+    }
+
+    @Override
+    public State state() {
+        return task.state();
+    }
+
+    @Override
+    public boolean commitRequested() {
+        return task.commitRequested();
+    }
+
+    @Override
+    public boolean needsInitializationOrRestoration() {
+        return task.needsInitializationOrRestoration();
+    }
+
+    @Override
+    public void initializeIfNeeded() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void addPartitionsForOffsetReset(final Set<TopicPartition> partitionsForOffsetReset) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void completeRestoration(final Consumer<Set<TopicPartition>> offsetResetter) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void suspend() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void resume() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void closeDirty() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void closeClean() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void updateInputPartitions(final Set<TopicPartition> topicPartitions,
+                                      final Map<String, List<String>> allTopologyNodesToSourceTopics) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void maybeCheckpoint(final boolean enforceCheckpoint) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void revive() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void prepareRecycle() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void addRecords(final TopicPartition partition, final Iterable<ConsumerRecord<byte[], byte[]>> records) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public boolean process(final long wallClockTime) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void recordProcessBatchTime(final long processBatchTime) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void recordProcessTimeRatioAndBufferSize(final long allTaskProcessMs, final long now) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public boolean maybePunctuateStreamTime() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public boolean maybePunctuateSystemTime() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public Map<TopicPartition, OffsetAndMetadata> prepareCommit() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void postCommit(final boolean enforceCheckpoint) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public Map<TopicPartition, Long> purgeableOffsets() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void maybeInitTaskTimeoutOrThrow(final long currentWallClockMs, final Exception cause) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public void clearTaskTimeout() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public boolean commitNeeded() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public StateStore getStore(final String name) {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public Map<TopicPartition, Long> changelogOffsets() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public Map<TopicPartition, Long> committedOffsets() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public Map<TopicPartition, Long> highWaterMark() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+
+    @Override
+    public Optional<Long> timeCurrentIdlingStarted() {
+        throw new UnsupportedOperationException("This task is read-only");
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java
index 8b22f22f8274e..a48a671d46091 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollector.java
@@ -33,7 +33,9 @@ <K, V> void send(final String topic,
                      final Integer partition,
                      final Long timestamp,
                      final Serializer<K> keySerializer,
-                     final Serializer<V> valueSerializer);
+                     final Serializer<V> valueSerializer,
+                     final String processorNodeId,
+                     final InternalProcessorContext<Void, Void> context);
 
     <K, V> void send(final String topic,
                      final K key,
@@ -42,6 +44,8 @@ <K, V> void send(final String topic,
                      final Long timestamp,
                      final Serializer<K> keySerializer,
                      final Serializer<V> valueSerializer,
+                     final String processorNodeId,
+                     final InternalProcessorContext<Void, Void> context,
                      final StreamPartitioner<? super K, ? super V> partitioner);
 
     /**
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java
index f8c9cf9d7d870..38445fde9bf0e 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordCollectorImpl.java
@@ -46,6 +46,8 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.metrics.TaskMetrics;
+import org.apache.kafka.streams.processor.internals.metrics.TopicMetrics;
+
 import org.slf4j.Logger;
 
 import java.util.Collections;
@@ -54,6 +56,8 @@
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
 
+import static org.apache.kafka.streams.processor.internals.ClientUtils.producerRecordSizeInBytes;
+
 public class RecordCollectorImpl implements RecordCollector {
     private final static String SEND_EXCEPTION_MESSAGE = "Error encountered sending record to topic %s for task %s due to:%n%s";
 
@@ -61,10 +65,13 @@ public class RecordCollectorImpl implements RecordCollector {
     private final TaskId taskId;
     private final StreamsProducer streamsProducer;
     private final ProductionExceptionHandler productionExceptionHandler;
-    private final Sensor droppedRecordsSensor;
     private final boolean eosEnabled;
     private final Map<TopicPartition, Long> offsets;
 
+    private final StreamsMetricsImpl streamsMetrics;
+    private final Sensor droppedRecordsSensor;
+    private final Map<String, Map<String, Sensor>> sinkNodeToProducedSensorByTopic = new HashMap<>();
+
     private final AtomicReference<KafkaException> sendException = new AtomicReference<>(null);
 
     /**
@@ -74,15 +81,29 @@ public RecordCollectorImpl(final LogContext logContext,
                                final TaskId taskId,
                                final StreamsProducer streamsProducer,
                                final ProductionExceptionHandler productionExceptionHandler,
-                               final StreamsMetricsImpl streamsMetrics) {
+                               final StreamsMetricsImpl streamsMetrics,
+                               final ProcessorTopology topology) {
         this.log = logContext.logger(getClass());
         this.taskId = taskId;
         this.streamsProducer = streamsProducer;
         this.productionExceptionHandler = productionExceptionHandler;
         this.eosEnabled = streamsProducer.eosEnabled();
+        this.streamsMetrics = streamsMetrics;
 
         final String threadId = Thread.currentThread().getName();
         this.droppedRecordsSensor = TaskMetrics.droppedRecordsSensor(threadId, taskId.toString(), streamsMetrics);
+        for (final String topic : topology.sinkTopics()) {
+            final String processorNodeId = topology.sink(topic).name();
+            sinkNodeToProducedSensorByTopic.computeIfAbsent(processorNodeId, t -> new HashMap<>()).put(
+                topic,
+                TopicMetrics.producedSensor(
+                    threadId,
+                    taskId.toString(),
+                    processorNodeId,
+                    topic,
+                    streamsMetrics
+                ));
+        }
 
         this.offsets = new HashMap<>();
     }
@@ -106,6 +127,8 @@ public <K, V> void send(final String topic,
                             final Long timestamp,
                             final Serializer<K> keySerializer,
                             final Serializer<V> valueSerializer,
+                            final String processorNodeId,
+                            final InternalProcessorContext<Void, Void> context,
                             final StreamPartitioner<? super K, ? super V> partitioner) {
         final Integer partition;
 
@@ -122,7 +145,7 @@ public <K, V> void send(final String topic,
                 // here we cannot drop the message on the floor even if it is a transient timeout exception,
                 // so we treat everything the same as a fatal exception
                 throw new StreamsException("Could not determine the number of partitions for topic '" + topic +
-                    "' for task " + taskId + " due to " + fatal.toString(),
+                    "' for task " + taskId + " due to " + fatal,
                     fatal
                 );
             }
@@ -136,7 +159,7 @@ public <K, V> void send(final String topic,
             partition = null;
         }
 
-        send(topic, key, value, headers, partition, timestamp, keySerializer, valueSerializer);
+        send(topic, key, value, headers, partition, timestamp, keySerializer, valueSerializer, processorNodeId, context);
     }
 
     @Override
@@ -147,7 +170,9 @@ public <K, V> void send(final String topic,
                             final Integer partition,
                             final Long timestamp,
                             final Serializer<K> keySerializer,
-                            final Serializer<V> valueSerializer) {
+                            final Serializer<V> valueSerializer,
+                            final String processorNodeId,
+                            final InternalProcessorContext<Void, Void> context) {
         checkForException();
 
         final byte[] keyBytes;
@@ -173,7 +198,7 @@ public <K, V> void send(final String topic,
                     valueClass),
                 exception);
         } catch (final RuntimeException exception) {
-            final String errorMessage = String.format(SEND_EXCEPTION_MESSAGE, topic, taskId, exception.toString());
+            final String errorMessage = String.format(SEND_EXCEPTION_MESSAGE, topic, taskId, exception);
             throw new StreamsException(errorMessage, exception);
         }
 
@@ -192,6 +217,29 @@ public <K, V> void send(final String topic,
                 } else {
                     log.warn("Received offset={} in produce response for {}", metadata.offset(), tp);
                 }
+
+                if (!topic.endsWith("-changelog")) {
+                    final Map<String, Sensor> producedSensorByTopic = sinkNodeToProducedSensorByTopic.get(processorNodeId);
+                    if (producedSensorByTopic == null) {
+                        log.error("Unable to records bytes produced to topic {} by sink node {} as the node is not recognized.\n"
+                                      + "Known sink nodes are {}.", topic, processorNodeId, sinkNodeToProducedSensorByTopic.keySet());
+                    } else {
+                        // we may not have created a sensor during initialization if the node uses dynamic topic routing,
+                        // as all topics are not known up front, so create the sensor for that topic if absent
+                        final Sensor topicProducedSensor = producedSensorByTopic.computeIfAbsent(
+                            topic,
+                            t -> TopicMetrics.producedSensor(
+                                Thread.currentThread().getName(),
+                                taskId.toString(),
+                                processorNodeId,
+                                topic,
+                                context.metrics()
+                            )
+                        );
+                        final long bytesProduced = producerRecordSizeInBytes(serializedRecord);
+                        topicProducedSensor.record(bytesProduced, context.currentSystemTimeMs());
+                    }
+                }
             } else {
                 recordSendError(topic, exception, serializedRecord);
 
@@ -267,6 +315,8 @@ public void flush() {
     public void closeClean() {
         log.info("Closing record collector clean");
 
+        removeAllProducedSensors();
+
         // No need to abort transaction during a clean close: either we have successfully committed the ongoing
         // transaction during handleRevocation and thus there is no transaction in flight, or else none of the revoked
         // tasks had any data in the current transaction and therefore there is no need to commit or abort it.
@@ -290,6 +340,14 @@ public void closeDirty() {
         checkForException();
     }
 
+    private void removeAllProducedSensors() {
+        for (final Map<String, Sensor> nodeMap : sinkNodeToProducedSensorByTopic.values()) {
+            for (final Sensor sensor : nodeMap.values()) {
+                streamsMetrics.removeSensor(sensor);
+            }
+        }
+    }
+
     @Override
     public Map<TopicPartition, Long> offsets() {
         return Collections.unmodifiableMap(new HashMap<>(offsets));
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java
index 1c0196635cb2d..297c287673892 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RecordQueue.java
@@ -25,10 +25,14 @@
 import org.apache.kafka.streams.processor.internals.metrics.TaskMetrics;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.TimestampExtractor;
+import org.apache.kafka.streams.processor.internals.metrics.TopicMetrics;
+
 import org.slf4j.Logger;
 
 import java.util.ArrayDeque;
 
+import static org.apache.kafka.streams.processor.internals.ClientUtils.consumerRecordSizeInBytes;
+
 /**
  * RecordQueue is a FIFO queue of {@link StampedRecord} (ConsumerRecord + timestamp). It also keeps track of the
  * partition timestamp defined as the largest timestamp seen on the partition so far; this is passed to the
@@ -50,6 +54,8 @@ public class RecordQueue {
     private long partitionTime = UNKNOWN;
 
     private final Sensor droppedRecordsSensor;
+    private final Sensor consumedSensor;
+    private long headRecordSizeInBytes;
 
     RecordQueue(final TopicPartition partition,
                 final SourceNode<?, ?> source,
@@ -62,9 +68,18 @@ public class RecordQueue {
         this.fifoQueue = new ArrayDeque<>();
         this.timestampExtractor = timestampExtractor;
         this.processorContext = processorContext;
+
+        final String threadName = Thread.currentThread().getName();
         droppedRecordsSensor = TaskMetrics.droppedRecordsSensor(
-            Thread.currentThread().getName(),
+            threadName,
+            processorContext.taskId().toString(),
+            processorContext.metrics()
+        );
+        consumedSensor = TopicMetrics.consumedSensor(
+            threadName,
             processorContext.taskId().toString(),
+            source.name(),
+            partition.topic(),
             processorContext.metrics()
         );
         recordDeserializer = new RecordDeserializer(
@@ -74,6 +89,7 @@ public class RecordQueue {
             droppedRecordsSensor
         );
         this.log = logContext.logger(RecordQueue.class);
+        this.headRecordSizeInBytes = 0L;
     }
 
     void setPartitionTime(final long partitionTime) {
@@ -119,9 +135,13 @@ int addRawRecords(final Iterable<ConsumerRecord<byte[], byte[]>> rawRecords) {
      *
      * @return StampedRecord
      */
-    public StampedRecord poll() {
+    public StampedRecord poll(final long wallClockTime) {
         final StampedRecord recordToReturn = headRecord;
+
+        consumedSensor.record(headRecordSizeInBytes, wallClockTime);
+
         headRecord = null;
+        headRecordSizeInBytes = 0L;
         partitionTime = Math.max(partitionTime, recordToReturn.timestamp);
 
         updateHead();
@@ -167,9 +187,14 @@ public Long headRecordOffset() {
     public void clear() {
         fifoQueue.clear();
         headRecord = null;
+        headRecordSizeInBytes = 0L;
         partitionTime = UNKNOWN;
     }
 
+    public void close() {
+        processorContext.metrics().removeSensor(consumedSensor);
+    }
+
     private void updateHead() {
         ConsumerRecord<byte[], byte[]> lastCorruptedRecord = null;
 
@@ -206,6 +231,7 @@ private void updateHead() {
                 continue;
             }
             headRecord = new StampedRecord(deserialized, timestamp);
+            headRecordSizeInBytes = consumerRecordSizeInBytes(raw);
         }
 
         // if all records in the FIFO queue are corrupted, make the last one the headRecord
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RepartitionTopics.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RepartitionTopics.java
index 44be19ef8548c..b8698bc6e5672 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/RepartitionTopics.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/RepartitionTopics.java
@@ -118,7 +118,7 @@ public Queue<StreamsException> missingSourceTopicExceptions() {
 
             return new StreamsException(
                 new MissingSourceTopicException(String.format(
-                    "Missing source topics %s for subtopology %s of topology %s",
+                    "Missing source topics %s for subtopology %d of topology %s",
                     missingSourceTopics, subtopologyId, topologyName)),
                 new TaskId(subtopologyId, 0, topologyName));
         }).collect(Collectors.toCollection(LinkedList::new));
@@ -154,15 +154,17 @@ private Map<String, InternalTopicConfig> computeRepartitionTopicConfig(final Clu
                 final Set<String> missingSourceTopicsForSubtopology = computeMissingExternalSourceTopics(topicsInfo, clusterMetadata);
                 missingSourceTopicsForTopology.addAll(missingSourceTopicsForSubtopology);
                 if (!missingSourceTopicsForSubtopology.isEmpty()) {
-                    missingInputTopicsBySubtopology.put(subtopologyEntry.getKey(), missingSourceTopicsForSubtopology);
+                    final Subtopology subtopology = subtopologyEntry.getKey();
+                    missingInputTopicsBySubtopology.put(subtopology, missingSourceTopicsForSubtopology);
                     log.error("Subtopology {} was missing source topics {} and will be excluded from the current assignment, "
                         + "this can be due to the consumer client's metadata being stale or because they have "
                         + "not been created yet. Please verify that you have created all input topics; if they "
                         + "do exist, you just need to wait for the metadata to be updated, at which time a new "
-                        + "rebalance will be kicked off automatically and the topology will be retried at that time."
-                        + topologyName, missingSourceTopicsForTopology);
+                        + "rebalance will be kicked off automatically and the topology will be retried at that time.",
+                        subtopology.nodeGroupId, missingSourceTopicsForSubtopology);
                 }
             }
+
             if (missingSourceTopicsForTopology.isEmpty()) {
                 allRepartitionTopicConfigs.putAll(repartitionTopicConfigsForTopology);
                 allTopicsInfo.addAll(topicsInfoForTopology);
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java
index f30e2d2847b70..6f508eff2792a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/SinkNode.java
@@ -82,7 +82,17 @@ public void process(final Record<KIn, VIn> record) {
 
         final String topic = topicExtractor.extract(key, value, contextForExtraction);
 
-        collector.send(topic, key, value, record.headers(), timestamp, keySerializer, valSerializer, partitioner);
+        collector.send(
+            topic,
+            key,
+            value,
+            record.headers(),
+            timestamp,
+            keySerializer,
+            valSerializer,
+            name(),
+            context,
+            partitioner);
     }
 
     /**
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java
index 6b3d6794e18d1..bb7aef1dcdcd6 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTask.java
@@ -27,7 +27,7 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.metrics.ThreadMetrics;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig.TaskConfig;
+import org.apache.kafka.streams.TopologyConfig.TaskConfig;
 import org.apache.kafka.streams.state.internals.ThreadCache;
 
 import java.util.Collections;
@@ -39,11 +39,13 @@
  * A StandbyTask
  */
 public class StandbyTask extends AbstractTask implements Task {
-    private final Sensor closeTaskSensor;
     private final boolean eosEnabled;
-    private final InternalProcessorContext processorContext;
+    private final Sensor closeTaskSensor;
     private final StreamsMetricsImpl streamsMetrics;
 
+    @SuppressWarnings("rawtypes")
+    protected final InternalProcessorContext processorContext;
+
     /**
      * @param id              the ID of this task
      * @param inputPartitions input topic partitions, used for thread metadata only
@@ -53,6 +55,7 @@ public class StandbyTask extends AbstractTask implements Task {
      * @param stateMgr        the {@link ProcessorStateManager} for this task
      * @param stateDirectory  the {@link StateDirectory} created by the thread
      */
+    @SuppressWarnings("rawtypes")
     StandbyTask(final TaskId id,
                 final Set<TopicPartition> inputPartitions,
                 final ProcessorTopology topology,
@@ -68,7 +71,7 @@ public class StandbyTask extends AbstractTask implements Task {
             stateDirectory,
             stateMgr,
             inputPartitions,
-            config.taskTimeoutMs,
+            config,
             "standby-task",
             StandbyTask.class
         );
@@ -194,7 +197,7 @@ public void postCommit(final boolean enforceCheckpoint) {
 
             case RUNNING:
             case SUSPENDED:
-                maybeWriteCheckpoint(enforceCheckpoint);
+                maybeCheckpoint(enforceCheckpoint);
 
                 log.debug("Finalized commit for {} task", state());
 
@@ -220,7 +223,7 @@ public void closeDirty() {
     }
 
     @Override
-    public void closeCleanAndRecycleState() {
+    public void prepareRecycle() {
         streamsMetrics.removeAllTaskLevelSensors(Thread.currentThread().getName(), id.toString());
         if (state() == State.SUSPENDED) {
             stateMgr.recycle();
@@ -231,7 +234,7 @@ public void closeCleanAndRecycleState() {
         closeTaskSensor.record();
         transitionTo(State.CLOSED);
 
-        log.info("Closed clean and recycled state");
+        log.info("Closed and recycled state, and converted type to active");
     }
 
     private void close(final boolean clean) {
@@ -303,6 +306,7 @@ public void addRecords(final TopicPartition partition, final Iterable<ConsumerRe
         throw new IllegalStateException("Attempted to add records to task " + id() + " for invalid input partition " + partition);
     }
 
+    @SuppressWarnings("rawtypes")
     InternalProcessorContext processorContext() {
         return processorContext;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTaskCreator.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTaskCreator.java
index 43ebd40a35696..2f48cdb67f2ca 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTaskCreator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StandbyTaskCreator.java
@@ -21,7 +21,6 @@
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.processor.TaskId;
-import org.apache.kafka.streams.processor.internals.Task.TaskType;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.metrics.ThreadMetrics;
 import org.apache.kafka.streams.state.internals.ThreadCache;
@@ -29,12 +28,10 @@
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import static org.apache.kafka.common.utils.Utils.filterMap;
 import static org.apache.kafka.streams.internals.StreamsConfigUtils.eosEnabled;
 
 class StandbyTaskCreator {
@@ -47,9 +44,6 @@ class StandbyTaskCreator {
     private final Logger log;
     private final Sensor createTaskSensor;
 
-    // tasks may be assigned for a NamedTopology that is not yet known by this host, and saved for later creation
-    private final Map<TaskId, Set<TopicPartition>> unknownTasksToBeCreated = new HashMap<>();
-
     StandbyTaskCreator(final TopologyMetadata topologyMetadata,
                        final StreamsConfig applicationConfig,
                        final StreamsMetricsImpl streamsMetrics,
@@ -73,30 +67,12 @@ class StandbyTaskCreator {
         );
     }
 
-    void removeRevokedUnknownTasks(final Set<TaskId> assignedTasks) {
-        unknownTasksToBeCreated.keySet().retainAll(assignedTasks);
-    }
-
-    Map<TaskId, Set<TopicPartition>> uncreatedTasksForTopologies(final Set<String> currentTopologies) {
-        return filterMap(unknownTasksToBeCreated, t -> currentTopologies.contains(t.getKey().topologyName()));
-    }
-
-    // TODO: change return type to `StandbyTask`
     Collection<Task> createTasks(final Map<TaskId, Set<TopicPartition>> tasksToBeCreated) {
-        // TODO: change type to `StandbyTask`
         final List<Task> createdTasks = new ArrayList<>();
-        final Map<TaskId, Set<TopicPartition>>  newUnknownTasks = new HashMap<>();
 
         for (final Map.Entry<TaskId, Set<TopicPartition>> newTaskAndPartitions : tasksToBeCreated.entrySet()) {
             final TaskId taskId = newTaskAndPartitions.getKey();
             final Set<TopicPartition> partitions = newTaskAndPartitions.getValue();
-
-            // task belongs to a named topology that hasn't been added yet, wait until it has to create this
-            if (taskId.topologyName() != null && !topologyMetadata.namedTopologiesView().contains(taskId.topologyName())) {
-                newUnknownTasks.put(taskId, partitions);
-                continue;
-            }
-
             final ProcessorTopology topology = topologyMetadata.buildSubtopology(taskId);
 
             if (topology.hasStateWithChangelogs()) {
@@ -111,7 +87,7 @@ Collection<Task> createTasks(final Map<TaskId, Set<TopicPartition>> tasksToBeCre
                     partitions
                 );
 
-                final InternalProcessorContext context = new ProcessorContextImpl(
+                final InternalProcessorContext<Object, Object> context = new ProcessorContextImpl(
                     taskId,
                     applicationConfig,
                     stateManager,
@@ -127,37 +103,46 @@ Collection<Task> createTasks(final Map<TaskId, Set<TopicPartition>> tasksToBeCre
                     taskId, partitions
                 );
             }
-            unknownTasksToBeCreated.remove(taskId);
-        }
-        if (!newUnknownTasks.isEmpty()) {
-            log.info("Delaying creation of tasks not yet known by this instance: {}", newUnknownTasks.keySet());
-            unknownTasksToBeCreated.putAll(newUnknownTasks);
         }
         return createdTasks;
     }
 
+    /*
+     * TODO: we pass in the new input partitions to validate if they still match,
+     *       in the future we when we have fixed partitions -> tasks mapping,
+     *       we should always reuse the input partition and hence no need validations
+     */
     StandbyTask createStandbyTaskFromActive(final StreamTask streamTask,
                                             final Set<TopicPartition> inputPartitions) {
-        final InternalProcessorContext context = streamTask.processorContext();
-        final ProcessorStateManager stateManager = streamTask.stateMgr;
+        if (!inputPartitions.equals(streamTask.inputPartitions)) {
+            log.warn("Detected unmatched input partitions for task {} when recycling it from active to standby", streamTask.id);
+        }
 
-        streamTask.closeCleanAndRecycleState();
-        stateManager.transitionTaskType(TaskType.STANDBY, getLogContext(streamTask.id()));
+        streamTask.prepareRecycle();
+        streamTask.stateMgr.transitionTaskType(Task.TaskType.STANDBY);
 
-        return createStandbyTask(
-            streamTask.id(),
+        final StandbyTask task = new StandbyTask(
+            streamTask.id,
             inputPartitions,
-            topologyMetadata.buildSubtopology(streamTask.id),
-            stateManager,
-            context
+            streamTask.topology,
+            streamTask.config,
+            streamsMetrics,
+            streamTask.stateMgr,
+            stateDirectory,
+            dummyCache,
+            streamTask.processorContext
         );
+
+        log.trace("Created standby task {} from recycled active task with assigned partitions {}", task.id, inputPartitions);
+        createTaskSensor.record();
+        return task;
     }
 
     StandbyTask createStandbyTask(final TaskId taskId,
                                   final Set<TopicPartition> inputPartitions,
                                   final ProcessorTopology topology,
                                   final ProcessorStateManager stateManager,
-                                  final InternalProcessorContext context) {
+                                  final InternalProcessorContext<Object, Object> context) {
         final StandbyTask task = new StandbyTask(
             taskId,
             inputPartitions,
@@ -170,7 +155,7 @@ StandbyTask createStandbyTask(final TaskId taskId,
             context
         );
 
-        log.trace("Created task {} with assigned partitions {}", taskId, inputPartitions);
+        log.trace("Created standby task {} with assigned partitions {}", taskId, inputPartitions);
         createTaskSensor.record();
         return task;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateUpdater.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateUpdater.java
index 8965abfbe9a41..69d521b6002fa 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateUpdater.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StateUpdater.java
@@ -16,54 +16,184 @@
  */
 package org.apache.kafka.streams.processor.internals;
 
+import org.apache.kafka.streams.processor.TaskId;
+
 import java.time.Duration;
+import java.util.Collections;
 import java.util.List;
+import java.util.Objects;
 import java.util.Set;
 
 public interface StateUpdater {
 
+    class ExceptionAndTasks {
+        private final Set<Task> tasks;
+        private final RuntimeException exception;
+
+        public ExceptionAndTasks(final Set<Task> tasks, final RuntimeException exception) {
+            this.tasks = Objects.requireNonNull(tasks);
+            this.exception = Objects.requireNonNull(exception);
+        }
+
+        public Set<Task> getTasks() {
+            return Collections.unmodifiableSet(tasks);
+        }
+
+        public RuntimeException exception() {
+            return exception;
+        }
+
+        @Override
+        public boolean equals(final Object o) {
+            if (this == o) return true;
+            if (!(o instanceof ExceptionAndTasks)) return false;
+            final ExceptionAndTasks that = (ExceptionAndTasks) o;
+            return tasks.equals(that.tasks) && exception.equals(that.exception);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(tasks, exception);
+        }
+    }
+
+    /**
+     * Starts the state updater.
+     */
+    void start();
+
+    /**
+     * Shuts down the state updater.
+     *
+     * @param timeout duration how long to wait until the state updater is shut down
+     *
+     * @throws
+     *     org.apache.kafka.streams.errors.StreamsException if the state updater thread cannot shutdown within the timeout
+     */
+    void shutdown(final Duration timeout);
+
     /**
      * Adds a task (active or standby) to the state updater.
      *
+     * This method does not block until the task is added to the state updater.
+     *
      * @param task task to add
      */
     void add(final Task task);
 
     /**
-     * Removes a task (active or standby) from the state updater.
+     * Removes a task (active or standby) from the state updater and adds the removed task to the removed tasks.
+     *
+     * This method does not block until the removed task is removed from the state updater.
      *
-     * @param task task ro remove
+     * The task to be removed is not removed from the restored active tasks and the failed tasks.
+     * Stateless tasks will never be added to the removed tasks since they are immediately added to the
+     * restored active tasks.
+     *
+     * @param taskId ID of the task to remove
      */
-    void remove(final Task task);
+    void remove(final TaskId taskId);
 
     /**
-     * Gets restored active tasks from state restoration/update
+     * Pause a task (active or standby) from restoring in the state updater.
+     *
+     * This method does not block until the task is paused.
+     *
+     * Restored tasks, removed tasks and failed tasks are not paused so this action would be an no-op for them.
+     * Stateless tasks will never be paused since they are immediately added to the
+     * restored active tasks.
+     *
+     * @param taskId ID of the task to remove
+     */
+    void pause(final TaskId taskId);
+
+    /**
+     * Resume restoring a task (active or standby) in the state updater.
+     *
+     * This method does not block until the task is paused.
+     *
+     * Restored tasks, removed tasks and failed tasks are not resumed so this action would be an no-op for them.
+     * Stateless tasks will never be resumed since they are immediately added to the
+     * restored active tasks.
+     *
+     * @param taskId ID of the task to remove
+     */
+    void resume(final TaskId taskId);
+
+    /**
+     * Drains the restored active tasks from the state updater.
+     *
+     * The returned active tasks are removed from the state updater.
      *
      * @param timeout duration how long the calling thread should wait for restored active tasks
      *
-     * @return list of active tasks with up-to-date states
+     * @return set of active tasks with up-to-date states
      */
-    Set<StreamTask> getRestoredActiveTasks(final Duration timeout);
+    Set<StreamTask> drainRestoredActiveTasks(final Duration timeout);
+
 
     /**
-     * Gets a list of exceptions thrown during restoration.
+     * Drains the removed tasks (active and standbys) from the state updater.
+     *
+     * Removed tasks returned by this method are tasks extraordinarily removed from the state updater. These do not
+     * include restored or failed tasks.
      *
-     * @return exceptions
+     * The returned removed tasks are removed from the state updater
+     *
+     * @return set of tasks removed from the state updater
      */
-    List<RuntimeException> getExceptions();
+    Set<Task> drainRemovedTasks();
 
+    /**
+     * Drains the failed tasks and the corresponding exceptions.
+     *
+     * The returned failed tasks are removed from the state updater
+     *
+     * @return list of failed tasks and the corresponding exceptions
+     */
+    List<ExceptionAndTasks> drainExceptionsAndFailedTasks();
 
     /**
-     * Get all tasks (active and standby) that are managed by the state updater.
+     * Gets all tasks that are managed by the state updater.
+     *
+     * The state updater manages all tasks that were added with the {@link StateUpdater#add(Task)} and that have
+     * not been removed from the state updater with one of the following methods:
+     * <ul>
+     *   <li>{@link StateUpdater#drainRestoredActiveTasks(Duration)}</li>
+     *   <li>{@link StateUpdater#drainRemovedTasks()}</li>
+     *   <li>{@link StateUpdater#drainExceptionsAndFailedTasks()}</li>
+     * </ul>
      *
-     * @return list of tasks managed by the state updater
+     * @return set of all tasks managed by the state updater
      */
-    Set<Task> getAllTasks();
+    Set<Task> getTasks();
 
     /**
-     * Shuts down the state updater.
+     * Gets active tasks that are managed by the state updater.
      *
-     * @param timeout duration how long to wait until the state updater is shut down
+     * The state updater manages all active tasks that were added with the {@link StateUpdater#add(Task)} and that have
+     * not been removed from the state updater with one of the following methods:
+     * <ul>
+     *   <li>{@link StateUpdater#drainRestoredActiveTasks(Duration)}</li>
+     *   <li>{@link StateUpdater#drainRemovedTasks()}</li>
+     *   <li>{@link StateUpdater#drainExceptionsAndFailedTasks()}</li>
+     * </ul>
+     *
+     * @return set of all tasks managed by the state updater
      */
-    void shutdown(final Duration timeout);
+    Set<StreamTask> getActiveTasks();
+
+    /**
+     * Gets standby tasks that are managed by the state updater.
+     *
+     * The state updater manages all standby tasks that were added with the {@link StateUpdater#add(Task)} and that have
+     * not been removed from the state updater with one of the following methods:
+     * <ul>
+     *   <li>{@link StateUpdater#drainRemovedTasks()}</li>
+     *   <li>{@link StateUpdater#drainExceptionsAndFailedTasks()}</li>
+     * </ul>
+     *
+     * @return set of all tasks managed by the state updater
+     */
+    Set<StandbyTask> getStandbyTasks();
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java
index fdf027f2be776..f8926e70bbee6 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StoreChangelogReader.java
@@ -17,8 +17,9 @@
 package org.apache.kafka.streams.processor.internals;
 
 import org.apache.kafka.clients.admin.Admin;
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsOptions;
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsSpec;
 import org.apache.kafka.clients.admin.ListOffsetsOptions;
-import org.apache.kafka.clients.admin.ListOffsetsResult;
 import org.apache.kafka.clients.admin.OffsetSpec;
 import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
@@ -36,6 +37,7 @@
 import org.apache.kafka.streams.processor.StateRestoreListener;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.ProcessorStateManager.StateStoreMetadata;
+import org.apache.kafka.streams.processor.internals.Task.TaskType;
 import org.slf4j.Logger;
 
 import java.time.Duration;
@@ -52,8 +54,6 @@
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
-import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchCommittedOffsets;
-
 /**
  * ChangelogReader is created and maintained by the stream thread and used for both updating standby tasks and
  * restoring active tasks. It manages the restore consumer, including its assigned partitions, when to pause / resume
@@ -204,19 +204,14 @@ int bufferedLimitIndex() {
     // is being removed from the thread; otherwise it would stay in this map even after completed
     private final Map<TopicPartition, ChangelogMetadata> changelogs;
 
-    // the changelog reader only need the main consumer to get committed offsets for source changelog partitions
-    // to update offset limit for standby tasks;
-    private Consumer<byte[], byte[]> mainConsumer;
+    // groupId is needed for the admin client to retrieve committed offsets
+    private final String groupId;
 
-    // the changelog reader needs the admin client to list end offsets
+    // the changelog reader needs the admin client to list end offsets and committed offsets
     private final Admin adminClient;
 
     private long lastUpdateOffsetTime;
 
-    void setMainConsumer(final Consumer<byte[], byte[]> consumer) {
-        this.mainConsumer = consumer;
-    }
-
     public StoreChangelogReader(final Time time,
                                 final StreamsConfig config,
                                 final LogContext logContext,
@@ -230,6 +225,7 @@ public StoreChangelogReader(final Time time,
         this.restoreConsumer = restoreConsumer;
         this.stateRestoreListener = stateRestoreListener;
 
+        this.groupId = config.getString(StreamsConfig.APPLICATION_ID_CONFIG);
         this.pollTime = Duration.ofMillis(config.getLong(StreamsConfig.POLL_MS_CONFIG));
         this.updateOffsetIntervalMs = config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG) == Long.MAX_VALUE ?
             DEFAULT_OFFSET_UPDATE_MS : config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG);
@@ -394,7 +390,8 @@ private Set<TopicPartition> standbyRestoringChangelogs() {
             .collect(Collectors.toSet());
     }
 
-    private boolean allChangelogsCompleted() {
+    @Override
+    public boolean allChangelogsCompleted() {
         return changelogs.values().stream()
             .allMatch(metadata -> metadata.changelogState == ChangelogState.COMPLETED);
     }
@@ -428,6 +425,8 @@ public void restore(final Map<TaskId, Task> tasks) {
             final ConsumerRecords<byte[], byte[]> polledRecords;
 
             try {
+                pauseResumePartitions(tasks, restoringChangelogs);
+
                 // for restoring active and updating standby we may prefer different poll time
                 // in order to make sure we call the main consumer#poll in time.
                 // TODO: once we move ChangelogReader to a separate thread this may no longer be a concern
@@ -462,7 +461,10 @@ public void restore(final Map<TaskId, Task> tasks) {
                 final TaskId taskId = changelogs.get(partition).stateManager.taskId();
                 try {
                     if (restoreChangelog(changelogs.get(partition))) {
-                        tasks.get(taskId).clearTaskTimeout();
+                        final Task task = tasks.get(taskId);
+                        if (task != null) {
+                            task.clearTaskTimeout();
+                        }
                     }
                 } catch (final TimeoutException timeoutException) {
                     tasks.get(taskId).maybeInitTaskTimeoutOrThrow(
@@ -478,6 +480,47 @@ public void restore(final Map<TaskId, Task> tasks) {
         }
     }
 
+    private void pauseResumePartitions(final Map<TaskId, Task> tasks,
+                                       final Set<TopicPartition> restoringChangelogs) {
+        if (state == ChangelogReaderState.ACTIVE_RESTORING) {
+            updatePartitionsByType(tasks, restoringChangelogs, TaskType.ACTIVE);
+        }
+        if (state == ChangelogReaderState.STANDBY_UPDATING) {
+            updatePartitionsByType(tasks, restoringChangelogs, TaskType.STANDBY);
+        }
+    }
+
+    private void updatePartitionsByType(final Map<TaskId, Task> tasks,
+                                        final Set<TopicPartition> restoringChangelogs,
+                                        final TaskType taskType) {
+        final Collection<TopicPartition> toResume =
+            restoringChangelogs.stream().filter(t -> shouldResume(tasks, t, taskType)).collect(Collectors.toList());
+        final Collection<TopicPartition> toPause =
+            restoringChangelogs.stream().filter(t -> shouldPause(tasks, t, taskType)).collect(Collectors.toList());
+        restoreConsumer.resume(toResume);
+        restoreConsumer.pause(toPause);
+    }
+
+    private boolean shouldResume(final Map<TaskId, Task> tasks, final TopicPartition partition, final TaskType taskType) {
+        final ProcessorStateManager manager = changelogs.get(partition).stateManager;
+        final TaskId taskId = manager.taskId();
+        final Task task = tasks.get(taskId);
+        if (manager.taskType() == taskType) {
+            return task != null;
+        }
+        return false;
+    }
+
+    private boolean shouldPause(final Map<TaskId, Task> tasks, final TopicPartition partition, final TaskType taskType) {
+        final ProcessorStateManager manager = changelogs.get(partition).stateManager;
+        final TaskId taskId = manager.taskId();
+        final Task task = tasks.get(taskId);
+        if (manager.taskType() == taskType) {
+            return task == null;
+        }
+        return false;
+    }
+
     private void maybeLogRestorationProgress() {
         if (state == ChangelogReaderState.ACTIVE_RESTORING) {
             if (time.milliseconds() - lastRestoreLogTime > RESTORE_LOG_INTERVAL_MS) {
@@ -534,6 +577,7 @@ private void maybeUpdateLimitOffsetsForStandbyChangelogs(final Map<TaskId, Task>
             for (final TopicPartition partition : changelogsWithLimitOffsets) {
                 if (!changelogs.get(partition).bufferedRecords().isEmpty()) {
                     updateLimitOffsetsForStandbyChangelogs(committedOffsetForChangelogs(tasks, changelogsWithLimitOffsets));
+                    lastUpdateOffsetTime = time.milliseconds();
                     break;
                 }
             }
@@ -632,7 +676,11 @@ private Set<Task> getTasksFromPartitions(final Map<TaskId, Task> tasks,
     }
 
     private void clearTaskTimeout(final Set<Task> tasks) {
-        tasks.forEach(Task::clearTaskTimeout);
+        tasks.forEach(t -> {
+            if (t != null) {
+                t.clearTaskTimeout();
+            }
+        });
     }
 
     private void maybeInitTaskTimeoutOrThrow(final Set<Task> tasks,
@@ -641,41 +689,54 @@ private void maybeInitTaskTimeoutOrThrow(final Set<Task> tasks,
         tasks.forEach(t -> t.maybeInitTaskTimeoutOrThrow(now, cause));
     }
 
-    private Map<TopicPartition, Long> committedOffsetForChangelogs(final Map<TaskId, Task> tasks,
-                                                                   final Set<TopicPartition> partitions) {
-        final Map<TopicPartition, Long> committedOffsets;
+    private Map<TopicPartition, Long> committedOffsetForChangelogs(final Map<TaskId, Task> tasks, final Set<TopicPartition> partitions) {
+        if (partitions.isEmpty()) {
+            return Collections.emptyMap();
+        }
+
         try {
-            committedOffsets = fetchCommittedOffsets(partitions, mainConsumer);
+            // those which do not have a committed offset would default to 0
+            final ListConsumerGroupOffsetsOptions options = new ListConsumerGroupOffsetsOptions()
+                    .requireStable(true);
+            final ListConsumerGroupOffsetsSpec spec = new ListConsumerGroupOffsetsSpec()
+                    .topicPartitions(new ArrayList<>(partitions));
+            final Map<TopicPartition, Long> committedOffsets = adminClient.listConsumerGroupOffsets(Collections.singletonMap(groupId, spec))
+                    .partitionsToOffsetAndMetadata(groupId).get().entrySet()
+                    .stream()
+                    .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue() == null ? 0L : e.getValue().offset()));
+
             clearTaskTimeout(getTasksFromPartitions(tasks, partitions));
-        } catch (final TimeoutException timeoutException) {
-            log.debug("Could not fetch all committed offsets for {}, will retry in the next run loop", partitions);
-            maybeInitTaskTimeoutOrThrow(getTasksFromPartitions(tasks, partitions), timeoutException);
+            return committedOffsets;
+        } catch (final TimeoutException | InterruptedException | ExecutionException retriableException) {
+            log.debug("Could not retrieve the committed offsets for partitions {} due to {}, will retry in the next run loop",
+                partitions, retriableException.toString());
+            maybeInitTaskTimeoutOrThrow(getTasksFromPartitions(tasks, partitions), retriableException);
             return Collections.emptyMap();
+        } catch (final KafkaException e) {
+            throw new StreamsException(String.format("Failed to retrieve committed offsets for %s", partitions), e);
         }
-        lastUpdateOffsetTime = time.milliseconds();
-        return committedOffsets;
     }
 
-    private Map<TopicPartition, Long> endOffsetForChangelogs(final Map<TaskId, Task> tasks,
-                                                             final Set<TopicPartition> partitions) {
+    private Map<TopicPartition, Long> endOffsetForChangelogs(final Map<TaskId, Task> tasks, final Set<TopicPartition> partitions) {
         if (partitions.isEmpty()) {
             return Collections.emptyMap();
         }
 
         try {
-            final ListOffsetsResult result = adminClient.listOffsets(
-                    partitions.stream().collect(Collectors.toMap(Function.identity(), tp -> OffsetSpec.latest())),
-                    new ListOffsetsOptions(IsolationLevel.READ_UNCOMMITTED)
-            );
+            // we always use read_uncommitted to get log end offset since the last committed txn may have not advanced the LSO for EOS,
+            // see KAFKA-10167 for more details
+            final ListOffsetsOptions options = new ListOffsetsOptions(IsolationLevel.READ_UNCOMMITTED);
+            final Map<TopicPartition, OffsetSpec> offsetSpecs =
+                partitions.stream().collect(Collectors.toMap(Function.identity(), tp -> OffsetSpec.latest()));
+            final Map<TopicPartition, Long> logEndOffsets = adminClient.listOffsets(offsetSpecs, options)
+                .all().get().entrySet()
+                .stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().offset()));
 
-            final Map<TopicPartition,  ListOffsetsResult.ListOffsetsResultInfo> resultPerPartition = result.all().get();
             clearTaskTimeout(getTasksFromPartitions(tasks, partitions));
-
-            return resultPerPartition.entrySet().stream().collect(
-                    Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().offset())
-            );
+            return logEndOffsets;
         } catch (final TimeoutException | InterruptedException | ExecutionException retriableException) {
-            log.debug("Could not fetch all end offsets for {}, will retry in the next run loop", partitions);
+            log.debug("Could not fetch all end offsets for {} due to {}, will retry in the next run loop",
+                partitions, retriableException.toString());
             maybeInitTaskTimeoutOrThrow(getTasksFromPartitions(tasks, partitions), retriableException);
             return Collections.emptyMap();
         } catch (final KafkaException e) {
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java
index f86e89f73ff63..f7bf8a5e74859 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamTask.java
@@ -43,14 +43,12 @@
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.metrics.TaskMetrics;
 import org.apache.kafka.streams.processor.internals.metrics.ThreadMetrics;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig.TaskConfig;
+import org.apache.kafka.streams.TopologyConfig.TaskConfig;
 import org.apache.kafka.streams.state.internals.ThreadCache;
 
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
-import java.nio.ByteBuffer;
-import java.util.Base64;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -69,9 +67,6 @@
  */
 public class StreamTask extends AbstractTask implements ProcessorNodePunctuator, Task {
 
-    // visible for testing
-    static final byte LATEST_MAGIC_BYTE = 1;
-
     private final Time time;
     private final Consumer<byte[], byte[]> mainConsumer;
 
@@ -88,7 +83,6 @@ public class StreamTask extends AbstractTask implements ProcessorNodePunctuator,
     private final Map<TopicPartition, Long> committedOffsets;
     private final Map<TopicPartition, Long> highWatermark;
     private final Set<TopicPartition> resetOffsetsForPartitions;
-    private Optional<Long> timeCurrentIdlingStarted;
     private final PunctuationQueue streamTimePunctuationQueue;
     private final PunctuationQueue systemTimePunctuationQueue;
     private final StreamsMetricsImpl streamsMetrics;
@@ -102,15 +96,16 @@ public class StreamTask extends AbstractTask implements ProcessorNodePunctuator,
     private final Sensor bufferedRecordsSensor;
     private final Map<String, Sensor> e2eLatencySensors = new HashMap<>();
 
-    @SuppressWarnings("rawtypes")
-    private final InternalProcessorContext processorContext;
-
     private final RecordQueueCreator recordQueueCreator;
 
+    @SuppressWarnings("rawtypes")
+    protected final InternalProcessorContext processorContext;
+
     private StampedRecord record;
     private boolean commitNeeded = false;
     private boolean commitRequested = false;
     private boolean hasPendingTxCommit = false;
+    private Optional<Long> timeCurrentIdlingStarted;
 
     @SuppressWarnings("rawtypes")
     public StreamTask(final TaskId id,
@@ -132,7 +127,7 @@ public StreamTask(final TaskId id,
             stateDirectory,
             stateMgr,
             inputPartitions,
-            config.taskTimeoutMs,
+            config,
             "task",
             StreamTask.class
         );
@@ -297,7 +292,6 @@ public void suspend() {
                     partitionGroup.clear();
                 } finally {
                     transitToSuspend();
-                    log.info("Suspended running");
                 }
 
                 break;
@@ -418,6 +412,25 @@ public Map<TopicPartition, OffsetAndMetadata> prepareCommit() {
         }
     }
 
+    private Long findOffset(final TopicPartition partition) {
+        Long offset = partitionGroup.headRecordOffset(partition);
+        if (offset == null) {
+            try {
+                offset = mainConsumer.position(partition);
+            } catch (final TimeoutException error) {
+                // the `consumer.position()` call should never block, because we know that we did process data
+                // for the requested partition and thus the consumer should have a valid local position
+                // that it can return immediately
+
+                // hence, a `TimeoutException` indicates a bug and thus we rethrow it as fatal `IllegalStateException`
+                throw new IllegalStateException(error);
+            } catch (final KafkaException fatal) {
+                throw new StreamsException(fatal);
+            }
+        }
+        return offset;
+    }
+
     private Map<TopicPartition, OffsetAndMetadata> committableOffsetsAndMetadata() {
         final Map<TopicPartition, OffsetAndMetadata> committableOffsets;
 
@@ -432,28 +445,18 @@ private Map<TopicPartition, OffsetAndMetadata> committableOffsetsAndMetadata() {
             case SUSPENDED:
                 final Map<TopicPartition, Long> partitionTimes = extractPartitionTimes();
 
-                committableOffsets = new HashMap<>(consumedOffsets.size());
-                for (final Map.Entry<TopicPartition, Long> entry : consumedOffsets.entrySet()) {
-                    final TopicPartition partition = entry.getKey();
-                    Long offset = partitionGroup.headRecordOffset(partition);
-                    if (offset == null) {
-                        try {
-                            offset = mainConsumer.position(partition);
-                        } catch (final TimeoutException error) {
-                            // the `consumer.position()` call should never block, because we know that we did process data
-                            // for the requested partition and thus the consumer should have a valid local position
-                            // that it can return immediately
-
-                            // hence, a `TimeoutException` indicates a bug and thus we rethrow it as fatal `IllegalStateException`
-                            throw new IllegalStateException(error);
-                        } catch (final KafkaException fatal) {
-                            throw new StreamsException(fatal);
-                        }
-                    }
+                // If there's processor metadata to be committed. We need to commit them to all
+                // input partitions
+                final Set<TopicPartition> partitionsNeedCommit = processorContext.getProcessorMetadata().needsCommit() ?
+                    inputPartitions() : consumedOffsets.keySet();
+                committableOffsets = new HashMap<>(partitionsNeedCommit.size());
+
+                for (final TopicPartition partition : partitionsNeedCommit) {
+                    final Long offset = findOffset(partition);
                     final long partitionTime = partitionTimes.get(partition);
-                    committableOffsets.put(partition, new OffsetAndMetadata(offset, encodeTimestamp(partitionTime)));
+                    committableOffsets.put(partition, new OffsetAndMetadata(offset,
+                        new TopicPartitionMetadata(partitionTime, processorContext.getProcessorMetadata()).encode()));
                 }
-
                 break;
 
             case CLOSED:
@@ -478,14 +481,14 @@ public void postCommit(final boolean enforceCheckpoint) {
 
             case RESTORING:
             case SUSPENDED:
-                maybeWriteCheckpoint(enforceCheckpoint);
+                maybeCheckpoint(enforceCheckpoint);
                 log.debug("Finalized commit for {} task with enforce checkpoint {}", state(), enforceCheckpoint);
 
                 break;
 
             case RUNNING:
                 if (enforceCheckpoint || !eosEnabled) {
-                    maybeWriteCheckpoint(enforceCheckpoint);
+                    maybeCheckpoint(enforceCheckpoint);
                 }
                 log.debug("Finalized commit for {} task with eos {} enforce checkpoint {}", state(), eosEnabled, enforceCheckpoint);
 
@@ -505,6 +508,7 @@ private void clearCommitStatuses() {
         commitNeeded = false;
         commitRequested = false;
         hasPendingTxCommit = false;
+        processorContext.getProcessorMetadata().setNeedsCommit(false);
     }
 
     private Map<TopicPartition, Long> extractPartitionTimes() {
@@ -536,16 +540,18 @@ public void closeDirty() {
     public void updateInputPartitions(final Set<TopicPartition> topicPartitions, final Map<String, List<String>> allTopologyNodesToSourceTopics) {
         super.updateInputPartitions(topicPartitions, allTopologyNodesToSourceTopics);
         partitionGroup.updatePartitions(topicPartitions, recordQueueCreator::createQueue);
+        processorContext.getProcessorMetadata().setNeedsCommit(true);
     }
 
     @Override
-    public void closeCleanAndRecycleState() {
+    public void prepareRecycle() {
         validateClean();
         removeAllSensors();
         clearCommitStatuses();
         switch (state()) {
             case SUSPENDED:
                 stateMgr.recycle();
+                partitionGroup.close();
                 recordCollector.closeClean();
 
                 break;
@@ -560,10 +566,9 @@ public void closeCleanAndRecycleState() {
         }
 
         closeTaskSensor.record();
-
         transitionTo(State.CLOSED);
 
-        log.info("Closed clean and recycled state");
+        log.info("Closed and recycled state, and converted type to standby");
     }
 
     /**
@@ -574,14 +579,14 @@ public void closeCleanAndRecycleState() {
      *                          or flushing state store get IO errors; such error should cause the thread to die
      */
     @Override
-    protected void maybeWriteCheckpoint(final boolean enforceCheckpoint) {
+    public void maybeCheckpoint(final boolean enforceCheckpoint) {
         // commitNeeded indicates we may have processed some records since last commit
         // and hence we need to refresh checkpointable offsets regardless whether we should checkpoint or not
         if (commitNeeded || enforceCheckpoint) {
             stateMgr.updateChangelogOffsets(checkpointableOffsets());
         }
 
-        super.maybeWriteCheckpoint(enforceCheckpoint);
+        super.maybeCheckpoint(enforceCheckpoint);
     }
 
     private void validateClean() {
@@ -607,6 +612,13 @@ private void removeAllSensors() {
     private void close(final boolean clean) {
         switch (state()) {
             case SUSPENDED:
+                TaskManager.executeAndMaybeSwallow(
+                    clean,
+                    partitionGroup::close,
+                    "partition group close",
+                    log
+                );
+
                 // first close state manager (which is idempotent) then close the record collector
                 // if the latter throws and we re-close dirty which would close the state manager again.
                 TaskManager.executeAndMaybeSwallow(
@@ -646,7 +658,6 @@ private void close(final boolean clean) {
         }
 
         record = null;
-        partitionGroup.clear();
         closeTaskSensor.record();
 
         transitionTo(State.CLOSED);
@@ -887,7 +898,7 @@ private void resetOffsetsIfNeededAndInitializeMetadata(final java.util.function.
             offsetResetter.accept(resetOffsetsForPartitions);
             resetOffsetsForPartitions.clear();
 
-            initializeTaskTime(offsetsAndMetadata.entrySet().stream()
+            initializeTaskTimeAndProcessorMetadata(offsetsAndMetadata.entrySet().stream()
                 .filter(e -> e.getValue() != null)
                 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
             );
@@ -905,20 +916,26 @@ private void resetOffsetsIfNeededAndInitializeMetadata(final java.util.function.
         }
     }
 
-    private void initializeTaskTime(final Map<TopicPartition, OffsetAndMetadata> offsetsAndMetadata) {
+    private void initializeTaskTimeAndProcessorMetadata(final Map<TopicPartition, OffsetAndMetadata> offsetsAndMetadata) {
+        final ProcessorMetadata finalProcessMetadata = new ProcessorMetadata();
         for (final Map.Entry<TopicPartition, OffsetAndMetadata> entry : offsetsAndMetadata.entrySet()) {
             final TopicPartition partition = entry.getKey();
             final OffsetAndMetadata metadata = entry.getValue();
 
             if (metadata != null) {
-                final long committedTimestamp = decodeTimestamp(metadata.metadata());
+                final TopicPartitionMetadata committedTimestampAndMeta = TopicPartitionMetadata.decode(metadata.metadata());
+                final long committedTimestamp = committedTimestampAndMeta.partitionTime();
                 partitionGroup.setPartitionTime(partition, committedTimestamp);
                 log.debug("A committed timestamp was detected: setting the partition time of partition {}"
                     + " to {} in stream task {}", partition, committedTimestamp, id);
+
+                final ProcessorMetadata processorMetadata = committedTimestampAndMeta.processorMetadata();
+                finalProcessMetadata.update(processorMetadata);
             } else {
                 log.debug("No committed timestamp was found in metadata for partition {}", partition);
             }
         }
+        processorContext.setProcessorMetadata(finalProcessMetadata);
 
         final Set<TopicPartition> nonCommitted = new HashSet<>(inputPartitions());
         nonCommitted.removeAll(offsetsAndMetadata.keySet());
@@ -1092,34 +1109,6 @@ public boolean commitRequested() {
         return commitRequested;
     }
 
-    static String encodeTimestamp(final long partitionTime) {
-        final ByteBuffer buffer = ByteBuffer.allocate(9);
-        buffer.put(LATEST_MAGIC_BYTE);
-        buffer.putLong(partitionTime);
-        return Base64.getEncoder().encodeToString(buffer.array());
-    }
-
-    long decodeTimestamp(final String encryptedString) {
-        if (encryptedString.isEmpty()) {
-            return RecordQueue.UNKNOWN;
-        }
-        try {
-            final ByteBuffer buffer = ByteBuffer.wrap(Base64.getDecoder().decode(encryptedString));
-            final byte version = buffer.get();
-            switch (version) {
-                case LATEST_MAGIC_BYTE:
-                    return buffer.getLong();
-                default:
-                    log.warn("Unsupported offset metadata version found. Supported version {}. Found version {}.",
-                            LATEST_MAGIC_BYTE, version);
-                    return RecordQueue.UNKNOWN;
-            }
-        } catch (final Exception exception) {
-            log.warn("Unsupported offset metadata found");
-            return RecordQueue.UNKNOWN;
-        }
-    }
-
     @SuppressWarnings("rawtypes")
     public InternalProcessorContext processorContext() {
         return processorContext;
@@ -1226,7 +1215,7 @@ public Map<TopicPartition, Long> highWaterMark() {
     }
 
     private void transitToSuspend() {
-        log.info("Suspended {}", state());
+        log.info("Suspended from {}", state());
         transitionTo(State.SUSPENDED);
         timeCurrentIdlingStarted = Optional.of(System.currentTimeMillis());
     }
@@ -1278,7 +1267,7 @@ public RecordQueue createQueue(final TopicPartition partition) {
             final SourceNode<?, ?> source = topology.source(partition.topic());
             if (source == null) {
                 throw new TopologyException(
-                        "Topic is unknown to the topology. " +
+                        "Topic " + partition.topic() + " is unknown to the topology. " +
                                 "This may happen if different KafkaStreams instances of the same application execute different Topologies. " +
                                 "Note that Topologies are only identical if all operators are added in the same order."
                 );
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java
index 8345a4ade921a..5afc747408f7b 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamThread.java
@@ -35,6 +35,7 @@
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.streams.KafkaClientSupplier;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.TaskMetadata;
 import org.apache.kafka.streams.ThreadMetadata;
 import org.apache.kafka.streams.errors.StreamsException;
@@ -261,6 +262,7 @@ public boolean isRunning() {
     public final Object stateLock;
     private final Duration pollTime;
     private final long commitTimeMs;
+    private final long purgeTimeMs;
     private final int maxPollTimeMs;
     private final String originalReset;
     private final TaskManager taskManager;
@@ -288,6 +290,7 @@ public boolean isRunning() {
     private long now;
     private long lastPollMs;
     private long lastCommitMs;
+    private long lastPurgeMs;
     private long lastPartitionAssignedMs = -1L;
     private int numIterations;
     private volatile State state = State.CREATED;
@@ -343,6 +346,7 @@ public static StreamThread create(final TopologyMetadata topologyMetadata,
         referenceContainer.adminClient = adminClient;
         referenceContainer.streamsMetadataState = streamsMetadataState;
         referenceContainer.time = time;
+        referenceContainer.clientTags = config.getClientTags();
 
         log.info("Creating restore consumer client");
         final Map<String, Object> restoreConsumerConfigs = config.getRestoreConsumerConfigs(getRestoreConsumerClientId(threadId));
@@ -381,17 +385,18 @@ public static StreamThread create(final TopologyMetadata topologyMetadata,
             threadId,
             log
         );
+
         final TaskManager taskManager = new TaskManager(
             time,
             changelogReader,
             processId,
             logPrefix,
-            streamsMetrics,
             activeTaskCreator,
             standbyTaskCreator,
             topologyMetadata,
             adminClient,
-            stateDirectory
+            stateDirectory,
+            maybeCreateAndStartStateUpdater(config, changelogReader, time)
         );
         referenceContainer.taskManager = taskManager;
 
@@ -407,7 +412,6 @@ public static StreamThread create(final TopologyMetadata topologyMetadata,
         }
 
         final Consumer<byte[], byte[]> mainConsumer = clientSupplier.getConsumer(consumerConfigs);
-        changelogReader.setMainConsumer(mainConsumer);
         taskManager.setMainConsumer(mainConsumer);
         referenceContainer.mainConsumer = mainConsumer;
 
@@ -435,6 +439,20 @@ public static StreamThread create(final TopologyMetadata topologyMetadata,
         return streamThread.updateThreadMetadata(getSharedAdminClientId(clientId));
     }
 
+    private static StateUpdater maybeCreateAndStartStateUpdater(final StreamsConfig streamsConfig,
+                                                                final ChangelogReader changelogReader,
+                                                                final Time time) {
+        final boolean stateUpdaterEnabled =
+            InternalConfig.getBoolean(streamsConfig.originals(), InternalConfig.STATE_UPDATER_ENABLED, false);
+        if (stateUpdaterEnabled) {
+            final StateUpdater stateUpdater = new DefaultStateUpdater(streamsConfig, changelogReader, time);
+            stateUpdater.start();
+            return stateUpdater;
+        } else {
+            return null;
+        }
+    }
+
     public StreamThread(final Time time,
                         final StreamsConfig config,
                         final Admin adminClient,
@@ -517,6 +535,7 @@ public StreamThread(final Time time,
         this.maxPollTimeMs = new InternalConsumerConfig(config.getMainConsumerConfigs("dummyGroupId", "dummyClientId", dummyThreadIdx))
             .getInt(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG);
         this.commitTimeMs = config.getLong(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG);
+        this.purgeTimeMs = config.getLong(StreamsConfig.REPARTITION_PURGE_INTERVAL_MS_CONFIG);
 
         this.numIterations = 1;
         this.eosEnabled = eosEnabled(config);
@@ -585,7 +604,7 @@ boolean runLoop() {
                 runOnce();
                 if (nextProbingRebalanceMs.get() < time.milliseconds()) {
                     log.info("Triggering the followup rebalance scheduled for {} ms.", nextProbingRebalanceMs.get());
-                    mainConsumer.enforceRebalance();
+                    mainConsumer.enforceRebalance("Scheduled probing rebalance");
                     nextProbingRebalanceMs.set(Long.MAX_VALUE);
                 }
             } catch (final TaskCorruptedException e) {
@@ -597,7 +616,7 @@ boolean runLoop() {
                     final boolean enforceRebalance = taskManager.handleCorruption(e.corruptedTasks());
                     if (enforceRebalance && eosEnabled) {
                         log.info("Active task(s) got corrupted. Triggering a rebalance.");
-                        mainConsumer.enforceRebalance();
+                        mainConsumer.enforceRebalance("Active tasks corrupted");
                     }
                 } catch (final TaskMigratedException taskMigrated) {
                     handleTaskMigrated(taskMigrated);
@@ -639,7 +658,7 @@ public void maybeSendShutdown() {
         if (assignmentErrorCode.get() == AssignorError.SHUTDOWN_REQUESTED.code()) {
             log.warn("Detected that shutdown was requested. " +
                     "All clients in this app will now begin to shutdown");
-            mainConsumer.enforceRebalance();
+            mainConsumer.enforceRebalance("Shutdown requested");
         }
     }
 
@@ -796,10 +815,10 @@ void runOnce() {
 
                 final long beforeCommitMs = now;
                 final int committed = maybeCommit();
-                totalCommittedSinceLastSummary += committed;
                 final long commitLatency = Math.max(now - beforeCommitMs, 0);
                 totalCommitLatency += commitLatency;
                 if (committed > 0) {
+                    totalCommittedSinceLastSummary += committed;
                     commitSensor.record(commitLatency / (double) committed, now);
 
                     if (log.isDebugEnabled()) {
@@ -861,7 +880,7 @@ private void initializeAndRestorePhase() {
             if (taskManager.tryToCompleteRestoration(now, partitions -> resetOffsets(partitions, null))) {
                 changelogReader.transitToUpdateStandby();
                 log.info("Restoration took {} ms for all tasks {}", time.milliseconds() - lastPartitionAssignedMs,
-                    taskManager.tasks().keySet());
+                    taskManager.allTasks().keySet());
                 setState(State.RUNNING);
             }
 
@@ -875,7 +894,8 @@ private void initializeAndRestorePhase() {
         }
         // we can always let changelog reader try restoring in order to initialize the changelogs;
         // if there's no active restoring or standby updating it would not try to fetch any data
-        changelogReader.restore(taskManager.tasks());
+        // After KAFKA-13873, we only restore the not paused tasks.
+        changelogReader.restore(taskManager.notPausedTasks());
         log.debug("Idempotent restore call done. Thread state has not changed.");
     }
 
@@ -1060,16 +1080,17 @@ int maybeCommit() {
             }
 
             committed = taskManager.commit(
-                taskManager.tasks()
+                taskManager.allTasks()
                     .values()
                     .stream()
                     .filter(t -> t.state() == Task.State.RUNNING || t.state() == Task.State.RESTORING)
                     .collect(Collectors.toSet())
             );
 
-            if (committed > 0) {
+            if (committed > 0 && (now - lastPurgeMs) > purgeTimeMs) {
                 // try to purge the committed records for repartition topics if possible
                 taskManager.maybePurgeCommittedRecords();
+                lastPurgeMs = now;
             }
 
             if (committed == -1) {
@@ -1119,7 +1140,7 @@ private void completeShutdown(final boolean cleanRun) {
         // intentionally do not check the returned flag
         setState(State.PENDING_SHUTDOWN);
 
-        log.info("Shutting down");
+        log.info("Shutting down {}", cleanRun ? "clean" : "unclean");
 
         try {
             taskManager.shutdown(cleanRun);
@@ -1227,7 +1248,7 @@ public List<Task> activeTasks() {
     }
 
     public Map<TaskId, Task> allTasks() {
-        return taskManager.tasks();
+        return taskManager.allTasks();
     }
 
     /**
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsMetadataState.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsMetadataState.java
index e8ee3eacf69d5..6850715b0aa29 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsMetadataState.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsMetadataState.java
@@ -212,7 +212,7 @@ public synchronized <K> KeyQueryMetadata getKeyQueryMetadataForKey(final String
         }
         return getKeyQueryMetadataForKey(storeName,
                                          key,
-                                         new DefaultStreamPartitioner<>(keySerializer, clusterMetadata));
+                                         new DefaultStreamPartitioner<>(keySerializer));
     }
 
     /**
@@ -225,7 +225,7 @@ public synchronized <K> KeyQueryMetadata getKeyQueryMetadataForKey(final String
         Objects.requireNonNull(keySerializer, "keySerializer can't be null");
         return getKeyQueryMetadataForKey(storeName,
                                          key,
-                                         new DefaultStreamPartitioner<>(keySerializer, clusterMetadata),
+                                         new DefaultStreamPartitioner<>(keySerializer),
                                          topologyName);
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignor.java
index d2fa90524f507..2af2fba718704 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignor.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignor.java
@@ -128,7 +128,7 @@ private static class ClientMetadata {
         private final ClientState state;
         private final SortedSet<String> consumers;
 
-        ClientMetadata(final String endPoint) {
+        ClientMetadata(final String endPoint, final Map<String, String> clientTags) {
 
             // get the host info, or null if no endpoint is configured (ie endPoint == null)
             hostInfo = HostInfo.buildFromEndpoint(endPoint);
@@ -136,8 +136,8 @@ private static class ClientMetadata {
             // initialize the consumer memberIds
             consumers = new TreeSet<>();
 
-            // initialize the client state
-            state = new ClientState();
+            // initialize the client state with client tags
+            state = new ClientState(clientTags);
         }
 
         void addConsumer(final String consumerMemberId, final List<TopicPartition> ownedPartitions) {
@@ -189,6 +189,7 @@ public String toString() {
 
     private Supplier<TaskAssignor> taskAssignorSupplier;
     private byte uniqueField;
+    private Map<String, String> clientTags;
 
     /**
      * We need to have the PartitionAssignor and its StreamThread to be mutually accessible since the former needs
@@ -223,6 +224,7 @@ public void configure(final Map<String, ?> configs) {
         taskAssignorSupplier = assignorConfiguration::taskAssignor;
         assignmentListener = assignorConfiguration.assignmentListener();
         uniqueField = 0;
+        clientTags = referenceContainer.clientTags;
     }
 
     @Override
@@ -265,7 +267,7 @@ public ByteBuffer subscriptionUserData(final Set<String> topics) {
             taskOffsetSums,
             uniqueField,
             assignmentErrorCode.get(),
-            Collections.emptyMap()
+            clientTags
         ).encode();
     }
 
@@ -338,7 +340,7 @@ public GroupAssignment assign(final Cluster metadata, final GroupSubscription gr
                 futureMetadataVersion = usedVersion;
                 processId = FUTURE_ID;
                 if (!clientMetadataMap.containsKey(FUTURE_ID)) {
-                    clientMetadataMap.put(FUTURE_ID, new ClientMetadata(null));
+                    clientMetadataMap.put(FUTURE_ID, new ClientMetadata(null, Collections.emptyMap()));
                 }
             } else {
                 processId = info.processId();
@@ -348,7 +350,7 @@ public GroupAssignment assign(final Cluster metadata, final GroupSubscription gr
 
             // create the new client metadata if necessary
             if (clientMetadata == null) {
-                clientMetadata = new ClientMetadata(info.userEndPoint());
+                clientMetadata = new ClientMetadata(info.userEndPoint(), info.clientTags());
                 clientMetadataMap.put(info.processId(), clientMetadata);
             }
 
@@ -1462,6 +1464,10 @@ protected byte uniqueField() {
         return uniqueField;
     }
 
+    protected Map<String, String> clientTags() {
+        return clientTags;
+    }
+
     protected void handleRebalanceStart(final Set<String> topics) {
         taskManager.handleRebalanceStart(topics);
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsProducer.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsProducer.java
index 7c98189ff1b32..a1b68ff790856 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsProducer.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/StreamsProducer.java
@@ -223,7 +223,8 @@ private double totalBlockedTime(final Producer<?, ?> producer) {
             + getMetricValue(producer.metrics(), "txn-begin-time-ns-total")
             + getMetricValue(producer.metrics(), "txn-send-offsets-time-ns-total")
             + getMetricValue(producer.metrics(), "txn-commit-time-ns-total")
-            + getMetricValue(producer.metrics(), "txn-abort-time-ns-total");
+            + getMetricValue(producer.metrics(), "txn-abort-time-ns-total")
+            + getMetricValue(producer.metrics(), "metadata-wait-time-ns-total");
     }
 
     public double totalBlockedTime() {
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/Task.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/Task.java
index 3549ba2b18cf3..20f0343b2a786 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/Task.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/Task.java
@@ -141,6 +141,12 @@ default void addPartitionsForOffsetReset(final Set<TopicPartition> partitionsFor
      */
     void updateInputPartitions(final Set<TopicPartition> topicPartitions, final Map<String, List<String>> allTopologyNodesToSourceTopics);
 
+    /**
+     * @param enforceCheckpoint if true the task would always execute the checkpoint;
+     *                          otherwise it may skip if the state has not advanced much
+     */
+    void maybeCheckpoint(final boolean enforceCheckpoint);
+
     void markChangelogAsCorrupted(final Collection<TopicPartition> partitions);
 
     /**
@@ -149,10 +155,9 @@ default void addPartitionsForOffsetReset(final Set<TopicPartition> partitionsFor
     void revive();
 
     /**
-     * Attempt a clean close but do not close the underlying state
+     * Close the task except the state, so that the states can be later recycled
      */
-    void closeCleanAndRecycleState();
-
+    void prepareRecycle();
 
     // runtime methods (using in RUNNING state)
 
@@ -206,7 +211,7 @@ void maybeInitTaskTimeoutOrThrow(final long currentWallClockMs,
     /**
      * @return any changelog partitions associated with this task
      */
-    Collection<TopicPartition> changelogPartitions();
+    Set<TopicPartition> changelogPartitions();
 
     State state();
 
@@ -246,4 +251,5 @@ default boolean commitRequested() {
      * @return This returns the time the task started idling. If it is not idling it returns empty.
      */
     Optional<Long> timeCurrentIdlingStarted();
+
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskAndAction.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskAndAction.java
new file mode 100644
index 0000000000000..cc93321a29f42
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskAndAction.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.streams.processor.TaskId;
+
+import java.util.Objects;
+
+public class TaskAndAction {
+
+    enum Action {
+        ADD,
+        REMOVE,
+        PAUSE,
+        RESUME
+    }
+
+    private final Task task;
+    private final TaskId taskId;
+    private final Action action;
+
+    private TaskAndAction(final Task task, final TaskId taskId, final Action action) {
+        this.task = task;
+        this.taskId = taskId;
+        this.action = action;
+    }
+
+    public static TaskAndAction createAddTask(final Task task) {
+        Objects.requireNonNull(task, "Task to add is null!");
+        return new TaskAndAction(task, null, Action.ADD);
+    }
+
+    public static TaskAndAction createRemoveTask(final TaskId taskId) {
+        Objects.requireNonNull(taskId, "Task ID of task to remove is null!");
+        return new TaskAndAction(null, taskId, Action.REMOVE);
+    }
+
+    public static TaskAndAction createPauseTask(final TaskId taskId) {
+        Objects.requireNonNull(taskId, "Task ID of task to pause is null!");
+        return new TaskAndAction(null, taskId, Action.PAUSE);
+    }
+
+    public static TaskAndAction createResumeTask(final TaskId taskId) {
+        Objects.requireNonNull(taskId, "Task ID of task to resume is null!");
+        return new TaskAndAction(null, taskId, Action.RESUME);
+    }
+
+    public Task getTask() {
+        if (action != Action.ADD) {
+            throw new IllegalStateException("Action type " + action + " cannot have a task!");
+        }
+        return task;
+    }
+
+    public TaskId getTaskId() {
+        if (action != Action.REMOVE && action != Action.PAUSE && action != Action.RESUME) {
+            throw new IllegalStateException("Action type " + action + " cannot have a task ID!");
+        }
+        return taskId;
+    }
+
+    public Action getAction() {
+        return action;
+    }
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadata.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadata.java
index f3422537f991d..bd1515c7bce01 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadata.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadata.java
@@ -17,8 +17,11 @@
 package org.apache.kafka.streams.processor.internals;
 
 import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode;
 import org.apache.kafka.streams.processor.TaskId;
 
+import java.util.Collection;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
@@ -35,21 +38,50 @@ public class TaskExecutionMetadata {
     private static final long CONSTANT_BACKOFF_MS = 5_000L;
 
     private final boolean hasNamedTopologies;
+    private final Set<String> pausedTopologies;
+    private final ProcessingMode processingMode;
+    private final Collection<Task> successfullyProcessed = new HashSet<>();
     // map of topologies experiencing errors/currently under backoff
     private final ConcurrentHashMap<String, NamedTopologyMetadata> topologyNameToErrorMetadata = new ConcurrentHashMap<>();
 
-    public TaskExecutionMetadata(final Set<String> allTopologyNames) {
+    public TaskExecutionMetadata(final Set<String> allTopologyNames,
+                                 final Set<String> pausedTopologies,
+                                 final ProcessingMode processingMode) {
         this.hasNamedTopologies = !(allTopologyNames.size() == 1 && allTopologyNames.contains(UNNAMED_TOPOLOGY));
+        this.pausedTopologies = pausedTopologies;
+        this.processingMode = processingMode;
+    }
+
+    public boolean hasNamedTopologies() {
+        return hasNamedTopologies;
+    }
+
+    public ProcessingMode processingMode() {
+        return processingMode;
     }
 
     public boolean canProcessTask(final Task task, final long now) {
         final String topologyName = task.id().topologyName();
         if (!hasNamedTopologies) {
             // TODO implement error handling/backoff for non-named topologies (needs KIP)
-            return true;
+            return !pausedTopologies.contains(UNNAMED_TOPOLOGY);
+        } else {
+            if (pausedTopologies.contains(topologyName)) {
+                return false;
+            } else {
+                final NamedTopologyMetadata metadata = topologyNameToErrorMetadata.get(topologyName);
+                return metadata == null || (metadata.canProcess() && metadata.canProcessTask(task, now));
+            }
+        }
+    }
+
+    public boolean canPunctuateTask(final Task task) {
+        final String topologyName = task.id().topologyName();
+
+        if (topologyName == null) {
+            return !pausedTopologies.contains(UNNAMED_TOPOLOGY);
         } else {
-            final NamedTopologyMetadata metadata = topologyNameToErrorMetadata.get(topologyName);
-            return metadata == null || (metadata.canProcess() && metadata.canProcessTask(task, now));
+            return !pausedTopologies.contains(topologyName);
         }
     }
 
@@ -61,6 +93,22 @@ public void registerTaskError(final Task task, final Throwable t, final long now
         }
     }
 
+    Collection<Task> successfullyProcessed() {
+        return successfullyProcessed;
+    }
+
+    void addToSuccessfullyProcessed(final Task task) {
+        successfullyProcessed.add(task);
+    }
+
+    void removeTaskFromSuccessfullyProcessedBeforeClosing(final Task task) {
+        successfullyProcessed.remove(task);
+    }
+
+    void clearSuccessfullyProcessed() {
+        successfullyProcessed.clear();
+    }
+
     private class NamedTopologyMetadata {
         private final Logger log;
         private final Map<TaskId, Long> tasksToErrorTime = new ConcurrentHashMap<>();
@@ -93,7 +141,7 @@ public boolean canProcessTask(final Task task, final long now) {
         }
 
         public synchronized void registerTaskError(final Task task, final Throwable t, final long now) {
-            log.info("Begin backoff for unhealthy task {} at t={}", task.id(), now);
+            log.info("Begin backoff for unhealthy task {} at t={} due to {}", task.id(), now, t.getClass().getName());
             tasksToErrorTime.put(task.id(), now);
         }
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutor.java
index cad03fbd1b3ec..2827fe1e906b6 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutor.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskExecutor.java
@@ -26,7 +26,6 @@
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.errors.TaskCorruptedException;
 import org.apache.kafka.streams.errors.TaskMigratedException;
-import org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode;
 import org.apache.kafka.streams.processor.TaskId;
 
 import java.util.Collection;
@@ -47,21 +46,17 @@
 public class TaskExecutor {
 
     private final Logger log;
-
-    private final boolean hasNamedTopologies;
-    private final ProcessingMode processingMode;
     private final Tasks tasks;
-    private final TaskExecutionMetadata taskExecutionMetadata;
+    private final TaskManager taskManager;
+    private final TaskExecutionMetadata executionMetadata;
 
     public TaskExecutor(final Tasks tasks,
-                        final TaskExecutionMetadata taskExecutionMetadata,
-                        final ProcessingMode processingMode,
-                        final boolean hasNamedTopologies,
+                        final TaskManager taskManager,
+                        final TaskExecutionMetadata executionMetadata,
                         final LogContext logContext) {
         this.tasks = tasks;
-        this.taskExecutionMetadata = taskExecutionMetadata;
-        this.processingMode = processingMode;
-        this.hasNamedTopologies = hasNamedTopologies;
+        this.taskManager = taskManager;
+        this.executionMetadata = executionMetadata;
         this.log = logContext.logger(getClass());
     }
 
@@ -76,13 +71,13 @@ int process(final int maxNumRecords, final Time time) {
         for (final Task task : tasks.activeTasks()) {
             final long now = time.milliseconds();
             try {
-                if (taskExecutionMetadata.canProcessTask(task, now)) {
+                if (executionMetadata.canProcessTask(task, now)) {
                     lastProcessed = task;
                     totalProcessed += processTask(task, maxNumRecords, now, time);
                 }
             } catch (final Throwable t) {
-                taskExecutionMetadata.registerTaskError(task, t, now);
-                tasks.removeTaskFromCuccessfullyProcessedBeforeClosing(lastProcessed);
+                executionMetadata.registerTaskError(task, t, now);
+                executionMetadata.removeTaskFromSuccessfullyProcessedBeforeClosing(lastProcessed);
                 commitSuccessfullyProcessedTasks();
                 throw t;
             }
@@ -102,9 +97,9 @@ private long processTask(final Task task, final int maxNumRecords, final long be
                 processed++;
             }
             // TODO: enable regardless of whether using named topologies
-            if (processed > 0 && hasNamedTopologies && processingMode != EXACTLY_ONCE_V2) {
+            if (processed > 0 && executionMetadata.hasNamedTopologies() && executionMetadata.processingMode() != EXACTLY_ONCE_V2) {
                 log.trace("Successfully processed task {}", task.id());
-                tasks.addToSuccessfullyProcessed(task);
+                executionMetadata.addToSuccessfullyProcessed(task);
             }
         } catch (final TimeoutException timeoutException) {
             // TODO consolidate TimeoutException retries with general error handling
@@ -163,6 +158,7 @@ int commitTasksAndMaybeUpdateCommittableOffsets(final Collection<Task> tasksToCo
                 task.postCommit(false);
             }
         }
+
         return committed;
     }
 
@@ -180,12 +176,12 @@ void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, OffsetAndMet
         final Set<TaskId> corruptedTasks = new HashSet<>();
 
         if (!offsetsPerTask.isEmpty()) {
-            if (processingMode == EXACTLY_ONCE_ALPHA) {
+            if (executionMetadata.processingMode() == EXACTLY_ONCE_ALPHA) {
                 for (final Map.Entry<Task, Map<TopicPartition, OffsetAndMetadata>> taskToCommit : offsetsPerTask.entrySet()) {
                     final Task task = taskToCommit.getKey();
                     try {
-                        tasks.streamsProducerForTask(task.id())
-                            .commitTransaction(taskToCommit.getValue(), tasks.mainConsumer().groupMetadata());
+                        taskManager.streamsProducerForTask(task.id())
+                            .commitTransaction(taskToCommit.getValue(), taskManager.mainConsumer().groupMetadata());
                         updateTaskCommitMetadata(taskToCommit.getValue());
                     } catch (final TimeoutException timeoutException) {
                         log.error(
@@ -199,9 +195,9 @@ void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, OffsetAndMet
                 final Map<TopicPartition, OffsetAndMetadata> allOffsets = offsetsPerTask.values().stream()
                     .flatMap(e -> e.entrySet().stream()).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 
-                if (processingMode == EXACTLY_ONCE_V2) {
+                if (executionMetadata.processingMode() == EXACTLY_ONCE_V2) {
                     try {
-                        tasks.threadProducer().commitTransaction(allOffsets, tasks.mainConsumer().groupMetadata());
+                        taskManager.threadProducer().commitTransaction(allOffsets, taskManager.mainConsumer().groupMetadata());
                         updateTaskCommitMetadata(allOffsets);
                     } catch (final TimeoutException timeoutException) {
                         log.error(
@@ -219,7 +215,7 @@ void commitOffsetsOrTransaction(final Map<Task, Map<TopicPartition, OffsetAndMet
                     }
                 } else {
                     try {
-                        tasks.mainConsumer().commitSync(allOffsets);
+                        taskManager.mainConsumer().commitSync(allOffsets);
                         updateTaskCommitMetadata(allOffsets);
                     } catch (final CommitFailedException error) {
                         throw new TaskMigratedException("Consumer committing offsets failed, " +
@@ -260,13 +256,13 @@ private void updateTaskCommitMetadata(final Map<TopicPartition, OffsetAndMetadat
     }
 
     private void commitSuccessfullyProcessedTasks() {
-        if (!tasks.successfullyProcessed().isEmpty()) {
+        if (!executionMetadata.successfullyProcessed().isEmpty()) {
             log.info("Streams encountered an error when processing tasks." +
                 " Will commit all previously successfully processed tasks {}",
-                tasks.successfullyProcessed().stream().map(Task::id));
-            commitTasksAndMaybeUpdateCommittableOffsets(tasks.successfullyProcessed(), new HashMap<>());
+                executionMetadata.successfullyProcessed().stream().map(Task::id));
+            commitTasksAndMaybeUpdateCommittableOffsets(executionMetadata.successfullyProcessed(), new HashMap<>());
         }
-        tasks.clearSuccessfullyProcessed();
+        executionMetadata.clearSuccessfullyProcessed();
     }
 
     /**
@@ -277,11 +273,13 @@ int punctuate() {
 
         for (final Task task : tasks.activeTasks()) {
             try {
-                if (task.maybePunctuateStreamTime()) {
-                    punctuated++;
-                }
-                if (task.maybePunctuateSystemTime()) {
-                    punctuated++;
+                if (executionMetadata.canPunctuateTask(task)) {
+                    if (task.maybePunctuateStreamTime()) {
+                        punctuated++;
+                    }
+                    if (task.maybePunctuateSystemTime()) {
+                        punctuated++;
+                    }
                 }
             } catch (final TaskMigratedException e) {
                 log.info("Failed to punctuate stream task {} since it got migrated to another thread already. " +
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java
index f1585633cad97..cfd20d2299495 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TaskManager.java
@@ -38,9 +38,7 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.StateDirectory.TaskDirectory;
 import org.apache.kafka.streams.processor.internals.Task.State;
-import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.state.internals.OffsetCheckpoint;
-
 import org.slf4j.Logger;
 
 import java.io.File;
@@ -74,14 +72,15 @@ public class TaskManager {
     // by QueryableState
     private final Logger log;
     private final Time time;
-    private final ChangelogReader changelogReader;
+    private final Tasks tasks;
     private final UUID processId;
     private final String logPrefix;
-    private final TopologyMetadata topologyMetadata;
     private final Admin adminClient;
     private final StateDirectory stateDirectory;
     private final ProcessingMode processingMode;
-    private final Tasks tasks;
+    private final ChangelogReader changelogReader;
+    private final TopologyMetadata topologyMetadata;
+
     private final TaskExecutor taskExecutor;
 
     private Consumer<byte[], byte[]> mainConsumer;
@@ -93,45 +92,50 @@ public class TaskManager {
     // includes assigned & initialized tasks and unassigned tasks we locked temporarily during rebalance
     private final Set<TaskId> lockedTaskDirectories = new HashSet<>();
 
+    private final ActiveTaskCreator activeTaskCreator;
+    private final StandbyTaskCreator standbyTaskCreator;
+    private final StateUpdater stateUpdater;
+
     TaskManager(final Time time,
                 final ChangelogReader changelogReader,
                 final UUID processId,
                 final String logPrefix,
-                final StreamsMetricsImpl streamsMetrics,
                 final ActiveTaskCreator activeTaskCreator,
                 final StandbyTaskCreator standbyTaskCreator,
                 final TopologyMetadata topologyMetadata,
                 final Admin adminClient,
-                final StateDirectory stateDirectory) {
+                final StateDirectory stateDirectory,
+                final StateUpdater stateUpdater) {
         this.time = time;
-        this.changelogReader = changelogReader;
         this.processId = processId;
         this.logPrefix = logPrefix;
-        this.topologyMetadata = topologyMetadata;
         this.adminClient = adminClient;
         this.stateDirectory = stateDirectory;
+        this.changelogReader = changelogReader;
+        this.topologyMetadata = topologyMetadata;
+        this.activeTaskCreator = activeTaskCreator;
+        this.standbyTaskCreator = standbyTaskCreator;
         this.processingMode = topologyMetadata.processingMode();
 
         final LogContext logContext = new LogContext(logPrefix);
         this.log = logContext.logger(getClass());
 
-        this.tasks = new Tasks(logContext, topologyMetadata,  streamsMetrics, activeTaskCreator, standbyTaskCreator);
+        this.stateUpdater = stateUpdater;
+        this.tasks = new Tasks(logContext);
         this.taskExecutor = new TaskExecutor(
             tasks,
+            this,
             topologyMetadata.taskExecutionMetadata(),
-            processingMode,
-            topologyMetadata.hasNamedTopologies(),
             logContext
         );
     }
 
     void setMainConsumer(final Consumer<byte[], byte[]> mainConsumer) {
         this.mainConsumer = mainConsumer;
-        tasks.setMainConsumer(mainConsumer);
     }
 
     public double totalProducerBlockedTime() {
-        return tasks.totalProducerBlockedTime();
+        return activeTaskCreator.totalProducerBlockedTime();
     }
 
     public UUID processId() {
@@ -142,6 +146,18 @@ public TopologyMetadata topologyMetadata() {
         return topologyMetadata;
     }
 
+    Consumer<byte[], byte[]> mainConsumer() {
+        return mainConsumer;
+    }
+
+    StreamsProducer streamsProducerForTask(final TaskId taskId) {
+        return activeTaskCreator.streamsProducerForTask(taskId);
+    }
+
+    StreamsProducer threadProducer() {
+        return activeTaskCreator.threadProducer();
+    }
+
     boolean isRebalanceInProgress() {
         return rebalanceInProgress;
     }
@@ -186,7 +202,7 @@ boolean handleCorruption(final Set<TaskId> corruptedTasks) {
 
         // We need to commit before closing the corrupted active tasks since this will force the ongoing txn to abort
         try {
-            final Collection<Task> tasksToCommit = tasks()
+            final Collection<Task> tasksToCommit = allTasks()
                 .values()
                 .stream()
                 .filter(t -> t.state() == Task.State.RUNNING || t.state() == Task.State.RESTORING)
@@ -285,34 +301,28 @@ public void handleAssignment(final Map<TaskId, Set<TopicPartition>> activeTasks,
         final LinkedHashMap<TaskId, RuntimeException> taskCloseExceptions = new LinkedHashMap<>();
         final Map<TaskId, Set<TopicPartition>> activeTasksToCreate = new HashMap<>(activeTasks);
         final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate = new HashMap<>(standbyTasks);
-        final Comparator<Task> byId = Comparator.comparing(Task::id);
-        final Set<Task> tasksToRecycle = new TreeSet<>(byId);
-        final Set<Task> tasksToCloseClean = new TreeSet<>(byId);
-        final Set<Task> tasksToCloseDirty = new TreeSet<>(byId);
+        final Map<Task, Set<TopicPartition>> tasksToRecycle = new HashMap<>();
+        final Set<Task> tasksToCloseClean = new TreeSet<>(Comparator.comparing(Task::id));
 
-        // first rectify all existing tasks
-        for (final Task task : tasks.allTasks()) {
-            if (activeTasks.containsKey(task.id()) && task.isActive()) {
-                tasks.updateInputPartitionsAndResume(task, activeTasks.get(task.id()));
-                activeTasksToCreate.remove(task.id());
-            } else if (standbyTasks.containsKey(task.id()) && !task.isActive()) {
-                tasks.updateInputPartitionsAndResume(task, standbyTasks.get(task.id()));
-                standbyTasksToCreate.remove(task.id());
-            } else if (activeTasks.containsKey(task.id()) || standbyTasks.containsKey(task.id())) {
-                // check for tasks that were owned previously but have changed active/standby status
-                tasksToRecycle.add(task);
-            } else {
-                tasksToCloseClean.add(task);
-            }
+        tasks.purgePendingTasks(activeTasks.keySet(), standbyTasks.keySet());
+
+        // first rectify all existing tasks:
+        // 1. for tasks that are already owned, just update input partitions / resume and skip re-creating them
+        // 2. for tasks that have changed active/standby status, just recycle and skip re-creating them
+        // 3. otherwise, close them since they are no longer owned
+        if (stateUpdater == null) {
+            classifyTasksWithoutStateUpdater(activeTasksToCreate, standbyTasksToCreate, tasksToRecycle, tasksToCloseClean);
+        } else {
+            classifyTasksWithStateUpdater(activeTasksToCreate, standbyTasksToCreate, tasksToRecycle, tasksToCloseClean);
         }
 
+        tasks.addPendingActiveTasks(pendingTasksToCreate(activeTasksToCreate));
+        tasks.addPendingStandbyTasks(pendingTasksToCreate(standbyTasksToCreate));
+
         // close and recycle those tasks
-        handleCloseAndRecycle(
+        closeAndRecycleTasks(
             tasksToRecycle,
             tasksToCloseClean,
-            tasksToCloseDirty,
-            activeTasksToCreate,
-            standbyTasksToCreate,
             taskCloseExceptions
         );
 
@@ -346,22 +356,143 @@ public void handleAssignment(final Map<TaskId, Set<TopicPartition>> activeTasks,
             throw first.getValue();
         }
 
-        tasks.handleNewAssignmentAndCreateTasks(activeTasksToCreate, standbyTasksToCreate, activeTasks.keySet(), standbyTasks.keySet());
+        createNewTasks(activeTasksToCreate, standbyTasksToCreate);
+    }
+
+    private void createNewTasks(final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
+                                final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate) {
+        final Collection<Task> newActiveTasks = activeTaskCreator.createTasks(mainConsumer, activeTasksToCreate);
+        final Collection<Task> newStandbyTask = standbyTaskCreator.createTasks(standbyTasksToCreate);
+
+        if (stateUpdater == null) {
+            tasks.addNewActiveTasks(newActiveTasks);
+            tasks.addNewStandbyTasks(newStandbyTask);
+        } else {
+            tasks.addPendingTaskToRestore(newActiveTasks);
+            tasks.addPendingTaskToRestore(newStandbyTask);
+        }
+    }
+
+    private void classifyTasksWithoutStateUpdater(final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
+                                                  final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate,
+                                                  final Map<Task, Set<TopicPartition>> tasksToRecycle,
+                                                  final Set<Task> tasksToCloseClean) {
+        for (final Task task : tasks.allTasks()) {
+            final TaskId taskId = task.id();
+            if (activeTasksToCreate.containsKey(taskId)) {
+                if (task.isActive()) {
+                    final Set<TopicPartition> topicPartitions = activeTasksToCreate.get(taskId);
+                    if (tasks.updateActiveTaskInputPartitions(task, topicPartitions)) {
+                        task.updateInputPartitions(topicPartitions, topologyMetadata.nodeToSourceTopics(task.id()));
+                    }
+                    task.resume();
+                } else {
+                    tasksToRecycle.put(task, activeTasksToCreate.get(taskId));
+                }
+                activeTasksToCreate.remove(taskId);
+            } else if (standbyTasksToCreate.containsKey(taskId)) {
+                if (!task.isActive()) {
+                    final Set<TopicPartition> topicPartitions = standbyTasksToCreate.get(taskId);
+                    task.updateInputPartitions(topicPartitions, topologyMetadata.nodeToSourceTopics(task.id()));
+                    task.resume();
+                } else {
+                    tasksToRecycle.put(task, standbyTasksToCreate.get(taskId));
+                }
+                standbyTasksToCreate.remove(taskId);
+            } else {
+                tasksToCloseClean.add(task);
+            }
+        }
+    }
+
+    private void classifyRunningTasks(final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
+                                      final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate,
+                                      final Map<Task, Set<TopicPartition>> tasksToRecycle,
+                                      final Set<Task> tasksToCloseClean) {
+        for (final Task task : tasks.allTasks()) {
+            final TaskId taskId = task.id();
+            if (activeTasksToCreate.containsKey(taskId)) {
+                if (task.isActive()) {
+                    final Set<TopicPartition> topicPartitions = activeTasksToCreate.get(taskId);
+                    if (tasks.updateActiveTaskInputPartitions(task, topicPartitions)) {
+                        task.updateInputPartitions(topicPartitions, topologyMetadata.nodeToSourceTopics(task.id()));
+                    }
+                    task.resume();
+                } else {
+                    throw new IllegalStateException("Standby tasks should only be managed by the state updater");
+                }
+                activeTasksToCreate.remove(taskId);
+            } else if (standbyTasksToCreate.containsKey(taskId)) {
+                if (!task.isActive()) {
+                    throw new IllegalStateException("Standby tasks should only be managed by the state updater");
+                } else {
+                    tasksToRecycle.put(task, standbyTasksToCreate.get(taskId));
+                }
+                standbyTasksToCreate.remove(taskId);
+            } else {
+                tasksToCloseClean.add(task);
+            }
+        }
+    }
+
+    private void classifyTasksWithStateUpdater(final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
+                                               final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate,
+                                               final Map<Task, Set<TopicPartition>> tasksToRecycle,
+                                               final Set<Task> tasksToCloseClean) {
+        classifyRunningTasks(activeTasksToCreate, standbyTasksToCreate, tasksToRecycle, tasksToCloseClean);
+        for (final Task task : stateUpdater.getTasks()) {
+            final TaskId taskId = task.id();
+            if (activeTasksToCreate.containsKey(taskId)) {
+                if (task.isActive()) {
+                    final Set<TopicPartition> topicPartitions = activeTasksToCreate.get(taskId);
+                    if (!task.inputPartitions().equals(topicPartitions)) {
+                        tasks.addPendingTaskThatNeedsInputPartitionsUpdate(taskId);
+                    }
+                } else {
+                    stateUpdater.remove(taskId);
+                    tasks.addPendingStandbyTaskToRecycle(taskId);
+                }
+                activeTasksToCreate.remove(taskId);
+            } else if (standbyTasksToCreate.containsKey(taskId)) {
+                if (!task.isActive()) {
+                    final Set<TopicPartition> topicPartitions = standbyTasksToCreate.get(taskId);
+                    if (!task.inputPartitions().equals(topicPartitions)) {
+                        tasks.addPendingTaskThatNeedsInputPartitionsUpdate(taskId);
+                    }
+                } else {
+                    stateUpdater.remove(taskId);
+                    tasks.addPendingActiveTaskToRecycle(taskId);
+                }
+                standbyTasksToCreate.remove(taskId);
+            } else {
+                stateUpdater.remove(taskId);
+                tasks.addPendingTaskToClose(taskId);
+            }
+        }
     }
 
-    private void handleCloseAndRecycle(final Set<Task> tasksToRecycle,
-                                       final Set<Task> tasksToCloseClean,
-                                       final Set<Task> tasksToCloseDirty,
-                                       final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
-                                       final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate,
-                                       final LinkedHashMap<TaskId, RuntimeException> taskCloseExceptions) {
-        if (!tasksToCloseDirty.isEmpty()) {
-            throw new IllegalArgumentException("Tasks to close-dirty should be empty");
+    private Map<TaskId, Set<TopicPartition>> pendingTasksToCreate(final Map<TaskId, Set<TopicPartition>> tasksToCreate) {
+        final Map<TaskId, Set<TopicPartition>> pendingTasks = new HashMap<>();
+        final Iterator<Map.Entry<TaskId, Set<TopicPartition>>> iter = tasksToCreate.entrySet().iterator();
+        while (iter.hasNext()) {
+            final Map.Entry<TaskId, Set<TopicPartition>> entry = iter.next();
+            final TaskId taskId = entry.getKey();
+            if (taskId.topologyName() != null && !topologyMetadata.namedTopologiesView().contains(taskId.topologyName())) {
+                pendingTasks.put(taskId, entry.getValue());
+                iter.remove();
+            }
         }
+        return pendingTasks;
+    }
+
+    private void closeAndRecycleTasks(final Map<Task, Set<TopicPartition>> tasksToRecycle,
+                                      final Set<Task> tasksToCloseClean,
+                                      final LinkedHashMap<TaskId, RuntimeException> taskCloseExceptions) {
+        final Set<Task> tasksToCloseDirty = new TreeSet<>(Comparator.comparing(Task::id));
 
         // for all tasks to close or recycle, we should first write a checkpoint as in post-commit
         final List<Task> tasksToCheckpoint = new ArrayList<>(tasksToCloseClean);
-        tasksToCheckpoint.addAll(tasksToRecycle);
+        tasksToCheckpoint.addAll(tasksToRecycle.keySet());
         for (final Task task : tasksToCheckpoint) {
             try {
                 // Note that we are not actually committing here but just check if we need to write checkpoint file:
@@ -399,32 +530,32 @@ private void handleCloseAndRecycle(final Set<Task> tasksToRecycle,
         tasksToCloseClean.removeAll(tasksToCloseDirty);
         for (final Task task : tasksToCloseClean) {
             try {
-                completeTaskCloseClean(task);
-                if (task.isActive()) {
-                    tasks.cleanUpTaskProducerAndRemoveTask(task.id(), taskCloseExceptions);
+                final RuntimeException removeTaskException = completeTaskCloseClean(task);
+                if (removeTaskException != null) {
+                    taskCloseExceptions.putIfAbsent(task.id(), removeTaskException);
                 }
-            } catch (final RuntimeException e) {
+            } catch (final RuntimeException closeTaskException) {
                 final String uncleanMessage = String.format(
                         "Failed to close task %s cleanly. Attempting to close remaining tasks before re-throwing:",
                         task.id());
-                log.error(uncleanMessage, e);
-                taskCloseExceptions.putIfAbsent(task.id(), e);
+                log.error(uncleanMessage, closeTaskException);
+                taskCloseExceptions.putIfAbsent(task.id(), closeTaskException);
                 tasksToCloseDirty.add(task);
             }
         }
 
-        tasksToRecycle.removeAll(tasksToCloseDirty);
-        for (final Task oldTask : tasksToRecycle) {
+        tasksToRecycle.keySet().removeAll(tasksToCloseDirty);
+        for (final Map.Entry<Task, Set<TopicPartition>> entry : tasksToRecycle.entrySet()) {
+            final Task oldTask = entry.getKey();
             try {
                 if (oldTask.isActive()) {
-                    final Set<TopicPartition> partitions = standbyTasksToCreate.remove(oldTask.id());
-                    tasks.convertActiveToStandby((StreamTask) oldTask, partitions, taskCloseExceptions);
+                    convertActiveToStandby((StreamTask) oldTask, entry.getValue());
                 } else {
-                    final Set<TopicPartition> partitions = activeTasksToCreate.remove(oldTask.id());
-                    tasks.convertStandbyToActive((StandbyTask) oldTask, partitions);
+                    convertStandbyToActive((StandbyTask) oldTask, entry.getValue());
                 }
             } catch (final RuntimeException e) {
-                final String uncleanMessage = String.format("Failed to recycle task %s cleanly. Attempting to close remaining tasks before re-throwing:", oldTask.id());
+                final String uncleanMessage = String.format("Failed to recycle task %s cleanly. " +
+                    "Attempting to close remaining tasks before re-throwing:", oldTask.id());
                 log.error(uncleanMessage, e);
                 taskCloseExceptions.putIfAbsent(oldTask.id(), e);
                 tasksToCloseDirty.add(oldTask);
@@ -434,10 +565,22 @@ private void handleCloseAndRecycle(final Set<Task> tasksToRecycle,
         // for tasks that cannot be cleanly closed or recycled, close them dirty
         for (final Task task : tasksToCloseDirty) {
             closeTaskDirty(task);
-            tasks.cleanUpTaskProducerAndRemoveTask(task.id(), taskCloseExceptions);
         }
     }
 
+    private void convertActiveToStandby(final StreamTask activeTask,
+                                        final Set<TopicPartition> partitions) {
+        final StandbyTask standbyTask = standbyTaskCreator.createStandbyTaskFromActive(activeTask, partitions);
+        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(activeTask.id());
+        tasks.replaceActiveWithStandby(standbyTask);
+    }
+
+    private void convertStandbyToActive(final StandbyTask standbyTask,
+                                        final Set<TopicPartition> partitions) {
+        final StreamTask activeTask = activeTaskCreator.createActiveTaskFromStandby(standbyTask, partitions, mainConsumer);
+        tasks.replaceStandbyWithActive(activeTask);
+    }
+
     /**
      * Tries to initialize any new or still-uninitialized tasks, then checks if they can/have completed restoration.
      *
@@ -448,53 +591,61 @@ private void handleCloseAndRecycle(final Set<Task> tasksToRecycle,
     boolean tryToCompleteRestoration(final long now, final java.util.function.Consumer<Set<TopicPartition>> offsetResetter) {
         boolean allRunning = true;
 
-        final List<Task> activeTasks = new LinkedList<>();
-        for (final Task task : tasks.allTasks()) {
-            try {
-                task.initializeIfNeeded();
-                task.clearTaskTimeout();
-            } catch (final LockException lockException) {
-                // it is possible that if there are multiple threads within the instance that one thread
-                // trying to grab the task from the other, while the other has not released the lock since
-                // it did not participate in the rebalance. In this case we can just retry in the next iteration
-                log.debug("Could not initialize task {} since: {}; will retry", task.id(), lockException.getMessage());
-                allRunning = false;
-            } catch (final TimeoutException timeoutException) {
-                task.maybeInitTaskTimeoutOrThrow(now, timeoutException);
-                allRunning = false;
-            }
+        if (stateUpdater == null) {
+            final List<Task> activeTasks = new LinkedList<>();
+            for (final Task task : tasks.allTasks()) {
+                try {
+                    task.initializeIfNeeded();
+                    task.clearTaskTimeout();
+                } catch (final LockException lockException) {
+                    // it is possible that if there are multiple threads within the instance that one thread
+                    // trying to grab the task from the other, while the other has not released the lock since
+                    // it did not participate in the rebalance. In this case we can just retry in the next iteration
+                    log.debug("Could not initialize task {} since: {}; will retry", task.id(), lockException.getMessage());
+                    allRunning = false;
+                } catch (final TimeoutException timeoutException) {
+                    task.maybeInitTaskTimeoutOrThrow(now, timeoutException);
+                    allRunning = false;
+                }
 
-            if (task.isActive()) {
-                activeTasks.add(task);
+                if (task.isActive()) {
+                    activeTasks.add(task);
+                }
             }
-        }
-
-        if (allRunning && !activeTasks.isEmpty()) {
-
-            final Set<TopicPartition> restored = changelogReader.completedChangelogs();
-
-            for (final Task task : activeTasks) {
-                if (restored.containsAll(task.changelogPartitions())) {
-                    try {
-                        task.completeRestoration(offsetResetter);
-                        task.clearTaskTimeout();
-                    } catch (final TimeoutException timeoutException) {
-                        task.maybeInitTaskTimeoutOrThrow(now, timeoutException);
-                        log.debug(
-                            String.format(
-                                "Could not complete restoration for %s due to the following exception; will retry",
-                                task.id()),
-                            timeoutException
-                        );
 
+            if (allRunning && !activeTasks.isEmpty()) {
+
+                final Set<TopicPartition> restored = changelogReader.completedChangelogs();
+
+                for (final Task task : activeTasks) {
+                    if (restored.containsAll(task.changelogPartitions())) {
+                        try {
+                            task.completeRestoration(offsetResetter);
+                            task.clearTaskTimeout();
+                        } catch (final TimeoutException timeoutException) {
+                            task.maybeInitTaskTimeoutOrThrow(now, timeoutException);
+                            log.debug(
+                                String.format(
+                                    "Could not complete restoration for %s due to the following exception; will retry",
+                                    task.id()),
+                                timeoutException
+                            );
+
+                            allRunning = false;
+                        }
+                    } else {
+                        // we found a restoring task that isn't done restoring, which is evidence that
+                        // not all tasks are running
                         allRunning = false;
                     }
-                } else {
-                    // we found a restoring task that isn't done restoring, which is evidence that
-                    // not all tasks are running
-                    allRunning = false;
                 }
             }
+        } else {
+            for (final Task task : tasks.drainPendingTaskToRestore()) {
+                stateUpdater.add(task);
+            }
+
+            // TODO: should add logic for checking and resuming when all active tasks have been restored
         }
 
         if (allRunning) {
@@ -646,19 +797,17 @@ private void prepareCommitAndAddOffsetsToMap(final Set<Task> tasksToPrepare,
     void handleLostAll() {
         log.debug("Closing lost active tasks as zombies.");
 
-        final Set<Task> allTask = new HashSet<>(tasks.allTasks());
+        final Set<Task> allTask = tasks.allTasks();
         for (final Task task : allTask) {
             // Even though we've apparently dropped out of the group, we can continue safely to maintain our
             // standby tasks while we rejoin.
             if (task.isActive()) {
                 closeTaskDirty(task);
-
-                tasks.cleanUpTaskProducerAndRemoveTask(task.id(), new HashMap<>());
             }
         }
 
         if (processingMode == EXACTLY_ONCE_V2) {
-            tasks.reInitializeThreadProducer();
+            activeTaskCreator.reInitializeThreadProducer();
         }
     }
 
@@ -673,7 +822,7 @@ public Map<TaskId, Long> getTaskOffsetSums() {
         // Not all tasks will create directories, and there may be directories for tasks we don't currently own,
         // so we consider all tasks that are either owned or on disk. This includes stateless tasks, which should
         // just have an empty changelogOffsets map.
-        for (final TaskId id : union(HashSet::new, lockedTaskDirectories, tasks.tasksPerId().keySet())) {
+        for (final TaskId id : union(HashSet::new, lockedTaskDirectories, tasks.allTaskIds())) {
             final Task task = tasks.owned(id) ? tasks.task(id) : null;
             // Closed and uninitialized tasks don't have any offsets so we should read directly from the checkpoint
             if (task != null && task.state() != State.CREATED && task.state() != State.CLOSED) {
@@ -796,15 +945,35 @@ private void closeTaskDirty(final Task task) {
         try {
             task.suspend();
         } catch (final RuntimeException swallow) {
-            log.error("Error suspending dirty task {} ", task.id(), swallow);
+            log.error("Error suspending dirty task {}: {}", task.id(), swallow.getMessage());
         }
-        tasks.removeTaskBeforeClosing(task.id());
+
         task.closeDirty();
+
+        try {
+            tasks.removeTask(task);
+
+            if (task.isActive()) {
+                activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(task.id());
+            }
+        } catch (final RuntimeException swallow) {
+            log.error("Error removing dirty task {}: {}", task.id(), swallow.getMessage());
+        }
     }
 
-    private void completeTaskCloseClean(final Task task) {
-        tasks.removeTaskBeforeClosing(task.id());
+    private RuntimeException completeTaskCloseClean(final Task task) {
         task.closeClean();
+        try {
+            tasks.removeTask(task);
+
+            if (task.isActive()) {
+                activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(task.id());
+            }
+        } catch (final RuntimeException e) {
+            log.error("Error removing active task {}: {}", task.id(), e.getMessage());
+            return e;
+        }
+        return null;
     }
 
     void shutdown(final boolean clean) {
@@ -823,7 +992,7 @@ void shutdown(final boolean clean) {
 
         executeAndMaybeSwallow(
             clean,
-            tasks::closeThreadProducerIfNeeded,
+            activeTaskCreator::closeThreadProducerIfNeeded,
             e -> firstException.compareAndSet(null, e),
             e -> log.warn("Ignoring an exception while closing thread producer.", e)
         );
@@ -859,16 +1028,6 @@ void closeAndCleanUpTasks(final Collection<Task> activeTasks, final Collection<T
             closeTaskDirty(task);
         }
 
-        // TODO: change type to `StreamTask`
-        for (final Task activeTask : activeTasks) {
-            executeAndMaybeSwallow(
-                clean,
-                () -> tasks.closeAndRemoveTaskProducerIfNeeded(activeTask),
-                e -> firstException.compareAndSet(null, e),
-                e -> log.warn("Ignoring an exception while closing task " + activeTask.id() + " producer.", e)
-            );
-        }
-
         final RuntimeException exception = firstException.get();
         if (exception != null) {
             throw exception;
@@ -957,7 +1116,10 @@ private Collection<Task> tryCloseCleanActiveTasks(final Collection<Task> activeT
         for (final Task task : tasksToCloseClean) {
             try {
                 task.suspend();
-                completeTaskCloseClean(task);
+                final RuntimeException exception = completeTaskCloseClean(task);
+                if (exception != null) {
+                    firstException.compareAndSet(null, exception);
+                }
             } catch (final StreamsException e) {
                 log.error("Exception caught while clean-closing task " + task.id(), e);
                 e.setTaskId(task.id());
@@ -988,7 +1150,10 @@ private Collection<Task> tryCloseCleanStandbyTasks(final Collection<Task> standb
                 task.prepareCommit();
                 task.postCommit(true);
                 task.suspend();
-                completeTaskCloseClean(task);
+                final RuntimeException exception = completeTaskCloseClean(task);
+                if (exception != null) {
+                    maybeWrapAndSetFirstException(firstException, exception, task.id());
+                }
             } catch (final TaskMigratedException e) {
                 // just ignore the exception as it doesn't matter during shutdown
                 tasksToCloseDirty.add(task);
@@ -1012,10 +1177,17 @@ Set<TaskId> standbyTaskIds() {
             .collect(Collectors.toSet());
     }
 
-    Map<TaskId, Task> tasks() {
+    Map<TaskId, Task> allTasks() {
         // not bothering with an unmodifiable map, since the tasks themselves are mutable, but
         // if any outside code modifies the map or the tasks, it would be a severe transgression.
-        return tasks.tasksPerId();
+        return tasks.allTasksPerId();
+    }
+
+    Map<TaskId, Task> notPausedTasks() {
+        return Collections.unmodifiableMap(tasks.allTasks()
+            .stream()
+            .filter(t -> !topologyMetadata.isPaused(t.id().topologyName()))
+            .collect(Collectors.toMap(Task::id, v -> v)));
     }
 
     Map<TaskId, Task> activeTaskMap() {
@@ -1044,7 +1216,7 @@ private Stream<Task> standbyTaskStream() {
 
     // For testing only.
     int commitAll() {
-        return commit(new HashSet<>(tasks.allTasks()));
+        return commit(tasks.allTasks());
     }
 
     /**
@@ -1130,7 +1302,7 @@ public void updateTaskEndMetadata(final TopicPartition topicPartition, final Lon
      */
     void handleTopologyUpdates() {
         topologyMetadata.executeTopologyUpdatesAndBumpThreadVersion(
-            tasks::maybeCreateTasksFromNewTopologies,
+            this::createPendingTasks,
             this::maybeCloseTasksFromRemovedTopologies
         );
 
@@ -1156,11 +1328,9 @@ void maybeCloseTasksFromRemovedTopologies(final Set<String> currentNamedTopologi
                 }
             }
 
-            final Set<TaskId> allRemovedTasks =
-                union(HashSet::new, activeTasksToRemove, standbyTasksToRemove).stream().map(Task::id).collect(Collectors.toSet());
+            final Set<Task> allTasksToRemove = union(HashSet::new, activeTasksToRemove, standbyTasksToRemove);
             closeAndCleanUpTasks(activeTasksToRemove, standbyTasksToRemove, true);
-            allRemovedTasks.forEach(tasks::removeTaskBeforeClosing);
-            releaseLockedDirectoriesForTasks(allRemovedTasks);
+            releaseLockedDirectoriesForTasks(allTasksToRemove.stream().map(Task::id).collect(Collectors.toSet()));
         } catch (final Exception e) {
             // TODO KAFKA-12648: for now just swallow the exception to avoid interfering with the other topologies
             //  that are running alongside, but eventually we should be able to rethrow up to the handler to inform
@@ -1169,6 +1339,13 @@ void maybeCloseTasksFromRemovedTopologies(final Set<String> currentNamedTopologi
         }
     }
 
+    void createPendingTasks(final Set<String> currentNamedTopologies) {
+        final Map<TaskId, Set<TopicPartition>> activeTasksToCreate = tasks.pendingActiveTasksForTopologies(currentNamedTopologies);
+        final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate = tasks.pendingStandbyTasksForTopologies(currentNamedTopologies);
+
+        createNewTasks(activeTasksToCreate, standbyTasksToCreate);
+    }
+
     /**
      * @throws TaskMigratedException if the task producer got fenced (EOS only)
      * @throws StreamsException      if any task threw an exception while processing
@@ -1244,11 +1421,11 @@ public String toString(final String indent) {
     }
 
     Map<MetricName, Metric> producerMetrics() {
-        return tasks.producerMetrics();
+        return activeTaskCreator.producerMetrics();
     }
 
     Set<String> producerClientIds() {
-        return tasks.producerClientIds();
+        return activeTaskCreator.producerClientIds();
     }
 
     Set<TaskId> lockedTaskDirectories() {
@@ -1295,7 +1472,7 @@ public static void executeAndMaybeSwallow(final boolean clean,
     }
 
     boolean needsInitializationOrRestoration() {
-        return tasks().values().stream().anyMatch(Task::needsInitializationOrRestoration);
+        return activeTaskIterable().stream().anyMatch(Task::needsInitializationOrRestoration);
     }
 
     // for testing only
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/Tasks.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/Tasks.java
index c4aec35d4e970..e360556658135 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/Tasks.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/Tasks.java
@@ -16,224 +16,225 @@
  */
 package org.apache.kafka.streams.processor.internals;
 
-import org.apache.kafka.clients.consumer.Consumer;
-import org.apache.kafka.common.Metric;
-import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.streams.processor.TaskId;
-import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
-
-import java.util.HashSet;
 import org.slf4j.Logger;
 
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.stream.Collectors;
 
+import static org.apache.kafka.common.utils.Utils.filterMap;
+import static org.apache.kafka.common.utils.Utils.union;
+
+/**
+ * All tasks contained by the Streams instance.
+ *
+ * Note that these tasks are shared between the TaskManager (stream thread) and the StateUpdater (restore thread),
+ * i.e. all running active tasks are processed by the former and all restoring active tasks and standby tasks are
+ * processed by the latter.
+ */
 class Tasks {
     private final Logger log;
-    private final TopologyMetadata topologyMetadata;
-    private final StreamsMetricsImpl streamsMetrics;
-
-    private final Map<TaskId, Task> allTasksPerId = new TreeMap<>();
-    private final Map<TaskId, Task> readOnlyTasksPerId = Collections.unmodifiableMap(allTasksPerId);
-    private final Collection<Task> readOnlyTasks = Collections.unmodifiableCollection(allTasksPerId.values());
 
     // TODO: change type to `StreamTask`
     private final Map<TaskId, Task> activeTasksPerId = new TreeMap<>();
+    // TODO: change type to `StandbyTask`
+    private final Map<TaskId, Task> standbyTasksPerId = new TreeMap<>();
+
+    // Tasks may have been assigned for a NamedTopology that is not yet known by this host. When that occurs we stash
+    // these unknown tasks until either the corresponding NamedTopology is added and we can create them at last, or
+    // we receive a new assignment and they are revoked from the thread.
+
+    // Tasks may have been assigned but not yet created because:
+    // 1. They are for a NamedTopology that is yet known by this host.
+    // 2. They are to be recycled from an existing restoring task yet to be returned from the state updater.
+    //
+    // When that occurs we stash these pending tasks until either they are finally clear to be created,
+    // or they are revoked from a new assignment.
+    private final Map<TaskId, Set<TopicPartition>> pendingActiveTasksToCreate = new HashMap<>();
+    private final Map<TaskId, Set<TopicPartition>> pendingStandbyTasksToCreate = new HashMap<>();
+
+    private final Set<Task> pendingTasksToRestore = new HashSet<>();
+
+    private final Set<TaskId> pendingActiveTasksToRecycle = new HashSet<>();
+    private final Set<TaskId> pendingStandbyTasksToRecycle = new HashSet<>();
+    private final Set<TaskId> pendingTasksThatNeedInputPartitionUpdate = new HashSet<>();
+    private final Set<TaskId> pendingTasksToClose = new HashSet<>();
+
     // TODO: change type to `StreamTask`
     private final Map<TopicPartition, Task> activeTasksPerPartition = new HashMap<>();
-    // TODO: change type to `StreamTask`
-    private final Map<TaskId, Task> readOnlyActiveTasksPerId = Collections.unmodifiableMap(activeTasksPerId);
-    private final Set<TaskId> readOnlyActiveTaskIds = Collections.unmodifiableSet(activeTasksPerId.keySet());
-    // TODO: change type to `StreamTask`
-    private final Collection<Task> readOnlyActiveTasks = Collections.unmodifiableCollection(activeTasksPerId.values());
 
-    // TODO: change type to `StandbyTask`
-    private final Map<TaskId, Task> standbyTasksPerId = new TreeMap<>();
-    // TODO: change type to `StandbyTask`
-    private final Map<TaskId, Task> readOnlyStandbyTasksPerId = Collections.unmodifiableMap(standbyTasksPerId);
-    private final Set<TaskId> readOnlyStandbyTaskIds = Collections.unmodifiableSet(standbyTasksPerId.keySet());
-    private final Collection<Task> successfullyProcessed = new HashSet<>();
+    Tasks(final LogContext logContext) {
+        this.log = logContext.logger(getClass());
+    }
 
-    private final ActiveTaskCreator activeTaskCreator;
-    private final StandbyTaskCreator standbyTaskCreator;
+    void purgePendingTasks(final Set<TaskId> assignedActiveTasks, final Set<TaskId> assignedStandbyTasks) {
+        pendingActiveTasksToCreate.keySet().retainAll(assignedActiveTasks);
+        pendingStandbyTasksToCreate.keySet().retainAll(assignedStandbyTasks);
+    }
+
+    void addPendingActiveTasks(final Map<TaskId, Set<TopicPartition>> pendingTasks) {
+        pendingActiveTasksToCreate.putAll(pendingTasks);
+    }
 
-    private Consumer<byte[], byte[]> mainConsumer;
+    void addPendingStandbyTasks(final Map<TaskId, Set<TopicPartition>> pendingTasks) {
+        pendingStandbyTasksToCreate.putAll(pendingTasks);
+    }
 
-    Tasks(final LogContext logContext,
-          final TopologyMetadata topologyMetadata,
-          final StreamsMetricsImpl streamsMetrics,
-          final ActiveTaskCreator activeTaskCreator,
-          final StandbyTaskCreator standbyTaskCreator) {
+    void addPendingActiveTaskToRecycle(final TaskId taskId) {
+        pendingActiveTasksToRecycle.add(taskId);
+    }
 
-        log = logContext.logger(getClass());
+    void addPendingStandbyTaskToRecycle(final TaskId taskId) {
+        pendingStandbyTasksToRecycle.add(taskId);
+    }
 
-        this.topologyMetadata = topologyMetadata;
-        this.streamsMetrics = streamsMetrics;
-        this.activeTaskCreator = activeTaskCreator;
-        this.standbyTaskCreator = standbyTaskCreator;
+    void addPendingTaskThatNeedsInputPartitionsUpdate(final TaskId taskId) {
+        pendingTasksThatNeedInputPartitionUpdate.add(taskId);
     }
 
-    void setMainConsumer(final Consumer<byte[], byte[]> mainConsumer) {
-        this.mainConsumer = mainConsumer;
+    void addPendingTaskToClose(final TaskId taskId) {
+        pendingTasksToClose.add(taskId);
     }
 
-    void handleNewAssignmentAndCreateTasks(final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
-                                           final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate,
-                                           final Set<TaskId> assignedActiveTasks,
-                                           final Set<TaskId> assignedStandbyTasks) {
-        activeTaskCreator.removeRevokedUnknownTasks(assignedActiveTasks);
-        standbyTaskCreator.removeRevokedUnknownTasks(assignedStandbyTasks);
-        createTasks(activeTasksToCreate, standbyTasksToCreate);
+    void addPendingTaskToRestore(final Collection<Task> tasks) {
+        pendingTasksToRestore.addAll(tasks);
     }
 
-    void maybeCreateTasksFromNewTopologies(final Set<String> currentNamedTopologies) {
-        createTasks(
-            activeTaskCreator.uncreatedTasksForTopologies(currentNamedTopologies),
-            standbyTaskCreator.uncreatedTasksForTopologies(currentNamedTopologies)
-        );
+    Set<Task> drainPendingTaskToRestore() {
+        final Set<Task> result = new HashSet<>(pendingTasksToRestore);
+        pendingTasksToRestore.clear();
+        return result;
     }
 
-    double totalProducerBlockedTime() {
-        return activeTaskCreator.totalProducerBlockedTime();
+    Map<TaskId, Set<TopicPartition>> pendingActiveTasksForTopologies(final Set<String> currentTopologies) {
+        return filterMap(pendingActiveTasksToCreate, t -> currentTopologies.contains(t.getKey().topologyName()));
     }
 
-    void createTasks(final Map<TaskId, Set<TopicPartition>> activeTasksToCreate,
-                     final Map<TaskId, Set<TopicPartition>> standbyTasksToCreate) {
-        for (final Map.Entry<TaskId, Set<TopicPartition>> taskToBeCreated : activeTasksToCreate.entrySet()) {
-            final TaskId taskId = taskToBeCreated.getKey();
+    Map<TaskId, Set<TopicPartition>> pendingStandbyTasksForTopologies(final Set<String> currentTopologies) {
+        return filterMap(pendingStandbyTasksToCreate, t -> currentTopologies.contains(t.getKey().topologyName()));
+    }
 
-            if (activeTasksPerId.containsKey(taskId)) {
-                throw new IllegalStateException("Attempted to create an active task that we already own: " + taskId);
-            }
-        }
+    void addNewActiveTasks(final Collection<Task> newTasks) {
+        if (!newTasks.isEmpty()) {
+            for (final Task activeTask : newTasks) {
+                final TaskId taskId = activeTask.id();
 
-        for (final Map.Entry<TaskId, Set<TopicPartition>> taskToBeCreated : standbyTasksToCreate.entrySet()) {
-            final TaskId taskId = taskToBeCreated.getKey();
+                if (activeTasksPerId.containsKey(taskId)) {
+                    throw new IllegalStateException("Attempted to create an active task that we already own: " + taskId);
+                }
 
-            if (standbyTasksPerId.containsKey(taskId)) {
-                throw new IllegalStateException("Attempted to create a standby task that we already own: " + taskId);
-            }
-        }
+                if (standbyTasksPerId.containsKey(taskId)) {
+                    throw new IllegalStateException("Attempted to create an active task while we already own its standby: " + taskId);
+                }
 
-        // keep this check to simplify testing (ie, no need to mock `activeTaskCreator`)
-        if (!activeTasksToCreate.isEmpty()) {
-            // TODO: change type to `StreamTask`
-            for (final Task activeTask : activeTaskCreator.createTasks(mainConsumer, activeTasksToCreate)) {
                 activeTasksPerId.put(activeTask.id(), activeTask);
-                allTasksPerId.put(activeTask.id(), activeTask);
+                pendingActiveTasksToCreate.remove(activeTask.id());
                 for (final TopicPartition topicPartition : activeTask.inputPartitions()) {
                     activeTasksPerPartition.put(topicPartition, activeTask);
                 }
             }
         }
+    }
+
+    void addNewStandbyTasks(final Collection<Task> newTasks) {
+        if (!newTasks.isEmpty()) {
+            for (final Task standbyTask : newTasks) {
+                final TaskId taskId = standbyTask.id();
+
+                if (standbyTasksPerId.containsKey(taskId)) {
+                    throw new IllegalStateException("Attempted to create an standby task that we already own: " + taskId);
+                }
+
+                if (activeTasksPerId.containsKey(taskId)) {
+                    throw new IllegalStateException("Attempted to create an standby task while we already own its active: " + taskId);
+                }
 
-        // keep this check to simplify testing (ie, no need to mock `standbyTaskCreator`)
-        if (!standbyTasksToCreate.isEmpty()) {
-            // TODO: change type to `StandbyTask`
-            for (final Task standbyTask : standbyTaskCreator.createTasks(standbyTasksToCreate)) {
                 standbyTasksPerId.put(standbyTask.id(), standbyTask);
-                allTasksPerId.put(standbyTask.id(), standbyTask);
+                pendingStandbyTasksToCreate.remove(standbyTask.id());
             }
         }
     }
 
-    void convertActiveToStandby(final StreamTask activeTask,
-                                final Set<TopicPartition> partitions,
-                                final Map<TaskId, RuntimeException> taskCloseExceptions) {
-        if (activeTasksPerId.remove(activeTask.id()) == null) {
-            throw new IllegalStateException("Attempted to convert unknown active task to standby task: " + activeTask.id());
+    void removeTask(final Task taskToRemove) {
+        final TaskId taskId = taskToRemove.id();
+
+        if (taskToRemove.state() != Task.State.CLOSED) {
+            throw new IllegalStateException("Attempted to remove a task that is not closed: " + taskId);
         }
-        final Set<TopicPartition> toBeRemoved = activeTasksPerPartition.entrySet().stream()
-            .filter(e -> e.getValue().id().equals(activeTask.id()))
-            .map(Map.Entry::getKey)
-            .collect(Collectors.toSet());
-        toBeRemoved.forEach(activeTasksPerPartition::remove);
 
-        cleanUpTaskProducerAndRemoveTask(activeTask.id(), taskCloseExceptions);
+        if (taskToRemove.isActive()) {
+            if (activeTasksPerId.remove(taskId) == null) {
+                throw new IllegalArgumentException("Attempted to remove an active task that is not owned: " + taskId);
+            }
+            removePartitionsForActiveTask(taskId);
+            pendingActiveTasksToCreate.remove(taskId);
+        } else {
+            if (standbyTasksPerId.remove(taskId) == null) {
+                throw new IllegalArgumentException("Attempted to remove a standby task that is not owned: " + taskId);
+            }
+            pendingStandbyTasksToCreate.remove(taskId);
+        }
+    }
+
+    void replaceActiveWithStandby(final StandbyTask standbyTask) {
+        final TaskId taskId = standbyTask.id();
+        if (activeTasksPerId.remove(taskId) == null) {
+            throw new IllegalStateException("Attempted to replace unknown active task with standby task: " + taskId);
+        }
+        removePartitionsForActiveTask(taskId);
 
-        final StandbyTask standbyTask = standbyTaskCreator.createStandbyTaskFromActive(activeTask, partitions);
         standbyTasksPerId.put(standbyTask.id(), standbyTask);
-        allTasksPerId.put(standbyTask.id(), standbyTask);
     }
 
-    void convertStandbyToActive(final StandbyTask standbyTask, final Set<TopicPartition> partitions) {
-        if (standbyTasksPerId.remove(standbyTask.id()) == null) {
-            throw new IllegalStateException("Attempted to convert unknown standby task to stream task: " + standbyTask.id());
+    void replaceStandbyWithActive(final StreamTask activeTask) {
+        final TaskId taskId = activeTask.id();
+        if (standbyTasksPerId.remove(taskId) == null) {
+            throw new IllegalStateException("Attempted to convert unknown standby task to stream task: " + taskId);
         }
 
-        final StreamTask activeTask = activeTaskCreator.createActiveTaskFromStandby(standbyTask, partitions, mainConsumer);
         activeTasksPerId.put(activeTask.id(), activeTask);
         for (final TopicPartition topicPartition : activeTask.inputPartitions()) {
             activeTasksPerPartition.put(topicPartition, activeTask);
         }
-        allTasksPerId.put(activeTask.id(), activeTask);
     }
 
-    void updateInputPartitionsAndResume(final Task task, final Set<TopicPartition> topicPartitions) {
+    boolean updateActiveTaskInputPartitions(final Task task, final Set<TopicPartition> topicPartitions) {
         final boolean requiresUpdate = !task.inputPartitions().equals(topicPartitions);
         if (requiresUpdate) {
             log.debug("Update task {} inputPartitions: current {}, new {}", task, task.inputPartitions(), topicPartitions);
-            for (final TopicPartition inputPartition : task.inputPartitions()) {
-                activeTasksPerPartition.remove(inputPartition);
-            }
             if (task.isActive()) {
+                for (final TopicPartition inputPartition : task.inputPartitions()) {
+                    activeTasksPerPartition.remove(inputPartition);
+                }
                 for (final TopicPartition topicPartition : topicPartitions) {
                     activeTasksPerPartition.put(topicPartition, task);
                 }
             }
-            task.updateInputPartitions(topicPartitions, topologyMetadata.nodeToSourceTopics(task.id()));
-        }
-        task.resume();
-    }
-
-    void cleanUpTaskProducerAndRemoveTask(final TaskId taskId,
-                                          final Map<TaskId, RuntimeException> taskCloseExceptions) {
-        try {
-            activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(taskId);
-        } catch (final RuntimeException e) {
-            final String uncleanMessage = String.format("Failed to close task %s cleanly. Attempting to close remaining tasks before re-throwing:", taskId);
-            log.error(uncleanMessage, e);
-            taskCloseExceptions.putIfAbsent(taskId, e);
         }
-        removeTaskBeforeClosing(taskId);
-    }
 
-    void reInitializeThreadProducer() {
-        activeTaskCreator.reInitializeThreadProducer();
+        return requiresUpdate;
     }
 
-    void closeThreadProducerIfNeeded() {
-        activeTaskCreator.closeThreadProducerIfNeeded();
-    }
-
-    // TODO: change type to `StreamTask`
-    void closeAndRemoveTaskProducerIfNeeded(final Task activeTask) {
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(activeTask.id());
-    }
-
-    void removeTaskBeforeClosing(final TaskId taskId) {
-        activeTasksPerId.remove(taskId);
+    private void removePartitionsForActiveTask(final TaskId taskId) {
         final Set<TopicPartition> toBeRemoved = activeTasksPerPartition.entrySet().stream()
             .filter(e -> e.getValue().id().equals(taskId))
             .map(Map.Entry::getKey)
             .collect(Collectors.toSet());
         toBeRemoved.forEach(activeTasksPerPartition::remove);
-        standbyTasksPerId.remove(taskId);
-        allTasksPerId.remove(taskId);
     }
 
     void clear() {
         activeTasksPerId.clear();
-        activeTasksPerPartition.clear();
         standbyTasksPerId.clear();
-        allTasksPerId.clear();
+        activeTasksPerPartition.clear();
     }
 
     // TODO: change return type to `StreamTask`
@@ -241,19 +242,23 @@ Task activeTasksForInputPartition(final TopicPartition partition) {
         return activeTasksPerPartition.get(partition);
     }
 
-    // TODO: change return type to `StandbyTask`
-    Task standbyTask(final TaskId taskId) {
-        if (!standbyTasksPerId.containsKey(taskId)) {
-            throw new IllegalStateException("Standby task unknown: " + taskId);
+    private Task getTask(final TaskId taskId) {
+        if (activeTasksPerId.containsKey(taskId)) {
+            return activeTasksPerId.get(taskId);
         }
-        return standbyTasksPerId.get(taskId);
+        if (standbyTasksPerId.containsKey(taskId)) {
+            return standbyTasksPerId.get(taskId);
+        }
+        return null;
     }
 
     Task task(final TaskId taskId) {
-        if (!allTasksPerId.containsKey(taskId)) {
+        final Task task = getTask(taskId);
+
+        if (task != null)
+            return task;
+        else
             throw new IllegalStateException("Task unknown: " + taskId);
-        }
-        return allTasksPerId.get(taskId);
     }
 
     Collection<Task> tasks(final Collection<TaskId> taskIds) {
@@ -266,73 +271,30 @@ Collection<Task> tasks(final Collection<TaskId> taskIds) {
 
     // TODO: change return type to `StreamTask`
     Collection<Task> activeTasks() {
-        return readOnlyActiveTasks;
-    }
-
-    Collection<Task> allTasks() {
-        return readOnlyTasks;
+        return Collections.unmodifiableCollection(activeTasksPerId.values());
     }
 
-    Set<TaskId> activeTaskIds() {
-        return readOnlyActiveTaskIds;
+    /**
+     * All tasks returned by any of the getters are read-only and should NOT be modified;
+     * and the returned task could be modified by other threads concurrently
+     */
+    Set<Task> allTasks() {
+        return union(HashSet::new, new HashSet<>(activeTasksPerId.values()), new HashSet<>(standbyTasksPerId.values()));
     }
 
-    Set<TaskId> standbyTaskIds() {
-        return readOnlyStandbyTaskIds;
+    Set<TaskId> allTaskIds() {
+        return union(HashSet::new, activeTasksPerId.keySet(), standbyTasksPerId.keySet());
     }
 
-    // TODO: change return type to `StreamTask`
-    Map<TaskId, Task> activeTaskMap() {
-        return readOnlyActiveTasksPerId;
-    }
-
-    // TODO: change return type to `StandbyTask`
-    Map<TaskId, Task> standbyTaskMap() {
-        return readOnlyStandbyTasksPerId;
-    }
-
-    Map<TaskId, Task> tasksPerId() {
-        return readOnlyTasksPerId;
+    Map<TaskId, Task> allTasksPerId() {
+        final Map<TaskId, Task> ret = new HashMap<>();
+        ret.putAll(activeTasksPerId);
+        ret.putAll(standbyTasksPerId);
+        return ret;
     }
 
     boolean owned(final TaskId taskId) {
-        return allTasksPerId.containsKey(taskId);
-    }
-
-    StreamsProducer streamsProducerForTask(final TaskId taskId) {
-        return activeTaskCreator.streamsProducerForTask(taskId);
-    }
-
-    StreamsProducer threadProducer() {
-        return activeTaskCreator.threadProducer();
-    }
-
-    Map<MetricName, Metric> producerMetrics() {
-        return activeTaskCreator.producerMetrics();
-    }
-
-    Set<String> producerClientIds() {
-        return activeTaskCreator.producerClientIds();
-    }
-
-    Consumer<byte[], byte[]> mainConsumer() {
-        return mainConsumer;
-    }
-
-    Collection<Task> successfullyProcessed() {
-        return successfullyProcessed;
-    }
-
-    void addToSuccessfullyProcessed(final Task task) {
-        successfullyProcessed.add(task);
-    }
-
-    void removeTaskFromCuccessfullyProcessedBeforeClosing(final Task task) {
-        successfullyProcessed.remove(task);
-    }
-
-    void clearSuccessfullyProcessed() {
-        successfullyProcessed.clear();
+        return getTask(taskId) != null;
     }
 
     // for testing only
@@ -342,6 +304,5 @@ void addTask(final Task task) {
         } else {
             standbyTasksPerId.put(task.id(), task);
         }
-        allTasksPerId.put(task.id(), task);
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopicPartitionMetadata.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopicPartitionMetadata.java
new file mode 100644
index 0000000000000..dae072d25bbac
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopicPartitionMetadata.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import java.nio.ByteBuffer;
+import java.util.Base64;
+import java.util.Objects;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Metadata to be committed together with TopicPartition offset
+ */
+public class TopicPartitionMetadata {
+
+    private static final Logger LOG = LoggerFactory.getLogger(TopicPartitionMetadata.class);
+
+    // visible for testing
+    static final byte LATEST_MAGIC_BYTE = 2;
+
+    private final long partitionTime;
+    private final ProcessorMetadata processorMetadata;
+
+    public TopicPartitionMetadata(final long partitionTime, final ProcessorMetadata processorMetadata) {
+        Objects.requireNonNull(processorMetadata);
+        this.partitionTime = partitionTime;
+        this.processorMetadata = processorMetadata;
+    }
+
+    public long partitionTime() {
+        return partitionTime;
+    }
+
+    public ProcessorMetadata processorMetadata() {
+        return processorMetadata;
+    }
+
+    public String encode() {
+        final byte[] serializedMeta = processorMetadata.serialize();
+        // Format: MAGIC_BYTE(1) + PartitionTime(8) + processMeta
+        final ByteBuffer buffer = ByteBuffer.allocate(Byte.BYTES + Long.BYTES + serializedMeta.length);
+        buffer.put(LATEST_MAGIC_BYTE);
+        buffer.putLong(partitionTime);
+        buffer.put(serializedMeta);
+        return Base64.getEncoder().encodeToString(buffer.array());
+    }
+
+    public static TopicPartitionMetadata decode(final String encryptedString) {
+        long timestamp = RecordQueue.UNKNOWN;
+        ProcessorMetadata metadata = new ProcessorMetadata();
+
+        if (encryptedString.isEmpty()) {
+            return new TopicPartitionMetadata(timestamp, metadata);
+        }
+        try {
+            final ByteBuffer buffer = ByteBuffer.wrap(Base64.getDecoder().decode(encryptedString));
+            final byte version = buffer.get();
+            switch (version) {
+                case (byte) 1:
+                    timestamp = buffer.getLong();
+                    break;
+                case LATEST_MAGIC_BYTE:
+                    timestamp = buffer.getLong();
+                    if (buffer.remaining() > 0) {
+                        final byte[] metaBytes = new byte[buffer.remaining()];
+                        buffer.get(metaBytes);
+                        metadata = ProcessorMetadata.deserialize(metaBytes);
+                    }
+                    break;
+                default:
+                    LOG.warn(
+                        "Unsupported offset metadata version found. Supported version <= {}. Found version {}.",
+                        LATEST_MAGIC_BYTE, version);
+            }
+        } catch (final Exception exception) {
+            LOG.warn("Unsupported offset metadata found");
+        }
+        return new TopicPartitionMetadata(timestamp, metadata);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(partitionTime, processorMetadata);
+    }
+
+    @Override
+    public boolean equals(final Object obj) {
+        if (obj == null || obj.getClass() != getClass()) {
+            return false;
+        }
+
+        if (obj == this) {
+            return true;
+        }
+
+        return partitionTime == ((TopicPartitionMetadata) obj).partitionTime
+            && Objects.equals(processorMetadata, ((TopicPartitionMetadata) obj).processorMetadata);
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopologyMetadata.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopologyMetadata.java
index 91f1768d86fc6..0e5d68cd759b5 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopologyMetadata.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/TopologyMetadata.java
@@ -29,8 +29,8 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.TopicsInfo;
+import org.apache.kafka.streams.TopologyConfig.TaskConfig;
 import org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig.TaskConfig;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -73,6 +73,7 @@ public class TopologyMetadata {
     private final ProcessingMode processingMode;
     private final TopologyVersion version;
     private final TaskExecutionMetadata taskExecutionMetadata;
+    private final Set<String> pausedTopologies;
 
     private final ConcurrentNavigableMap<String, InternalTopologyBuilder> builders; // Keep sorted by topology name for readability
 
@@ -104,6 +105,7 @@ public TopologyMetadata(final InternalTopologyBuilder builder,
         this.processingMode = StreamsConfigUtils.processingMode(config);
         this.config = config;
         this.log = LoggerFactory.getLogger(getClass());
+        this.pausedTopologies = ConcurrentHashMap.newKeySet();
 
         builders = new ConcurrentSkipListMap<>();
         if (builder.hasNamedTopology()) {
@@ -111,7 +113,7 @@ public TopologyMetadata(final InternalTopologyBuilder builder,
         } else {
             builders.put(UNNAMED_TOPOLOGY, builder);
         }
-        this.taskExecutionMetadata = new TaskExecutionMetadata(builders.keySet());
+        this.taskExecutionMetadata = new TaskExecutionMetadata(builders.keySet(), pausedTopologies, processingMode);
     }
 
     public TopologyMetadata(final ConcurrentNavigableMap<String, InternalTopologyBuilder> builders,
@@ -120,12 +122,13 @@ public TopologyMetadata(final ConcurrentNavigableMap<String, InternalTopologyBui
         this.processingMode = StreamsConfigUtils.processingMode(config);
         this.config = config;
         this.log = LoggerFactory.getLogger(getClass());
+        this.pausedTopologies = ConcurrentHashMap.newKeySet();
 
         this.builders = builders;
         if (builders.isEmpty()) {
             log.info("Created an empty KafkaStreams app with no topology");
         }
-        this.taskExecutionMetadata = new TaskExecutionMetadata(builders.keySet());
+        this.taskExecutionMetadata = new TaskExecutionMetadata(builders.keySet(), pausedTopologies, processingMode);
     }
 
     // Need to (re)set the log here to pick up the `processId` part of the clientId in the prefix
@@ -257,6 +260,35 @@ public void registerAndBuildNewTopology(final KafkaFutureImpl<Void> future, fina
         }
     }
 
+    /**
+     * Pauses a topology by name
+     * @param topologyName Name of the topology to pause
+     */
+    public void pauseTopology(final String topologyName) {
+        pausedTopologies.add(topologyName);
+    }
+
+    /**
+     * Checks if a given topology is paused.
+     * @param topologyName If null, assume that we are checking the `UNNAMED_TOPOLOGY`.
+     * @return A boolean indicating if the topology is paused.
+     */
+    public boolean isPaused(final String topologyName) {
+        if (topologyName == null) {
+            return pausedTopologies.contains(UNNAMED_TOPOLOGY);
+        } else {
+            return pausedTopologies.contains(topologyName);
+        }
+    }
+
+    /**
+     * Resumes a topology by name
+     * @param topologyName Name of the topology to resume
+     */
+    public void resumeTopology(final String topologyName) {
+        pausedTopologies.remove(topologyName);
+    }
+
     /**
      * Removes the topology and registers a future that listens for all threads on the older version to see the update
      */
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java
index 65cc7ae1930bd..4a3d46cfc3b96 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/AssignorConfiguration.java
@@ -29,7 +29,6 @@
 import org.apache.kafka.streams.processor.internals.InternalTopicManager;
 import org.slf4j.Logger;
 
-import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
@@ -115,6 +114,17 @@ public RebalanceProtocol rebalanceProtocol() {
                     log.warn("The eager rebalancing protocol is deprecated and will stop being supported in a future release." +
                         " Please be prepared to remove the 'upgrade.from' config soon.");
                     return RebalanceProtocol.EAGER;
+                case StreamsConfig.UPGRADE_FROM_24:
+                case StreamsConfig.UPGRADE_FROM_25:
+                case StreamsConfig.UPGRADE_FROM_26:
+                case StreamsConfig.UPGRADE_FROM_27:
+                case StreamsConfig.UPGRADE_FROM_28:
+                case StreamsConfig.UPGRADE_FROM_30:
+                case StreamsConfig.UPGRADE_FROM_31:
+                case StreamsConfig.UPGRADE_FROM_32:
+                    // This config is for explicitly sending FK response to a requested partition
+                    // and should not affect the rebalance protocol
+                    break;
                 default:
                     throw new IllegalArgumentException("Unknown configuration value for parameter 'upgrade.from': " + upgradeFrom);
             }
@@ -158,6 +168,17 @@ public int configuredMetadataVersion(final int priorVersion) {
                 case StreamsConfig.UPGRADE_FROM_23:
                     // These configs are for cooperative rebalancing and should not affect the metadata version
                     break;
+                case StreamsConfig.UPGRADE_FROM_24:
+                case StreamsConfig.UPGRADE_FROM_25:
+                case StreamsConfig.UPGRADE_FROM_26:
+                case StreamsConfig.UPGRADE_FROM_27:
+                case StreamsConfig.UPGRADE_FROM_28:
+                case StreamsConfig.UPGRADE_FROM_30:
+                case StreamsConfig.UPGRADE_FROM_31:
+                case StreamsConfig.UPGRADE_FROM_32:
+                    // This config is for explicitly sending FK response to a requested partition
+                    // and should not affect the metadata version
+                    break;
                 default:
                     throw new IllegalArgumentException(
                         "Unknown configuration value for parameter 'upgrade.from': " + upgradeFrom
@@ -250,7 +271,7 @@ private AssignmentConfigs(final StreamsConfig configs) {
             maxWarmupReplicas = configs.getInt(StreamsConfig.MAX_WARMUP_REPLICAS_CONFIG);
             numStandbyReplicas = configs.getInt(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG);
             probingRebalanceIntervalMs = configs.getLong(StreamsConfig.PROBING_REBALANCE_INTERVAL_MS_CONFIG);
-            rackAwareAssignmentTags = Collections.emptyList();
+            rackAwareAssignmentTags = configs.getList(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG);
         }
 
         AssignmentConfigs(final Long acceptableRecoveryLag,
@@ -262,7 +283,7 @@ private AssignmentConfigs(final StreamsConfig configs) {
             this.maxWarmupReplicas = validated(StreamsConfig.MAX_WARMUP_REPLICAS_CONFIG, maxWarmupReplicas);
             this.numStandbyReplicas = validated(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, numStandbyReplicas);
             this.probingRebalanceIntervalMs = validated(StreamsConfig.PROBING_REBALANCE_INTERVAL_MS_CONFIG, probingRebalanceIntervalMs);
-            this.rackAwareAssignmentTags = rackAwareAssignmentTags;
+            this.rackAwareAssignmentTags = validated(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG, rackAwareAssignmentTags);
         }
 
         private static <T> T validated(final String configKey, final T value) {
@@ -280,6 +301,7 @@ public String toString() {
                 "\n  maxWarmupReplicas=" + maxWarmupReplicas +
                 "\n  numStandbyReplicas=" + numStandbyReplicas +
                 "\n  probingRebalanceIntervalMs=" + probingRebalanceIntervalMs +
+                "\n  rackAwareAssignmentTags=" + rackAwareAssignmentTags +
                 "\n}";
         }
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java
index 5ee0e93e6aa42..b8ba4ce27e10a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientState.java
@@ -29,17 +29,16 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
-import java.util.stream.Collectors;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.UUID;
+import java.util.stream.Collectors;
 
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.unmodifiableMap;
 import static java.util.Collections.unmodifiableSet;
 import static java.util.Comparator.comparing;
 import static java.util.Comparator.comparingLong;
-
 import static org.apache.kafka.common.utils.Utils.union;
 import static org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo.UNKNOWN_OFFSET_SUM;
 
@@ -65,6 +64,10 @@ public ClientState() {
         this(0);
     }
 
+    public ClientState(final Map<String, String> clientTags) {
+        this(0, clientTags);
+    }
+
     ClientState(final int capacity) {
         this(capacity, Collections.emptyMap());
     }
@@ -422,6 +425,7 @@ public String toString() {
                ") prevStandbyTasks: (" + previousStandbyTasks.taskIds() +
                ") changelogOffsetTotalsByTask: (" + taskOffsetSums.entrySet() +
                ") taskLagTotals: (" + taskLagTotals.entrySet() +
+               ") clientTags: (" + clientTags.entrySet() +
                ") capacity: " + capacity +
                " assigned: " + assignedTaskCount() +
                "]";
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignor.java
index c7399d7ed8a5b..cabfa545b101d 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignor.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignor.java
@@ -60,7 +60,7 @@ public boolean assign(final Map<UUID, ClientState> clients,
 
         final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(
             numStandbyReplicas,
-            allTaskIds
+            statefulTaskIds
         );
 
         final Map<String, Set<String>> tagKeyToValues = new HashMap<>();
@@ -79,6 +79,7 @@ public boolean assign(final Map<UUID, ClientState> clients,
 
                 if (clientState.activeTasks().contains(statefulTaskId)) {
                     assignStandbyTasksToClientsWithDifferentTags(
+                        numStandbyReplicas,
                         standbyTaskClientsByTaskLoad,
                         statefulTaskId,
                         clientId,
@@ -94,17 +95,10 @@ public boolean assign(final Map<UUID, ClientState> clients,
         }
 
         if (!tasksToRemainingStandbys.isEmpty()) {
-            log.debug("Rack aware standby task assignment was not able to assign all standby tasks. " +
-                      "tasksToRemainingStandbys=[{}], pendingStandbyTasksToClientId=[{}]. " +
-                      "Will distribute the remaining standby tasks to least loaded clients.",
-                      tasksToRemainingStandbys, pendingStandbyTasksToClientId);
-
             assignPendingStandbyTasksToLeastLoadedClients(clients,
                                                           numStandbyReplicas,
-                                                          rackAwareAssignmentTags,
                                                           standbyTaskClientsByTaskLoad,
-                                                          tasksToRemainingStandbys,
-                                                          pendingStandbyTasksToClientId);
+                                                          tasksToRemainingStandbys);
         }
 
         // returning false, because standby task assignment will never require a follow-up probing rebalance.
@@ -113,34 +107,22 @@ public boolean assign(final Map<UUID, ClientState> clients,
 
     private static void assignPendingStandbyTasksToLeastLoadedClients(final Map<UUID, ClientState> clients,
                                                                       final int numStandbyReplicas,
-                                                                      final Set<String> rackAwareAssignmentTags,
                                                                       final ConstrainedPrioritySet standbyTaskClientsByTaskLoad,
-                                                                      final Map<TaskId, Integer> pendingStandbyTaskToNumberRemainingStandbys,
-                                                                      final Map<TaskId, UUID> pendingStandbyTaskToClientId) {
+                                                                      final Map<TaskId, Integer> pendingStandbyTaskToNumberRemainingStandbys) {
         // We need to re offer all the clients to find the least loaded ones
         standbyTaskClientsByTaskLoad.offerAll(clients.keySet());
 
         for (final Entry<TaskId, Integer> pendingStandbyTaskAssignmentEntry : pendingStandbyTaskToNumberRemainingStandbys.entrySet()) {
             final TaskId activeTaskId = pendingStandbyTaskAssignmentEntry.getKey();
-            final UUID clientId = pendingStandbyTaskToClientId.get(activeTaskId);
 
-            final int numberOfRemainingStandbys = pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(
+            pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(
+                numStandbyReplicas,
                 clients,
                 pendingStandbyTaskToNumberRemainingStandbys,
                 standbyTaskClientsByTaskLoad,
-                activeTaskId
+                activeTaskId,
+                log
             );
-
-            if (numberOfRemainingStandbys > 0) {
-                log.warn("Unable to assign {} of {} standby tasks for task [{}] with client tags [{}]. " +
-                         "There is not enough available capacity. You should " +
-                         "increase the number of application instances " +
-                         "on different client tag dimensions " +
-                         "to maintain the requested number of standby replicas. " +
-                         "Rack awareness is configured with [{}] tags.",
-                         numberOfRemainingStandbys, numStandbyReplicas, activeTaskId,
-                         clients.get(clientId).clientTags(), rackAwareAssignmentTags);
-            }
         }
     }
 
@@ -174,7 +156,8 @@ static void fillClientsTagStatistics(final Map<UUID, ClientState> clientStates,
     }
 
     // Visible for testing
-    static void assignStandbyTasksToClientsWithDifferentTags(final ConstrainedPrioritySet standbyTaskClientsByTaskLoad,
+    static void assignStandbyTasksToClientsWithDifferentTags(final int numberOfStandbyClients,
+                                                             final ConstrainedPrioritySet standbyTaskClientsByTaskLoad,
                                                              final TaskId activeTaskId,
                                                              final UUID activeTaskClient,
                                                              final Set<String> rackAwareAssignmentTags,
@@ -211,17 +194,32 @@ static void assignStandbyTasksToClientsWithDifferentTags(final ConstrainedPriori
                 break;
             }
 
-            clientStates.get(clientOnUnusedTagDimensions).assignStandby(activeTaskId);
-
+            final ClientState clientStateOnUsedTagDimensions = clientStates.get(clientOnUnusedTagDimensions);
             countOfUsedClients++;
             numRemainingStandbys--;
 
+            log.debug("Assigning {} out of {} standby tasks for an active task [{}] with client tags {}. " +
+                      "Standby task client tags are {}.",
+                      numberOfStandbyClients - numRemainingStandbys, numberOfStandbyClients, activeTaskId,
+                      clientStates.get(activeTaskClient).clientTags(), clientStateOnUsedTagDimensions.clientTags());
+
+            clientStateOnUsedTagDimensions.assignStandby(activeTaskId);
             lastUsedClient = clientOnUnusedTagDimensions;
         } while (numRemainingStandbys > 0);
 
         if (numRemainingStandbys > 0) {
             pendingStandbyTasksToClientId.put(activeTaskId, activeTaskClient);
             tasksToRemainingStandbys.put(activeTaskId, numRemainingStandbys);
+            log.warn("Rack aware standby task assignment was not able to assign {} of {} standby tasks for the " +
+                     "active task [{}] with the rack aware assignment tags {}. " +
+                     "This may happen when there aren't enough application instances on different tag " +
+                     "dimensions compared to an active and corresponding standby task. " +
+                     "Consider launching application instances on different tag dimensions than [{}]. " +
+                     "Standby task assignment will fall back to assigning standby tasks to the least loaded clients.",
+                     numRemainingStandbys, numberOfStandbyClients,
+                     activeTaskId, rackAwareAssignmentTags,
+                     clientStates.get(activeTaskClient).clientTags());
+
         } else {
             tasksToRemainingStandbys.remove(activeTaskId);
         }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/DefaultStandbyTaskAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/DefaultStandbyTaskAssignor.java
index db6cb4e26cef8..680a056a826ff 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/DefaultStandbyTaskAssignor.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/DefaultStandbyTaskAssignor.java
@@ -50,18 +50,12 @@ public boolean assign(final Map<UUID, ClientState> clients,
         standbyTaskClientsByTaskLoad.offerAll(clients.keySet());
 
         for (final TaskId task : statefulTaskIds) {
-            final int numRemainingStandbys = pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(clients,
-                                                                                                    tasksToRemainingStandbys,
-                                                                                                    standbyTaskClientsByTaskLoad,
-                                                                                                    task);
-
-            if (numRemainingStandbys > 0) {
-                log.warn("Unable to assign {} of {} standby tasks for task [{}]. " +
-                         "There is not enough available capacity. You should " +
-                         "increase the number of application instances " +
-                         "to maintain the requested number of standby replicas.",
-                         numRemainingStandbys, numStandbyReplicas, task);
-            }
+            pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(numStandbyReplicas,
+                                                                   clients,
+                                                                   tasksToRemainingStandbys,
+                                                                   standbyTaskClientsByTaskLoad,
+                                                                   task,
+                                                                   log);
         }
 
         // returning false, because standby task assignment will never require a follow-up probing rebalance.
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignor.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignor.java
index d0bb50b66ff60..c54199ad1773f 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignor.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignor.java
@@ -22,6 +22,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
@@ -68,6 +69,8 @@ public boolean assign(final Map<UUID, ClientState> clients,
             configs.acceptableRecoveryLag
         );
 
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = tasksToClientByLag(statefulTasks, clientStates);
+
         // We temporarily need to know which standby tasks were intended as warmups
         // for active tasks, so that we don't move them (again) when we plan standby
         // task movements. We can then immediately treat warmups exactly the same as
@@ -77,6 +80,7 @@ public boolean assign(final Map<UUID, ClientState> clients,
 
         final int neededActiveTaskMovements = assignActiveTaskMovements(
             tasksToCaughtUpClients,
+            tasksToClientByLag,
             clientStates,
             warmups,
             remainingWarmupReplicas
@@ -84,6 +88,7 @@ public boolean assign(final Map<UUID, ClientState> clients,
 
         final int neededStandbyTaskMovements = assignStandbyTaskMovements(
             tasksToCaughtUpClients,
+            tasksToClientByLag,
             clientStates,
             remainingWarmupReplicas,
             warmups
@@ -129,7 +134,7 @@ private void assignStandbyReplicaTasks(final TreeMap<UUID, ClientState> clientSt
             return;
         }
 
-        final StandbyTaskAssignor standbyTaskAssignor = createStandbyTaskAssignor(configs);
+        final StandbyTaskAssignor standbyTaskAssignor = StandbyTaskAssignorFactory.create(configs);
 
         standbyTaskAssignor.assign(clientStates, allTaskIds, statefulTasks, configs);
 
@@ -142,15 +147,6 @@ private void assignStandbyReplicaTasks(final TreeMap<UUID, ClientState> clientSt
         );
     }
 
-    // Visible for testing
-    static StandbyTaskAssignor createStandbyTaskAssignor(final AssignmentConfigs configs) {
-        if (!configs.rackAwareAssignmentTags.isEmpty()) {
-            return new ClientTagAwareStandbyTaskAssignor();
-        } else {
-            return new DefaultStandbyTaskAssignor();
-        }
-    }
-
     private static void balanceTasksOverThreads(final SortedMap<UUID, ClientState> clientStates,
                                                 final Function<ClientState, Set<TaskId>> currentAssignmentAccessor,
                                                 final BiConsumer<ClientState, TaskId> taskUnassignor,
@@ -247,6 +243,18 @@ private static Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients(final Set<Tas
         return taskToCaughtUpClients;
     }
 
+    private static Map<TaskId, SortedSet<UUID>> tasksToClientByLag(final Set<TaskId> statefulTasks,
+                                                              final Map<UUID, ClientState> clientStates) {
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = new HashMap<>();
+        for (final TaskId task : statefulTasks) {
+            final SortedSet<UUID> clientLag = new TreeSet<>(Comparator.<UUID>comparingLong(a ->
+                    clientStates.get(a).lagFor(task)).thenComparing(a -> a));
+            clientLag.addAll(clientStates.keySet());
+            tasksToClientByLag.put(task, clientLag);
+        }
+        return tasksToClientByLag;
+    }
+
     private static boolean unbounded(final long acceptableRecoveryLag) {
         return acceptableRecoveryLag == Long.MAX_VALUE;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ReferenceContainer.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ReferenceContainer.java
index 9b46eeb710802..19011d865a1c8 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ReferenceContainer.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/ReferenceContainer.java
@@ -24,6 +24,7 @@
 import org.apache.kafka.streams.processor.internals.TaskManager;
 
 import java.util.LinkedList;
+import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
@@ -37,4 +38,5 @@ public class ReferenceContainer {
     public final AtomicLong nextScheduledRebalanceMs = new AtomicLong(Long.MAX_VALUE);
     public final Queue<StreamsException> nonFatalExceptionsToHandle = new LinkedList<>();
     public Time time;
+    public Map<String, String> clientTags;
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtils.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtils.java
index 7ed6f5dec1b19..3f34e5ef8c17a 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtils.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtils.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.streams.processor.internals.assignment;
 
 import org.apache.kafka.streams.processor.TaskId;
+import org.slf4j.Logger;
 
 import java.util.Map;
 import java.util.Set;
@@ -33,10 +34,12 @@ static ConstrainedPrioritySet createLeastLoadedPrioritySetConstrainedByAssignedT
                                           client -> clients.get(client).assignedTaskLoad());
     }
 
-    static int pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(final Map<UUID, ClientState> clients,
-                                                                      final Map<TaskId, Integer> tasksToRemainingStandbys,
-                                                                      final ConstrainedPrioritySet standbyTaskClientsByTaskLoad,
-                                                                      final TaskId activeTaskId) {
+    static void pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(final int numStandbyReplicas,
+                                                                       final Map<UUID, ClientState> clients,
+                                                                       final Map<TaskId, Integer> tasksToRemainingStandbys,
+                                                                       final ConstrainedPrioritySet standbyTaskClientsByTaskLoad,
+                                                                       final TaskId activeTaskId,
+                                                                       final Logger log) {
         int numRemainingStandbys = tasksToRemainingStandbys.get(activeTaskId);
         while (numRemainingStandbys > 0) {
             final UUID client = standbyTaskClientsByTaskLoad.poll(activeTaskId);
@@ -49,7 +52,13 @@ static int pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(final Map<UUID
             tasksToRemainingStandbys.put(activeTaskId, numRemainingStandbys);
         }
 
-        return numRemainingStandbys;
+        if (numRemainingStandbys > 0) {
+            log.warn("Unable to assign {} of {} standby tasks for task [{}]. " +
+                     "There is not enough available capacity. You should " +
+                     "increase the number of application instances " +
+                     "to maintain the requested number of standby replicas.",
+                     numRemainingStandbys, numStandbyReplicas, activeTaskId);
+        }
     }
 
     static Map<TaskId, Integer> computeTasksToRemainingStandbys(final int numStandbyReplicas,
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignorFactory.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignorFactory.java
new file mode 100644
index 0000000000000..30c78f33f38da
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignorFactory.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals.assignment;
+
+class StandbyTaskAssignorFactory {
+    private StandbyTaskAssignorFactory() {}
+
+    static StandbyTaskAssignor create(final AssignorConfiguration.AssignmentConfigs configs) {
+        if (!configs.rackAwareAssignmentTags.isEmpty()) {
+            return new ClientTagAwareStandbyTaskAssignor();
+        } else {
+            return new DefaultStandbyTaskAssignor();
+        }
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovement.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovement.java
index cbfa3daaedf90..38e64276ba0a8 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovement.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovement.java
@@ -29,6 +29,7 @@
 import java.util.UUID;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.BiFunction;
+import java.util.function.Function;
 
 import static java.util.Arrays.asList;
 import static java.util.Objects.requireNonNull;
@@ -42,10 +43,6 @@ private TaskMovement(final TaskId task, final UUID destination, final SortedSet<
         this.task = task;
         this.destination = destination;
         this.caughtUpClients = caughtUpClients;
-
-        if (caughtUpClients == null || caughtUpClients.isEmpty()) {
-            throw new IllegalStateException("Should not attempt to move a task if no caught up clients exist");
-        }
     }
 
     private TaskId task() {
@@ -56,25 +53,34 @@ private int numCaughtUpClients() {
         return caughtUpClients.size();
     }
 
-    private static boolean taskIsNotCaughtUpOnClientAndOtherCaughtUpClientsExist(final TaskId task,
-                                                                                 final UUID client,
-                                                                                 final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients) {
-        return !taskIsCaughtUpOnClientOrNoCaughtUpClientsExist(task, client, tasksToCaughtUpClients);
+    private static boolean taskIsNotCaughtUpOnClientAndOtherMoreCaughtUpClientsExist(final TaskId task,
+                                                                                     final UUID client,
+                                                                                     final Map<UUID, ClientState> clientStates,
+                                                                                     final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients,
+                                                                                     final Map<TaskId, SortedSet<UUID>> tasksToClientByLag) {
+        final SortedSet<UUID> taskClients = requireNonNull(tasksToClientByLag.get(task), "uninitialized set");
+        if (taskIsCaughtUpOnClient(task, client, tasksToCaughtUpClients)) {
+            return false;
+        }
+        final long mostCaughtUpLag = clientStates.get(taskClients.first()).lagFor(task);
+        final long clientLag = clientStates.get(client).lagFor(task);
+        return mostCaughtUpLag < clientLag;
     }
 
-    private static boolean taskIsCaughtUpOnClientOrNoCaughtUpClientsExist(final TaskId task,
-                                                                          final UUID client,
-                                                                          final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients) {
+    private static boolean taskIsCaughtUpOnClient(final TaskId task,
+                                                  final UUID client,
+                                                  final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients) {
         final Set<UUID> caughtUpClients = requireNonNull(tasksToCaughtUpClients.get(task), "uninitialized set");
-        return caughtUpClients.isEmpty() || caughtUpClients.contains(client);
+        return caughtUpClients.contains(client);
     }
 
     static int assignActiveTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients,
+                                         final Map<TaskId, SortedSet<UUID>> tasksToClientByLag,
                                          final Map<UUID, ClientState> clientStates,
                                          final Map<UUID, Set<TaskId>> warmups,
                                          final AtomicInteger remainingWarmupReplicas) {
         final BiFunction<UUID, TaskId, Boolean> caughtUpPredicate =
-            (client, task) -> taskIsCaughtUpOnClientOrNoCaughtUpClientsExist(task, client, tasksToCaughtUpClients);
+            (client, task) -> taskIsCaughtUpOnClient(task, client, tasksToCaughtUpClients);
 
         final ConstrainedPrioritySet caughtUpClientsByTaskLoad = new ConstrainedPrioritySet(
             caughtUpPredicate,
@@ -89,10 +95,10 @@ static int assignActiveTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksToC
             final UUID client = clientStateEntry.getKey();
             final ClientState state = clientStateEntry.getValue();
             for (final TaskId task : state.activeTasks()) {
-                // if the desired client is not caught up, and there is another client that _is_ caught up, then
-                // we schedule a movement, so we can move the active task to the caught-up client. We'll try to
+                // if the desired client is not caught up, and there is another client that _is_ more caught up, then
+                // we schedule a movement, so we can move the active task to a more caught-up client. We'll try to
                 // assign a warm-up to the desired client so that we can move it later on.
-                if (taskIsNotCaughtUpOnClientAndOtherCaughtUpClientsExist(task, client, tasksToCaughtUpClients)) {
+                if (taskIsNotCaughtUpOnClientAndOtherMoreCaughtUpClientsExist(task, client, clientStates, tasksToCaughtUpClients, tasksToClientByLag)) {
                     taskMovements.add(new TaskMovement(task, client, tasksToCaughtUpClients.get(task)));
                 }
             }
@@ -102,33 +108,14 @@ static int assignActiveTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksToC
         final int movementsNeeded = taskMovements.size();
 
         for (final TaskMovement movement : taskMovements) {
-            final UUID standbySourceClient = caughtUpClientsByTaskLoad.poll(
-                movement.task,
-                c -> clientStates.get(c).hasStandbyTask(movement.task)
-            );
-            if (standbySourceClient == null) {
-                // there's not a caught-up standby available to take over the task, so we'll schedule a warmup instead
-                final UUID sourceClient = requireNonNull(
-                    caughtUpClientsByTaskLoad.poll(movement.task),
-                    "Tried to move task to caught-up client but none exist"
-                );
-
-                moveActiveAndTryToWarmUp(
-                    remainingWarmupReplicas,
-                    movement.task,
-                    clientStates.get(sourceClient),
-                    clientStates.get(movement.destination),
-                    warmups.computeIfAbsent(movement.destination, x -> new TreeSet<>())
-                );
-                caughtUpClientsByTaskLoad.offerAll(asList(sourceClient, movement.destination));
-            } else {
-                // we found a candidate to trade standby/active state with our destination, so we don't need a warmup
-                swapStandbyAndActive(
-                    movement.task,
-                    clientStates.get(standbySourceClient),
-                    clientStates.get(movement.destination)
-                );
-                caughtUpClientsByTaskLoad.offerAll(asList(standbySourceClient, movement.destination));
+            // Attempt to find a caught up standby, otherwise find any caught up client, failing that use the most
+            // caught up client.
+            final boolean moved = tryToSwapStandbyAndActiveOnCaughtUpClient(clientStates, caughtUpClientsByTaskLoad, movement) ||
+                    tryToMoveActiveToCaughtUpClientAndTryToWarmUp(clientStates, warmups, remainingWarmupReplicas, caughtUpClientsByTaskLoad, movement) ||
+                    tryToMoveActiveToMostCaughtUpClient(tasksToClientByLag, clientStates, warmups, remainingWarmupReplicas, caughtUpClientsByTaskLoad, movement);
+
+            if (!moved) {
+                throw new IllegalStateException("Tried to move task to more caught-up client as scheduled before but none exist");
             }
         }
 
@@ -136,11 +123,12 @@ static int assignActiveTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksToC
     }
 
     static int assignStandbyTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients,
+                                          final Map<TaskId, SortedSet<UUID>> tasksToClientByLag,
                                           final Map<UUID, ClientState> clientStates,
                                           final AtomicInteger remainingWarmupReplicas,
                                           final Map<UUID, Set<TaskId>> warmups) {
         final BiFunction<UUID, TaskId, Boolean> caughtUpPredicate =
-            (client, task) -> taskIsCaughtUpOnClientOrNoCaughtUpClientsExist(task, client, tasksToCaughtUpClients);
+            (client, task) -> taskIsCaughtUpOnClient(task, client, tasksToCaughtUpClients);
 
         final ConstrainedPrioritySet caughtUpClientsByTaskLoad = new ConstrainedPrioritySet(
             caughtUpPredicate,
@@ -157,8 +145,8 @@ static int assignStandbyTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksTo
             for (final TaskId task : state.standbyTasks()) {
                 if (warmups.getOrDefault(destination, Collections.emptySet()).contains(task)) {
                     // this is a warmup, so we won't move it.
-                } else if (taskIsNotCaughtUpOnClientAndOtherCaughtUpClientsExist(task, destination, tasksToCaughtUpClients)) {
-                    // if the desired client is not caught up, and there is another client that _is_ caught up, then
+                } else if (taskIsNotCaughtUpOnClientAndOtherMoreCaughtUpClientsExist(task, destination, clientStates, tasksToCaughtUpClients, tasksToClientByLag)) {
+                    // if the desired client is not caught up, and there is another client that _is_ more caught up, then
                     // we schedule a movement, so we can move the active task to the caught-up client. We'll try to
                     // assign a warm-up to the desired client so that we can move it later on.
                     taskMovements.add(new TaskMovement(task, destination, tasksToCaughtUpClients.get(task)));
@@ -170,11 +158,17 @@ static int assignStandbyTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksTo
         int movementsNeeded = 0;
 
         for (final TaskMovement movement : taskMovements) {
-            final UUID sourceClient = caughtUpClientsByTaskLoad.poll(
+            final Function<UUID, Boolean> eligibleClientPredicate =
+                    clientId -> !clientStates.get(clientId).hasAssignedTask(movement.task);
+            UUID sourceClient = caughtUpClientsByTaskLoad.poll(
                 movement.task,
-                clientId -> !clientStates.get(clientId).hasAssignedTask(movement.task)
+                eligibleClientPredicate
             );
 
+            if (sourceClient == null) {
+                sourceClient = mostCaughtUpEligibleClient(tasksToClientByLag, eligibleClientPredicate, movement.task, movement.destination);
+            }
+
             if (sourceClient == null) {
                 // then there's no caught-up client that doesn't already have a copy of this task, so there's
                 // nowhere to move it.
@@ -193,6 +187,74 @@ static int assignStandbyTaskMovements(final Map<TaskId, SortedSet<UUID>> tasksTo
         return movementsNeeded;
     }
 
+    private static boolean tryToSwapStandbyAndActiveOnCaughtUpClient(final Map<UUID, ClientState> clientStates,
+                                                                     final ConstrainedPrioritySet caughtUpClientsByTaskLoad,
+                                                                     final TaskMovement movement) {
+        final UUID caughtUpStandbySourceClient = caughtUpClientsByTaskLoad.poll(
+                movement.task,
+                c -> clientStates.get(c).hasStandbyTask(movement.task)
+        );
+        if (caughtUpStandbySourceClient != null) {
+            swapStandbyAndActive(
+                    movement.task,
+                    clientStates.get(caughtUpStandbySourceClient),
+                    clientStates.get(movement.destination)
+            );
+            caughtUpClientsByTaskLoad.offerAll(asList(caughtUpStandbySourceClient, movement.destination));
+            return true;
+        }
+        return false;
+    }
+
+    private static boolean tryToMoveActiveToCaughtUpClientAndTryToWarmUp(final Map<UUID, ClientState> clientStates,
+                                                                         final Map<UUID, Set<TaskId>> warmups,
+                                                                         final AtomicInteger remainingWarmupReplicas,
+                                                                         final ConstrainedPrioritySet caughtUpClientsByTaskLoad,
+                                                                         final TaskMovement movement) {
+        final UUID caughtUpSourceClient = caughtUpClientsByTaskLoad.poll(movement.task);
+        if (caughtUpSourceClient != null) {
+            moveActiveAndTryToWarmUp(
+                    remainingWarmupReplicas,
+                    movement.task,
+                    clientStates.get(caughtUpSourceClient),
+                    clientStates.get(movement.destination),
+                    warmups.computeIfAbsent(movement.destination, x -> new TreeSet<>())
+            );
+            caughtUpClientsByTaskLoad.offerAll(asList(caughtUpSourceClient, movement.destination));
+            return true;
+        }
+        return false;
+    }
+
+    private static boolean tryToMoveActiveToMostCaughtUpClient(final Map<TaskId, SortedSet<UUID>> tasksToClientByLag,
+                                                               final Map<UUID, ClientState> clientStates,
+                                                               final Map<UUID, Set<TaskId>> warmups,
+                                                               final AtomicInteger remainingWarmupReplicas,
+                                                               final ConstrainedPrioritySet caughtUpClientsByTaskLoad,
+                                                               final TaskMovement movement) {
+        final UUID mostCaughtUpSourceClient = mostCaughtUpEligibleClient(tasksToClientByLag, movement.task, movement.destination);
+        if (mostCaughtUpSourceClient != null) {
+            if (clientStates.get(mostCaughtUpSourceClient).hasStandbyTask(movement.task)) {
+                swapStandbyAndActive(
+                        movement.task,
+                        clientStates.get(mostCaughtUpSourceClient),
+                        clientStates.get(movement.destination)
+                );
+            } else {
+                moveActiveAndTryToWarmUp(
+                        remainingWarmupReplicas,
+                        movement.task,
+                        clientStates.get(mostCaughtUpSourceClient),
+                        clientStates.get(movement.destination),
+                        warmups.computeIfAbsent(movement.destination, x -> new TreeSet<>())
+                );
+            }
+            caughtUpClientsByTaskLoad.offerAll(asList(mostCaughtUpSourceClient, movement.destination));
+            return true;
+        }
+        return false;
+    }
+
     private static void moveActiveAndTryToWarmUp(final AtomicInteger remainingWarmupReplicas,
                                                  final TaskId task,
                                                  final ClientState sourceClientState,
@@ -235,4 +297,24 @@ private static void swapStandbyAndActive(final TaskId task,
         destinationClientState.assignStandby(task);
     }
 
+    private static UUID mostCaughtUpEligibleClient(final Map<TaskId, SortedSet<UUID>> tasksToClientByLag,
+                                                   final TaskId task,
+                                                   final UUID destinationClient) {
+        return mostCaughtUpEligibleClient(tasksToClientByLag, client -> true, task, destinationClient);
+    }
+
+    private static UUID mostCaughtUpEligibleClient(final Map<TaskId, SortedSet<UUID>> tasksToClientByLag,
+                                                   final Function<UUID, Boolean> constraint,
+                                                   final TaskId task,
+                                                   final UUID destinationClient) {
+        for (final UUID client : tasksToClientByLag.get(task)) {
+            if (destinationClient.equals(client)) {
+                break;
+            } else if (constraint.apply(client)) {
+                return client;
+            }
+        }
+        return null;
+    }
+
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetrics.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetrics.java
index 231d9a6277cfb..8dcd265a244ac 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetrics.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetrics.java
@@ -21,6 +21,9 @@
 
 import java.util.Map;
 
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.AVG_LATENCY_DESCRIPTION;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.LATENCY_SUFFIX;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.MAX_LATENCY_DESCRIPTION;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.PROCESSOR_NODE_LEVEL_GROUP;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.RECORD_E2E_LATENCY;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.RECORD_E2E_LATENCY_AVG_DESCRIPTION;
@@ -31,6 +34,8 @@
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOTAL_DESCRIPTION;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addAvgAndMinAndMaxToSensor;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addInvocationRateAndCountToSensor;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addRateOfSumAndSumMetricsToSensor;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addAvgAndMaxToSensor;
 
 public class ProcessorNodeMetrics {
     private ProcessorNodeMetrics() {}
@@ -62,6 +67,17 @@ private ProcessorNodeMetrics() {}
     private static final String FORWARD_RATE_DESCRIPTION =
         RATE_DESCRIPTION_PREFIX + FORWARD_DESCRIPTION + RATE_DESCRIPTION_SUFFIX;
 
+    private static final String EMITTED_RECORDS = "window-aggregate-final-emit";
+    private static final String EMITTED_RECORDS_DESCRIPTION = "emit final records";
+    private static final String EMITTED_RECORDS_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + EMITTED_RECORDS_DESCRIPTION;
+    private static final String EMITTED_RECORDS_RATE_DESCRIPTION =
+        RATE_DESCRIPTION_PREFIX + EMITTED_RECORDS_DESCRIPTION + RATE_DESCRIPTION_SUFFIX;
+
+    private static final String EMIT_FINAL_LATENCY = EMITTED_RECORDS + LATENCY_SUFFIX;
+    private static final String EMIT_FINAL_DESCRIPTION = "calls to emit final";
+    private static final String EMIT_FINAL_AVG_LATENCY_DESCRIPTION = AVG_LATENCY_DESCRIPTION + EMIT_FINAL_DESCRIPTION;
+    private static final String EMIT_FINAL_MAX_LATENCY_DESCRIPTION = MAX_LATENCY_DESCRIPTION + EMIT_FINAL_DESCRIPTION;
+
     public static Sensor suppressionEmitSensor(final String threadId,
                                                final String taskId,
                                                final String processorNodeId,
@@ -165,6 +181,42 @@ public static Sensor e2ELatencySensor(final String threadId,
         return sensor;
     }
 
+    public static Sensor emitFinalLatencySensor(final String threadId,
+                                                final String taskId,
+                                                final String processorNodeId,
+                                                final StreamsMetricsImpl streamsMetrics) {
+        final String sensorName = processorNodeId + "-" + EMIT_FINAL_LATENCY;
+        final Sensor sensor = streamsMetrics.nodeLevelSensor(threadId, taskId, processorNodeId, sensorName, RecordingLevel.DEBUG);
+        final Map<String, String> tagMap = streamsMetrics.nodeLevelTagMap(threadId, taskId, processorNodeId);
+        addAvgAndMaxToSensor(
+            sensor,
+            PROCESSOR_NODE_LEVEL_GROUP,
+            tagMap,
+            EMIT_FINAL_LATENCY,
+            EMIT_FINAL_AVG_LATENCY_DESCRIPTION,
+            EMIT_FINAL_MAX_LATENCY_DESCRIPTION
+        );
+        return sensor;
+    }
+
+    public static Sensor emittedRecordsSensor(final String threadId,
+                                              final String taskId,
+                                              final String processorNodeId,
+                                              final StreamsMetricsImpl streamsMetrics) {
+        final String sensorName = processorNodeId + "-" + EMITTED_RECORDS;
+        final Sensor sensor = streamsMetrics.nodeLevelSensor(threadId, taskId, processorNodeId, sensorName, RecordingLevel.DEBUG);
+        final Map<String, String> tagMap = streamsMetrics.nodeLevelTagMap(threadId, taskId, processorNodeId);
+        addRateOfSumAndSumMetricsToSensor(
+            sensor,
+            PROCESSOR_NODE_LEVEL_GROUP,
+            tagMap,
+            EMITTED_RECORDS,
+            EMITTED_RECORDS_RATE_DESCRIPTION,
+            EMITTED_RECORDS_TOTAL_DESCRIPTION
+        );
+        return sensor;
+    }
+
     private static Sensor throughputParentSensor(final String threadId,
                                                  final String taskId,
                                                  final String metricNamePrefix,
@@ -207,4 +259,6 @@ private static Sensor throughputSensor(final String threadId,
         );
         return sensor;
     }
+
+
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImpl.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImpl.java
index dea23993d23a2..3260bfc1b8276 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImpl.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImpl.java
@@ -95,6 +95,7 @@ public int hashCode() {
     private final Map<String, Deque<String>> threadLevelSensors = new HashMap<>();
     private final Map<String, Deque<String>> taskLevelSensors = new HashMap<>();
     private final Map<String, Deque<String>> nodeLevelSensors = new HashMap<>();
+    private final Map<String, Deque<String>> topicLevelSensors = new HashMap<>();
     private final Map<String, Deque<String>> cacheLevelSensors = new HashMap<>();
     private final ConcurrentMap<String, Deque<String>> storeLevelSensors = new ConcurrentHashMap<>();
     private final ConcurrentMap<String, Deque<MetricName>> storeLevelMetrics = new ConcurrentHashMap<>();
@@ -105,6 +106,7 @@ public int hashCode() {
     private static final String SENSOR_NAME_DELIMITER = ".s.";
     private static final String SENSOR_TASK_LABEL = "task";
     private static final String SENSOR_NODE_LABEL = "node";
+    private static final String SENSOR_TOPIC_LABEL = "topic";
     private static final String SENSOR_CACHE_LABEL = "cache";
     private static final String SENSOR_STORE_LABEL = "store";
     private static final String SENSOR_ENTITY_LABEL = "entity";
@@ -115,6 +117,7 @@ public int hashCode() {
     public static final String THREAD_ID_TAG = "thread-id";
     public static final String TASK_ID_TAG = "task-id";
     public static final String PROCESSOR_NODE_ID_TAG = "processor-node-id";
+    public static final String TOPIC_NAME_TAG = "topic";
     public static final String STORE_ID_TAG = "state-id";
     public static final String RECORD_CACHE_ID_TAG = "record-cache-id";
 
@@ -136,6 +139,7 @@ public int hashCode() {
     public static final String THREAD_LEVEL_GROUP = GROUP_PREFIX + "thread" + GROUP_SUFFIX;
     public static final String TASK_LEVEL_GROUP = GROUP_PREFIX + "task" + GROUP_SUFFIX;
     public static final String PROCESSOR_NODE_LEVEL_GROUP = GROUP_PREFIX + "processor-node" + GROUP_SUFFIX;
+    public static final String TOPIC_LEVEL_GROUP = GROUP_PREFIX + "topic" + GROUP_SUFFIX;
     public static final String STATE_STORE_LEVEL_GROUP = GROUP_PREFIX + "state" + GROUP_SUFFIX;
     public static final String CACHE_LEVEL_GROUP = GROUP_PREFIX + "record-cache" + GROUP_SUFFIX;
 
@@ -325,6 +329,15 @@ public Map<String, String> nodeLevelTagMap(final String threadId,
         return tagMap;
     }
 
+    public Map<String, String> topicLevelTagMap(final String threadId,
+                                                final String taskName,
+                                                final String processorNodeName,
+                                                final String topicName) {
+        final Map<String, String> tagMap = nodeLevelTagMap(threadId, taskName, processorNodeName);
+        tagMap.put(TOPIC_NAME_TAG, topicName);
+        return tagMap;
+    }
+
     public Map<String, String> storeLevelTagMap(final String taskName,
                                                 final String storeType,
                                                 final String storeName) {
@@ -388,6 +401,40 @@ private String nodeSensorPrefix(final String threadId, final String taskId, fina
             + SENSOR_PREFIX_DELIMITER + SENSOR_NODE_LABEL + SENSOR_PREFIX_DELIMITER + processorNodeName;
     }
 
+    public Sensor topicLevelSensor(final String threadId,
+                                   final String taskId,
+                                   final String processorNodeName,
+                                   final String topicName,
+                                   final String sensorName,
+                                   final Sensor.RecordingLevel recordingLevel,
+                                   final Sensor... parents) {
+        final String key = topicSensorPrefix(threadId, taskId, processorNodeName, topicName);
+        synchronized (topicLevelSensors) {
+            return getSensors(topicLevelSensors, sensorName, key, recordingLevel, parents);
+        }
+    }
+
+    public final void removeAllTopicLevelSensors(final String threadId,
+                                                 final String taskId,
+                                                 final String processorNodeName,
+                                                 final String topicName) {
+        final String key = topicSensorPrefix(threadId, taskId, processorNodeName, topicName);
+        synchronized (topicLevelSensors) {
+            final Deque<String> sensors = topicLevelSensors.remove(key);
+            while (sensors != null && !sensors.isEmpty()) {
+                metrics.removeSensor(sensors.pop());
+            }
+        }
+    }
+
+    private String topicSensorPrefix(final String threadId,
+                                     final String taskId,
+                                     final String processorNodeName,
+                                     final String topicName) {
+        return nodeSensorPrefix(threadId, taskId, processorNodeName)
+            + SENSOR_PREFIX_DELIMITER + SENSOR_TOPIC_LABEL + SENSOR_PREFIX_DELIMITER + topicName;
+    }
+
     public Sensor cacheLevelSensor(final String threadId,
                                    final String taskName,
                                    final String storeName,
@@ -455,9 +502,8 @@ public <T> void addStoreLevelMutableMetric(final String taskId,
             storeLevelTagMap(taskId, metricsScope, storeName)
         );
         if (metrics.metric(metricName) == null) {
-            final MetricConfig metricConfig = new MetricConfig().recordLevel(recordingLevel);
+            metrics.addMetricIfAbsent(metricName, new MetricConfig().recordLevel(recordingLevel), valueProvider);
             final String key = storeSensorPrefix(Thread.currentThread().getName(), taskId, storeName);
-            metrics.addMetric(metricName, metricConfig, valueProvider);
             storeLevelMetrics.computeIfAbsent(key, ignored -> new LinkedList<>()).push(metricName);
         }
     }
@@ -795,6 +841,23 @@ public static void addAvgAndSumMetricsToSensor(final Sensor sensor,
         );
     }
 
+    public static void addTotalCountAndSumMetricsToSensor(final Sensor sensor,
+                                                          final String group,
+                                                          final Map<String, String> tags,
+                                                          final String countMetricNamePrefix,
+                                                          final String sumMetricNamePrefix,
+                                                          final String descriptionOfCount,
+                                                          final String descriptionOfTotal) {
+        sensor.add(
+            new MetricName(countMetricNamePrefix + TOTAL_SUFFIX, group, descriptionOfCount, tags),
+            new CumulativeCount()
+        );
+        sensor.add(
+            new MetricName(sumMetricNamePrefix + TOTAL_SUFFIX, group, descriptionOfTotal, tags),
+            new CumulativeSum()
+        );
+    }
+
     public static void maybeMeasureLatency(final Runnable actionToMeasure,
                                            final Time time,
                                            final Sensor sensor) {
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetrics.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetrics.java
index 9c3e809497189..eda173e532fa8 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetrics.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetrics.java
@@ -45,7 +45,6 @@ private ThreadMetrics() {}
     private static final String PUNCTUATE = "punctuate";
     private static final String CREATE_TASK = "task-created";
     private static final String CLOSE_TASK = "task-closed";
-    private static final String SKIP_RECORD = "skipped-records";
     private static final String BLOCKED_TIME = "blocked-time-ns-total";
     private static final String THREAD_START_TIME = "thread-start-time";
 
@@ -79,9 +78,6 @@ private ThreadMetrics() {}
     private static final String PUNCTUATE_RATE_DESCRIPTION = RATE_DESCRIPTION + PUNCTUATE_DESCRIPTION;
     private static final String PUNCTUATE_AVG_LATENCY_DESCRIPTION = "The average punctuate latency";
     private static final String PUNCTUATE_MAX_LATENCY_DESCRIPTION = "The maximum punctuate latency";
-    private static final String SKIP_RECORDS_DESCRIPTION = "skipped records";
-    private static final String SKIP_RECORD_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + SKIP_RECORDS_DESCRIPTION;
-    private static final String SKIP_RECORD_RATE_DESCRIPTION = RATE_DESCRIPTION + SKIP_RECORDS_DESCRIPTION;
     private static final String COMMIT_OVER_TASKS_DESCRIPTION =
         "calls to commit over all tasks assigned to one stream thread";
     private static final String COMMIT_OVER_TASKS_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + COMMIT_OVER_TASKS_DESCRIPTION;
@@ -123,18 +119,6 @@ public static Sensor closeTaskSensor(final String threadId,
         );
     }
 
-    public static Sensor skipRecordSensor(final String threadId,
-                                          final StreamsMetricsImpl streamsMetrics) {
-        return invocationRateAndCountSensor(
-            threadId,
-            SKIP_RECORD,
-            SKIP_RECORD_RATE_DESCRIPTION,
-            SKIP_RECORD_TOTAL_DESCRIPTION,
-            RecordingLevel.INFO,
-            streamsMetrics
-        );
-    }
-
     public static Sensor commitSensor(final String threadId,
                                       final StreamsMetricsImpl streamsMetrics) {
         return invocationRateAndCountAndAvgAndMaxLatencySensor(
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/TopicMetrics.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/TopicMetrics.java
new file mode 100644
index 0000000000000..85b438d969618
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/metrics/TopicMetrics.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals.metrics;
+
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
+
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOPIC_LEVEL_GROUP;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOTAL_DESCRIPTION;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addTotalCountAndSumMetricsToSensor;
+
+public class TopicMetrics {
+
+    private static final String CONSUMED = "consumed";
+    private static final String BYTES_CONSUMED = "bytes-consumed";
+    private static final String BYTES_CONSUMED_DESCRIPTION = "bytes consumed from this topic";
+    private static final String BYTES_CONSUMED_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + BYTES_CONSUMED_DESCRIPTION;
+    private static final String RECORDS_CONSUMED = "records-consumed";
+    private static final String RECORDS_CONSUMED_DESCRIPTION = "records consumed from this topic";
+    private static final String RECORDS_CONSUMED_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + RECORDS_CONSUMED_DESCRIPTION;
+
+    private static final String PRODUCED = "produced";
+    private static final String BYTES_PRODUCED = "bytes-produced";
+    private static final String BYTES_PRODUCED_DESCRIPTION = "bytes produced to this topic";
+    private static final String BYTES_PRODUCED_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + BYTES_PRODUCED_DESCRIPTION;
+    private static final String RECORDS_PRODUCED = "records-produced";
+    private static final String RECORDS_PRODUCED_DESCRIPTION = "records produced to this topic";
+    private static final String RECORDS_PRODUCED_TOTAL_DESCRIPTION = TOTAL_DESCRIPTION + RECORDS_PRODUCED_DESCRIPTION;
+
+    public static Sensor consumedSensor(final String threadId,
+                                        final String taskId,
+                                        final String processorNodeId,
+                                        final String topic,
+                                        final StreamsMetricsImpl streamsMetrics) {
+        final Sensor sensor = streamsMetrics.topicLevelSensor(
+            threadId,
+            taskId,
+            processorNodeId,
+            topic,
+            CONSUMED,
+            RecordingLevel.INFO);
+        addTotalCountAndSumMetricsToSensor(
+            sensor,
+            TOPIC_LEVEL_GROUP,
+            streamsMetrics.topicLevelTagMap(threadId, taskId, processorNodeId, topic),
+            RECORDS_CONSUMED,
+            BYTES_CONSUMED,
+            RECORDS_CONSUMED_TOTAL_DESCRIPTION,
+            BYTES_CONSUMED_TOTAL_DESCRIPTION
+        );
+        return sensor;
+    }
+
+    public static Sensor producedSensor(final String threadId,
+                                        final String taskId,
+                                        final String processorNodeId,
+                                        final String topic,
+                                        final StreamsMetricsImpl streamsMetrics) {
+        final Sensor sensor = streamsMetrics.topicLevelSensor(
+            threadId,
+            taskId,
+            processorNodeId,
+            topic,
+            PRODUCED,
+            RecordingLevel.INFO);
+        addTotalCountAndSumMetricsToSensor(
+            sensor,
+            TOPIC_LEVEL_GROUP,
+            streamsMetrics.topicLevelTagMap(threadId, taskId, processorNodeId, topic),
+            RECORDS_PRODUCED,
+            BYTES_PRODUCED,
+            RECORDS_PRODUCED_TOTAL_DESCRIPTION,
+            BYTES_PRODUCED_TOTAL_DESCRIPTION
+        );
+        return sensor;
+    }
+
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/KafkaStreamsNamedTopologyWrapper.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/KafkaStreamsNamedTopologyWrapper.java
index 0e29f4d694a8c..3d22c583373c2 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/KafkaStreamsNamedTopologyWrapper.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/KafkaStreamsNamedTopologyWrapper.java
@@ -253,6 +253,31 @@ public RemoveNamedTopologyResult removeNamedTopology(final String topologyToRemo
         }
     }
 
+    /**
+     * Pauses a topology by name
+     * @param topologyName Name of the topology to pause
+     */
+    public void pauseNamedTopology(final String topologyName) {
+        topologyMetadata.pauseTopology(topologyName);
+    }
+
+    /**
+     * Checks if a given topology is paused.
+     * @param topologyName If null, assume that we are checking the `UNNAMED_TOPOLOGY`.
+     * @return A boolean indicating if the topology is paused.
+     */
+    public boolean isNamedTopologyPaused(final String topologyName) {
+        return topologyMetadata.isPaused(topologyName);
+    }
+
+    /**
+     * Resumes a topology by name
+     * @param topologyName Name of the topology to resume
+     */
+    public void resumeNamedTopology(final String topologyName) {
+        topologyMetadata.resumeTopology(topologyName);
+    }
+
     /**
      * @return  true iff the application is still in CREATED and the future was completed
      */
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopology.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopology.java
index a1debbe4b5620..208aa9f6e03a1 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopology.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopology.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.streams.processor.internals.namedtopology;
 
 import org.apache.kafka.streams.Topology;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
 
 import java.util.List;
diff --git a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopologyBuilder.java b/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopologyBuilder.java
index ddd9192d53e55..42af69e25f50d 100644
--- a/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopologyBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/processor/internals/namedtopology/NamedTopologyBuilder.java
@@ -18,6 +18,7 @@
 
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
 
 import java.util.Properties;
diff --git a/streams/src/main/java/org/apache/kafka/streams/query/QueryResult.java b/streams/src/main/java/org/apache/kafka/streams/query/QueryResult.java
index 5dba681b92503..045077609d47e 100644
--- a/streams/src/main/java/org/apache/kafka/streams/query/QueryResult.java
+++ b/streams/src/main/java/org/apache/kafka/streams/query/QueryResult.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.streams.query;
 
-
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.query.internals.FailedQueryResult;
 import org.apache.kafka.streams.query.internals.SucceededQueryResult;
@@ -31,7 +30,7 @@
 public interface QueryResult<R> {
     /**
      * Static factory method to create a result object for a successful query. Used by StateStores
-     * to respond to a {@link StateStore#query(Query, PositionBound, boolean)}.
+     * to respond to a {@link StateStore#query(Query, PositionBound, QueryConfig)}.
      */
     static <R> QueryResult<R> forResult(final R result) {
         return new SucceededQueryResult<>(result);
@@ -39,7 +38,7 @@ static <R> QueryResult<R> forResult(final R result) {
 
     /**
      * Static factory method to create a result object for a failed query. Used by StateStores to
-     * respond to a {@link StateStore#query(Query, PositionBound, boolean)}.
+     * respond to a {@link StateStore#query(Query, PositionBound, QueryConfig)}.
      */
     static <R> QueryResult<R> forFailure(
         final FailureReason failureReason,
@@ -52,7 +51,7 @@ static <R> QueryResult<R> forFailure(
      * Static factory method to create a failed query result object to indicate that the store does
      * not know how to handle the query.
      * <p>
-     * Used by StateStores to respond to a {@link StateStore#query(Query, PositionBound, boolean)}.
+     * Used by StateStores to respond to a {@link StateStore#query(Query, PositionBound, QueryConfig)}.
      */
     static <R> QueryResult<R> forUnknownQueryType(
         final Query<R> query,
@@ -69,7 +68,7 @@ static <R> QueryResult<R> forUnknownQueryType(
      * Static factory method to create a failed query result object to indicate that the store has
      * not yet caught up to the requested position bound.
      * <p>
-     * Used by StateStores to respond to a {@link StateStore#query(Query, PositionBound, boolean)}.
+     * Used by StateStores to respond to a {@link StateStore#query(Query, PositionBound, QueryConfig)}.
      */
     static <R> QueryResult<R> notUpToBound(
         final Position currentPosition,
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/ReadOnlySessionStore.java b/streams/src/main/java/org/apache/kafka/streams/state/ReadOnlySessionStore.java
index 4d44691992303..7fe11a6bea0e7 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/ReadOnlySessionStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/ReadOnlySessionStore.java
@@ -322,9 +322,9 @@ default KeyValueIterator<Windowed<K>, AGG> backwardFindSessions(final K keyFrom,
     /**
      * Get the value of key from a single session.
      *
-     * @param key                    the key to fetch
+     * @param key              the key to fetch
      * @param sessionStartTime start timestamp of the session
-     * @param sessionEndTime end timestamp of the session
+     * @param sessionEndTime   end timestamp of the session
      * @return The value or {@code null} if no session with the exact start and end timestamp exists
      *         for the given key
      * @throws NullPointerException If {@code null} is used for any key.
@@ -339,9 +339,9 @@ default AGG fetchSession(final K key,
     /**
      * Get the value of key from a single session.
      *
-     * @param key                    the key to fetch
+     * @param key              the key to fetch
      * @param sessionStartTime start timestamp of the session
-     * @param sessionEndTime end timestamp of the session
+     * @param sessionEndTime   end timestamp of the session
      * @return The value or {@code null} if no session with the exact start and end timestamp exists
      *         for the given key
      * @throws NullPointerException If {@code null} is used for any key.
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/SessionStore.java b/streams/src/main/java/org/apache/kafka/streams/state/SessionStore.java
index 926cddc4d2a43..76a43173946e4 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/SessionStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/SessionStore.java
@@ -39,6 +39,19 @@
  */
 public interface SessionStore<K, AGG> extends StateStore, ReadOnlySessionStore<K, AGG> {
 
+    /**
+     * Return all the session window entries that ends between the specified range (both ends are inclusive).
+     * This function would be used to retrieve all closed and immutable windows.
+     *
+     * @param earliestSessionEndTime earliest session end time to search from, inclusive
+     * @param latestSessionEndTime latest session end time to search to, inclusive
+     */
+    default KeyValueIterator<Windowed<K>, AGG> findSessions(final long earliestSessionEndTime,
+                                                            final long latestSessionEndTime) {
+        throw new UnsupportedOperationException(
+                "This API is not supported by this implementation of SessionStore.");
+    }
+
     @Override
     default KeyValueIterator<Windowed<K>, AGG> findSessions(final K key,
                                                             final Instant earliestSessionEndTime,
@@ -89,12 +102,14 @@ default KeyValueIterator<Windowed<K>, AGG> backwardFindSessions(final K keyFrom,
                 prepareMillisCheckFailMsgPrefix(latestSessionStartTime, "latestSessionStartTime")));
     }
 
-    default AGG fetchSession(final K key, final Instant earliestSessionEndTime, final Instant latestSessionStartTime) {
+    default AGG fetchSession(final K key,
+                             final Instant sessionStartTime,
+                             final Instant sessionEndTime) {
         return fetchSession(key,
-            ApiUtils.validateMillisecondInstant(earliestSessionEndTime,
-                prepareMillisCheckFailMsgPrefix(earliestSessionEndTime, "startTime")),
-            ApiUtils.validateMillisecondInstant(latestSessionStartTime,
-                prepareMillisCheckFailMsgPrefix(latestSessionStartTime, "endTime")));
+            ApiUtils.validateMillisecondInstant(sessionStartTime,
+                prepareMillisCheckFailMsgPrefix(sessionStartTime, "sessionStartTime")),
+            ApiUtils.validateMillisecondInstant(sessionEndTime,
+                prepareMillisCheckFailMsgPrefix(sessionEndTime, "sessionEndTime")));
     }
 
     /**
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStore.java
index 39bfa6a2ba036..95c1d8d8c81e9 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStore.java
@@ -50,7 +50,6 @@ public abstract class AbstractDualSchemaRocksDBSegmentedBytesStore<S extends Seg
 
     private final String name;
     protected final AbstractSegments<S> segments;
-    private final String metricScope;
     protected final KeySchema baseKeySchema;
     protected final Optional<KeySchema> indexKeySchema;
 
@@ -65,12 +64,10 @@ public abstract class AbstractDualSchemaRocksDBSegmentedBytesStore<S extends Seg
     private volatile boolean open;
 
     AbstractDualSchemaRocksDBSegmentedBytesStore(final String name,
-                                                 final String metricScope,
                                                  final KeySchema baseKeySchema,
                                                  final Optional<KeySchema> indexKeySchema,
                                                  final AbstractSegments<S> segments) {
         this.name = name;
-        this.metricScope = metricScope;
         this.baseKeySchema = baseKeySchema;
         this.indexKeySchema = indexKeySchema;
         this.segments = segments;
@@ -179,12 +176,16 @@ public void put(final Bytes rawBaseKey,
             LOG.warn("Skipping record for expired segment.");
         } else {
             StoreQueryUtils.updatePosition(position, stateStoreContext);
-            segment.put(rawBaseKey, value);
 
+            // Put to index first so that if put to base failed, when we iterate index, we will
+            // find no base value. If put to base first but putting to index fails, when we iterate
+            // index, we can't find the key but if we iterate over base store, we can find the key
+            // which lead to inconsistency.
             if (hasIndex()) {
                 final KeyValue<Bytes, byte[]> indexKeyValue = getIndexKeyValue(rawBaseKey, value);
                 segment.put(indexKeyValue.key, indexKeyValue.value);
             }
+            segment.put(rawBaseKey, value);
         }
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSegmentedBytesStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractRocksDBTimeOrderedSegmentedBytesStore.java
similarity index 64%
rename from streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSegmentedBytesStore.java
rename to streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractRocksDBTimeOrderedSegmentedBytesStore.java
index e87af877fba30..0398f0ca06005 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSegmentedBytesStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/AbstractRocksDBTimeOrderedSegmentedBytesStore.java
@@ -16,22 +16,12 @@
  */
 package org.apache.kafka.streams.state.internals;
 
-import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Optional;
-import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
-import org.apache.kafka.streams.errors.ProcessorStateException;
-import org.apache.kafka.streams.processor.internals.ChangelogRecordDeserializationHelper;
 import org.apache.kafka.streams.state.KeyValueIterator;
-import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
-import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
-import org.rocksdb.RocksDBException;
-import org.rocksdb.WriteBatch;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,13 +30,15 @@
  * lookup for a specific key.
  *
  * Schema for first SegmentedBytesStore (base store) is as below:
- *     Key schema: | timestamp + recordkey |
+ *     Key schema: | timestamp + [timestamp] + recordkey |
  *     Value schema: | value |. Value here is determined by caller.
  *
  * Schema for second SegmentedBytesStore (index store) is as below:
- *     Key schema: | record + timestamp |
+ *     Key schema: | record + timestamp + [timestamp]|
  *     Value schema: ||
  *
+ * Note there could be two timestamps if we store both window end time and window start time.
+ *
  * Operations:
  *     Put: 1. Put to index store. 2. Put to base store.
  *     Delete: 1. Delete from base store. 2. Delete from index store.
@@ -59,11 +51,13 @@
  *     Index store can be optional if we can construct the timestamp in base store instead of looking
  *     them up from index store.
  *
+ * @see RocksDBTimeOrderedSessionSegmentedBytesStore
+ * @see RocksDBTimeOrderedWindowSegmentedBytesStore
  */
-public class RocksDBTimeOrderedSegmentedBytesStore extends AbstractDualSchemaRocksDBSegmentedBytesStore<KeyValueSegment> {
+public abstract class AbstractRocksDBTimeOrderedSegmentedBytesStore extends AbstractDualSchemaRocksDBSegmentedBytesStore<KeyValueSegment> {
     private static final Logger LOG = LoggerFactory.getLogger(AbstractDualSchemaRocksDBSegmentedBytesStore.class);
 
-    private class IndexToBaseStoreIterator implements KeyValueIterator<Bytes, byte[]> {
+    abstract class IndexToBaseStoreIterator implements KeyValueIterator<Bytes, byte[]> {
         private final KeyValueIterator<Bytes, byte[]> indexIterator;
         private byte[] cachedValue;
 
@@ -95,7 +89,7 @@ public boolean hasNext() {
                 if (cachedValue == null) {
                     // Key not in base store, inconsistency happened and remove from index.
                     indexIterator.next();
-                    RocksDBTimeOrderedSegmentedBytesStore.this.removeIndex(key);
+                    AbstractRocksDBTimeOrderedSegmentedBytesStore.this.removeIndex(key);
                 } else {
                     return true;
                 }
@@ -114,84 +108,19 @@ public KeyValue<Bytes, byte[]> next() {
             return KeyValue.pair(getBaseKey(ret.key), value);
         }
 
-        private Bytes getBaseKey(final Bytes indexKey) {
-            final byte[] keyBytes = KeyFirstWindowKeySchema.extractStoreKeyBytes(indexKey.get());
-            final long timestamp = KeyFirstWindowKeySchema.extractStoreTimestamp(indexKey.get());
-            final int seqnum = KeyFirstWindowKeySchema.extractStoreSequence(indexKey.get());
-            return TimeFirstWindowKeySchema.toStoreKeyBinary(keyBytes, timestamp, seqnum);
-        }
+        abstract protected Bytes getBaseKey(final Bytes indexKey);
     }
 
-    RocksDBTimeOrderedSegmentedBytesStore(final String name,
-                                          final String metricsScope,
-                                          final long retention,
-                                          final long segmentInterval,
-                                          final boolean withIndex) {
-        super(name, metricsScope, new TimeFirstWindowKeySchema(),
-            Optional.ofNullable(withIndex ? new KeyFirstWindowKeySchema() : null),
+    AbstractRocksDBTimeOrderedSegmentedBytesStore(final String name,
+                                                  final String metricsScope,
+                                                  final long retention,
+                                                  final long segmentInterval,
+                                                  final KeySchema baseKeySchema,
+                                                  final Optional<KeySchema> indexKeySchema) {
+        super(name, baseKeySchema, indexKeySchema,
             new KeyValueSegments(name, metricsScope, retention, segmentInterval));
     }
 
-    public void put(final Bytes key, final long timestamp, final int seqnum, final byte[] value) {
-        final Bytes baseKey = TimeFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum);
-        put(baseKey, value);
-    }
-
-    byte[] fetch(final Bytes key, final long timestamp, final int seqnum) {
-        return get(TimeFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum));
-    }
-
-    @Override
-    protected KeyValue<Bytes, byte[]> getIndexKeyValue(final Bytes baseKey, final byte[] baseValue) {
-        final byte[] key = TimeFirstWindowKeySchema.extractStoreKeyBytes(baseKey.get());
-        final long timestamp = TimeFirstWindowKeySchema.extractStoreTimestamp(baseKey.get());
-        final int seqnum = TimeFirstWindowKeySchema.extractStoreSequence(baseKey.get());
-
-        return KeyValue.pair(KeyFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum), new byte[0]);
-    }
-
-    @Override
-    Map<KeyValueSegment, WriteBatch> getWriteBatches(
-        final Collection<ConsumerRecord<byte[], byte[]>> records) {
-        // advance stream time to the max timestamp in the batch
-        for (final ConsumerRecord<byte[], byte[]> record : records) {
-            final long timestamp = WindowKeySchema.extractStoreTimestamp(record.key());
-            observedStreamTime = Math.max(observedStreamTime, timestamp);
-        }
-
-        final Map<KeyValueSegment, WriteBatch> writeBatchMap = new HashMap<>();
-        for (final ConsumerRecord<byte[], byte[]> record : records) {
-            final long timestamp = WindowKeySchema.extractStoreTimestamp(record.key());
-            final long segmentId = segments.segmentId(timestamp);
-            final KeyValueSegment segment = segments.getOrCreateSegmentIfLive(segmentId, context, observedStreamTime);
-            if (segment != null) {
-                ChangelogRecordDeserializationHelper.applyChecksAndUpdatePosition(
-                    record,
-                    consistencyEnabled,
-                    position
-                );
-                try {
-                    final WriteBatch batch = writeBatchMap.computeIfAbsent(segment, s -> new WriteBatch());
-
-                    // Assuming changelog record is serialized using WindowKeySchema
-                    // from ChangeLoggingTimestampedWindowBytesStore. Reconstruct key/value to restore
-                    if (hasIndex()) {
-                        final byte[] indexKey = KeyFirstWindowKeySchema.fromNonPrefixWindowKey(record.key());
-                        // Take care of tombstone
-                        final byte[] value = record.value() == null ? null : new byte[0];
-                        segment.addToBatch(new KeyValue<>(indexKey, value), batch);
-                    }
-
-                    final byte[] baseKey = TimeFirstWindowKeySchema.fromNonPrefixWindowKey(record.key());
-                    segment.addToBatch(new KeyValue<>(baseKey, record.value()), batch);
-                } catch (final RocksDBException e) {
-                    throw new ProcessorStateException("Error restoring batch to store " + name(), e);
-                }
-            }
-        }
-        return writeBatchMap;
-    }
-
     @Override
     public KeyValueIterator<Bytes, byte[]> fetch(final Bytes key,
                                                  final long from,
@@ -206,18 +135,19 @@ public KeyValueIterator<Bytes, byte[]> backwardFetch(final Bytes key,
         return fetch(key, from, to, false);
     }
 
+    abstract protected IndexToBaseStoreIterator getIndexToBaseStoreIterator(final SegmentIterator<KeyValueSegment> segmentIterator);
+
     KeyValueIterator<Bytes, byte[]> fetch(final Bytes key,
                                           final long from,
                                           final long to,
                                           final boolean forward) {
         if (indexKeySchema.isPresent()) {
-            final List<KeyValueSegment> searchSpace = indexKeySchema.get().segmentsToSearch(segments, from, to,
-                forward);
+            final List<KeyValueSegment> searchSpace = indexKeySchema.get().segmentsToSearch(segments, from, to, forward);
 
             final Bytes binaryFrom = indexKeySchema.get().lowerRangeFixedSize(key, from);
             final Bytes binaryTo = indexKeySchema.get().upperRangeFixedSize(key, to);
 
-            return new IndexToBaseStoreIterator(new SegmentIterator<>(
+            return getIndexToBaseStoreIterator(new SegmentIterator<>(
                 searchSpace.iterator(),
                 indexKeySchema.get().hasNextCondition(key, key, from, to, forward),
                 binaryFrom,
@@ -225,8 +155,7 @@ KeyValueIterator<Bytes, byte[]> fetch(final Bytes key,
                 forward));
         }
 
-        final List<KeyValueSegment> searchSpace = baseKeySchema.segmentsToSearch(segments, from, to,
-            forward);
+        final List<KeyValueSegment> searchSpace = baseKeySchema.segmentsToSearch(segments, from, to, forward);
 
         final Bytes binaryFrom = baseKeySchema.lowerRangeFixedSize(key, from);
         final Bytes binaryTo = baseKeySchema.upperRangeFixedSize(key, to);
@@ -275,7 +204,7 @@ KeyValueIterator<Bytes, byte[]> fetch(final Bytes keyFrom,
             final Bytes binaryFrom = indexKeySchema.get().lowerRange(keyFrom, from);
             final Bytes binaryTo = indexKeySchema.get().upperRange(keyTo, to);
 
-            return new IndexToBaseStoreIterator(new SegmentIterator<>(
+            return getIndexToBaseStoreIterator(new SegmentIterator<>(
                 searchSpace.iterator(),
                 indexKeySchema.get().hasNextCondition(keyFrom, keyTo, from, to, forward),
                 binaryFrom,
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingKeyValueStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingKeyValueStore.java
index 1d08d20ec2387..04f2a0c6230f2 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingKeyValueStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingKeyValueStore.java
@@ -226,10 +226,9 @@ private void putAndMaybeForward(final ThreadCache.DirtyEntry entry,
             if (rawNewValue != null || rawOldValue != null) {
                 // we need to get the old values if needed, and then put to store, and then flush
                 final ProcessorRecordContext current = context.recordContext();
-                context.setRecordContext(entry.entry().context());
-                wrapped().put(entry.key(), entry.newValue());
-
                 try {
+                    context.setRecordContext(entry.entry().context());
+                    wrapped().put(entry.key(), entry.newValue());
                     flushListener.apply(
                         new Record<>(
                             entry.key().get(),
@@ -241,7 +240,13 @@ private void putAndMaybeForward(final ThreadCache.DirtyEntry entry,
                 }
             }
         } else {
-            wrapped().put(entry.key(), entry.newValue());
+            final ProcessorRecordContext current = context.recordContext();
+            try {
+                context.setRecordContext(entry.entry().context());
+                wrapped().put(entry.key(), entry.newValue());
+            } finally {
+                context.setRecordContext(current);
+            }
         }
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingSessionStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingSessionStore.java
index 59d2a0e729436..cff10da5f8736 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingSessionStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingSessionStore.java
@@ -105,11 +105,11 @@ private void putAndMaybeForward(final ThreadCache.DirtyEntry entry, final Intern
             // we can skip flushing to downstream as well as writing to underlying store
             if (newValueBytes != null || oldValueBytes != null) {
                 // we need to get the old values if needed, and then put to store, and then flush
-                wrapped().put(bytesKey, entry.newValue());
 
                 final ProcessorRecordContext current = context.recordContext();
-                context.setRecordContext(entry.entry().context());
                 try {
+                    context.setRecordContext(entry.entry().context());
+                    wrapped().put(bytesKey, entry.newValue());
                     flushListener.apply(
                         new Record<>(
                             binaryKey.get(),
@@ -121,7 +121,13 @@ private void putAndMaybeForward(final ThreadCache.DirtyEntry entry, final Intern
                 }
             }
         } else {
-            wrapped().put(bytesKey, entry.newValue());
+            final ProcessorRecordContext current = context.recordContext();
+            try {
+                context.setRecordContext(entry.entry().context());
+                wrapped().put(bytesKey, entry.newValue());
+            } finally {
+                context.setRecordContext(current);
+            }
         }
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingWindowStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingWindowStore.java
index 8a1f8865fb697..5477e57c71353 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingWindowStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/CachingWindowStore.java
@@ -19,7 +19,6 @@
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
-import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.internals.Change;
 import org.apache.kafka.streams.processor.ProcessorContext;
@@ -27,8 +26,8 @@
 import org.apache.kafka.streams.processor.StateStoreContext;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.processor.internals.ProcessorContextUtils;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
-import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
 import org.apache.kafka.streams.processor.internals.RecordQueue;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.StateSerdes;
@@ -75,27 +74,22 @@ class CachingWindowStore
     @Deprecated
     @Override
     public void init(final ProcessorContext context, final StateStore root) {
-        initInternal(asInternalProcessorContext(context));
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, name(), Boolean.TRUE);
+        initInternal(asInternalProcessorContext(context), changelogTopic);
         super.init(context, root);
     }
 
     @Override
     public void init(final StateStoreContext context, final StateStore root) {
-        initInternal(asInternalProcessorContext(context));
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, name(), Boolean.TRUE);
+        initInternal(asInternalProcessorContext(context), changelogTopic);
         super.init(context, root);
     }
 
-    private void initInternal(final InternalProcessorContext<?, ?> context) {
-        final String prefix = StreamsConfig.InternalConfig.getString(
-            context.appConfigs(),
-            StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-            context.applicationId()
-        );
+    private void initInternal(final InternalProcessorContext<?, ?> context, final String changelogTopic) {
         this.context = context;
-        final String topic = ProcessorStateManager.storeChangelogTopic(prefix, name(),  context.taskId().topologyName());
-
         bytesSerdes = new StateSerdes<>(
-            topic,
+            changelogTopic,
             Serdes.Bytes(),
             Serdes.ByteArray());
         cacheName = context.taskId() + "-" + name();
@@ -122,11 +116,11 @@ private void putAndMaybeForward(final ThreadCache.DirtyEntry entry,
             // we can skip flushing to downstream as well as writing to underlying store
             if (rawNewValue != null || rawOldValue != null) {
                 // we need to get the old values if needed, and then put to store, and then flush
-                wrapped().put(binaryKey, entry.newValue(), windowStartTimestamp);
 
                 final ProcessorRecordContext current = context.recordContext();
-                context.setRecordContext(entry.entry().context());
                 try {
+                    context.setRecordContext(entry.entry().context());
+                    wrapped().put(binaryKey, entry.newValue(), windowStartTimestamp);
                     flushListener.apply(
                         new Record<>(
                             binaryWindowKey,
@@ -138,7 +132,13 @@ private void putAndMaybeForward(final ThreadCache.DirtyEntry entry,
                 }
             }
         } else {
-            wrapped().put(binaryKey, entry.newValue(), windowStartTimestamp);
+            final ProcessorRecordContext current = context.recordContext();
+            try {
+                context.setRecordContext(entry.entry().context());
+                wrapped().put(binaryKey, entry.newValue(), windowStartTimestamp);
+            } finally {
+                context.setRecordContext(current);
+            }
         }
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/ChangeLoggingSessionBytesStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/ChangeLoggingSessionBytesStore.java
index ff387ef38e9ce..fd3279880115b 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/ChangeLoggingSessionBytesStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/ChangeLoggingSessionBytesStore.java
@@ -31,9 +31,9 @@
  * Simple wrapper around a {@link SessionStore} to support writing
  * updates to a changelog
  */
-class ChangeLoggingSessionBytesStore
-        extends WrappedStateStore<SessionStore<Bytes, byte[]>, byte[], byte[]>
-        implements SessionStore<Bytes, byte[]> {
+public class ChangeLoggingSessionBytesStore
+    extends WrappedStateStore<SessionStore<Bytes, byte[]>, byte[], byte[]>
+    implements SessionStore<Bytes, byte[]> {
 
     private InternalProcessorContext context;
 
@@ -95,6 +95,12 @@ public byte[] fetchSession(final Bytes key, final long earliestSessionEndTime, f
         return wrapped().fetchSession(key, earliestSessionEndTime, latestSessionStartTime);
     }
 
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> findSessions(final long earliestSessionEndTime,
+                                                                  final long latestSessionEndTime) {
+        return wrapped().findSessions(earliestSessionEndTime, latestSessionEndTime);
+    }
+
     @Override
     public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFetch(final Bytes key) {
         return wrapped().backwardFetch(key);
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemorySessionStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemorySessionStore.java
index 97984dd156ead..579abc3678275 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemorySessionStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemorySessionStore.java
@@ -202,25 +202,36 @@ public void remove(final Windowed<Bytes> sessionKey) {
 
     @Override
     public byte[] fetchSession(final Bytes key,
-                               final long earliestSessionEndTime,
-                               final long latestSessionStartTime) {
+                               final long sessionStartTime,
+                               final long sessionEndTime) {
         removeExpiredSegments();
 
         Objects.requireNonNull(key, "key cannot be null");
 
         // Only need to search if the record hasn't expired yet
-        if (latestSessionStartTime > observedStreamTime - retentionPeriod) {
-            final ConcurrentNavigableMap<Bytes, ConcurrentNavigableMap<Long, byte[]>> keyMap = endTimeMap.get(latestSessionStartTime);
+        if (sessionEndTime > observedStreamTime - retentionPeriod) {
+            final ConcurrentNavigableMap<Bytes, ConcurrentNavigableMap<Long, byte[]>> keyMap = endTimeMap.get(sessionEndTime);
             if (keyMap != null) {
                 final ConcurrentNavigableMap<Long, byte[]> startTimeMap = keyMap.get(key);
                 if (startTimeMap != null) {
-                    return startTimeMap.get(earliestSessionEndTime);
+                    return startTimeMap.get(sessionStartTime);
                 }
             }
         }
         return null;
     }
 
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> findSessions(final long earliestSessionEndTime,
+                                                                  final long latestSessionEndTime) {
+        removeExpiredSegments();
+
+        final ConcurrentNavigableMap<Long, ConcurrentNavigableMap<Bytes, ConcurrentNavigableMap<Long, byte[]>>> endTimSubMap
+            = endTimeMap.subMap(earliestSessionEndTime, true, latestSessionEndTime, true);
+
+        return registerNewIterator(null, null, Long.MAX_VALUE, endTimSubMap.entrySet().iterator(), true);
+    }
+
     @Override
     public KeyValueIterator<Windowed<Bytes>, byte[]> findSessions(final Bytes key,
                                                                   final long earliestSessionEndTime,
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryTimeOrderedKeyValueBuffer.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryTimeOrderedKeyValueBuffer.java
index ba80c35b66931..5403f9e7035bd 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryTimeOrderedKeyValueBuffer.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/InMemoryTimeOrderedKeyValueBuffer.java
@@ -25,7 +25,6 @@
 import org.apache.kafka.common.serialization.BytesSerializer;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.utils.Bytes;
-import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.internals.Change;
 import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
 import org.apache.kafka.streams.processor.ProcessorContext;
@@ -35,7 +34,6 @@
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.ProcessorContextUtils;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
-import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
 import org.apache.kafka.streams.processor.internals.RecordBatchingStateRestoreCallback;
 import org.apache.kafka.streams.processor.internals.RecordCollector;
 import org.apache.kafka.streams.processor.internals.RecordQueue;
@@ -203,12 +201,14 @@ public void setSerdesIfNull(final SerdeGetter getter) {
     @Override
     public void init(final ProcessorContext context, final StateStore root) {
         this.context = ProcessorContextUtils.asInternalProcessorContext(context);
+        changelogTopic = ProcessorContextUtils.changelogFor(context, name(), Boolean.TRUE);
         init(root);
     }
 
     @Override
     public void init(final StateStoreContext context, final StateStore root) {
         this.context = ProcessorContextUtils.asInternalProcessorContext(context);
+        changelogTopic = ProcessorContextUtils.changelogFor(context, name(), Boolean.TRUE);
         init(root);
     }
 
@@ -229,13 +229,7 @@ private void init(final StateStore root) {
             streamsMetrics
         );
 
-        context.register(root, (RecordBatchingStateRestoreCallback) this::restoreBatch);
-        final String prefix = StreamsConfig.InternalConfig.getString(
-            context.appConfigs(),
-            StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-            context.applicationId()
-        );
-        changelogTopic = ProcessorStateManager.storeChangelogTopic(prefix, storeName, context.taskId().topologyName());
+        this.context.register(root, (RecordBatchingStateRestoreCallback) this::restoreBatch);
         updateBufferMetrics();
         open = true;
         partition = context.taskId().partition();
@@ -298,8 +292,9 @@ private void logValue(final Bytes key, final BufferKey bufferKey, final BufferVa
             partition,
             null,
             KEY_SERIALIZER,
-            VALUE_SERIALIZER
-        );
+            VALUE_SERIALIZER,
+            null,
+            null);
     }
 
     private void logTombstone(final Bytes key) {
@@ -311,8 +306,9 @@ private void logTombstone(final Bytes key) {
             partition,
             null,
             KEY_SERIALIZER,
-            VALUE_SERIALIZER
-        );
+            VALUE_SERIALIZER,
+            null,
+            null);
     }
 
     private void restoreBatch(final Collection<ConsumerRecord<byte[], byte[]>> batch) {
@@ -427,7 +423,9 @@ public void evictWhile(final Supplier<Boolean> predicate,
                 delegate.remove();
                 index.remove(next.getKey().key());
 
-                dirtyKeys.add(next.getKey().key());
+                if (loggingEnabled) {
+                    dirtyKeys.add(next.getKey().key());
+                }
 
                 memBufferSize -= computeRecordSize(next.getKey().key(), bufferValue);
 
@@ -501,7 +499,9 @@ public void put(final long time,
             serializedKey,
             new BufferValue(serializedPriorValue, serialChange.oldValue, serialChange.newValue, recordContext)
         );
-        dirtyKeys.add(serializedKey);
+        if (loggingEnabled) {
+            dirtyKeys.add(serializedKey);
+        }
         updateBufferMetrics();
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreIterator.java
index 46004f57b89d0..482a02b5afd26 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreIterator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreIterator.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.streams.state.internals;
 
+import java.util.function.Function;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.state.KeyValueIterator;
@@ -29,11 +30,20 @@
  */
 class MergedSortedCacheWindowStoreIterator extends AbstractMergedSortedCacheStoreIterator<Long, Long, byte[], byte[]> implements WindowStoreIterator<byte[]> {
 
+    private final Function<byte[], Long> timestampExtractor;
 
     MergedSortedCacheWindowStoreIterator(final PeekingKeyValueIterator<Bytes, LRUCacheEntry> cacheIterator,
                                          final KeyValueIterator<Long, byte[]> storeIterator,
                                          final boolean forward) {
+        this(cacheIterator, storeIterator, forward, WindowKeySchema::extractStoreTimestamp);
+    }
+
+    MergedSortedCacheWindowStoreIterator(final PeekingKeyValueIterator<Bytes, LRUCacheEntry> cacheIterator,
+                                         final KeyValueIterator<Long, byte[]> storeIterator,
+                                         final boolean forward,
+                                         final Function<byte[], Long> tsExtractor) {
         super(cacheIterator, storeIterator, forward);
+        this.timestampExtractor = tsExtractor;
     }
 
     @Override
@@ -44,7 +54,7 @@ public KeyValue<Long, byte[]> deserializeStorePair(final KeyValue<Long, byte[]>
     @Override
     Long deserializeCacheKey(final Bytes cacheKey) {
         final byte[] binaryKey = bytesFromCacheKey(cacheKey);
-        return WindowKeySchema.extractStoreTimestamp(binaryKey);
+        return timestampExtractor.apply(binaryKey);
     }
 
     @Override
@@ -61,7 +71,7 @@ public Long deserializeStoreKey(final Long key) {
     public int compare(final Bytes cacheKey, final Long storeKey) {
         final byte[] binaryKey = bytesFromCacheKey(cacheKey);
 
-        final Long cacheTimestamp = WindowKeySchema.extractStoreTimestamp(binaryKey);
+        final Long cacheTimestamp = timestampExtractor.apply(binaryKey);
         return cacheTimestamp.compareTo(storeKey);
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreKeyValueIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreKeyValueIterator.java
index afc6a042fed84..28df5a50538b2 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreKeyValueIterator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWindowStoreKeyValueIterator.java
@@ -17,6 +17,7 @@
 
 package org.apache.kafka.streams.state.internals;
 
+import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.kstream.Windowed;
@@ -29,6 +30,8 @@ class MergedSortedCacheWindowStoreKeyValueIterator
     private final StateSerdes<Bytes, byte[]> serdes;
     private final long windowSize;
     private final SegmentedCacheFunction cacheFunction;
+    private final StoreKeyToWindowKey storeKeyToWindowKey;
+    private final WindowKeyToBytes windowKeyToBytes;
 
     MergedSortedCacheWindowStoreKeyValueIterator(
         final PeekingKeyValueIterator<Bytes, LRUCacheEntry> filteredCacheIterator,
@@ -37,11 +40,27 @@ class MergedSortedCacheWindowStoreKeyValueIterator
         final long windowSize,
         final SegmentedCacheFunction cacheFunction,
         final boolean forward
+    ) {
+        this(filteredCacheIterator, underlyingIterator, serdes,
+            windowSize, cacheFunction, forward, WindowKeySchema::fromStoreKey, WindowKeySchema::toStoreKeyBinary);
+    }
+
+    MergedSortedCacheWindowStoreKeyValueIterator(
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> filteredCacheIterator,
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator,
+        final StateSerdes<Bytes, byte[]> serdes,
+        final long windowSize,
+        final SegmentedCacheFunction cacheFunction,
+        final boolean forward,
+        final StoreKeyToWindowKey storeKeyToWindowKey,
+        final WindowKeyToBytes windowKeyToBytes
     ) {
         super(filteredCacheIterator, underlyingIterator, forward);
         this.serdes = serdes;
         this.windowSize = windowSize;
         this.cacheFunction = cacheFunction;
+        this.storeKeyToWindowKey = storeKeyToWindowKey;
+        this.windowKeyToBytes = windowKeyToBytes;
     }
 
     @Override
@@ -57,7 +76,7 @@ KeyValue<Windowed<Bytes>, byte[]> deserializeStorePair(final KeyValue<Windowed<B
     @Override
     Windowed<Bytes> deserializeCacheKey(final Bytes cacheKey) {
         final byte[] binaryKey = cacheFunction.key(cacheKey).get();
-        return WindowKeySchema.fromStoreKey(binaryKey, windowSize, serdes.keyDeserializer(), serdes.topic());
+        return storeKeyToWindowKey.toWindowKey(binaryKey, windowSize, serdes.keyDeserializer(), serdes.topic());
     }
 
     @Override
@@ -67,7 +86,17 @@ byte[] deserializeCacheValue(final LRUCacheEntry cacheEntry) {
 
     @Override
     int compare(final Bytes cacheKey, final Windowed<Bytes> storeKey) {
-        final Bytes storeKeyBytes = WindowKeySchema.toStoreKeyBinary(storeKey.key(), storeKey.window().start(), 0);
+        final Bytes storeKeyBytes = windowKeyToBytes.toBytes(storeKey.key(), storeKey.window().start(), 0);
         return cacheFunction.compareSegmentedKeys(cacheKey, storeKeyBytes);
     }
+
+    @FunctionalInterface
+    interface StoreKeyToWindowKey {
+        Windowed<Bytes> toWindowKey(final byte[] binaryKey, final long windowSize, final Deserializer<Bytes> deserializer, final String topic);
+    }
+
+    @FunctionalInterface
+    interface WindowKeyToBytes {
+        Bytes toBytes(final Bytes key, final long windowStart, final int seqNum);
+    }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredKeyValueStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredKeyValueStore.java
index 3c6da63f05edc..9f8ca1c72a91b 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredKeyValueStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredKeyValueStore.java
@@ -22,7 +22,6 @@
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.streams.KeyValue;
-import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.errors.ProcessorStateException;
 import org.apache.kafka.streams.kstream.internals.Change;
 import org.apache.kafka.streams.kstream.internals.WrappingNullableUtils;
@@ -32,7 +31,6 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.ProcessorContextUtils;
-import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
 import org.apache.kafka.streams.processor.internals.SerdeGetter;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.query.KeyQuery;
@@ -173,12 +171,9 @@ protected Serde<V> prepareValueSerdeForStore(final Serde<V> valueSerde, final Se
     @Deprecated
     private void initStoreSerde(final ProcessorContext context) {
         final String storeName = name();
-        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName);
-        final String prefix = getPrefix(context.appConfigs(), context.applicationId());
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName, Boolean.FALSE);
         serdes = new StateSerdes<>(
-            changelogTopic != null ?
-                changelogTopic :
-                ProcessorStateManager.storeChangelogTopic(prefix, storeName, taskId.topologyName()),
+            changelogTopic,
             prepareKeySerde(keySerde, new SerdeGetter(context)),
             prepareValueSerdeForStore(valueSerde, new SerdeGetter(context))
         );
@@ -186,29 +181,14 @@ private void initStoreSerde(final ProcessorContext context) {
 
     private void initStoreSerde(final StateStoreContext context) {
         final String storeName = name();
-        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName);
-        final String prefix = getPrefix(context.appConfigs(), context.applicationId());
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName, Boolean.FALSE);
         serdes = new StateSerdes<>(
-            changelogTopic != null ?
-                changelogTopic :
-                ProcessorStateManager.storeChangelogTopic(prefix, storeName, taskId.topologyName()),
+            changelogTopic,
             prepareKeySerde(keySerde, new SerdeGetter(context)),
             prepareValueSerdeForStore(valueSerde, new SerdeGetter(context))
         );
     }
 
-    private static String getPrefix(final Map<String, Object> configs, final String applicationId) {
-        if (configs == null) {
-            return applicationId;
-        } else {
-            return StreamsConfig.InternalConfig.getString(
-                configs,
-                StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-                applicationId
-            );
-        }
-    }
-
     @SuppressWarnings("unchecked")
     @Override
     public boolean setFlushListener(final CacheFlushListener<K, V> listener,
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredSessionStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredSessionStore.java
index ee4a62561216d..bc4f2169b3364 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredSessionStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredSessionStore.java
@@ -20,7 +20,6 @@
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.common.utils.Time;
-import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.errors.ProcessorStateException;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.internals.Change;
@@ -31,7 +30,6 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.ProcessorContextUtils;
-import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
 import org.apache.kafka.streams.processor.internals.SerdeGetter;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.query.FailureReason;
@@ -138,12 +136,9 @@ private void registerMetrics() {
 
     private void initStoreSerde(final ProcessorContext context) {
         final String storeName = name();
-        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName);
-        final String prefix = getPrefix(context.appConfigs(), context.applicationId());
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName, Boolean.FALSE);
         serdes = new StateSerdes<>(
-            changelogTopic != null ?
-                changelogTopic :
-                ProcessorStateManager.storeChangelogTopic(prefix, storeName, taskId.topologyName()),
+            changelogTopic,
             WrappingNullableUtils.prepareKeySerde(keySerde, new SerdeGetter(context)),
             WrappingNullableUtils.prepareValueSerde(valueSerde, new SerdeGetter(context))
         );
@@ -151,29 +146,14 @@ private void initStoreSerde(final ProcessorContext context) {
 
     private void initStoreSerde(final StateStoreContext context) {
         final String storeName = name();
-        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName);
-        final String prefix = getPrefix(context.appConfigs(), context.applicationId());
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName, Boolean.FALSE);
         serdes = new StateSerdes<>(
-            changelogTopic != null ?
-                changelogTopic :
-                ProcessorStateManager.storeChangelogTopic(prefix, storeName, taskId.topologyName()),
+            changelogTopic,
             WrappingNullableUtils.prepareKeySerde(keySerde, new SerdeGetter(context)),
             WrappingNullableUtils.prepareValueSerde(valueSerde, new SerdeGetter(context))
         );
     }
 
-    private static String getPrefix(final Map<String, Object> configs, final String applicationId) {
-        if (configs == null) {
-            return applicationId;
-        } else {
-            return StreamsConfig.InternalConfig.getString(
-                configs,
-                StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-                applicationId
-            );
-        }
-    }
-
     @SuppressWarnings("unchecked")
     @Override
     public boolean setFlushListener(final CacheFlushListener<Windowed<K>, V> listener,
@@ -366,6 +346,18 @@ public KeyValueIterator<Windowed<K>, V> findSessions(final K keyFrom,
             time);
     }
 
+    @Override
+    public KeyValueIterator<Windowed<K>, V> findSessions(final long earliestSessionEndTime,
+                                                         final long latestSessionEndTime) {
+        return new MeteredWindowedKeyValueIterator<>(
+                wrapped().findSessions(earliestSessionEndTime, latestSessionEndTime),
+                fetchSensor,
+                streamsMetrics,
+                serdes::keyFrom,
+                serdes::valueFrom,
+                time);
+    }
+
     @Override
     public KeyValueIterator<Windowed<K>, V> backwardFindSessions(final K keyFrom,
                                                                  final K keyTo,
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredWindowStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredWindowStore.java
index c3460130f4f9b..2a08adce53351 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredWindowStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/MeteredWindowStore.java
@@ -20,7 +20,6 @@
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.common.utils.Time;
-import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.errors.ProcessorStateException;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.internals.Change;
@@ -31,7 +30,6 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.ProcessorContextUtils;
-import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
 import org.apache.kafka.streams.processor.internals.SerdeGetter;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.query.FailureReason;
@@ -157,32 +155,18 @@ private void registerMetrics() {
     @Deprecated
     private void initStoreSerde(final ProcessorContext context) {
         final String storeName = name();
-        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName);
-        final String prefix = StreamsConfig.InternalConfig.getString(
-            context.appConfigs(),
-            StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-            context.applicationId()
-        );
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName, Boolean.FALSE);
         serdes = new StateSerdes<>(
-            changelogTopic != null ?
-                changelogTopic :
-                ProcessorStateManager.storeChangelogTopic(prefix, storeName, taskId.topologyName()),
+            changelogTopic,
             prepareKeySerde(keySerde, new SerdeGetter(context)),
             prepareValueSerde(valueSerde, new SerdeGetter(context)));
     }
 
     private void initStoreSerde(final StateStoreContext context) {
         final String storeName = name();
-        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName);
-        final String prefix = StreamsConfig.InternalConfig.getString(
-            context.appConfigs(),
-            StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
-            context.applicationId()
-        );
+        final String changelogTopic = ProcessorContextUtils.changelogFor(context, storeName, Boolean.FALSE);
         serdes = new StateSerdes<>(
-            changelogTopic != null ?
-                changelogTopic :
-                ProcessorStateManager.storeChangelogTopic(prefix, storeName, taskId.topologyName()),
+            changelogTopic,
             prepareKeySerde(keySerde, new SerdeGetter(context)),
             prepareValueSerde(valueSerde, new SerdeGetter(context)));
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedSessionKeySchemas.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedSessionKeySchemas.java
new file mode 100644
index 0000000000000..3ce00bcb8a9bb
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedSessionKeySchemas.java
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serializer;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.kstream.Window;
+import org.apache.kafka.streams.kstream.Windowed;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import org.apache.kafka.streams.kstream.internals.SessionWindow;
+import org.apache.kafka.streams.state.internals.SegmentedBytesStore.KeySchema;
+
+import static org.apache.kafka.streams.state.StateSerdes.TIMESTAMP_SIZE;
+
+public class PrefixedSessionKeySchemas {
+
+    private static final int PREFIX_SIZE = 1;
+    private static final byte TIME_FIRST_PREFIX = 0;
+    private static final byte KEY_FIRST_PREFIX = 1;
+
+    private static byte extractPrefix(final byte[] binaryBytes) {
+        return binaryBytes[0];
+    }
+
+    public static class TimeFirstSessionKeySchema implements KeySchema {
+
+        @Override
+        public Bytes upperRange(final Bytes key, final long to) {
+            if (key == null) {
+                // Put next prefix instead of null so that we can start from right prefix
+                // when scanning backwards
+                final byte nextPrefix = TIME_FIRST_PREFIX + 1;
+                return Bytes.wrap(ByteBuffer.allocate(PREFIX_SIZE).put(nextPrefix).array());
+            }
+            return Bytes.wrap(ByteBuffer.allocate(PREFIX_SIZE + 2 * TIMESTAMP_SIZE + key.get().length)
+                .put(TIME_FIRST_PREFIX)
+                // the end timestamp can be as large as possible as long as it's larger than start time
+                .putLong(Long.MAX_VALUE)
+                // this is the start timestamp
+                .putLong(to)
+                .put(key.get())
+                .array());
+        }
+
+        @Override
+        public Bytes lowerRange(final Bytes key, final long from) {
+            if (key == null) {
+                return Bytes.wrap(ByteBuffer.allocate(PREFIX_SIZE + TIMESTAMP_SIZE)
+                    .put(TIME_FIRST_PREFIX)
+                    .putLong(from)
+                    .array());
+            }
+
+            return Bytes.wrap(ByteBuffer.allocate(PREFIX_SIZE + 2 * TIMESTAMP_SIZE + key.get().length)
+                .put(TIME_FIRST_PREFIX)
+                .putLong(from)
+                .putLong(0L)
+                .put(key.get())
+                .array());
+        }
+
+        /**
+         * @param key the key in the range
+         * @param to the latest start time
+         */
+        @Override
+        public Bytes upperRangeFixedSize(final Bytes key, final long to) {
+            return toBinary(key, to, Long.MAX_VALUE);
+        }
+
+        /**
+         * @param key the key in the range
+         * @param from the earliest end timestamp in the range
+         */
+        @Override
+        public Bytes lowerRangeFixedSize(final Bytes key, final long from) {
+            return toBinary(key, 0, Math.max(0, from));
+        }
+
+        @Override
+        public long segmentTimestamp(final Bytes key) {
+            return extractEndTimestamp(key.get());
+        }
+
+        @Override
+        public HasNextCondition hasNextCondition(final Bytes binaryKeyFrom,
+                                                 final Bytes binaryKeyTo,
+                                                 final long earliestWindowEndTime,
+                                                 final long latestWindowStartTime,
+                                                 final boolean forward) {
+            return iterator -> {
+                while (iterator.hasNext()) {
+                    final Bytes bytes = iterator.peekNextKey();
+                    final byte prefix = extractPrefix(bytes.get());
+
+                    if (prefix != TIME_FIRST_PREFIX) {
+                        return false;
+                    }
+
+                    final Windowed<Bytes> windowedKey = from(bytes);
+                    final long endTime = windowedKey.window().end();
+                    final long startTime = windowedKey.window().start();
+
+                    // We can return false directly here since keys are sorted by end time and if
+                    // we get time smaller than `from`, there won't be time within range.
+                    if (!forward && endTime < earliestWindowEndTime) {
+                        return false;
+                    }
+
+                    if ((binaryKeyFrom == null || windowedKey.key().compareTo(binaryKeyFrom) >= 0)
+                        && (binaryKeyTo == null || windowedKey.key().compareTo(binaryKeyTo) <= 0)
+                        && endTime >= earliestWindowEndTime && startTime <= latestWindowStartTime) {
+                        return true;
+                    }
+                    iterator.next();
+                }
+                return false;
+            };
+        }
+
+        @Override
+        public <S extends Segment> List<S> segmentsToSearch(final Segments<S> segments,
+                                                            final long earliestWindowEndTime,
+                                                            final long latestWindowStartTime,
+                                                            final boolean forward) {
+            return segments.segments(earliestWindowEndTime, Long.MAX_VALUE, forward);
+        }
+
+        static long extractStartTimestamp(final byte[] binaryKey) {
+            return ByteBuffer.wrap(binaryKey).getLong(PREFIX_SIZE + TIMESTAMP_SIZE);
+        }
+
+        static long extractEndTimestamp(final byte[] binaryKey) {
+            return ByteBuffer.wrap(binaryKey).getLong(PREFIX_SIZE);
+        }
+
+        private static <K> K extractKey(final byte[] binaryKey,
+                                        final Deserializer<K> deserializer,
+                                        final String topic) {
+            return deserializer.deserialize(topic, extractKeyBytes(binaryKey));
+        }
+
+        static byte[] extractKeyBytes(final byte[] binaryKey) {
+            final byte[] bytes = new byte[binaryKey.length - 2 * TIMESTAMP_SIZE - PREFIX_SIZE];
+            System.arraycopy(binaryKey, PREFIX_SIZE + 2 * TIMESTAMP_SIZE, bytes, 0, bytes.length);
+            return bytes;
+        }
+
+        static Window extractWindow(final byte[] binaryKey) {
+            final ByteBuffer buffer = ByteBuffer.wrap(binaryKey);
+            final long start = buffer.getLong(PREFIX_SIZE + TIMESTAMP_SIZE);
+            final long end = buffer.getLong(PREFIX_SIZE);
+            return new SessionWindow(start, end);
+        }
+
+        public static Windowed<Bytes> from(final Bytes bytesKey) {
+            final byte[] binaryKey = bytesKey.get();
+            final Window window = extractWindow(binaryKey);
+            return new Windowed<>(Bytes.wrap(extractKeyBytes(binaryKey)), window);
+        }
+
+        public static <K> Windowed<K> from(final byte[] binaryKey,
+                                           final Deserializer<K> keyDeserializer,
+                                           final String topic) {
+            final K key = extractKey(binaryKey, keyDeserializer, topic);
+            final Window window = extractWindow(binaryKey);
+            return new Windowed<>(key, window);
+        }
+
+        public static <K> byte[] toBinary(final Windowed<K> sessionKey,
+                                          final Serializer<K> serializer,
+                                          final String topic) {
+            final byte[] bytes = serializer.serialize(topic, sessionKey.key());
+            return toBinary(Bytes.wrap(bytes), sessionKey.window().start(), sessionKey.window().end()).get();
+        }
+
+        public static Bytes toBinary(final Windowed<Bytes> sessionKey) {
+            return toBinary(sessionKey.key(), sessionKey.window().start(), sessionKey.window().end());
+        }
+
+        // for time prefixed schema, like the session key schema we need to put time stamps first, then the key
+        // and hence we need to override the write binary function with the write reordering
+        public static void writeBinary(final ByteBuffer buf,
+                                       final Bytes key,
+                                       final long startTime,
+                                       final long endTime) {
+            buf.putLong(endTime);
+            buf.putLong(startTime);
+            if (key != null) {
+                buf.put(key.get());
+            }
+        }
+
+        public static Bytes toBinary(final Bytes key,
+                                     final long startTime,
+                                     final long endTime) {
+            final ByteBuffer buf = ByteBuffer.allocate(PREFIX_SIZE + SessionKeySchema.keyByteLength(key));
+            buf.put(TIME_FIRST_PREFIX);
+            writeBinary(buf, key, startTime, endTime);
+            return Bytes.wrap(buf.array());
+        }
+
+        public static byte[] extractWindowBytesFromNonPrefixSessionKey(final byte[] binaryKey) {
+            final ByteBuffer buffer = ByteBuffer.allocate(PREFIX_SIZE + binaryKey.length).put(TIME_FIRST_PREFIX);
+            // Put timestamp
+            buffer.put(binaryKey, binaryKey.length - 2 * TIMESTAMP_SIZE, 2 * TIMESTAMP_SIZE);
+            buffer.put(binaryKey, 0, binaryKey.length - 2 * TIMESTAMP_SIZE);
+
+            return buffer.array();
+        }
+    }
+
+    public static class KeyFirstSessionKeySchema implements KeySchema {
+
+        @Override
+        public Bytes upperRange(final Bytes key, final long to) {
+            final Bytes noPrefixBytes = new SessionKeySchema().upperRange(key, to);
+            return wrapPrefix(noPrefixBytes, true);
+        }
+
+        @Override
+        public Bytes lowerRange(final Bytes key, final long from) {
+            final Bytes noPrefixBytes = new SessionKeySchema().lowerRange(key, from);
+            // Wrap at least prefix even key is null
+            return wrapPrefix(noPrefixBytes, false);
+        }
+
+        @Override
+        public Bytes upperRangeFixedSize(final Bytes key, final long to) {
+            final ByteBuffer buffer = ByteBuffer.allocate(PREFIX_SIZE + SessionKeySchema.keyByteLength(key));
+            buffer.put(KEY_FIRST_PREFIX);
+            SessionKeySchema.writeBinary(buffer, SessionKeySchema.upperRangeFixedWindow(key, to));
+            return Bytes.wrap(buffer.array());
+        }
+
+        @Override
+        public Bytes lowerRangeFixedSize(final Bytes key, final long from) {
+            final ByteBuffer buffer = ByteBuffer.allocate(PREFIX_SIZE + SessionKeySchema.keyByteLength(key));
+            buffer.put(KEY_FIRST_PREFIX);
+            SessionKeySchema.writeBinary(buffer, SessionKeySchema.lowerRangeFixedWindow(key, from));
+            return Bytes.wrap(buffer.array());
+        }
+
+        @Override
+        public long segmentTimestamp(final Bytes key) {
+            return extractEndTimestamp(key.get());
+        }
+
+        @Override
+        public HasNextCondition hasNextCondition(final Bytes binaryKeyFrom,
+                                                 final Bytes binaryKeyTo,
+                                                 final long from,
+                                                 final long to,
+                                                 final boolean forward) {
+            return iterator -> {
+                while (iterator.hasNext()) {
+                    final Bytes bytes = iterator.peekNextKey();
+                    final byte prefix = extractPrefix(bytes.get());
+
+                    if (prefix != KEY_FIRST_PREFIX) {
+                        return false;
+                    }
+
+                    final Windowed<Bytes> windowedKey = from(bytes);
+                    final long endTime = windowedKey.window().end();
+                    final long startTime = windowedKey.window().start();
+
+                    if ((binaryKeyFrom == null || windowedKey.key().compareTo(binaryKeyFrom) >= 0)
+                        && (binaryKeyTo == null || windowedKey.key().compareTo(binaryKeyTo) <= 0)
+                        && endTime >= from
+                        && startTime <= to) {
+                        return true;
+                    }
+                    iterator.next();
+                }
+                return false;
+            };
+        }
+
+        @Override
+        public <S extends Segment> List<S> segmentsToSearch(final Segments<S> segments,
+                                                            final long from,
+                                                            final long to,
+                                                            final boolean forward) {
+            return segments.segments(from, Long.MAX_VALUE, forward);
+        }
+
+        static Window extractWindow(final byte[] binaryKey) {
+            final ByteBuffer buffer = ByteBuffer.wrap(binaryKey);
+            final long start = buffer.getLong(binaryKey.length - TIMESTAMP_SIZE);
+            final long end = buffer.getLong(binaryKey.length - 2 * TIMESTAMP_SIZE);
+            return new SessionWindow(start, end);
+        }
+
+        static byte[] extractKeyBytes(final byte[] binaryKey) {
+            final byte[] bytes = new byte[binaryKey.length - 2 * TIMESTAMP_SIZE - PREFIX_SIZE];
+            System.arraycopy(binaryKey, PREFIX_SIZE, bytes, 0, bytes.length);
+            return bytes;
+        }
+
+        public static Windowed<Bytes> from(final Bytes bytesKey) {
+            final byte[] binaryKey = bytesKey.get();
+            final Window window = extractWindow(binaryKey);
+            return new Windowed<>(Bytes.wrap(extractKeyBytes(binaryKey)), window);
+        }
+
+        private static <K> K extractKey(final byte[] binaryKey,
+                                        final Deserializer<K> deserializer,
+                                        final String topic) {
+            return deserializer.deserialize(topic, extractKeyBytes(binaryKey));
+        }
+
+        public static <K> Windowed<K> from(final byte[] binaryKey,
+                                           final Deserializer<K> keyDeserializer,
+                                           final String topic) {
+            final K key = extractKey(binaryKey, keyDeserializer, topic);
+            final Window window = extractWindow(binaryKey);
+            return new Windowed<>(key, window);
+        }
+
+        static long extractStartTimestamp(final byte[] binaryKey) {
+            return ByteBuffer.wrap(binaryKey).getLong(binaryKey.length - TIMESTAMP_SIZE);
+        }
+
+        static long extractEndTimestamp(final byte[] binaryKey) {
+            return ByteBuffer.wrap(binaryKey).getLong(binaryKey.length - 2 * TIMESTAMP_SIZE);
+        }
+
+        public static Bytes toBinary(final Windowed<Bytes> sessionKey) {
+            return toBinary(sessionKey.key(), sessionKey.window().start(), sessionKey.window().end());
+        }
+
+        public static <K> byte[] toBinary(final Windowed<K> sessionKey,
+                                          final Serializer<K> serializer,
+                                          final String topic) {
+            final byte[] bytes = serializer.serialize(topic, sessionKey.key());
+            return toBinary(Bytes.wrap(bytes), sessionKey.window().start(), sessionKey.window().end()).get();
+        }
+
+        public static Bytes toBinary(final Bytes key,
+                                     final long startTime,
+                                     final long endTime) {
+            final ByteBuffer buf = ByteBuffer.allocate(PREFIX_SIZE + SessionKeySchema.keyByteLength(key));
+            buf.put(KEY_FIRST_PREFIX);
+            SessionKeySchema.writeBinary(buf, key, startTime, endTime);
+            return Bytes.wrap(buf.array());
+        }
+
+        private static Bytes wrapPrefix(final Bytes noPrefixKey, final boolean upperRange) {
+            // Need to scan from prefix even key is null
+            if (noPrefixKey == null) {
+                final byte prefix = upperRange ? KEY_FIRST_PREFIX + 1 : KEY_FIRST_PREFIX;
+                final byte[] ret = ByteBuffer.allocate(PREFIX_SIZE)
+                    .put(prefix)
+                    .array();
+                return Bytes.wrap(ret);
+            }
+            final byte[] ret = ByteBuffer.allocate(PREFIX_SIZE + noPrefixKey.get().length)
+                .put(KEY_FIRST_PREFIX)
+                .put(noPrefixKey.get())
+                .array();
+            return Bytes.wrap(ret);
+        }
+
+        public static byte[] prefixNonPrefixSessionKey(final byte[] binaryKey) {
+            assert binaryKey != null;
+
+            return wrapPrefix(Bytes.wrap(binaryKey), false).get();
+        }
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedWindowKeySchemas.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedWindowKeySchemas.java
index 4f94ca9f9f7f8..47cf4b49b59ec 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedWindowKeySchemas.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/PrefixedWindowKeySchemas.java
@@ -41,7 +41,11 @@ private static byte extractPrefix(final byte[] binaryBytes) {
         return binaryBytes[0];
     }
 
-    public static class TimeFirstWindowKeySchema implements RocksDBSegmentedBytesStore.KeySchema {
+    public static boolean isTimeFirstSchemaKey(final byte[] binaryBytes) {
+        return binaryBytes.length > 0 && binaryBytes[0] == TIME_FIRST_PREFIX;
+    }
+
+    public static class TimeFirstWindowKeySchema implements KeySchema {
 
         @Override
         public Bytes upperRange(final Bytes key, final long to) {
@@ -176,6 +180,14 @@ public static <K> Bytes toStoreKeyBinary(final Windowed<K> timeKey,
             return toStoreKeyBinary(serializedKey, timeKey.window().start(), seqnum);
         }
 
+        public static <K> Bytes toStoreKeyBinary(final K key,
+                                                 final long timestamp,
+                                                 final int seqnum,
+                                                 final StateSerdes<K, ?> serdes) {
+            final byte[] serializedKey = serdes.rawKey(key);
+            return toStoreKeyBinary(serializedKey, timestamp, seqnum);
+        }
+
         // for store serdes
         public static Bytes toStoreKeyBinary(final Bytes key,
                                              final long timestamp,
@@ -226,8 +238,6 @@ public static byte[] fromNonPrefixWindowKey(final byte[] binaryKey) {
 
     public static class KeyFirstWindowKeySchema implements KeySchema {
 
-
-
         @Override
         public Bytes upperRange(final Bytes key, final long to) {
             final Bytes noPrefixBytes = new WindowKeySchema().upperRange(key, to);
@@ -255,7 +265,7 @@ public Bytes upperRangeFixedSize(final Bytes key, final long to) {
 
         @Override
         public long segmentTimestamp(final Bytes key) {
-            return KeyFirstWindowKeySchema.extractStoreTimestamp(key.get());
+            return extractStoreTimestamp(key.get());
         }
 
         @Override
@@ -295,6 +305,14 @@ public <S extends Segment> List<S> segmentsToSearch(final Segments<S> segments,
             return segments.segments(from, to, forward);
         }
 
+        public static <K> Bytes toStoreKeyBinary(final K key,
+                                                 final long timestamp,
+                                                 final int seqnum,
+                                                 final StateSerdes<K, ?> serdes) {
+            final byte[] serializedKey = serdes.rawKey(key);
+            return toStoreKeyBinary(serializedKey, timestamp, seqnum);
+        }
+
         public static Bytes toStoreKeyBinary(final Windowed<Bytes> timeKey,
                                              final int seqnum) {
             return toStoreKeyBinary(timeKey.key().get(), timeKey.window().start(), seqnum);
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java
index 919c4402801a7..1eb9a70e8c3d3 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBStore.java
@@ -326,8 +326,8 @@ public synchronized byte[] putIfAbsent(final Bytes key,
     public void putAll(final List<KeyValue<Bytes, byte[]>> entries) {
         try (final WriteBatch batch = new WriteBatch()) {
             dbAccessor.prepareBatch(entries, batch);
-            StoreQueryUtils.updatePosition(position, context);
             write(batch);
+            StoreQueryUtils.updatePosition(position, context);
         } catch (final RocksDBException e) {
             throw new ProcessorStateException("Error while batch writing to store " + name, e);
         }
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSessionSegmentedBytesStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSessionSegmentedBytesStore.java
new file mode 100644
index 0000000000000..59e255443c0b8
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSessionSegmentedBytesStore.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.errors.ProcessorStateException;
+import org.apache.kafka.streams.kstream.Window;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.processor.internals.ChangelogRecordDeserializationHelper;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.KeyFirstSessionKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.TimeFirstSessionKeySchema;
+import org.rocksdb.RocksDBException;
+import org.rocksdb.WriteBatch;
+
+/**
+ * A RocksDB backed time-ordered segmented bytes store for session key schema.
+ */
+public class RocksDBTimeOrderedSessionSegmentedBytesStore extends AbstractRocksDBTimeOrderedSegmentedBytesStore {
+
+    private class SessionKeySchemaIndexToBaseStoreIterator extends IndexToBaseStoreIterator {
+        SessionKeySchemaIndexToBaseStoreIterator(final KeyValueIterator<Bytes, byte[]> indexIterator) {
+            super(indexIterator);
+        }
+
+        @Override
+        protected Bytes getBaseKey(final Bytes indexKey) {
+            final Window window = KeyFirstSessionKeySchema.extractWindow(indexKey.get());
+            final byte[] key = KeyFirstSessionKeySchema.extractKeyBytes(indexKey.get());
+
+            return TimeFirstSessionKeySchema.toBinary(Bytes.wrap(key), window.start(), window.end());
+        }
+    }
+
+    RocksDBTimeOrderedSessionSegmentedBytesStore(final String name,
+                                                 final String metricsScope,
+                                                 final long retention,
+                                                 final long segmentInterval,
+                                                 final boolean withIndex) {
+        super(name, metricsScope, retention, segmentInterval, new TimeFirstSessionKeySchema(),
+            Optional.ofNullable(withIndex ? new KeyFirstSessionKeySchema() : null));
+    }
+
+    public byte[] fetchSession(final Bytes key,
+                               final long sessionStartTime,
+                               final long sessionEndTime) {
+        return get(TimeFirstSessionKeySchema.toBinary(
+            key,
+            sessionStartTime,
+            sessionEndTime
+        ));
+    }
+
+    public KeyValueIterator<Bytes, byte[]> fetchSessions(final long earliestSessionEndTime,
+                                                         final long latestSessionEndTime) {
+        final List<KeyValueSegment> searchSpace = segments.segments(earliestSessionEndTime, latestSessionEndTime, true);
+
+        // here we want [0, latestSE, FF] as the upper bound to cover any possible keys,
+        // but since we can only get upper bound based on timestamps, we use a slight larger upper bound as [0, latestSE+1]
+        final Bytes binaryFrom = baseKeySchema.lowerRangeFixedSize(null, earliestSessionEndTime);
+        final Bytes binaryTo = baseKeySchema.lowerRangeFixedSize(null, latestSessionEndTime + 1);
+
+        return new SegmentIterator<>(
+                searchSpace.iterator(),
+                iterator -> {
+                    while (iterator.hasNext()) {
+                        final Bytes bytes = iterator.peekNextKey();
+
+                        final Windowed<Bytes> windowedKey = TimeFirstSessionKeySchema.from(bytes);
+                        final long endTime = windowedKey.window().end();
+
+                        if (endTime <= latestSessionEndTime && endTime >= earliestSessionEndTime) {
+                            return true;
+                        }
+                        iterator.next();
+                    }
+                    return false;
+                },
+                binaryFrom,
+                binaryTo,
+                true);
+    }
+
+    public void remove(final Windowed<Bytes> key) {
+        remove(TimeFirstSessionKeySchema.toBinary(key));
+    }
+
+    public void put(final Windowed<Bytes> sessionKey, final byte[] aggregate) {
+        put(TimeFirstSessionKeySchema.toBinary(sessionKey), aggregate);
+    }
+
+    @Override
+    protected KeyValue<Bytes, byte[]> getIndexKeyValue(final Bytes baseKey, final byte[] baseValue) {
+        final Window window = TimeFirstSessionKeySchema.extractWindow(baseKey.get());
+        final byte[] key = TimeFirstSessionKeySchema.extractKeyBytes(baseKey.get());
+        return KeyValue.pair(KeyFirstSessionKeySchema.toBinary(Bytes.wrap(key), window.start(), window.end()), new byte[0]);
+    }
+
+    @Override
+    Map<KeyValueSegment, WriteBatch> getWriteBatches(
+        final Collection<ConsumerRecord<byte[], byte[]>> records) {
+        // advance stream time to the max timestamp in the batch
+        for (final ConsumerRecord<byte[], byte[]> record : records) {
+            final long timestamp = SessionKeySchema.extractEndTimestamp(record.key());
+            observedStreamTime = Math.max(observedStreamTime, timestamp);
+        }
+
+        final Map<KeyValueSegment, WriteBatch> writeBatchMap = new HashMap<>();
+        for (final ConsumerRecord<byte[], byte[]> record : records) {
+            final long timestamp = SessionKeySchema.extractEndTimestamp(record.key());
+            final long segmentId = segments.segmentId(timestamp);
+            final KeyValueSegment segment = segments.getOrCreateSegmentIfLive(segmentId, context, observedStreamTime);
+            if (segment != null) {
+                ChangelogRecordDeserializationHelper.applyChecksAndUpdatePosition(
+                    record,
+                    consistencyEnabled,
+                    position
+                );
+                try {
+                    final WriteBatch batch = writeBatchMap.computeIfAbsent(segment, s -> new WriteBatch());
+
+                    // Assuming changelog record is serialized using SessionKeySchema
+                    // from ChangeLoggingSessionBytesStore. Reconstruct key/value to restore
+                    if (hasIndex()) {
+                        final byte[] indexKey = KeyFirstSessionKeySchema.prefixNonPrefixSessionKey(record.key());
+                        // Take care of tombstone
+                        final byte[] value = record.value() == null ? null : new byte[0];
+                        segment.addToBatch(new KeyValue<>(indexKey, value), batch);
+                    }
+
+                    final byte[] baseKey = TimeFirstSessionKeySchema.extractWindowBytesFromNonPrefixSessionKey(record.key());
+                    segment.addToBatch(new KeyValue<>(baseKey, record.value()), batch);
+                } catch (final RocksDBException e) {
+                    throw new ProcessorStateException("Error restoring batch to store " + name(), e);
+                }
+            }
+        }
+        return writeBatchMap;
+    }
+
+    @Override
+    protected IndexToBaseStoreIterator getIndexToBaseStoreIterator(
+        final SegmentIterator<KeyValueSegment> segmentIterator) {
+        return new SessionKeySchemaIndexToBaseStoreIterator(segmentIterator);
+    }
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSessionStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSessionStore.java
new file mode 100644
index 0000000000000..62a874f06c69d
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSessionStore.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import java.util.Objects;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.query.PositionBound;
+import org.apache.kafka.streams.query.Query;
+import org.apache.kafka.streams.query.QueryConfig;
+import org.apache.kafka.streams.query.QueryResult;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.SessionStore;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.TimeFirstSessionKeySchema;
+
+public class RocksDBTimeOrderedSessionStore
+    extends WrappedStateStore<RocksDBTimeOrderedSessionSegmentedBytesStore, Object, Object>
+    implements SessionStore<Bytes, byte[]> {
+
+    private StateStoreContext stateStoreContext;
+
+    RocksDBTimeOrderedSessionStore(final RocksDBTimeOrderedSessionSegmentedBytesStore store) {
+        super(store);
+        Objects.requireNonNull(store, "store is null");
+    }
+
+    @Override
+    public void init(final StateStoreContext context, final StateStore root) {
+        wrapped().init(context, root);
+        this.stateStoreContext = context;
+    }
+
+    @Override
+    public <R> QueryResult<R> query(final Query<R> query,
+                                    final PositionBound positionBound,
+                                    final QueryConfig config) {
+
+        return StoreQueryUtils.handleBasicQueries(
+            query,
+            positionBound,
+            config,
+            this,
+            getPosition(),
+            stateStoreContext
+        );
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> findSessions(final long earliestSessionEndTime,
+                                                                  final long latestSessionEndTime) {
+        final KeyValueIterator<Bytes, byte[]> bytesIterator = wrapped().fetchSessions(earliestSessionEndTime, latestSessionEndTime);
+        return new WrappedSessionStoreIterator(bytesIterator, TimeFirstSessionKeySchema::from);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> findSessions(final Bytes key,
+                                                                  final long earliestSessionEndTime,
+                                                                  final long latestSessionStartTime) {
+        final KeyValueIterator<Bytes, byte[]> bytesIterator = wrapped().fetch(
+            key,
+            earliestSessionEndTime,
+            latestSessionStartTime
+        );
+        return new WrappedSessionStoreIterator(bytesIterator, TimeFirstSessionKeySchema::from);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFindSessions(final Bytes key,
+                                                                          final long earliestSessionEndTime,
+                                                                          final long latestSessionStartTime) {
+        final KeyValueIterator<Bytes, byte[]> bytesIterator = wrapped().backwardFetch(
+            key,
+            earliestSessionEndTime,
+            latestSessionStartTime
+        );
+        return new WrappedSessionStoreIterator(bytesIterator, TimeFirstSessionKeySchema::from);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> findSessions(final Bytes keyFrom,
+                                                                  final Bytes keyTo,
+                                                                  final long earliestSessionEndTime,
+                                                                  final long latestSessionStartTime) {
+        final KeyValueIterator<Bytes, byte[]> bytesIterator = wrapped().fetch(
+            keyFrom,
+            keyTo,
+            earliestSessionEndTime,
+            latestSessionStartTime
+        );
+        return new WrappedSessionStoreIterator(bytesIterator, TimeFirstSessionKeySchema::from);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFindSessions(final Bytes keyFrom,
+                                                                          final Bytes keyTo,
+                                                                          final long earliestSessionEndTime,
+                                                                          final long latestSessionStartTime) {
+        final KeyValueIterator<Bytes, byte[]> bytesIterator = wrapped().backwardFetch(
+            keyFrom,
+            keyTo,
+            earliestSessionEndTime,
+            latestSessionStartTime
+        );
+        return new WrappedSessionStoreIterator(bytesIterator, TimeFirstSessionKeySchema::from);
+    }
+
+    @Override
+    public byte[] fetchSession(final Bytes key,
+                               final long sessionStartTime,
+                               final long sessiontEndTime) {
+        return wrapped().fetchSession(
+            key,
+            sessionStartTime,
+            sessiontEndTime
+        );
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> fetch(final Bytes key) {
+        return findSessions(key, 0, Long.MAX_VALUE);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFetch(final Bytes key) {
+        return backwardFindSessions(key, 0, Long.MAX_VALUE);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> fetch(final Bytes keyFrom, final Bytes keyTo) {
+        return findSessions(keyFrom, keyTo, 0, Long.MAX_VALUE);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFetch(final Bytes keyFrom, final Bytes keyTo) {
+        return backwardFindSessions(keyFrom, keyTo, 0, Long.MAX_VALUE);
+    }
+
+    @Override
+    public void remove(final Windowed<Bytes> sessionKey) {
+        wrapped().remove(sessionKey);
+    }
+
+    @Override
+    public void put(final Windowed<Bytes> sessionKey, final byte[] aggregate) {
+        wrapped().put(sessionKey, aggregate);
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowSegmentedBytesStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowSegmentedBytesStore.java
new file mode 100644
index 0000000000000..b44588da2b845
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowSegmentedBytesStore.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.errors.ProcessorStateException;
+import org.apache.kafka.streams.processor.internals.ChangelogRecordDeserializationHelper;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
+import org.rocksdb.RocksDBException;
+import org.rocksdb.WriteBatch;
+
+/**
+ * A RocksDB backed time-ordered segmented bytes store for window key schema.
+ */
+public class RocksDBTimeOrderedWindowSegmentedBytesStore extends AbstractRocksDBTimeOrderedSegmentedBytesStore {
+
+    private class WindowKeySchemaIndexToBaseStoreIterator  extends IndexToBaseStoreIterator {
+        WindowKeySchemaIndexToBaseStoreIterator(final KeyValueIterator<Bytes, byte[]> indexIterator) {
+            super(indexIterator);
+        }
+
+        @Override
+        protected Bytes getBaseKey(final Bytes indexKey) {
+            final byte[] keyBytes = KeyFirstWindowKeySchema.extractStoreKeyBytes(indexKey.get());
+            final long timestamp = KeyFirstWindowKeySchema.extractStoreTimestamp(indexKey.get());
+            final int seqnum = KeyFirstWindowKeySchema.extractStoreSequence(indexKey.get());
+            return TimeFirstWindowKeySchema.toStoreKeyBinary(keyBytes, timestamp, seqnum);
+        }
+    }
+
+    RocksDBTimeOrderedWindowSegmentedBytesStore(final String name,
+                                                final String metricsScope,
+                                                final long retention,
+                                                final long segmentInterval,
+                                                final boolean withIndex) {
+        super(name, metricsScope, retention, segmentInterval, new TimeFirstWindowKeySchema(),
+            Optional.ofNullable(withIndex ? new KeyFirstWindowKeySchema() : null));
+    }
+
+    public void put(final Bytes key, final long timestamp, final int seqnum, final byte[] value) {
+        final Bytes baseKey = TimeFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum);
+        put(baseKey, value);
+    }
+
+    byte[] fetch(final Bytes key, final long timestamp, final int seqnum) {
+        return get(TimeFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum));
+    }
+
+    @Override
+    protected KeyValue<Bytes, byte[]> getIndexKeyValue(final Bytes baseKey, final byte[] baseValue) {
+        final byte[] key = TimeFirstWindowKeySchema.extractStoreKeyBytes(baseKey.get());
+        final long timestamp = TimeFirstWindowKeySchema.extractStoreTimestamp(baseKey.get());
+        final int seqnum = TimeFirstWindowKeySchema.extractStoreSequence(baseKey.get());
+
+        return KeyValue.pair(KeyFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum), new byte[0]);
+    }
+
+    @Override
+    Map<KeyValueSegment, WriteBatch> getWriteBatches(
+        final Collection<ConsumerRecord<byte[], byte[]>> records) {
+        // advance stream time to the max timestamp in the batch
+        for (final ConsumerRecord<byte[], byte[]> record : records) {
+            final long timestamp = WindowKeySchema.extractStoreTimestamp(record.key());
+            observedStreamTime = Math.max(observedStreamTime, timestamp);
+        }
+
+        final Map<KeyValueSegment, WriteBatch> writeBatchMap = new HashMap<>();
+        for (final ConsumerRecord<byte[], byte[]> record : records) {
+            final long timestamp = WindowKeySchema.extractStoreTimestamp(record.key());
+            final long segmentId = segments.segmentId(timestamp);
+            final KeyValueSegment segment = segments.getOrCreateSegmentIfLive(segmentId, context, observedStreamTime);
+            if (segment != null) {
+                ChangelogRecordDeserializationHelper.applyChecksAndUpdatePosition(
+                    record,
+                    consistencyEnabled,
+                    position
+                );
+                try {
+                    final WriteBatch batch = writeBatchMap.computeIfAbsent(segment, s -> new WriteBatch());
+
+                    // Assuming changelog record is serialized using WindowKeySchema
+                    // from ChangeLoggingTimestampedWindowBytesStore. Reconstruct key/value to restore
+                    if (hasIndex()) {
+                        final byte[] indexKey = KeyFirstWindowKeySchema.fromNonPrefixWindowKey(record.key());
+                        // Take care of tombstone
+                        final byte[] value = record.value() == null ? null : new byte[0];
+                        segment.addToBatch(new KeyValue<>(indexKey, value), batch);
+                    }
+
+                    final byte[] baseKey = TimeFirstWindowKeySchema.fromNonPrefixWindowKey(record.key());
+                    segment.addToBatch(new KeyValue<>(baseKey, record.value()), batch);
+                } catch (final RocksDBException e) {
+                    throw new ProcessorStateException("Error restoring batch to store " + name(), e);
+                }
+            }
+        }
+        return writeBatchMap;
+    }
+
+    @Override
+    protected IndexToBaseStoreIterator getIndexToBaseStoreIterator(
+        final SegmentIterator<KeyValueSegment> segmentIterator) {
+        return new WindowKeySchemaIndexToBaseStoreIterator(segmentIterator);
+    }
+}
\ No newline at end of file
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowStore.java
index a174e51ab21b8..bcc560eb0ca26 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowStore.java
@@ -21,22 +21,29 @@
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.query.PositionBound;
+import org.apache.kafka.streams.query.Query;
+import org.apache.kafka.streams.query.QueryConfig;
+import org.apache.kafka.streams.query.QueryResult;
 import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.TimestampedBytesStore;
 import org.apache.kafka.streams.state.WindowStore;
 import org.apache.kafka.streams.state.WindowStoreIterator;
 import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
 
 
 public class RocksDBTimeOrderedWindowStore
-    extends WrappedStateStore<RocksDBTimeOrderedSegmentedBytesStore, Object, Object>
-    implements WindowStore<Bytes, byte[]> {
+    extends WrappedStateStore<RocksDBTimeOrderedWindowSegmentedBytesStore, Object, Object>
+    implements WindowStore<Bytes, byte[]>, TimestampedBytesStore {
 
     private final boolean retainDuplicates;
     private final long windowSize;
+
     private int seqnum = 0;
+    private StateStoreContext stateStoreContext;
 
     RocksDBTimeOrderedWindowStore(
-        final RocksDBTimeOrderedSegmentedBytesStore store,
+        final RocksDBTimeOrderedWindowSegmentedBytesStore store,
         final boolean retainDuplicates,
         final long windowSize
     ) {
@@ -48,6 +55,7 @@ public class RocksDBTimeOrderedWindowStore
 
     @Override
     public void init(final StateStoreContext context, final StateStore root) {
+        stateStoreContext = context;
         wrapped().init(context, root);
     }
 
@@ -163,6 +171,25 @@ public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFetchAll(final long tim
             TimeFirstWindowKeySchema::fromStoreBytesKey).keyValueIterator();
     }
 
+    public boolean hasIndex() {
+        return wrapped().hasIndex();
+    }
+
+    @Override
+    public <R> QueryResult<R> query(final Query<R> query,
+                                    final PositionBound positionBound,
+                                    final QueryConfig config) {
+
+        return StoreQueryUtils.handleBasicQueries(
+            query,
+            positionBound,
+            config,
+            this,
+            getPosition(),
+            stateStoreContext
+        );
+    }
+
     private void maybeUpdateSeqnumForDups() {
         if (retainDuplicates) {
             seqnum = (seqnum + 1) & 0x7FFFFFFF;
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.java
index 84d8a80f6c485..ac0b82f99f426 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.java
@@ -16,6 +16,11 @@
  */
 package org.apache.kafka.streams.state.internals;
 
+import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
+import static org.apache.kafka.streams.internals.ApiUtils.validateMillisecondDuration;
+
+import java.time.Duration;
+import java.util.Objects;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
 import org.apache.kafka.streams.state.WindowStore;
@@ -33,6 +38,38 @@ public enum WindowStoreTypes {
     private final boolean retainDuplicates;
     private final WindowStoreTypes windowStoreType;
 
+    public static RocksDbIndexedTimeOrderedWindowBytesStoreSupplier create(final String name,
+                                                                           final Duration retentionPeriod,
+                                                                           final Duration windowSize,
+                                                                           final boolean retainDuplicates,
+                                                                           final boolean hasIndex) {
+        Objects.requireNonNull(name, "name cannot be null");
+        final String rpMsgPrefix = prepareMillisCheckFailMsgPrefix(retentionPeriod, "retentionPeriod");
+        final long retentionMs = validateMillisecondDuration(retentionPeriod, rpMsgPrefix);
+        final String wsMsgPrefix = prepareMillisCheckFailMsgPrefix(windowSize, "windowSize");
+        final long windowSizeMs = validateMillisecondDuration(windowSize, wsMsgPrefix);
+
+        final long defaultSegmentInterval = Math.max(retentionMs / 2, 60_000L);
+
+        if (retentionMs < 0L) {
+            throw new IllegalArgumentException("retentionPeriod cannot be negative");
+        }
+        if (windowSizeMs < 0L) {
+            throw new IllegalArgumentException("windowSize cannot be negative");
+        }
+        if (defaultSegmentInterval < 1L) {
+            throw new IllegalArgumentException("segmentInterval cannot be zero or negative");
+        }
+        if (windowSizeMs > retentionMs) {
+            throw new IllegalArgumentException("The retention period of the window store "
+                + name + " must be no smaller than its window size. Got size=["
+                + windowSizeMs + "], retention=[" + retentionMs + "]");
+        }
+
+        return new RocksDbIndexedTimeOrderedWindowBytesStoreSupplier(name, retentionMs,
+            defaultSegmentInterval, windowSizeMs, retainDuplicates, hasIndex);
+    }
+
     public RocksDbIndexedTimeOrderedWindowBytesStoreSupplier(final String name,
                                            final long retentionPeriod,
                                            final long segmentInterval,
@@ -69,7 +106,7 @@ public WindowStore<Bytes, byte[]> get() {
         switch (windowStoreType) {
             case DEFAULT_WINDOW_STORE:
                 return new RocksDBTimeOrderedWindowStore(
-                    new RocksDBTimeOrderedSegmentedBytesStore(
+                    new RocksDBTimeOrderedWindowSegmentedBytesStore(
                         name,
                         metricsScope(),
                         retentionPeriod,
@@ -79,7 +116,7 @@ public WindowStore<Bytes, byte[]> get() {
                     windowSize);
             case INDEXED_WINDOW_STORE:
                 return new RocksDBTimeOrderedWindowStore(
-                    new RocksDBTimeOrderedSegmentedBytesStore(
+                    new RocksDBTimeOrderedWindowSegmentedBytesStore(
                         name,
                         metricsScope(),
                         retentionPeriod,
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbTimeOrderedSessionBytesStoreSupplier.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbTimeOrderedSessionBytesStoreSupplier.java
new file mode 100644
index 0000000000000..60cd710e6a3d6
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbTimeOrderedSessionBytesStoreSupplier.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
+import org.apache.kafka.streams.state.SessionStore;
+
+public class RocksDbTimeOrderedSessionBytesStoreSupplier implements SessionBytesStoreSupplier {
+    private final String name;
+    private final long retentionPeriod;
+    private final boolean withIndex;
+
+    public RocksDbTimeOrderedSessionBytesStoreSupplier(final String name,
+                                                       final long retentionPeriod,
+                                                       final boolean withIndex) {
+        this.name = name;
+        this.retentionPeriod = retentionPeriod;
+        this.withIndex = withIndex;
+    }
+
+    @Override
+    public String name() {
+        return name;
+    }
+
+    @Override
+    public SessionStore<Bytes, byte[]> get() {
+        return new RocksDBTimeOrderedSessionStore(
+            new RocksDBTimeOrderedSessionSegmentedBytesStore(
+                name,
+                metricsScope(),
+                retentionPeriod,
+                segmentIntervalMs(),
+                withIndex
+            )
+        );
+    }
+
+    @Override
+    public String metricsScope() {
+        return "rocksdb-session";
+    }
+
+    @Override
+    public long segmentIntervalMs() {
+        // Selected somewhat arbitrarily. Profiling may reveal a different value is preferable.
+        return Math.max(retentionPeriod / 2, 60_000L);
+    }
+
+    @Override
+    public long retentionPeriod() {
+        return retentionPeriod;
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentIterator.java
index 6191c4988877e..9aabc787c89d9 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentIterator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentIterator.java
@@ -91,7 +91,7 @@ private boolean hasNextConditionHasNext() {
         try {
             hasNext = hasNextCondition.hasNext(currentIterator);
         } catch (final InvalidStateStoreException e) {
-            //already closed so ignore
+            // already closed so ignore
         }
         return hasNext;
     }
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentedBytesStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentedBytesStore.java
index 80b5a91ffa53a..1ef6a932f9b51 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentedBytesStore.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/SegmentedBytesStore.java
@@ -91,8 +91,8 @@ public interface SegmentedBytesStore extends StateStore {
     /**
      * Gets all the key-value pairs that belong to the windows within in the given time range.
      *
-     * @param from the beginning of the time slot from which to search
-     * @param to   the end of the time slot from which to search
+     * @param from the beginning of the time slot from which to search (inclusive)
+     * @param to   the end of the time slot from which to search (inclusive)
      * @return an iterator over windowed key-value pairs {@code <Windowed<K>, value>}
      * @throws InvalidStateStoreException if the store is not initialized
      * @throws NullPointerException if null is used for any key
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/SessionKeySchema.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/SessionKeySchema.java
index d4196a9ede6ac..f21e47fd87fe5 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/SessionKeySchema.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/SessionKeySchema.java
@@ -34,18 +34,30 @@ public class SessionKeySchema implements SegmentedBytesStore.KeySchema {
     private static final int SUFFIX_SIZE = 2 * TIMESTAMP_SIZE;
     private static final byte[] MIN_SUFFIX = new byte[SUFFIX_SIZE];
 
+    public static int keyByteLength(final Bytes key) {
+        return (key == null ? 0 : key.get().length) + 2 * TIMESTAMP_SIZE;
+    }
+
     @Override
     public Bytes upperRangeFixedSize(final Bytes key, final long to) {
-        final Windowed<Bytes> sessionKey = new Windowed<>(key, new SessionWindow(to, Long.MAX_VALUE));
+        final Windowed<Bytes> sessionKey = upperRangeFixedWindow(key, to);
         return SessionKeySchema.toBinary(sessionKey);
     }
 
+    public static <K> Windowed<K> upperRangeFixedWindow(final K key, final long to) {
+        return new Windowed<K>(key, new SessionWindow(to, Long.MAX_VALUE));
+    }
+
     @Override
     public Bytes lowerRangeFixedSize(final Bytes key, final long from) {
-        final Windowed<Bytes> sessionKey = new Windowed<>(key, new SessionWindow(0, Math.max(0, from)));
+        final Windowed<Bytes> sessionKey = lowerRangeFixedWindow(key, from);
         return SessionKeySchema.toBinary(sessionKey);
     }
 
+    public static <K> Windowed<K> lowerRangeFixedWindow(final K key, final long from) {
+        return new Windowed<K>(key, new SessionWindow(0, Math.max(0, from)));
+    }
+
     @Override
     public Bytes upperRange(final Bytes key, final long to) {
         if (key == null) {
@@ -161,11 +173,27 @@ public static Bytes toBinary(final Windowed<Bytes> sessionKey) {
     public static Bytes toBinary(final Bytes key,
                                  final long startTime,
                                  final long endTime) {
-        final byte[] bytes = key.get();
-        final ByteBuffer buf = ByteBuffer.allocate(bytes.length + 2 * TIMESTAMP_SIZE);
-        buf.put(bytes);
+        final ByteBuffer buf = ByteBuffer.allocate(keyByteLength(key));
+        writeBinary(buf, key, startTime, endTime);
+        return Bytes.wrap(buf.array());
+    }
+
+    public static void writeBinary(final ByteBuffer buf, final Windowed<Bytes> sessionKey) {
+        writeBinary(buf, sessionKey.key(), sessionKey.window().start(), sessionKey.window().end());
+    }
+
+    public static void writeBinary(final ByteBuffer buf,
+                                   final Bytes key,
+                                   final long startTime,
+                                   final long endTime) {
+        // we search for the session window that can overlap with the [ESET, LSST] range
+        // since the session window length can vary, we define the search boundary as:
+        // lower: [0, ESET]
+        // upper: [LSST, INF]
+        // and by puting the end time first and then the start time, the serialized search boundary
+        // is: [(ESET-0), (INF-LSST)]
+        buf.put(key.get());
         buf.putLong(endTime);
         buf.putLong(startTime);
-        return Bytes.wrap(buf.array());
     }
 }
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/StoreQueryUtils.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/StoreQueryUtils.java
index 06b37139f07d8..4630195c8ae75 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/StoreQueryUtils.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/StoreQueryUtils.java
@@ -142,7 +142,9 @@ public static void updatePosition(
 
         if (stateStoreContext != null && stateStoreContext.recordMetadata().isPresent()) {
             final RecordMetadata meta = stateStoreContext.recordMetadata().get();
-            position.withComponent(meta.topic(), meta.partition(), meta.offset());
+            if (meta.topic() != null) {
+                position.withComponent(meta.topic(), meta.partition(), meta.offset());
+            }
         }
     }
 
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/TimeOrderedCachingWindowStore.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/TimeOrderedCachingWindowStore.java
new file mode 100644
index 0000000000000..62cca5e459cc2
--- /dev/null
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/TimeOrderedCachingWindowStore.java
@@ -0,0 +1,726 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.function.Function;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
+import org.apache.kafka.streams.processor.internals.RecordQueue;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.StateSerdes;
+import org.apache.kafka.streams.state.WindowStore;
+import org.apache.kafka.streams.state.WindowStoreIterator;
+import org.apache.kafka.streams.state.internals.MergedSortedCacheWindowStoreKeyValueIterator.StoreKeyToWindowKey;
+import org.apache.kafka.streams.state.internals.MergedSortedCacheWindowStoreKeyValueIterator.WindowKeyToBytes;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.SegmentedBytesStore.KeySchema;
+import org.apache.kafka.streams.state.internals.ThreadCache.DirtyEntry;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.LinkedList;
+import java.util.NoSuchElementException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.apache.kafka.streams.processor.internals.ProcessorContextUtils.asInternalProcessorContext;
+import static org.apache.kafka.streams.state.internals.ExceptionUtils.executeAll;
+import static org.apache.kafka.streams.state.internals.ExceptionUtils.throwSuppressed;
+
+class TimeOrderedCachingWindowStore
+    extends WrappedStateStore<WindowStore<Bytes, byte[]>, byte[], byte[]>
+    implements WindowStore<Bytes, byte[]>, CachedStateStore<byte[], byte[]> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(TimeOrderedCachingWindowStore.class);
+
+    private final long windowSize;
+    private final SegmentedCacheFunction baseKeyCacheFunction;
+    private final SegmentedCacheFunction indexKeyCacheFunction;
+    private final TimeFirstWindowKeySchema baseKeySchema = new TimeFirstWindowKeySchema();
+    private final KeyFirstWindowKeySchema indexKeySchema = new KeyFirstWindowKeySchema();
+
+    private String cacheName;
+    private boolean hasIndex;
+    private boolean sendOldValues;
+    private InternalProcessorContext<?, ?> context;
+    private StateSerdes<Bytes, byte[]> bytesSerdes;
+    private CacheFlushListener<byte[], byte[]> flushListener;
+
+    private final AtomicLong maxObservedTimestamp;
+
+    TimeOrderedCachingWindowStore(final WindowStore<Bytes, byte[]> underlying,
+                                  final long windowSize,
+                                  final long segmentInterval) {
+        super(underlying);
+        this.windowSize = windowSize;
+        this.baseKeyCacheFunction = new SegmentedCacheFunction(baseKeySchema, segmentInterval);
+        this.indexKeyCacheFunction = new SegmentedCacheFunction(indexKeySchema, segmentInterval);
+        this.maxObservedTimestamp = new AtomicLong(RecordQueue.UNKNOWN);
+        enforceWrappedStore(underlying);
+    }
+
+    private void enforceWrappedStore(final WindowStore<Bytes, byte[]> underlying) {
+        final RocksDBTimeOrderedWindowStore timeOrderedWindowStore = getWrappedStore(underlying);
+        if (timeOrderedWindowStore == null) {
+            throw new IllegalArgumentException("TimeOrderedCachingWindowStore only supports RocksDBTimeOrderedWindowStore backed store");
+        }
+
+        hasIndex = timeOrderedWindowStore.hasIndex();
+    }
+
+    private RocksDBTimeOrderedWindowStore getWrappedStore(final StateStore wrapped) {
+        if (wrapped instanceof RocksDBTimeOrderedWindowStore) {
+            return (RocksDBTimeOrderedWindowStore) wrapped;
+        }
+        if (wrapped instanceof WrappedStateStore) {
+            return getWrappedStore(((WrappedStateStore) wrapped).wrapped());
+        }
+        return null;
+    }
+
+    @Deprecated
+    @Override
+    public void init(final ProcessorContext context, final StateStore root) {
+        initInternal(asInternalProcessorContext(context));
+        super.init(context, root);
+    }
+
+    @Override
+    public void init(final StateStoreContext context, final StateStore root) {
+        initInternal(asInternalProcessorContext(context));
+        super.init(context, root);
+    }
+
+    private void initInternal(final InternalProcessorContext<?, ?> context) {
+        final String prefix = StreamsConfig.InternalConfig.getString(
+            context.appConfigs(),
+            StreamsConfig.InternalConfig.TOPIC_PREFIX_ALTERNATIVE,
+            context.applicationId()
+        );
+        this.context = context;
+        final String topic = ProcessorStateManager.storeChangelogTopic(prefix, name(),  context.taskId().topologyName());
+
+        bytesSerdes = new StateSerdes<>(
+            topic,
+            Serdes.Bytes(),
+            Serdes.ByteArray());
+        cacheName = context.taskId() + "-" + name();
+
+        context.registerCacheFlushListener(cacheName, entries -> {
+            putAndMaybeForward(entries, context);
+        });
+    }
+
+    private void putAndMaybeForward(final List<DirtyEntry> entries,
+                                    final InternalProcessorContext<?, ?> context) {
+
+        // Track what base key or index key we already processed so don't reprocess
+        final Set<Bytes> processedBasedKey = new HashSet<>();
+
+        for (final ThreadCache.DirtyEntry entry : entries) {
+            final byte[] binaryWindowKey = baseKeyCacheFunction.key(entry.key()).get();
+            final boolean isBaseKey = PrefixedWindowKeySchemas.isTimeFirstSchemaKey(
+                binaryWindowKey);
+
+            final DirtyEntry finalEntry;
+            if (!isBaseKey) {
+                final Bytes baseKey = indexKeyToBaseKey(Bytes.wrap(binaryWindowKey));
+                if (hasIndex && processedBasedKey.contains(baseKey)) {
+                    // Processed in base
+                    continue;
+                }
+
+                final Bytes cachedBaseKey = baseKeyCacheFunction.cacheKey(baseKey);
+                final LRUCacheEntry value = context.cache().get(cacheName, cachedBaseKey);
+                // Base key value is already evicted, which should be handled already
+                if (value == null) {
+                    continue;
+                }
+
+                finalEntry = new DirtyEntry(entry.key(), value.value(), value);
+
+                if (hasIndex) {
+                    processedBasedKey.add(baseKey);
+                }
+            } else {
+                final Bytes baseKey = Bytes.wrap(binaryWindowKey);
+                if (hasIndex && processedBasedKey.contains(baseKey)) {
+                    // Processed in index
+                    continue;
+                }
+                finalEntry = entry;
+                if (hasIndex) {
+                    processedBasedKey.add(Bytes.wrap(binaryWindowKey));
+                }
+            }
+
+            final Windowed<Bytes> windowedKeyBytes;
+            if (isBaseKey) {
+                windowedKeyBytes = TimeFirstWindowKeySchema.fromStoreBytesKey(binaryWindowKey,
+                    windowSize);
+            } else {
+                windowedKeyBytes = KeyFirstWindowKeySchema.fromStoreBytesKey(binaryWindowKey,
+                    windowSize);
+            }
+
+            final long windowStartTimestamp = windowedKeyBytes.window().start();
+            final Bytes binaryKey = windowedKeyBytes.key();
+
+            putAndMaybeForward(context, finalEntry, binaryKey, windowStartTimestamp);
+        }
+    }
+
+    private void putAndMaybeForward(final InternalProcessorContext<?, ?> context,
+                                    final DirtyEntry finalEntry,
+                                    final Bytes binaryKey,
+                                    final long windowStartTimestamp) {
+        if (flushListener != null) {
+            final byte[] rawNewValue = finalEntry.newValue();
+            final byte[] rawOldValue = rawNewValue == null || sendOldValues ?
+                wrapped().fetch(binaryKey, windowStartTimestamp) : null;
+
+            // this is an optimization: if this key did not exist in underlying store and also not in the cache,
+            // we can skip flushing to downstream as well as writing to underlying store
+            if (rawNewValue != null || rawOldValue != null) {
+                // we need to get the old values if needed, and then put to store, and then flush
+                final ProcessorRecordContext current = context.recordContext();
+                try {
+                    context.setRecordContext(finalEntry.entry().context());
+                    wrapped().put(binaryKey, finalEntry.newValue(), windowStartTimestamp);
+
+                    flushListener.apply(
+                        new Record<>(
+                            WindowKeySchema.toStoreKeyBinary(binaryKey,
+                                    windowStartTimestamp, 0)
+                                .get(),
+                            new Change<>(rawNewValue, sendOldValues ? rawOldValue : null),
+                            finalEntry.entry().context().timestamp(),
+                            finalEntry.entry().context().headers()));
+                } finally {
+                    context.setRecordContext(current);
+                }
+            }
+        } else {
+            final ProcessorRecordContext current = context.recordContext();
+            try {
+                context.setRecordContext(finalEntry.entry().context());
+                wrapped().put(binaryKey, finalEntry.newValue(), windowStartTimestamp);
+            } finally {
+                context.setRecordContext(current);
+            }
+        }
+    }
+
+    @Override
+    public boolean setFlushListener(final CacheFlushListener<byte[], byte[]> flushListener,
+                                    final boolean sendOldValues) {
+        this.flushListener = flushListener;
+        this.sendOldValues = sendOldValues;
+
+        return true;
+    }
+
+    private Bytes indexKeyToBaseKey(final Bytes indexKey) {
+        final byte[] key = KeyFirstWindowKeySchema.extractStoreKeyBytes(indexKey.get());
+        final long timestamp = KeyFirstWindowKeySchema.extractStoreTimestamp(indexKey.get());
+        final int seqnum = KeyFirstWindowKeySchema.extractStoreSequence(indexKey.get());
+        return TimeFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, seqnum);
+    }
+
+    @Override
+    public synchronized void put(final Bytes key,
+                                 final byte[] value,
+                                 final long windowStartTimestamp) {
+        // since this function may not access the underlying inner store, we need to validate
+        // if store is open outside as well.
+        validateStoreOpen();
+
+        final Bytes baseKeyBytes = TimeFirstWindowKeySchema.toStoreKeyBinary(key, windowStartTimestamp, 0);
+        final LRUCacheEntry entry =
+            new LRUCacheEntry(
+                value,
+                context.headers(),
+                true,
+                context.offset(),
+                context.timestamp(),
+                context.partition(),
+                context.topic());
+
+        // Put to index first so that base can be evicted later
+        if (hasIndex) {
+            // Important: put base key first to avoid the situation that if we put index first,
+            // it could be evicted when we are putting base key. In that case, base key is not yet
+            // in cache so we can't store key/value to store when index is evicted. Then if we fetch
+            // using index, we can't find it in either store or cache
+            context.cache().put(cacheName, baseKeyCacheFunction.cacheKey(baseKeyBytes), entry);
+            final LRUCacheEntry emptyEntry =
+                new LRUCacheEntry(
+                    new byte[0],
+                    new RecordHeaders(),
+                    true,
+                    context.offset(),
+                    context.timestamp(),
+                    context.partition(),
+                    "");
+            final Bytes indexKey = KeyFirstWindowKeySchema.toStoreKeyBinary(key, windowStartTimestamp, 0);
+            context.cache().put(cacheName, indexKeyCacheFunction.cacheKey(indexKey), emptyEntry);
+        } else {
+            context.cache().put(cacheName, baseKeyCacheFunction.cacheKey(baseKeyBytes), entry);
+        }
+        maxObservedTimestamp.set(Math.max(windowStartTimestamp, maxObservedTimestamp.get()));
+    }
+
+    @Override
+    public byte[] fetch(final Bytes key,
+                        final long timestamp) {
+        validateStoreOpen();
+        if (context.cache() == null) {
+            return wrapped().fetch(key, timestamp);
+        }
+
+        final Bytes baseBytesKey = TimeFirstWindowKeySchema.toStoreKeyBinary(key, timestamp, 0);
+        final Bytes cacheKey = baseKeyCacheFunction.cacheKey(baseBytesKey);
+
+        final LRUCacheEntry entry = context.cache().get(cacheName, cacheKey);
+        if (entry == null) {
+            return wrapped().fetch(key, timestamp);
+        } else {
+            return entry.value();
+        }
+    }
+
+    @Override
+    public synchronized WindowStoreIterator<byte[]> fetch(final Bytes key,
+                                                          final long timeFrom,
+                                                          final long timeTo) {
+        // since this function may not access the underlying inner store, we need to validate
+        // if store is open outside as well.
+        validateStoreOpen();
+
+        final WindowStoreIterator<byte[]> underlyingIterator = wrapped().fetch(key, timeFrom, timeTo);
+        if (context.cache() == null) {
+            return underlyingIterator;
+        }
+
+        return fetchInternal(underlyingIterator, key, timeFrom, timeTo, true);
+    }
+
+    @Override
+    public synchronized WindowStoreIterator<byte[]> backwardFetch(final Bytes key,
+                                                                  final long timeFrom,
+                                                                  final long timeTo) {
+        // since this function may not access the underlying inner store, we need to validate
+        // if store is open outside as well.
+        validateStoreOpen();
+
+        final WindowStoreIterator<byte[]> underlyingIterator = wrapped().backwardFetch(key, timeFrom, timeTo);
+        if (context.cache() == null) {
+            return underlyingIterator;
+        }
+
+        return fetchInternal(underlyingIterator, key, timeFrom, timeTo, false);
+    }
+
+    private WindowStoreIterator<byte[]> fetchInternal(final WindowStoreIterator<byte[]> underlyingIterator,
+                                                      final Bytes key,
+                                                      final long timeFrom,
+                                                      final long timeTo,
+                                                      final boolean forward) {
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> cacheIterator = new CacheIteratorWrapper(
+            key, timeFrom, timeTo, forward, hasIndex);
+        final KeySchema keySchema = hasIndex ? indexKeySchema : baseKeySchema;
+        final SegmentedCacheFunction cacheFunction = hasIndex ? indexKeyCacheFunction : baseKeyCacheFunction;
+        final HasNextCondition hasNextCondition = keySchema.hasNextCondition(key, key, timeFrom, timeTo, forward);
+
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> filteredCacheIterator =
+            new FilteredCacheIterator(cacheIterator, hasNextCondition, cacheFunction);
+
+        final Function<byte[], Long> tsExtractor = hasIndex ? KeyFirstWindowKeySchema::extractStoreTimestamp
+            : TimeFirstWindowKeySchema::extractStoreTimestamp;
+        return new MergedSortedCacheWindowStoreIterator(filteredCacheIterator, underlyingIterator, forward, tsExtractor);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> fetch(final Bytes keyFrom,
+                                                           final Bytes keyTo,
+                                                           final long timeFrom,
+                                                           final long timeTo) {
+        if (keyFrom != null && keyTo != null && keyFrom.compareTo(keyTo) > 0) {
+            LOG.warn("Returning empty iterator for fetch with invalid key range: from > to. " +
+                "This may be due to range arguments set in the wrong order, " +
+                "or serdes that don't preserve ordering when lexicographically comparing the serialized bytes. " +
+                "Note that the built-in numerical serdes do not follow this for negative numbers");
+            return KeyValueIterators.emptyIterator();
+        }
+
+        // since this function may not access the underlying inner store, we need to validate
+        // if store is open outside as well.
+        validateStoreOpen();
+
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator =
+            wrapped().fetch(keyFrom, keyTo, timeFrom, timeTo);
+        if (context.cache() == null) {
+            return underlyingIterator;
+        }
+
+        return fetchKeyRange(underlyingIterator, keyFrom, keyTo, timeFrom, timeTo, true);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFetch(final Bytes keyFrom,
+                                                                   final Bytes keyTo,
+                                                                   final long timeFrom,
+                                                                   final long timeTo) {
+        if (keyFrom != null && keyTo != null && keyFrom.compareTo(keyTo) > 0) {
+            LOG.warn("Returning empty iterator for fetch with invalid key range: from > to. "
+                + "This may be due to serdes that don't preserve ordering when lexicographically comparing the serialized bytes. " +
+                "Note that the built-in numerical serdes do not follow this for negative numbers");
+            return KeyValueIterators.emptyIterator();
+        }
+
+        // since this function may not access the underlying inner store, we need to validate
+        // if store is open outside as well.
+        validateStoreOpen();
+
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator =
+            wrapped().backwardFetch(keyFrom, keyTo, timeFrom, timeTo);
+        if (context.cache() == null) {
+            return underlyingIterator;
+        }
+
+        return fetchKeyRange(underlyingIterator, keyFrom, keyTo, timeFrom, timeTo, false);
+    }
+
+    private KeyValueIterator<Windowed<Bytes>, byte[]> fetchKeyRange(final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator,
+                                                                    final Bytes keyFrom,
+                                                                    final Bytes keyTo,
+                                                                    final long timeFrom,
+                                                                    final long timeTo,
+                                                                    final boolean forward) {
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> cacheIterator = new CacheIteratorWrapper(
+            keyFrom, keyTo, timeFrom, timeTo, forward, hasIndex);
+
+        final KeySchema keySchema = hasIndex ? indexKeySchema : baseKeySchema;
+        final HasNextCondition hasNextCondition = keySchema.hasNextCondition(keyFrom, keyTo, timeFrom, timeTo, forward);
+        final SegmentedCacheFunction cacheFunction = hasIndex ? indexKeyCacheFunction : baseKeyCacheFunction;
+
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> filteredCacheIterator =
+            new FilteredCacheIterator(cacheIterator, hasNextCondition, cacheFunction);
+        final StoreKeyToWindowKey storeKeyToWindowKey = hasIndex ? KeyFirstWindowKeySchema::fromStoreKey : TimeFirstWindowKeySchema::fromStoreKey;
+        final WindowKeyToBytes windowKeyToBytes = hasIndex ? KeyFirstWindowKeySchema::toStoreKeyBinary : TimeFirstWindowKeySchema::toStoreKeyBinary;
+
+        return new MergedSortedCacheWindowStoreKeyValueIterator(
+            filteredCacheIterator,
+            underlyingIterator,
+            bytesSerdes,
+            windowSize,
+            cacheFunction,
+            forward,
+            storeKeyToWindowKey,
+            windowKeyToBytes
+        );
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> fetchAll(final long timeFrom,
+                                                              final long timeTo) {
+        validateStoreOpen();
+
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator = wrapped().fetchAll(timeFrom, timeTo);
+        return fetchAllInternal(underlyingIterator, timeFrom, timeTo, true);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardFetchAll(final long timeFrom,
+                                                                      final long timeTo) {
+        validateStoreOpen();
+
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator = wrapped().backwardFetchAll(timeFrom, timeTo);
+        return fetchAllInternal(underlyingIterator, timeFrom, timeTo, false);
+    }
+
+    private KeyValueIterator<Windowed<Bytes>, byte[]> fetchAllInternal(final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator,
+                                                                       final long timeFrom,
+                                                                       final long timeTo,
+                                                                       final boolean forward) {
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> cacheIterator = new CacheIteratorWrapper(
+            null, null, timeFrom, timeTo, forward, false);
+        final HasNextCondition hasNextCondition = baseKeySchema.hasNextCondition(null, null, timeFrom, timeTo, forward);
+        final PeekingKeyValueIterator<Bytes, LRUCacheEntry> filteredCacheIterator =
+            new FilteredCacheIterator(cacheIterator, hasNextCondition, baseKeyCacheFunction);
+
+        final StoreKeyToWindowKey storeKeyToWindowKey = TimeFirstWindowKeySchema::fromStoreKey;
+        final WindowKeyToBytes windowKeyToBytes = TimeFirstWindowKeySchema::toStoreKeyBinary;
+
+        return new MergedSortedCacheWindowStoreKeyValueIterator(
+            filteredCacheIterator,
+            underlyingIterator,
+            bytesSerdes,
+            windowSize,
+            baseKeyCacheFunction,
+            forward,
+            storeKeyToWindowKey,
+            windowKeyToBytes
+        );
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> all() {
+        validateStoreOpen();
+
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator = wrapped().all();
+        return fetchAllInternal(underlyingIterator, 0, Long.MAX_VALUE, true);
+    }
+
+    @Override
+    public KeyValueIterator<Windowed<Bytes>, byte[]> backwardAll() {
+        validateStoreOpen();
+
+        final KeyValueIterator<Windowed<Bytes>, byte[]> underlyingIterator = wrapped().backwardAll();
+        return fetchAllInternal(underlyingIterator, 0, Long.MAX_VALUE, false);
+    }
+
+    @Override
+    public synchronized void flush() {
+        context.cache().flush(cacheName);
+        wrapped().flush();
+    }
+
+    @Override
+    public void flushCache() {
+        context.cache().flush(cacheName);
+    }
+
+    @Override
+    public synchronized void close() {
+        final LinkedList<RuntimeException> suppressed = executeAll(
+            () -> context.cache().flush(cacheName),
+            () -> context.cache().close(cacheName),
+            wrapped()::close
+        );
+        if (!suppressed.isEmpty()) {
+            throwSuppressed("Caught an exception while closing caching window store for store " + name(),
+                suppressed);
+        }
+    }
+
+
+    private class CacheIteratorWrapper implements PeekingKeyValueIterator<Bytes, LRUCacheEntry> {
+
+        private final long segmentInterval;
+        private final Bytes keyFrom;
+        private final Bytes keyTo;
+        private final long timeTo;
+        private final boolean forward;
+        private final boolean useIndex; // If we are iterating from index
+
+        private long lastSegmentId;
+        private long currentSegmentId;
+        private Bytes cacheKeyFrom;
+        private Bytes cacheKeyTo;
+        private LRUCacheEntry cachedBaseValue;
+        private final SegmentedCacheFunction cacheFunction;
+
+        private ThreadCache.MemoryLRUCacheBytesIterator current;
+
+        private CacheIteratorWrapper(final Bytes key,
+                                     final long timeFrom,
+                                     final long timeTo,
+                                     final boolean forward,
+                                     final boolean index) {
+            this(key, key, timeFrom, timeTo, forward, index);
+        }
+
+        private CacheIteratorWrapper(final Bytes keyFrom,
+                                     final Bytes keyTo,
+                                     final long timeFrom,
+                                     final long timeTo,
+                                     final boolean forward,
+                                     final boolean index) {
+            this.keyFrom = keyFrom;
+            this.keyTo = keyTo;
+            this.timeTo = timeTo;
+            this.forward = forward;
+            this.useIndex = index;
+
+            cacheFunction = index ? indexKeyCacheFunction : baseKeyCacheFunction;
+
+            this.segmentInterval = cacheFunction.getSegmentInterval();
+
+            if (forward) {
+                this.lastSegmentId = cacheFunction.segmentId(Math.min(timeTo, maxObservedTimestamp.get()));
+                this.currentSegmentId = cacheFunction.segmentId(timeFrom);
+
+                setCacheKeyRange(timeFrom, currentSegmentLastTime());
+                this.current = context.cache().range(cacheName, cacheKeyFrom, cacheKeyTo);
+            } else {
+                this.currentSegmentId = cacheFunction.segmentId(Math.min(timeTo, maxObservedTimestamp.get()));
+                this.lastSegmentId = cacheFunction.segmentId(timeFrom);
+
+                setCacheKeyRange(currentSegmentBeginTime(), Math.min(timeTo, maxObservedTimestamp.get()));
+                this.current = context.cache().reverseRange(cacheName, cacheKeyFrom, cacheKeyTo);
+            }
+        }
+
+        @Override
+        public boolean hasNext() {
+            if (current == null) {
+                return false;
+            }
+
+            if (useIndex) {
+                do {
+                    // If iterating from index, need to make sure base key/value exist in cache
+                    while (current.hasNext()) {
+                        final Bytes cacheIndexKey = current.peekNextKey();
+                        final Bytes indexKey = indexKeyCacheFunction.key(cacheIndexKey);
+                        final Bytes baseKey = indexKeyToBaseKey(indexKey);
+                        final Bytes cachedBaseKey = baseKeyCacheFunction.cacheKey(baseKey);
+                        cachedBaseValue = context.cache().get(cacheName, cachedBaseKey);
+                        if (cachedBaseValue != null) {
+                            return true;
+                        }
+                        current.next();
+                    }
+                    getNextSegmentIterator();
+                } while (current != null);
+                return false;
+            }
+
+            if (current.hasNext()) {
+                return true;
+            }
+
+            while (!current.hasNext()) {
+                getNextSegmentIterator();
+                if (current == null) {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        @Override
+        public Bytes peekNextKey() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+            return current.peekNextKey();
+        }
+
+        @Override
+        public KeyValue<Bytes, LRUCacheEntry> peekNext() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+            if (useIndex) {
+                final KeyValue<Bytes, LRUCacheEntry> kv = current.peekNext();
+                return KeyValue.pair(kv.key, cachedBaseValue);
+            }
+            return current.peekNext();
+        }
+
+        @Override
+        public KeyValue<Bytes, LRUCacheEntry> next() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+            if (useIndex) {
+                final KeyValue<Bytes, LRUCacheEntry> kv = current.next();
+                return KeyValue.pair(kv.key, cachedBaseValue);
+            }
+            return current.next();
+        }
+
+        @Override
+        public void close() {
+            current.close();
+        }
+
+        private long currentSegmentBeginTime() {
+            return currentSegmentId * segmentInterval;
+        }
+
+        private long currentSegmentLastTime() {
+            return Math.min(timeTo, currentSegmentBeginTime() + segmentInterval - 1);
+        }
+
+        private void getNextSegmentIterator() {
+            if (forward) {
+                ++currentSegmentId;
+                // updating as maxObservedTimestamp can change while iterating
+                lastSegmentId = cacheFunction.segmentId(Math.min(timeTo, maxObservedTimestamp.get()));
+
+                if (currentSegmentId > lastSegmentId) {
+                    current = null;
+                    return;
+                }
+
+                setCacheKeyRange(currentSegmentBeginTime(), currentSegmentLastTime());
+
+                current.close();
+
+                current = context.cache().range(cacheName, cacheKeyFrom, cacheKeyTo);
+            } else {
+                --currentSegmentId;
+
+                // last segment id is stable when iterating backward, therefore no need to update
+                if (currentSegmentId < lastSegmentId) {
+                    current = null;
+                    return;
+                }
+
+                setCacheKeyRange(currentSegmentBeginTime(), currentSegmentLastTime());
+
+                current.close();
+
+                current = context.cache().reverseRange(cacheName, cacheKeyFrom, cacheKeyTo);
+            }
+        }
+
+        private void setCacheKeyRange(final long lowerRangeEndTime, final long upperRangeEndTime) {
+            if (cacheFunction.segmentId(lowerRangeEndTime) != cacheFunction.segmentId(upperRangeEndTime)) {
+                throw new IllegalStateException("Error iterating over segments: segment interval has changed");
+            }
+
+            final KeySchema schema = useIndex ? indexKeySchema : baseKeySchema;
+
+            if (keyFrom != null && keyFrom.equals(keyTo)) {
+                cacheKeyFrom = cacheFunction.cacheKey(schema.lowerRangeFixedSize(keyFrom, lowerRangeEndTime), currentSegmentId);
+                cacheKeyTo = cacheFunction.cacheKey(schema.upperRangeFixedSize(keyTo, upperRangeEndTime), currentSegmentId);
+            } else {
+                cacheKeyFrom = cacheFunction.cacheKey(schema.lowerRange(keyFrom, lowerRangeEndTime), currentSegmentId);
+                cacheKeyTo = cacheFunction.cacheKey(schema.upperRange(keyTo, timeTo), currentSegmentId);
+            }
+        }
+    }
+}
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilder.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilder.java
index 9d3363033d121..4eab5bcb251ae 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilder.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilder.java
@@ -85,12 +85,31 @@ private WindowStore<Bytes, byte[]> maybeWrapCaching(final WindowStore<Bytes, byt
         if (!enableCaching) {
             return inner;
         }
+
+        final boolean isTimeOrdered = isTimeOrderedStore(inner);
+        if (isTimeOrdered) {
+            return new TimeOrderedCachingWindowStore(
+                inner,
+                storeSupplier.windowSize(),
+                storeSupplier.segmentIntervalMs());
+        }
+
         return new CachingWindowStore(
             inner,
             storeSupplier.windowSize(),
             storeSupplier.segmentIntervalMs());
     }
 
+    private boolean isTimeOrderedStore(final StateStore stateStore) {
+        if (stateStore instanceof RocksDBTimeOrderedWindowStore) {
+            return true;
+        }
+        if (stateStore instanceof WrappedStateStore) {
+            return isTimeOrderedStore(((WrappedStateStore) stateStore).wrapped());
+        }
+        return false;
+    }
+
     private WindowStore<Bytes, byte[]> maybeWrapLogging(final WindowStore<Bytes, byte[]> inner) {
         if (!enableLogging) {
             return inner;
diff --git a/streams/src/main/java/org/apache/kafka/streams/state/internals/WrappedSessionStoreIterator.java b/streams/src/main/java/org/apache/kafka/streams/state/internals/WrappedSessionStoreIterator.java
index ce26029af4fbd..3a39a959664c8 100644
--- a/streams/src/main/java/org/apache/kafka/streams/state/internals/WrappedSessionStoreIterator.java
+++ b/streams/src/main/java/org/apache/kafka/streams/state/internals/WrappedSessionStoreIterator.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.streams.state.internals;
 
+import java.util.function.Function;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.kstream.Windowed;
@@ -24,9 +25,16 @@
 class WrappedSessionStoreIterator implements KeyValueIterator<Windowed<Bytes>, byte[]> {
 
     private final KeyValueIterator<Bytes, byte[]> bytesIterator;
+    private final Function<Bytes, Windowed<Bytes>> windowConstructor;
 
     WrappedSessionStoreIterator(final KeyValueIterator<Bytes, byte[]> bytesIterator) {
+        this(bytesIterator, SessionKeySchema::from);
+    }
+
+    WrappedSessionStoreIterator(final KeyValueIterator<Bytes, byte[]> bytesIterator,
+                                final Function<Bytes, Windowed<Bytes>> windowConstructor) {
         this.bytesIterator = bytesIterator;
+        this.windowConstructor = windowConstructor;
     }
 
     @Override
@@ -36,7 +44,7 @@ public void close() {
 
     @Override
     public Windowed<Bytes> peekNextKey() {
-        return SessionKeySchema.from(bytesIterator.peekNextKey());
+        return windowConstructor.apply(bytesIterator.peekNextKey());
     }
 
     @Override
@@ -47,6 +55,6 @@ public boolean hasNext() {
     @Override
     public KeyValue<Windowed<Bytes>, byte[]> next() {
         final KeyValue<Bytes, byte[]> next = bytesIterator.next();
-        return KeyValue.pair(SessionKeySchema.from(next.key), next.value);
+        return KeyValue.pair(windowConstructor.apply(next.key), next.value);
     }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java
index 9b9a671b471b4..b788e54923a12 100644
--- a/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/KafkaStreamsTest.java
@@ -20,9 +20,13 @@
 import org.apache.kafka.clients.admin.ListOffsetsResult;
 import org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo;
 import org.apache.kafka.clients.admin.MockAdminClient;
+import org.apache.kafka.clients.admin.RemoveMembersFromConsumerGroupResult;
 import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerGroupMetadata;
+import org.apache.kafka.clients.consumer.MockConsumer;
 import org.apache.kafka.clients.producer.MockProducer;
 import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.internals.KafkaFutureImpl;
 import org.apache.kafka.common.metrics.MetricConfig;
@@ -51,8 +55,8 @@
 import org.apache.kafka.streams.processor.internals.StateDirectory;
 import org.apache.kafka.streams.processor.internals.StreamThread;
 import org.apache.kafka.streams.processor.internals.StreamsMetadataState;
-import org.apache.kafka.streams.processor.internals.TopologyMetadata;
 import org.apache.kafka.streams.processor.internals.ThreadMetadataImpl;
+import org.apache.kafka.streams.processor.internals.TopologyMetadata;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
 import org.apache.kafka.streams.state.KeyValueStore;
@@ -70,6 +74,7 @@
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.powermock.api.easymock.PowerMock;
 import org.powermock.api.easymock.annotation.Mock;
@@ -86,6 +91,7 @@
 import java.util.Properties;
 import java.util.Set;
 import java.util.UUID;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ThreadFactory;
@@ -94,10 +100,9 @@
 
 import static java.util.Collections.emptyList;
 import static java.util.Collections.singletonList;
-import static org.apache.kafka.streams.state.QueryableStoreTypes.keyValueStore;
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState;
-
+import static org.apache.kafka.streams.state.QueryableStoreTypes.keyValueStore;
 import static org.apache.kafka.test.TestUtils.waitForCondition;
 import static org.easymock.EasyMock.anyInt;
 import static org.easymock.EasyMock.anyLong;
@@ -119,6 +124,8 @@
 @RunWith(PowerMockRunner.class)
 @PrepareForTest({KafkaStreams.class, StreamThread.class, ClientMetrics.class, StreamsConfigUtils.class})
 public class KafkaStreamsTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private static final int NUM_THREADS = 2;
     private final static String APPLICATION_ID = "appId";
@@ -584,6 +591,45 @@ public void testCloseIsIdempotent() {
         }
     }
 
+    @Test
+    public void testPauseResume() {
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time)) {
+            streams.start();
+            streams.pause();
+            Assert.assertTrue(streams.isPaused());
+            streams.resume();
+            Assert.assertFalse(streams.isPaused());
+        }
+    }
+
+    @Test
+    public void testStartingPaused() {
+        // This test shows that a KafkaStreams instance can be started "paused"
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time)) {
+            streams.pause();
+            streams.start();
+            Assert.assertTrue(streams.isPaused());
+            streams.resume();
+            Assert.assertFalse(streams.isPaused());
+        }
+    }
+
+    @Test
+    public void testShowPauseResumeAreIdempotent() {
+        // This test shows that a KafkaStreams instance can be started "paused"
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time)) {
+            streams.start();
+            streams.pause();
+            Assert.assertTrue(streams.isPaused());
+            streams.pause();
+            Assert.assertTrue(streams.isPaused());
+            streams.resume();
+            Assert.assertFalse(streams.isPaused());
+            streams.resume();
+            Assert.assertFalse(streams.isPaused());
+        }
+    }
+
     @Test
     public void shouldAddThreadWhenRunning() throws InterruptedException {
         props.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 1);
@@ -751,6 +797,24 @@ public void shouldThrowOnCleanupWhileRunning() throws InterruptedException {
         }
     }
 
+    @Test
+    public void shouldThrowOnCleanupWhilePaused() throws InterruptedException {
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time)) {
+            streams.start();
+            waitForCondition(
+                () -> streams.state() == KafkaStreams.State.RUNNING,
+                "Streams never started.");
+
+            streams.pause();
+            waitForCondition(
+                streams::isPaused,
+                "Streams did not pause.");
+
+            assertThrows("Cannot clean up while running.", IllegalStateException.class,
+                streams::cleanUp);
+        }
+    }
+
     @Test
     public void shouldThrowOnCleanupWhileShuttingDown() throws InterruptedException {
         final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time);
@@ -765,6 +829,72 @@ public void shouldThrowOnCleanupWhileShuttingDown() throws InterruptedException
         assertThat(streams.state() == State.PENDING_SHUTDOWN, equalTo(true));
     }
 
+    @Test
+    public void shouldThrowOnCleanupWhileShuttingDownStreamClosedWithCloseOptionLeaveGroupFalse() throws InterruptedException, ExecutionException {
+
+        final RemoveMembersFromConsumerGroupResult result = EasyMock.mock(RemoveMembersFromConsumerGroupResult.class);
+
+        final KafkaFuture<Void> memberResultFuture = EasyMock.mock(KafkaFuture.class);
+
+        final MockAdminClient mockAdminClient = EasyMock.partialMockBuilder(MockAdminClient.class)
+                .addMockedMethod("removeMembersFromConsumerGroup").createMock();
+
+        final MockConsumer<byte[], byte[]> mockConsumer = EasyMock.partialMockBuilder(MockConsumer.class)
+                .addMockedMethod("groupMetadata").createMock();
+
+        final ConsumerGroupMetadata consumerGroupMetadata = EasyMock.mock(ConsumerGroupMetadata.class);
+
+        final Optional<String> groupInstanceId = Optional.of("test-instance-id");
+
+        EasyMock.expect(memberResultFuture.get());
+        EasyMock.expect(result.memberResult(anyObject())).andStubReturn(memberResultFuture);
+        EasyMock.expect(consumerGroupMetadata.groupInstanceId()).andReturn(groupInstanceId);
+        EasyMock.expect(mockAdminClient.removeMembersFromConsumerGroup(anyObject(), anyObject())).andStubReturn(result);
+        EasyMock.expect(mockConsumer.groupMetadata()).andStubReturn(consumerGroupMetadata);
+
+        final MockClientSupplier mockClientSupplier = EasyMock.partialMockBuilder(MockClientSupplier.class)
+                .addMockedMethod("getAdmin")
+                .addMockedMethod("getConsumer")
+                .createMock();
+
+        EasyMock.expect(mockClientSupplier.getAdmin(anyObject())).andReturn(mockAdminClient);
+        EasyMock.expect(mockClientSupplier.getConsumer(anyObject())).andReturn(mockConsumer);
+
+        EasyMock.replay(result, consumerGroupMetadata, mockConsumer, mockAdminClient, mockClientSupplier);
+
+        final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, mockClientSupplier, time);
+        streams.start();
+        waitForCondition(
+                () -> streams.state() == KafkaStreams.State.RUNNING,
+                "Streams never started.");
+
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ZERO);
+        closeOptions.leaveGroup(true);
+
+        streams.close(closeOptions);
+        assertThat(streams.state() == State.PENDING_SHUTDOWN, equalTo(true));
+        assertThrows(IllegalStateException.class, streams::cleanUp);
+        assertThat(streams.state() == State.PENDING_SHUTDOWN, equalTo(true));
+    }
+
+    @Test
+    public void shouldThrowOnCleanupWhileShuttingDownStreamClosedWithCloseOptionLeaveGroupTrue() throws InterruptedException {
+        final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time);
+        streams.start();
+        waitForCondition(
+                () -> streams.state() == KafkaStreams.State.RUNNING,
+                "Streams never started.");
+
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ZERO);
+
+        streams.close(closeOptions);
+        assertThat(streams.state() == State.PENDING_SHUTDOWN, equalTo(true));
+        assertThrows(IllegalStateException.class, streams::cleanUp);
+        assertThat(streams.state() == State.PENDING_SHUTDOWN, equalTo(true));
+    }
+
     @Test
     public void shouldNotGetAllTasksWhenNotRunning() throws InterruptedException {
         try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time)) {
@@ -887,6 +1017,138 @@ public void shouldNotBlockInCloseForZeroDuration() {
         }
     }
 
+    @Test
+    public void shouldReturnFalseOnCloseWithCloseOptionWithLeaveGroupFalseWhenThreadsHaventTerminated() {
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ofMillis(10L));
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier)) {
+            assertFalse(streams.close(closeOptions));
+        }
+    }
+
+    @Test
+    public void shouldThrowOnNegativeTimeoutForCloseWithCloseOptionLeaveGroupFalse() {
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ofMillis(-1L));
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier, time)) {
+            assertThrows(IllegalArgumentException.class, () -> streams.close(closeOptions));
+        }
+    }
+
+    @Test
+    public void shouldNotBlockInCloseWithCloseOptionLeaveGroupFalseForZeroDuration() {
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ZERO);
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, supplier)) {
+            assertFalse(streams.close(closeOptions));
+        }
+    }
+
+    @Test
+    public void shouldReturnFalseOnCloseWithCloseOptionWithLeaveGroupTrueWhenThreadsHaventTerminated() throws ExecutionException, InterruptedException {
+
+        final RemoveMembersFromConsumerGroupResult result = EasyMock.mock(RemoveMembersFromConsumerGroupResult.class);
+
+        final KafkaFuture<Void> memberResultFuture = EasyMock.mock(KafkaFuture.class);
+
+        final MockAdminClient mockAdminClient = EasyMock.partialMockBuilder(MockAdminClient.class)
+                .addMockedMethod("removeMembersFromConsumerGroup").createMock();
+
+        final MockConsumer<byte[], byte[]> mockConsumer = EasyMock.partialMockBuilder(MockConsumer.class)
+                .addMockedMethod("groupMetadata").createMock();
+
+        final ConsumerGroupMetadata consumerGroupMetadata = EasyMock.mock(ConsumerGroupMetadata.class);
+
+        final Optional<String> groupInstanceId = Optional.of("test-instance-id");
+
+        EasyMock.expect(memberResultFuture.get());
+        EasyMock.expect(result.memberResult(anyObject())).andStubReturn(memberResultFuture);
+        EasyMock.expect(consumerGroupMetadata.groupInstanceId()).andReturn(groupInstanceId);
+        EasyMock.expect(mockAdminClient.removeMembersFromConsumerGroup(anyObject(), anyObject())).andStubReturn(result);
+        EasyMock.expect(mockConsumer.groupMetadata()).andStubReturn(consumerGroupMetadata);
+
+        final MockClientSupplier mockClientSupplier = EasyMock.partialMockBuilder(MockClientSupplier.class)
+                .addMockedMethod("getAdmin")
+                .addMockedMethod("getConsumer")
+                .createMock();
+
+        EasyMock.expect(mockClientSupplier.getAdmin(anyObject())).andReturn(mockAdminClient);
+        EasyMock.expect(mockClientSupplier.getConsumer(anyObject())).andReturn(mockConsumer);
+
+        EasyMock.replay(result, consumerGroupMetadata, mockConsumer, mockAdminClient, mockClientSupplier);
+
+
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ofMillis(10L));
+        closeOptions.leaveGroup(true);
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, mockClientSupplier)) {
+            assertFalse(streams.close(closeOptions));
+        }
+    }
+
+    @Test
+    public void shouldThrowOnNegativeTimeoutForCloseWithCloseOptionLeaveGroupTrue() {
+        final RemoveMembersFromConsumerGroupResult result = EasyMock.mock(RemoveMembersFromConsumerGroupResult.class);
+
+        final MockAdminClient mockAdminClient = EasyMock.partialMockBuilder(MockAdminClient.class)
+                .addMockedMethod("removeMembersFromConsumerGroup").createMock();
+
+        EasyMock.expect(mockAdminClient.removeMembersFromConsumerGroup(anyObject(), anyObject())).andStubReturn(result);
+
+        final MockClientSupplier mockClientSupplier = EasyMock.partialMockBuilder(MockClientSupplier.class)
+                .addMockedMethod("getAdmin").createMock();
+        EasyMock.expect(mockClientSupplier.getAdmin(anyObject())).andReturn(mockAdminClient);
+
+        EasyMock.replay(result, mockAdminClient, mockClientSupplier);
+
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ofMillis(-1L));
+        closeOptions.leaveGroup(true);
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, mockClientSupplier, time)) {
+            assertThrows(IllegalArgumentException.class, () -> streams.close(closeOptions));
+        }
+    }
+
+    @Test
+    public void shouldNotBlockInCloseWithCloseOptionLeaveGroupTrueForZeroDuration() throws ExecutionException, InterruptedException {
+        final RemoveMembersFromConsumerGroupResult result = EasyMock.mock(RemoveMembersFromConsumerGroupResult.class);
+
+        final KafkaFuture<Void> memberResultFuture = EasyMock.mock(KafkaFuture.class);
+
+        final MockAdminClient mockAdminClient = EasyMock.partialMockBuilder(MockAdminClient.class)
+                .addMockedMethod("removeMembersFromConsumerGroup").createMock();
+
+        final MockConsumer<byte[], byte[]> mockConsumer = EasyMock.partialMockBuilder(MockConsumer.class)
+                .addMockedMethod("groupMetadata").createMock();
+
+        final ConsumerGroupMetadata consumerGroupMetadata = EasyMock.mock(ConsumerGroupMetadata.class);
+
+        final Optional<String> groupInstanceId = Optional.of("test-instance-id");
+
+        EasyMock.expect(memberResultFuture.get());
+        EasyMock.expect(result.memberResult(anyObject())).andStubReturn(memberResultFuture);
+        EasyMock.expect(consumerGroupMetadata.groupInstanceId()).andReturn(groupInstanceId);
+        EasyMock.expect(mockAdminClient.removeMembersFromConsumerGroup(anyObject(), anyObject())).andStubReturn(result);
+        EasyMock.expect(mockConsumer.groupMetadata()).andStubReturn(consumerGroupMetadata);
+
+        final MockClientSupplier mockClientSupplier = EasyMock.partialMockBuilder(MockClientSupplier.class)
+                .addMockedMethod("getAdmin")
+                .addMockedMethod("getConsumer")
+                .createMock();
+
+        EasyMock.expect(mockClientSupplier.getAdmin(anyObject())).andReturn(mockAdminClient);
+        EasyMock.expect(mockClientSupplier.getConsumer(anyObject())).andReturn(mockConsumer);
+
+        EasyMock.replay(result, consumerGroupMetadata, mockConsumer, mockAdminClient, mockClientSupplier);
+
+        final KafkaStreams.CloseOptions closeOptions = new KafkaStreams.CloseOptions();
+        closeOptions.timeout(Duration.ZERO);
+        closeOptions.leaveGroup(true);
+        try (final KafkaStreams streams = new KafkaStreams(getBuilderWithSource().build(), props, mockClientSupplier)) {
+            assertFalse(streams.close(closeOptions));
+        }
+    }
+
     @Test
     public void shouldTriggerRecordingOfRocksDBMetricsIfRecordingLevelIsDebug() {
         PowerMock.mockStatic(Executors.class);
diff --git a/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java b/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java
index 24f7d5d61714e..01e09746b0d90 100644
--- a/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/KeyValueTest.java
@@ -16,12 +16,16 @@
  */
 package org.apache.kafka.streams;
 
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 public class KeyValueTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     @Test
     public void shouldHaveSameEqualsAndHashCode() {
diff --git a/streams/src/test/java/org/apache/kafka/streams/StreamsBuilderTest.java b/streams/src/test/java/org/apache/kafka/streams/StreamsBuilderTest.java
index 06854db06e041..d7553c85b6944 100644
--- a/streams/src/test/java/org/apache/kafka/streams/StreamsBuilderTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/StreamsBuilderTest.java
@@ -52,7 +52,9 @@
 import org.apache.kafka.test.NoopValueTransformer;
 import org.apache.kafka.test.NoopValueTransformerWithKey;
 import org.apache.kafka.test.StreamsTestUtils;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import java.time.Duration;
 import java.time.Instant;
@@ -77,6 +79,8 @@
 import static org.junit.Assert.fail;
 
 public class StreamsBuilderTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private static final String STREAM_TOPIC = "stream-topic";
 
@@ -622,6 +626,7 @@ public void shouldUseSpecifiedNameForForEachOperation() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldUseSpecifiedNameForTransform() {
         builder.stream(STREAM_TOPIC).transform(() -> null, Named.as(STREAM_OPERATION_NAME));
         builder.build();
@@ -630,6 +635,7 @@ public void shouldUseSpecifiedNameForTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldUseSpecifiedNameForTransformValues() {
         builder.stream(STREAM_TOPIC).transformValues(() -> new NoopValueTransformer<>(), Named.as(STREAM_OPERATION_NAME));
         builder.build();
@@ -638,6 +644,7 @@ public void shouldUseSpecifiedNameForTransformValues() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldUseSpecifiedNameForTransformValuesWithKey() {
         builder.stream(STREAM_TOPIC).transformValues(() -> new NoopValueTransformerWithKey<>(), Named.as(STREAM_OPERATION_NAME));
         builder.build();
@@ -944,6 +951,7 @@ public void shouldUseSpecifiedNameForPrintOperation() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldUseSpecifiedNameForFlatTransformValueOperation() {
         builder.stream(STREAM_TOPIC).flatTransformValues(() -> new NoopValueTransformer<>(), Named.as(STREAM_OPERATION_NAME));
         builder.build();
@@ -952,7 +960,7 @@ public void shouldUseSpecifiedNameForFlatTransformValueOperation() {
     }
 
     @Test
-    @SuppressWarnings({"unchecked", "rawtypes"})
+    @SuppressWarnings({"unchecked", "rawtypes", "deprecation"})
     public void shouldUseSpecifiedNameForFlatTransformValueWithKeyOperation() {
         builder.stream(STREAM_TOPIC).flatTransformValues(() -> new NoopValueTransformerWithKey(), Named.as(STREAM_OPERATION_NAME));
         builder.build();
diff --git a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java
index 2e1b0d842e146..435dd249f2f6f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/StreamsConfigTest.java
@@ -34,23 +34,32 @@
 import org.apache.kafka.streams.processor.internals.StreamsPartitionAssignor;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import java.io.File;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 
+import static java.util.Collections.nCopies;
 import static org.apache.kafka.common.IsolationLevel.READ_COMMITTED;
 import static org.apache.kafka.common.IsolationLevel.READ_UNCOMMITTED;
+import static org.apache.kafka.common.utils.Utils.mkSet;
 import static org.apache.kafka.streams.StreamsConfig.AT_LEAST_ONCE;
+import static org.apache.kafka.streams.StreamsConfig.DEFAULT_DSL_STORE_CONFIG;
 import static org.apache.kafka.streams.StreamsConfig.EXACTLY_ONCE;
 import static org.apache.kafka.streams.StreamsConfig.EXACTLY_ONCE_BETA;
 import static org.apache.kafka.streams.StreamsConfig.EXACTLY_ONCE_V2;
+import static org.apache.kafka.streams.StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_KEY_LENGTH;
+import static org.apache.kafka.streams.StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_VALUE_LENGTH;
 import static org.apache.kafka.streams.StreamsConfig.STATE_DIR_CONFIG;
 import static org.apache.kafka.streams.StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG;
 import static org.apache.kafka.streams.StreamsConfig.adminClientPrefix;
@@ -70,7 +79,8 @@
 import static org.junit.Assert.fail;
 
 public class StreamsConfigTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private final Properties props = new Properties();
     private StreamsConfig streamsConfig;
 
@@ -1007,7 +1017,7 @@ public void shouldSpecifyNoOptimizationWhenNotExplicitlyAddedToConfigs() {
     }
 
     @Test
-    public void shouldSpecifyOptimizationWhenNotExplicitlyAddedToConfigs() {
+    public void shouldSpecifyOptimizationWhenExplicitlyAddedToConfigs() {
         final String expectedOptimizeConfig = "all";
         props.put(TOPOLOGY_OPTIMIZATION_CONFIG, "all");
         final StreamsConfig config = new StreamsConfig(props);
@@ -1021,6 +1031,28 @@ public void shouldThrowConfigExceptionWhenOptimizationConfigNotValueInRange() {
         assertThrows(ConfigException.class, () -> new StreamsConfig(props));
     }
 
+    @Test
+    public void shouldSpecifyRocksdbWhenNotExplicitlyAddedToConfigs() {
+        final String expectedDefaultStoreType = StreamsConfig.ROCKS_DB;
+        final String actualDefaultStoreType = streamsConfig.getString(DEFAULT_DSL_STORE_CONFIG);
+        assertEquals("default.dsl.store should be \"rocksDB\"", expectedDefaultStoreType, actualDefaultStoreType);
+    }
+
+    @Test
+    public void shouldSpecifyInMemoryWhenExplicitlyAddedToConfigs() {
+        final String expectedDefaultStoreType = StreamsConfig.IN_MEMORY;
+        props.put(DEFAULT_DSL_STORE_CONFIG, expectedDefaultStoreType);
+        final StreamsConfig config = new StreamsConfig(props);
+        final String actualDefaultStoreType = config.getString(DEFAULT_DSL_STORE_CONFIG);
+        assertEquals("default.dsl.store should be \"in_memory\"", expectedDefaultStoreType, actualDefaultStoreType);
+    }
+
+    @Test
+    public void shouldThrowConfigExceptionWhenStoreTypeConfigNotValueInRange() {
+        props.put(DEFAULT_DSL_STORE_CONFIG, "bad_config");
+        assertThrows(ConfigException.class, () -> new StreamsConfig(props));
+    }
+
     @SuppressWarnings("deprecation")
     @Test
     public void shouldLogWarningWhenEosAlphaIsUsed() {
@@ -1124,6 +1156,113 @@ public void shouldThrowConfigExceptionIfProbingRebalanceIntervalIsOutsideBounds(
         assertThrows(ConfigException.class, () -> new StreamsConfig(props));
     }
 
+    @Test
+    public void shouldDefaultToEmptyListIfRackAwareAssignmentTagsIsNotSet() {
+        final StreamsConfig config = new StreamsConfig(props);
+        assertTrue(config.getList(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG).isEmpty());
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenClientTagsExceedTheLimit() {
+        final int limit = StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE + 1;
+        for (int i = 0; i < limit; i++) {
+            props.put(StreamsConfig.clientTagPrefix("k" + i), "v" + i);
+        }
+        final ConfigException exception = assertThrows(ConfigException.class, () -> new StreamsConfig(props));
+        assertEquals(
+            String.format("At most %s client tags can be specified using %s prefix.",
+                          StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE,
+                          StreamsConfig.CLIENT_TAG_PREFIX
+            ), exception.getMessage()
+        );
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenRackAwareAssignmentTagsExceedsMaxListSize() {
+        final int limit = StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE + 1;
+        final List<String> rackAwareAssignmentTags = new ArrayList<>();
+        for (int i = 0; i < limit; i++) {
+            final String clientTagKey = "k" + i;
+            rackAwareAssignmentTags.add(clientTagKey);
+            props.put(StreamsConfig.clientTagPrefix(clientTagKey), "v" + i);
+        }
+
+        props.put(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG, String.join(",", rackAwareAssignmentTags));
+        final ConfigException exception = assertThrows(ConfigException.class, () -> new StreamsConfig(props));
+        assertEquals(
+            String.format("Invalid value %s for configuration %s: exceeds maximum list size of [%s].",
+                          rackAwareAssignmentTags,
+                          StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG,
+                          StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE),
+            exception.getMessage()
+        );
+    }
+
+    @Test
+    public void shouldSetRackAwareAssignmentTags() {
+        props.put(StreamsConfig.clientTagPrefix("cluster"), "cluster-1");
+        props.put(StreamsConfig.clientTagPrefix("zone"), "eu-central-1a");
+        props.put(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG, "cluster,zone");
+        final StreamsConfig config = new StreamsConfig(props);
+        assertEquals(new HashSet<>(config.getList(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG)),
+                     mkSet("cluster", "zone"));
+    }
+
+    @Test
+    public void shouldGetEmptyMapIfClientTagsAreNotSet() {
+        final StreamsConfig config = new StreamsConfig(props);
+        assertTrue(config.getClientTags().isEmpty());
+    }
+
+    @Test
+    public void shouldGetClientTagsMapWhenSet() {
+        props.put(StreamsConfig.clientTagPrefix("zone"), "eu-central-1a");
+        props.put(StreamsConfig.clientTagPrefix("cluster"), "cluster-1");
+        final StreamsConfig config = new StreamsConfig(props);
+        final Map<String, String> clientTags = config.getClientTags();
+        assertEquals(clientTags.size(), 2);
+        assertEquals(clientTags.get("zone"), "eu-central-1a");
+        assertEquals(clientTags.get("cluster"), "cluster-1");
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenClientTagRackAwarenessIsConfiguredWithUnknownTags() {
+        props.put(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG, "cluster");
+        assertThrows(ConfigException.class, () -> new StreamsConfig(props));
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenClientTagKeyExceedMaxLimit() {
+        final String key = String.join("", nCopies(MAX_RACK_AWARE_ASSIGNMENT_TAG_KEY_LENGTH + 1, "k"));
+        props.put(StreamsConfig.clientTagPrefix(key), "eu-central-1a");
+        final ConfigException exception = assertThrows(ConfigException.class, () -> new StreamsConfig(props));
+        assertEquals(
+            String.format("Invalid value %s for configuration %s: Tag key exceeds maximum length of %s.",
+                          key, StreamsConfig.CLIENT_TAG_PREFIX, StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_KEY_LENGTH),
+            exception.getMessage()
+        );
+    }
+
+    @Test
+    public void shouldThrowExceptionWhenClientTagValueExceedMaxLimit() {
+        final String value = String.join("", nCopies(MAX_RACK_AWARE_ASSIGNMENT_TAG_VALUE_LENGTH + 1, "v"));
+        props.put(StreamsConfig.clientTagPrefix("x"), value);
+        final ConfigException exception = assertThrows(ConfigException.class, () -> new StreamsConfig(props));
+        assertEquals(
+            String.format("Invalid value %s for configuration %s: Tag value exceeds maximum length of %s.",
+                          value, StreamsConfig.CLIENT_TAG_PREFIX, StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_VALUE_LENGTH),
+            exception.getMessage()
+        );
+    }
+
+    @Test
+    public void testInvalidSecurityProtocol() {
+        props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "abc");
+        final ConfigException ce = assertThrows(ConfigException.class,
+                () -> new StreamsConfig(props));
+        assertTrue(ce.getMessage().contains(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG));
+    }
+
     static class MisconfiguredSerde implements Serde<Object> {
         @Override
         public void configure(final Map<String, ?>  configs, final boolean isKey) {
diff --git a/streams/src/test/java/org/apache/kafka/streams/TopologyTest.java b/streams/src/test/java/org/apache/kafka/streams/TopologyTest.java
index b332f6c3530b0..8d09cb3ee7a78 100644
--- a/streams/src/test/java/org/apache/kafka/streams/TopologyTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/TopologyTest.java
@@ -25,6 +25,7 @@
 import org.apache.kafka.streams.kstream.KTable;
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.kstream.SessionWindows;
+import org.apache.kafka.streams.kstream.SlidingWindows;
 import org.apache.kafka.streams.kstream.StreamJoined;
 import org.apache.kafka.streams.kstream.TimeWindows;
 import org.apache.kafka.streams.processor.RecordContext;
@@ -35,18 +36,24 @@
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.SubtopologyDescription;
+import org.apache.kafka.streams.processor.internals.ProcessorTopology;
 import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
+import org.apache.kafka.streams.state.WindowStore;
 import org.apache.kafka.streams.state.internals.KeyValueStoreBuilder;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.MockKeyValueStore;
 import org.apache.kafka.test.MockProcessorSupplier;
 import org.apache.kafka.test.MockValueJoiner;
+import org.apache.kafka.test.StreamsTestUtils;
 import org.easymock.EasyMock;
 import org.junit.Assert;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import java.time.Duration;
 import java.util.Arrays;
@@ -58,6 +65,7 @@
 
 import static java.time.Duration.ofMillis;
 import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThrows;
@@ -65,6 +73,8 @@
 
 @SuppressWarnings("deprecation")
 public class TopologyTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private final StoreBuilder<MockKeyValueStore> storeBuilder = EasyMock.createNiceMock(StoreBuilder.class);
     private final KeyValueStoreBuilder<?, ?> globalStoreBuilder = EasyMock.createNiceMock(KeyValueStoreBuilder.class);
@@ -82,7 +92,7 @@ public void shouldNotAllowNullNameWhenAddingSourceWithPattern() {
     }
 
     @Test
-    public void shouldNotAllowNullTopicsWhenAddingSoureWithTopic() {
+    public void shouldNotAllowNullTopicsWhenAddingSourceWithTopic() {
         assertThrows(NullPointerException.class, () -> topology.addSource("source", (String[]) null));
     }
 
@@ -1153,7 +1163,9 @@ public void kGroupedStreamZeroArgCountShouldPreserveTopologyStructure() {
         builder.stream("input-topic")
             .groupByKey()
             .count();
-        final TopologyDescription describe = builder.build().describe();
+        final Topology topology = builder.build();
+
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1164,6 +1176,8 @@ public void kGroupedStreamZeroArgCountShouldPreserveTopologyStructure() {
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
     }
 
     @Test
@@ -1171,8 +1185,10 @@ public void kGroupedStreamNamedMaterializedCountShouldPreserveTopologyStructure(
         final StreamsBuilder builder = new StreamsBuilder();
         builder.stream("input-topic")
             .groupByKey()
-            .count(Materialized.as("count-store"));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, KeyValueStore<Bytes, byte[]>>as("count-store")
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1183,6 +1199,8 @@ public void kGroupedStreamNamedMaterializedCountShouldPreserveTopologyStructure(
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
     }
 
     @Test
@@ -1190,8 +1208,33 @@ public void kGroupedStreamAnonymousMaterializedCountShouldPreserveTopologyStruct
         final StreamsBuilder builder = new StreamsBuilder();
         builder.stream("input-topic")
             .groupByKey()
-            .count(Materialized.with(null, Serdes.Long()));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, KeyValueStore<Bytes, byte[]>>with(null, Serdes.Long())
+                // set store type explicitly with default rocksDB
+                .withStoreType(Materialized.StoreType.ROCKS_DB));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000003\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000003 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000002])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void kGroupedStreamAnonymousStoreTypedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .count(Materialized.as(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1202,6 +1245,32 @@ public void kGroupedStreamAnonymousMaterializedCountShouldPreserveTopologyStruct
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void kGroupedStreamZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .count();
+        final Topology topology = builder.build();
+
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000002 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
     }
 
     @Test
@@ -1211,7 +1280,8 @@ public void timeWindowZeroArgCountShouldPreserveTopologyStructure() {
             .groupByKey()
             .windowedBy(TimeWindows.of(ofMillis(1)))
             .count();
-        final TopologyDescription describe = builder.build().describe();
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1222,6 +1292,8 @@ public void timeWindowZeroArgCountShouldPreserveTopologyStructure() {
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
     }
 
     @Test
@@ -1230,8 +1302,9 @@ public void timeWindowNamedMaterializedCountShouldPreserveTopologyStructure() {
         builder.stream("input-topic")
             .groupByKey()
             .windowedBy(TimeWindows.of(ofMillis(1)))
-            .count(Materialized.as("count-store"));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, WindowStore<Bytes, byte[]>>as("count-store").withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1242,6 +1315,8 @@ public void timeWindowNamedMaterializedCountShouldPreserveTopologyStructure() {
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
     }
 
     @Test
@@ -1250,8 +1325,10 @@ public void timeWindowAnonymousMaterializedCountShouldPreserveTopologyStructure(
         builder.stream("input-topic")
             .groupByKey()
             .windowedBy(TimeWindows.of(ofMillis(1)))
-            .count(Materialized.with(null, Serdes.Long()));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, WindowStore<Bytes, byte[]>>with(null, Serdes.Long())
+                .withStoreType(Materialized.StoreType.ROCKS_DB));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1262,6 +1339,381 @@ public void timeWindowAnonymousMaterializedCountShouldPreserveTopologyStructure(
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void timeWindowAnonymousStoreTypeMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(TimeWindows.of(ofMillis(1)))
+            .count(Materialized.as(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000003\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000003 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000002])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void timeWindowZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(TimeWindows.of(ofMillis(1)))
+            .count();
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000002 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void slidingWindowZeroArgCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(TimeWindows.of(ofMillis(1)))
+            .count();
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000002 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void slidingWindowNamedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(TimeWindows.of(ofMillis(1)))
+            .count(Materialized.<Object, Long, WindowStore<Bytes, byte[]>>as("count-store").withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000001\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000001 (stores: [count-store])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void slidingWindowZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(1)))
+            .count();
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000002 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void timeWindowedCogroupedZeroArgCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "");
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000002 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000003 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000002\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void timeWindowedCogroupedNamedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "", Materialized.<Object, Object, WindowStore<Bytes, byte[]>>as("aggregate-store")
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000001\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000001 (stores: [aggregate-store])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000002\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000002 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000001\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void timeWindowedCogroupedZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "");
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000002 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000003 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000002\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void slidingWindowedCogroupedZeroArgCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "");
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000002 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000003 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000002\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void slidingWindowedCogroupedNamedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "", Materialized.<Object, Object, WindowStore<Bytes, byte[]>>as("aggregate-store")
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000001\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000001 (stores: [aggregate-store])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000002\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000002 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000001\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void slidingWindowedCogroupedZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "");
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000002 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000003 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000002\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void sessionWindowedCogroupedZeroArgCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(SessionWindows.ofInactivityGapWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "", (aggKey, aggOne, aggTwo) -> "");
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000002 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000003 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000002\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void sessionWindowedCogroupedNamedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(SessionWindows.ofInactivityGapWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "", (aggKey, aggOne, aggTwo) -> "", Materialized.<Object, Object, SessionStore<Bytes, byte[]>>as("aggregate-store")
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000001\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000001 (stores: [aggregate-store])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000002\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000002 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000001\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void sessionWindowedCogroupedZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .cogroup((key, value, aggregate) -> value)
+            .windowedBy(SessionWindows.ofInactivityGapWithNoGrace(ofMillis(1)))
+            .aggregate(() -> "", (aggKey, aggOne, aggTwo) -> "");
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> COGROUPKSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: COGROUPKSTREAM-AGGREGATE-0000000002 (stores: [COGROUPKSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> COGROUPKSTREAM-MERGE-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Processor: COGROUPKSTREAM-MERGE-0000000003 (stores: [])\n" +
+                "      --> none\n" +
+                "      <-- COGROUPKSTREAM-AGGREGATE-0000000002\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
     }
 
     @Test
@@ -1271,7 +1723,8 @@ public void sessionWindowZeroArgCountShouldPreserveTopologyStructure() {
             .groupByKey()
             .windowedBy(SessionWindows.with(ofMillis(1)))
             .count();
-        final TopologyDescription describe = builder.build().describe();
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1282,6 +1735,8 @@ public void sessionWindowZeroArgCountShouldPreserveTopologyStructure() {
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
     }
 
     @Test
@@ -1290,8 +1745,10 @@ public void sessionWindowNamedMaterializedCountShouldPreserveTopologyStructure()
         builder.stream("input-topic")
             .groupByKey()
             .windowedBy(SessionWindows.with(ofMillis(1)))
-            .count(Materialized.as("count-store"));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, SessionStore<Bytes, byte[]>>as("count-store")
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1302,6 +1759,8 @@ public void sessionWindowNamedMaterializedCountShouldPreserveTopologyStructure()
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
     }
 
     @Test
@@ -1310,8 +1769,33 @@ public void sessionWindowAnonymousMaterializedCountShouldPreserveTopologyStructu
         builder.stream("input-topic")
             .groupByKey()
             .windowedBy(SessionWindows.with(ofMillis(1)))
-            .count(Materialized.with(null, Serdes.Long()));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, SessionStore<Bytes, byte[]>>with(null, Serdes.Long())
+                .withStoreType(Materialized.StoreType.ROCKS_DB));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000003\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000003 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000002])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(true));
+    }
+
+    @Test
+    public void sessionWindowAnonymousStoreTypedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(SessionWindows.with(ofMillis(1)))
+            .count(Materialized.as(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1322,6 +1806,32 @@ public void sessionWindowAnonymousMaterializedCountShouldPreserveTopologyStructu
                 "      <-- KSTREAM-SOURCE-0000000000\n\n",
             describe.toString()
         );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
+    }
+
+    @Test
+    public void sessionWindowZeroArgCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.stream("input-topic")
+            .groupByKey()
+            .windowedBy(SessionWindows.with(ofMillis(1)))
+            .count();
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input-topic])\n" +
+                "      --> KSTREAM-AGGREGATE-0000000002\n" +
+                "    Processor: KSTREAM-AGGREGATE-0000000002 (stores: [KSTREAM-AGGREGATE-STATE-STORE-0000000001])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n\n",
+            describe.toString()
+        );
+
+        assertThat(topology.internalTopologyBuilder.setApplicationId("test").buildTopology().hasPersistentLocalStore(), is(false));
     }
 
     @Test
@@ -1330,8 +1840,8 @@ public void tableZeroArgCountShouldPreserveTopologyStructure() {
         builder.table("input-topic")
             .groupBy((key, value) -> null)
             .count();
-        final TopologyDescription describe = builder.build().describe();
-
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1355,6 +1865,14 @@ public void tableZeroArgCountShouldPreserveTopologyStructure() {
                 "\n",
             describe.toString()
         );
+
+        final ProcessorTopology processorTopology = topology.internalTopologyBuilder.setApplicationId("test").buildTopology();
+        // one for ktable, and one for count operation
+        assertThat(processorTopology.stateStores().size(), is(2));
+        // ktable store is rocksDB (default)
+        assertThat(processorTopology.stateStores().get(0).persistent(), is(true));
+        // count store is rocksDB (default)
+        assertThat(processorTopology.stateStores().get(1).persistent(), is(true));
     }
 
     @Test
@@ -1362,8 +1880,10 @@ public void tableNamedMaterializedCountShouldPreserveTopologyStructure() {
         final StreamsBuilder builder = new StreamsBuilder();
         builder.table("input-topic")
             .groupBy((key, value) -> null)
-            .count(Materialized.as("count-store"));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, KeyValueStore<Bytes, byte[]>>as("count-store")
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1387,6 +1907,57 @@ public void tableNamedMaterializedCountShouldPreserveTopologyStructure() {
                 "\n",
             describe.toString()
         );
+
+        final ProcessorTopology processorTopology = topology.internalTopologyBuilder.setApplicationId("test").buildTopology();
+        // one for ktable, and one for count operation
+        assertThat(processorTopology.stateStores().size(), is(2));
+        // ktable store is rocksDB (default)
+        assertThat(processorTopology.stateStores().get(0).persistent(), is(true));
+        // count store is in-memory
+        assertThat(processorTopology.stateStores().get(1).persistent(), is(false));
+    }
+
+    @Test
+    public void tableNamedMaterializedCountWithTopologyConfigShouldPreserveTopologyStructure() {
+        // override the default store into in-memory
+        final StreamsBuilder builder = new StreamsBuilder(overrideDefaultStore(StreamsConfig.IN_MEMORY));
+        builder.table("input-topic")
+            .groupBy((key, value) -> null)
+            // can still override the default store dynamically
+            .count(Materialized.as(Materialized.StoreType.ROCKS_DB));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topology: my-topology:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000001 (topics: [input-topic])\n" +
+                "      --> KTABLE-SOURCE-0000000002\n" +
+                "    Processor: KTABLE-SOURCE-0000000002 (stores: [input-topic-STATE-STORE-0000000000])\n" +
+                "      --> KTABLE-SELECT-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000001\n" +
+                "    Processor: KTABLE-SELECT-0000000003 (stores: [])\n" +
+                "      --> KSTREAM-SINK-0000000005\n" +
+                "      <-- KTABLE-SOURCE-0000000002\n" +
+                "    Sink: KSTREAM-SINK-0000000005 (topic: KTABLE-AGGREGATE-STATE-STORE-0000000004-repartition)\n" +
+                "      <-- KTABLE-SELECT-0000000003\n" +
+                "\n" +
+                "  Sub-topology: 1\n" +
+                "    Source: KSTREAM-SOURCE-0000000006 (topics: [KTABLE-AGGREGATE-STATE-STORE-0000000004-repartition])\n" +
+                "      --> KTABLE-AGGREGATE-0000000007\n" +
+                "    Processor: KTABLE-AGGREGATE-0000000007 (stores: [KTABLE-AGGREGATE-STATE-STORE-0000000004])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000006\n" +
+                "\n",
+            describe.toString()
+        );
+
+        final ProcessorTopology processorTopology = topology.internalTopologyBuilder.setApplicationId("test").buildTopology();
+        // one for ktable, and one for count operation
+        assertThat(processorTopology.stateStores().size(), is(2));
+        // ktable store is in-memory (default is in-memory)
+        assertThat(processorTopology.stateStores().get(0).persistent(), is(false));
+        // count store is rocksDB
+        assertThat(processorTopology.stateStores().get(1).persistent(), is(true));
     }
 
     @Test
@@ -1394,8 +1965,10 @@ public void tableAnonymousMaterializedCountShouldPreserveTopologyStructure() {
         final StreamsBuilder builder = new StreamsBuilder();
         builder.table("input-topic")
             .groupBy((key, value) -> null)
-            .count(Materialized.with(null, Serdes.Long()));
-        final TopologyDescription describe = builder.build().describe();
+            .count(Materialized.<Object, Long, KeyValueStore<Bytes, byte[]>>with(null, Serdes.Long())
+                .withStoreType(Materialized.StoreType.ROCKS_DB));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
         assertEquals(
             "Topologies:\n" +
                 "   Sub-topology: 0\n" +
@@ -1419,6 +1992,55 @@ public void tableAnonymousMaterializedCountShouldPreserveTopologyStructure() {
                 "\n",
             describe.toString()
         );
+
+        final ProcessorTopology processorTopology = topology.internalTopologyBuilder.setApplicationId("test").buildTopology();
+        // one for ktable, and one for count operation
+        assertThat(processorTopology.stateStores().size(), is(2));
+        // ktable store is rocksDB (default)
+        assertThat(processorTopology.stateStores().get(0).persistent(), is(true));
+        // count store is rocksDB
+        assertThat(processorTopology.stateStores().get(1).persistent(), is(true));
+    }
+
+    @Test
+    public void tableAnonymousStoreTypedMaterializedCountShouldPreserveTopologyStructure() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.table("input-topic")
+            .groupBy((key, value) -> null)
+            .count(Materialized.as(Materialized.StoreType.IN_MEMORY));
+        final Topology topology = builder.build();
+        final TopologyDescription describe = topology.describe();
+        assertEquals(
+            "Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000001 (topics: [input-topic])\n" +
+                "      --> KTABLE-SOURCE-0000000002\n" +
+                "    Processor: KTABLE-SOURCE-0000000002 (stores: [input-topic-STATE-STORE-0000000000])\n" +
+                "      --> KTABLE-SELECT-0000000003\n" +
+                "      <-- KSTREAM-SOURCE-0000000001\n" +
+                "    Processor: KTABLE-SELECT-0000000003 (stores: [])\n" +
+                "      --> KSTREAM-SINK-0000000005\n" +
+                "      <-- KTABLE-SOURCE-0000000002\n" +
+                "    Sink: KSTREAM-SINK-0000000005 (topic: KTABLE-AGGREGATE-STATE-STORE-0000000004-repartition)\n" +
+                "      <-- KTABLE-SELECT-0000000003\n" +
+                "\n" +
+                "  Sub-topology: 1\n" +
+                "    Source: KSTREAM-SOURCE-0000000006 (topics: [KTABLE-AGGREGATE-STATE-STORE-0000000004-repartition])\n" +
+                "      --> KTABLE-AGGREGATE-0000000007\n" +
+                "    Processor: KTABLE-AGGREGATE-0000000007 (stores: [KTABLE-AGGREGATE-STATE-STORE-0000000004])\n" +
+                "      --> none\n" +
+                "      <-- KSTREAM-SOURCE-0000000006\n" +
+                "\n",
+            describe.toString()
+        );
+
+        final ProcessorTopology processorTopology = topology.internalTopologyBuilder.setApplicationId("test").buildTopology();
+        // one for ktable, and one for count operation
+        assertThat(processorTopology.stateStores().size(), is(2));
+        // ktable store is rocksDB (default)
+        assertThat(processorTopology.stateStores().get(0).persistent(), is(true));
+        // count store is in-memory
+        assertThat(processorTopology.stateStores().get(1).persistent(), is(false));
     }
 
     @Test
@@ -1447,7 +2069,8 @@ public void kTableAnonymousMaterializedMapValuesShouldPreserveTopologyStructure(
         final KTable<Object, Object> table = builder.table("input-topic");
         table.mapValues(
             (readOnlyKey, value) -> null,
-            Materialized.with(null, null));
+            Materialized.<Object, Object, KeyValueStore<Bytes, byte[]>>with(null, null)
+                .withStoreType(Materialized.StoreType.IN_MEMORY));
         final TopologyDescription describe = builder.build().describe();
         Assert.assertEquals(
             "Topologies:\n" +
@@ -1682,4 +2305,16 @@ private void addGlobalStoreToTopologyAndExpectedDescription(final String globalS
 
         expectedDescription.addGlobalStore(expectedGlobalStore);
     }
+
+    private TopologyConfig overrideDefaultStore(final String defaultStore) {
+        final Properties topologyOverrides = new Properties();
+        // change default store as in-memory
+        topologyOverrides.put(StreamsConfig.DEFAULT_DSL_STORE_CONFIG, defaultStore);
+        final StreamsConfig config = new StreamsConfig(StreamsTestUtils.getStreamsConfig());
+
+        return new TopologyConfig(
+            "my-topology",
+            config,
+            topologyOverrides);
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/AbstractResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/AbstractResetIntegrationTest.java
index fd5da124bb976..2cead2f692960 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/AbstractResetIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/AbstractResetIntegrationTest.java
@@ -49,6 +49,7 @@
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TemporaryFolder;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.BufferedWriter;
 import java.io.File;
@@ -69,6 +70,9 @@
 
 @Category({IntegrationTest.class})
 public abstract class AbstractResetIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
     static EmbeddedKafkaCluster cluster;
 
     private static MockTime mockTime;
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/AdjustStreamThreadCountTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/AdjustStreamThreadCountTest.java
index 26edd69a1c18c..59683e446003c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/AdjustStreamThreadCountTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/AdjustStreamThreadCountTest.java
@@ -32,18 +32,18 @@
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.PunctuationType;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.util.concurrent.atomic.AtomicBoolean;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+
 import java.io.IOException;
 import java.time.Duration;
 import java.util.ArrayList;
@@ -67,31 +67,27 @@
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.CoreMatchers.not;
 import static org.hamcrest.MatcherAssert.assertThat;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertThrows;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-@Category(IntegrationTest.class)
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+@Timeout(600)
+@Tag("integration")
 public class AdjustStreamThreadCountTest {
-
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
 
-
-    @Rule
-    public TestName testName = new TestName();
-
     private final List<KafkaStreams.State> stateTransitionHistory = new ArrayList<>();
     private static String inputTopic;
     private static StreamsBuilder builder;
@@ -99,9 +95,9 @@ public static void closeCluster() {
     private static String appId = "";
     public static final Duration DEFAULT_DURATION = Duration.ofSeconds(30);
 
-    @Before
-    public void setup() {
-        final String testId = safeUniqueTestName(getClass(), testName);
+    @BeforeEach
+    public void setup(final TestInfo testInfo) {
+        final String testId = safeUniqueTestName(getClass(), testInfo);
         appId = "appId_" + testId;
         inputTopic = "input" + testId;
         IntegrationTestUtils.cleanStateBeforeTest(CLUSTER, inputTopic);
@@ -127,7 +123,7 @@ private void startStreamsAndWaitForRunning(final KafkaStreams kafkaStreams) thro
         waitForRunning();
     }
 
-    @After
+    @AfterEach
     public void teardown() throws IOException {
         purgeLocalStreamsState(properties);
     }
@@ -396,6 +392,7 @@ public void shouldResizeCacheAfterThreadRemovalTimesOut() throws InterruptedExce
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldResizeCacheAfterThreadReplacement() throws InterruptedException {
         final long totalCacheBytes = 10L;
         final Properties props = new Properties();
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ConsistencyVectorIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ConsistencyVectorIntegrationTest.java
index 0b4178b7adf3d..78629cc652ae3 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/ConsistencyVectorIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/ConsistencyVectorIntegrationTest.java
@@ -49,6 +49,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -71,7 +72,8 @@
 
 @Category({IntegrationTest.class})
 public class ConsistencyVectorIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
     private static int port = 0;
     private static final String INPUT_TOPIC_NAME = "input-topic";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/EOSUncleanShutdownIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/EOSUncleanShutdownIntegrationTest.java
index d3e991d51c006..718f162a18c09 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/EOSUncleanShutdownIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/EOSUncleanShutdownIntegrationTest.java
@@ -36,9 +36,11 @@
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TemporaryFolder;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -66,6 +68,8 @@
 @RunWith(Parameterized.class)
 @Category(IntegrationTest.class)
 public class EOSUncleanShutdownIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     @SuppressWarnings("deprecation")
     @Parameterized.Parameters(name = "{0}")
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/EmitOnChangeIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/EmitOnChangeIntegrationTest.java
new file mode 100644
index 0000000000000..c7aae1e007511
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/EmitOnChangeIntegrationTest.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.integration;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.IntegerDeserializer;
+import org.apache.kafka.common.serialization.IntegerSerializer;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.test.StreamsTestUtils;
+import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Properties;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.common.utils.Utils.mkObjectProperties;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
+
+@Timeout(600)
+@Tag("integration")
+public class EmitOnChangeIntegrationTest {
+    private static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
+
+    @BeforeAll
+    public static void startCluster() throws IOException {
+        CLUSTER.start();
+    }
+
+    @AfterAll
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+
+    private static String inputTopic;
+    private static String inputTopic2;
+    private static String outputTopic;
+    private static String outputTopic2;
+    private static String appId = "";
+
+    @BeforeEach
+    public void setup(final TestInfo testInfo) {
+        final String testId = safeUniqueTestName(getClass(), testInfo);
+        appId = "appId_" + testId;
+        inputTopic = "input" + testId;
+        inputTopic2 = "input2" + testId;
+        outputTopic = "output" + testId;
+        outputTopic2 = "output2" + testId;
+        IntegrationTestUtils.cleanStateBeforeTest(CLUSTER, inputTopic, outputTopic, inputTopic2, outputTopic2);
+    }
+
+    @Test
+    public void shouldEmitSameRecordAfterFailover() throws Exception {
+        final Properties properties  = mkObjectProperties(
+            mkMap(
+                mkEntry(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers()),
+                mkEntry(StreamsConfig.APPLICATION_ID_CONFIG, appId),
+                mkEntry(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath()),
+                mkEntry(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 1),
+                mkEntry(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0),
+                mkEntry(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 300000L),
+                mkEntry(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.IntegerSerde.class),
+                mkEntry(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.StringSerde.class),
+                mkEntry(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 10000)
+            )
+        );
+
+        final AtomicBoolean shouldThrow = new AtomicBoolean(true);
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.table(inputTopic, Materialized.as("test-store"))
+            .toStream()
+            .map((key, value) -> {
+                if (shouldThrow.compareAndSet(true, false)) {
+                    throw new IllegalStateException("Kaboom");
+                } else {
+                    return new KeyValue<>(key, value);
+                }
+            })
+            .to(outputTopic);
+        builder.stream(inputTopic2).to(outputTopic2);
+
+        try (final KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties)) {
+            kafkaStreams.setUncaughtExceptionHandler(exception -> StreamThreadExceptionResponse.REPLACE_THREAD);
+            StreamsTestUtils.startKafkaStreamsAndWaitForRunningState(kafkaStreams);
+
+            IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(
+                inputTopic,
+                Arrays.asList(
+                    new KeyValue<>(1, "A"),
+                    new KeyValue<>(1, "B")
+                ),
+                TestUtils.producerConfig(
+                    CLUSTER.bootstrapServers(),
+                    IntegerSerializer.class,
+                    StringSerializer.class,
+                    new Properties()),
+                0L);
+
+            IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(
+                inputTopic2,
+                Arrays.asList(
+                    new KeyValue<>(1, "A"),
+                    new KeyValue<>(1, "B")
+                ),
+                TestUtils.producerConfig(
+                    CLUSTER.bootstrapServers(),
+                    IntegerSerializer.class,
+                    StringSerializer.class,
+                    new Properties()),
+                0L);
+
+            IntegrationTestUtils.waitUntilFinalKeyValueRecordsReceived(
+                TestUtils.consumerConfig(
+                    CLUSTER.bootstrapServers(),
+                    IntegerDeserializer.class,
+                    StringDeserializer.class
+                ),
+                outputTopic,
+                Arrays.asList(
+                    new KeyValue<>(1, "A"),
+                    new KeyValue<>(1, "B")
+                )
+            );
+            IntegrationTestUtils.waitUntilFinalKeyValueRecordsReceived(
+                TestUtils.consumerConfig(
+                    CLUSTER.bootstrapServers(),
+                    IntegerDeserializer.class,
+                    StringDeserializer.class
+                ),
+                outputTopic2,
+                Arrays.asList(
+                    new KeyValue<>(1, "A"),
+                    new KeyValue<>(1, "B")
+                )
+            );
+        }
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/EosIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/EosIntegrationTest.java
index f3dcc646418ed..12cb0bf9563e1 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/EosIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/EosIntegrationTest.java
@@ -40,28 +40,26 @@
 import org.apache.kafka.streams.kstream.Transformer;
 import org.apache.kafka.streams.kstream.TransformerSupplier;
 import org.apache.kafka.streams.processor.ProcessorContext;
-import org.apache.kafka.streams.processor.StateStoreContext;
-import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.StreamThread;
+import org.apache.kafka.streams.query.QueryResult;
+import org.apache.kafka.streams.query.RangeQuery;
+import org.apache.kafka.streams.query.StateQueryRequest;
+import org.apache.kafka.streams.query.StateQueryResult;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.KeyValueStore;
-import org.apache.kafka.streams.state.QueryableStoreTypes;
-import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.internals.OffsetCheckpoint;
-import org.apache.kafka.streams.state.internals.RocksDBStore;
-import org.apache.kafka.streams.state.internals.RocksDbKeyValueBytesStoreSupplier;
 import org.apache.kafka.test.IntegrationTest;
-import org.apache.kafka.test.MockInternalProcessorContext;
-import org.apache.kafka.test.MockKeyValueStore;
 import org.apache.kafka.test.StreamsTestUtils;
 import org.apache.kafka.test.TestUtils;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameter;
@@ -71,7 +69,6 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.math.BigInteger;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -91,6 +88,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForEmptyConsumerGroup;
+import static org.apache.kafka.streams.query.StateQueryRequest.inStore;
 import static org.apache.kafka.test.StreamsTestUtils.startKafkaStreamsAndWaitForRunningState;
 import static org.apache.kafka.test.TestUtils.consumerConfig;
 import static org.apache.kafka.test.TestUtils.waitForCondition;
@@ -98,12 +96,13 @@
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 @RunWith(Parameterized.class)
 @Category({IntegrationTest.class})
 public class EosIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final Logger LOG = LoggerFactory.getLogger(EosIntegrationTest.class);
     private static final int NUM_BROKERS = 3;
     private static final int MAX_POLL_INTERVAL_MS = 5 * 1000;
@@ -284,20 +283,7 @@ private void runSimpleCopyTest(final int numberOfRestarts,
             try (final KafkaStreams streams = new KafkaStreams(builder.build(), config)) {
                 startKafkaStreamsAndWaitForRunningState(streams, MAX_WAIT_TIME_MS);
 
-                final List<KeyValue<Long, Long>> committedRecords = IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(
-                    TestUtils.consumerConfig(
-                        CLUSTER.bootstrapServers(),
-                        CONSUMER_GROUP_ID,
-                        LongDeserializer.class,
-                        LongDeserializer.class,
-                        Utils.mkProperties(Collections.singletonMap(
-                            ConsumerConfig.ISOLATION_LEVEL_CONFIG,
-                            IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)))
-                        ),
-                    outputTopic,
-                    inputData.size()
-                );
-
+                final List<KeyValue<Long, Long>> committedRecords = readResult(outputTopic, inputData.size(), CONSUMER_GROUP_ID);
                 checkResultPerKey(committedRecords, inputData, "The committed records do not match what expected");
             }
         }
@@ -365,21 +351,7 @@ public void shouldBeAbleToPerformMultipleTransactions() throws Exception {
                 CLUSTER.time
             );
 
-            final List<KeyValue<Long, Long>> firstCommittedRecords =
-                IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(
-                    TestUtils.consumerConfig(
-                        CLUSTER.bootstrapServers(),
-                        CONSUMER_GROUP_ID,
-                        LongDeserializer.class,
-                        LongDeserializer.class,
-                        Utils.mkProperties(Collections.singletonMap(
-                            ConsumerConfig.ISOLATION_LEVEL_CONFIG,
-                            IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)))
-                        ),
-                    SINGLE_PARTITION_OUTPUT_TOPIC,
-                    firstBurstOfData.size()
-                );
-
+            final List<KeyValue<Long, Long>> firstCommittedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, firstBurstOfData.size(), CONSUMER_GROUP_ID);
             assertThat(firstCommittedRecords, equalTo(firstBurstOfData));
 
             IntegrationTestUtils.produceKeyValuesSynchronously(
@@ -389,21 +361,7 @@ public void shouldBeAbleToPerformMultipleTransactions() throws Exception {
                 CLUSTER.time
             );
 
-            final List<KeyValue<Long, Long>> secondCommittedRecords =
-                IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(
-                    TestUtils.consumerConfig(
-                        CLUSTER.bootstrapServers(),
-                        CONSUMER_GROUP_ID,
-                        LongDeserializer.class,
-                        LongDeserializer.class,
-                        Utils.mkProperties(Collections.singletonMap(
-                            ConsumerConfig.ISOLATION_LEVEL_CONFIG,
-                            IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)))
-                        ),
-                    SINGLE_PARTITION_OUTPUT_TOPIC,
-                    secondBurstOfData.size()
-                );
-
+            final List<KeyValue<Long, Long>> secondCommittedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, secondBurstOfData.size(), CONSUMER_GROUP_ID);
             assertThat(secondCommittedRecords, equalTo(secondBurstOfData));
         }
     }
@@ -445,7 +403,7 @@ public void shouldNotViolateEosIfOneTaskFails() throws Exception {
             // p-0: ---> 10 rec + C
             // p-1: ---> 10 rec + C
 
-            final List<KeyValue<Long, Long>> committedRecords = readResult(committedDataBeforeFailure.size(), CONSUMER_GROUP_ID);
+            final List<KeyValue<Long, Long>> committedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, committedDataBeforeFailure.size(), CONSUMER_GROUP_ID);
             checkResultPerKey(
                 committedRecords,
                 committedDataBeforeFailure,
@@ -458,7 +416,7 @@ public void shouldNotViolateEosIfOneTaskFails() throws Exception {
             // p-0: ---> 10 rec + C  + 5 rec (pending)
             // p-1: ---> 10 rec + C  + 5 rec (pending)
 
-            final List<KeyValue<Long, Long>> uncommittedRecords = readResult(dataBeforeFailure.size(), null);
+            final List<KeyValue<Long, Long>> uncommittedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, dataBeforeFailure.size(), null);
             checkResultPerKey(
                 uncommittedRecords,
                 dataBeforeFailure,
@@ -477,10 +435,12 @@ public void shouldNotViolateEosIfOneTaskFails() throws Exception {
             // p-1: ---> 10 rec + C  + 5 rec + C    + 5 rec + C
 
             final List<KeyValue<Long, Long>> allCommittedRecords = readResult(
+                SINGLE_PARTITION_OUTPUT_TOPIC,
                 committedDataBeforeFailure.size() + uncommittedDataBeforeFailure.size() + dataAfterFailure.size(),
                 CONSUMER_GROUP_ID + "_ALL");
 
             final List<KeyValue<Long, Long>> committedRecordsAfterFailure = readResult(
+                SINGLE_PARTITION_OUTPUT_TOPIC,
                 uncommittedDataBeforeFailure.size() + dataAfterFailure.size(),
                 CONSUMER_GROUP_ID);
 
@@ -551,7 +511,7 @@ public void shouldNotViolateEosIfOneTaskFailsWithState() throws Exception {
             // p-0: ---> 10 rec + C
             // p-1: ---> 10 rec + C
 
-            final List<KeyValue<Long, Long>> committedRecords = readResult(committedDataBeforeFailure.size(), CONSUMER_GROUP_ID);
+            final List<KeyValue<Long, Long>> committedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, committedDataBeforeFailure.size(), CONSUMER_GROUP_ID);
             checkResultPerKey(
                 committedRecords,
                 computeExpectedResult(committedDataBeforeFailure),
@@ -564,7 +524,7 @@ public void shouldNotViolateEosIfOneTaskFailsWithState() throws Exception {
             // p-0: ---> 10 rec + C  + 5 rec (pending)
             // p-1: ---> 10 rec + C  + 5 rec (pending)
 
-            final List<KeyValue<Long, Long>> uncommittedRecords = readResult(dataBeforeFailure.size(), null);
+            final List<KeyValue<Long, Long>> uncommittedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, dataBeforeFailure.size(), null);
             final List<KeyValue<Long, Long>> expectedResultBeforeFailure = computeExpectedResult(dataBeforeFailure);
 
 
@@ -590,10 +550,12 @@ public void shouldNotViolateEosIfOneTaskFailsWithState() throws Exception {
             // p-1: ---> 10 rec + C  + 5 rec + C    + 5 rec + C
 
             final List<KeyValue<Long, Long>> allCommittedRecords = readResult(
+                SINGLE_PARTITION_OUTPUT_TOPIC,
                 committedDataBeforeFailure.size() + uncommittedDataBeforeFailure.size() + dataAfterFailure.size(),
                 CONSUMER_GROUP_ID + "_ALL");
 
             final List<KeyValue<Long, Long>> committedRecordsAfterFailure = readResult(
+                SINGLE_PARTITION_OUTPUT_TOPIC,
                 uncommittedDataBeforeFailure.size() + dataAfterFailure.size(),
                 CONSUMER_GROUP_ID);
 
@@ -671,7 +633,7 @@ public void shouldNotViolateEosIfOneTaskGetsFencedUsingIsolatedAppInstances() th
             // p-0: ---> 10 rec + C
             // p-1: ---> 10 rec + C
 
-            final List<KeyValue<Long, Long>> committedRecords = readResult(committedDataBeforeStall.size(), CONSUMER_GROUP_ID);
+            final List<KeyValue<Long, Long>> committedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, committedDataBeforeStall.size(), CONSUMER_GROUP_ID);
             checkResultPerKey(
                 committedRecords,
                 committedDataBeforeStall,
@@ -684,7 +646,7 @@ public void shouldNotViolateEosIfOneTaskGetsFencedUsingIsolatedAppInstances() th
             // p-0: ---> 10 rec + C  + 5 rec (pending)
             // p-1: ---> 10 rec + C  + 5 rec (pending)
 
-            final List<KeyValue<Long, Long>> uncommittedRecords = readResult(dataBeforeStall.size(), null);
+            final List<KeyValue<Long, Long>> uncommittedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, dataBeforeStall.size(), null);
             checkResultPerKey(
                 uncommittedRecords,
                 dataBeforeStall,
@@ -730,6 +692,7 @@ public void shouldNotViolateEosIfOneTaskGetsFencedUsingIsolatedAppInstances() th
             // p-1: ---> 10 rec + C  + 5 rec + C    + 5 rec + C
 
             final List<KeyValue<Long, Long>> committedRecordsAfterRebalance = readResult(
+                SINGLE_PARTITION_OUTPUT_TOPIC,
                 uncommittedDataBeforeStall.size() + dataToTriggerFirstRebalance.size(),
                 CONSUMER_GROUP_ID);
 
@@ -766,6 +729,7 @@ public void shouldNotViolateEosIfOneTaskGetsFencedUsingIsolatedAppInstances() th
             // p-1: ---> 10 rec + C  + 5 rec + C    + 5 rec + C   + 10 rec + C
 
             final List<KeyValue<Long, Long>> allCommittedRecords = readResult(
+                SINGLE_PARTITION_OUTPUT_TOPIC,
                 committedDataBeforeStall.size() + uncommittedDataBeforeStall.size()
                 + dataToTriggerFirstRebalance.size() + dataAfterSecondRebalance.size(),
                 CONSUMER_GROUP_ID + "_ALL");
@@ -791,49 +755,36 @@ public void shouldWriteLatestOffsetsToCheckpointOnShutdown() throws Exception {
         final List<KeyValue<Long, Long>> expectedResult = computeExpectedResult(writtenData);
 
         try (final KafkaStreams streams = getKafkaStreams("streams", true, "appDir", 1, eosConfig, MAX_POLL_INTERVAL_MS)) {
+            writeInputData(writtenData);
 
             startKafkaStreamsAndWaitForRunningState(streams, MAX_WAIT_TIME_MS);
 
-            writeInputData(writtenData);
-
             waitForCondition(
                     () -> commitRequested.get() == 2, MAX_WAIT_TIME_MS,
                     "SteamsTasks did not request commit.");
 
-            final List<KeyValue<Long, Long>> committedRecords = readResult(writtenData.size(), CONSUMER_GROUP_ID);
+            final List<KeyValue<Long, Long>> committedRecords = readResult(SINGLE_PARTITION_OUTPUT_TOPIC, writtenData.size(), CONSUMER_GROUP_ID);
 
-            checkResultPerKey(
-                    committedRecords,
-                    expectedResult,
-                    "The committed records do not match what expected");
+            if (!eosConfig.equals(StreamsConfig.AT_LEAST_ONCE)) {
+                checkResultPerKey(
+                        committedRecords,
+                        expectedResult,
+                        "The committed records do not match what expected");
 
-            verifyStateStore(
-                    streams,
-                    getMaxPerKey(expectedResult),
-                    "The state store content do not match what expected");
+                verifyStateStore(
+                        streams,
+                        getMaxPerKey(expectedResult),
+                        "The state store content do not match what expected");
+            }
         }
 
-        final Set<KeyValue<Long, Long>> expectedState = getMaxPerKey(expectedResult);
-        verifyStateIsInStoreAndOffsetsAreInCheckpoint(0, expectedState);
-        verifyStateIsInStoreAndOffsetsAreInCheckpoint(1, expectedState);
-
-        assertThat("Not all expected state values were found in the state stores", expectedState.isEmpty());
+        verifyOffsetsAreInCheckpoint(0);
+        verifyOffsetsAreInCheckpoint(1);
     }
 
-    private void verifyStateIsInStoreAndOffsetsAreInCheckpoint(final int partition, final Set<KeyValue<Long, Long>> expectedState) throws IOException {
+    private void verifyOffsetsAreInCheckpoint(final int partition) throws IOException {
         final String stateStoreDir = stateTmpDir + File.separator + "appDir" + File.separator + applicationId + File.separator + "0_" + partition + File.separator;
 
-        // Verify that the data in the state store on disk is fully up-to-date
-        final StateStoreContext context = new MockInternalProcessorContext(new Properties(), new TaskId(0, 0), new File(stateStoreDir));
-        final MockKeyValueStore stateStore = new MockKeyValueStore("store", false);
-        final RocksDBStore store = (RocksDBStore) new RocksDbKeyValueBytesStoreSupplier(storeName, false).get();
-        store.init(context, stateStore);
-
-        store.all().forEachRemaining(kv -> {
-            final KeyValue<Long, Long> kv2 = new KeyValue<>(new BigInteger(kv.key.get()).longValue(), new BigInteger(kv.value).longValue());
-            expectedState.remove(kv2);
-        });
-
         // Verify that the checkpointed offsets match exactly with max offset of the records in the changelog
         final OffsetCheckpoint checkpoint = new OffsetCheckpoint(new File(stateStoreDir + ".checkpoint"));
         final Map<TopicPartition, Long> checkpointedOffsets = checkpoint.read();
@@ -866,8 +817,8 @@ private void verifyChangelogMaxRecordOffsetMatchesCheckpointedOffset(final Topic
     private List<KeyValue<Long, Long>> prepareData(final long fromInclusive,
                                                    final long toExclusive,
                                                    final Long... keys) {
-        final Long dataSize = keys.length * (toExclusive - fromInclusive);
-        final List<KeyValue<Long, Long>> data = new ArrayList<>(dataSize.intValue());
+        final long dataSize = keys.length * (toExclusive - fromInclusive);
+        final List<KeyValue<Long, Long>> data = new ArrayList<>((int) dataSize);
 
         for (final Long k : keys) {
             for (long v = fromInclusive; v < toExclusive; ++v) {
@@ -1022,7 +973,8 @@ private void writeInputData(final List<KeyValue<Long, Long>> records) {
         );
     }
 
-    private List<KeyValue<Long, Long>> readResult(final int numberOfRecords,
+    private List<KeyValue<Long, Long>> readResult(final String topic,
+                                                  final int numberOfRecords,
                                                   final String groupId) throws Exception {
         if (groupId != null) {
             return IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(
@@ -1034,7 +986,7 @@ private List<KeyValue<Long, Long>> readResult(final int numberOfRecords,
                     Utils.mkProperties(Collections.singletonMap(
                         ConsumerConfig.ISOLATION_LEVEL_CONFIG,
                         IsolationLevel.READ_COMMITTED.name().toLowerCase(Locale.ROOT)))),
-                SINGLE_PARTITION_OUTPUT_TOPIC,
+                topic,
                 numberOfRecords
             );
         }
@@ -1042,7 +994,7 @@ private List<KeyValue<Long, Long>> readResult(final int numberOfRecords,
         // read uncommitted
         return IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(
             TestUtils.consumerConfig(CLUSTER.bootstrapServers(), LongDeserializer.class, LongDeserializer.class),
-            SINGLE_PARTITION_OUTPUT_TOPIC,
+            topic,
             numberOfRecords
         );
     }
@@ -1088,17 +1040,21 @@ private Set<KeyValue<Long, Long>> getMaxPerKey(final List<KeyValue<Long, Long>>
 
     private void verifyStateStore(final KafkaStreams streams,
                                   final Set<KeyValue<Long, Long>> expectedStoreContent,
-                                  final String reason) throws Exception {
-        final ReadOnlyKeyValueStore<Long, Long> store = IntegrationTestUtils
-            .getStore(300_000L, storeName, streams, QueryableStoreTypes.keyValueStore());
-        assertNotNull(store);
-
-        try (final KeyValueIterator<Long, Long> it = store.all()) {
-            while (it.hasNext()) {
-                assertTrue(reason, expectedStoreContent.remove(it.next()));
+                                  final String reason) {
+        final StateQueryRequest<KeyValueIterator<Long, Long>> request =
+                inStore(storeName).withQuery(RangeQuery.withNoBounds());
+
+        final StateQueryResult<KeyValueIterator<Long, Long>> result =
+                IntegrationTestUtils.iqv2WaitForResult(streams, request);
+
+        for (final QueryResult<KeyValueIterator<Long, Long>> partitionResult: result.getPartitionResults().values()) {
+            try (final KeyValueIterator<Long, Long> it = partitionResult.getResult()) {
+                while (it.hasNext()) {
+                    assertTrue(reason, expectedStoreContent.remove(it.next()));
+                }
             }
-
-            assertTrue(reason, expectedStoreContent.isEmpty());
         }
+
+        assertTrue(reason, expectedStoreContent.isEmpty());
     }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/EosV2UpgradeIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/EosV2UpgradeIntegrationTest.java
index b6aab860eac85..7f652f0c7f574 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/EosV2UpgradeIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/EosV2UpgradeIntegrationTest.java
@@ -56,8 +56,10 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -90,6 +92,8 @@
 @RunWith(Parameterized.class)
 @Category({IntegrationTest.class})
 public class EosV2UpgradeIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     @Parameterized.Parameters(name = "{0}")
     public static Collection<Boolean[]> data() {
@@ -862,6 +866,7 @@ public void shouldUpgradeFromEosAlphaToEosV2() throws Exception {
         }
     }
 
+    @SuppressWarnings("deprecation")
     private KafkaStreams getKafkaStreams(final String appDir,
                                          final String processingGuarantee) {
         final StreamsBuilder builder = new StreamsBuilder();
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/FineGrainedAutoResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/FineGrainedAutoResetIntegrationTest.java
index baaf06c573a11..e5ceb2fad4c5f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/FineGrainedAutoResetIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/FineGrainedAutoResetIntegrationTest.java
@@ -38,14 +38,14 @@
 import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
 import org.apache.kafka.streams.kstream.Produced;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.StreamsTestUtils;
 import org.apache.kafka.test.TestUtils;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -60,11 +60,11 @@
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
-import static org.junit.Assert.fail;
+import static org.junit.jupiter.api.Assertions.fail;
 
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class FineGrainedAutoResetIntegrationTest {
-
     private static final int NUM_BROKERS = 1;
     private static final String DEFAULT_OUTPUT_TOPIC = "outputTopic";
     private static final String OUTPUT_TOPIC_0 = "outputTopic_0";
@@ -73,7 +73,7 @@ public class FineGrainedAutoResetIntegrationTest {
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException, InterruptedException {
         CLUSTER.start();
         CLUSTER.createTopics(
@@ -102,7 +102,7 @@ public static void startCluster() throws IOException, InterruptedException {
                 OUTPUT_TOPIC_2);
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -140,7 +140,7 @@ public static void closeCluster() {
     private final String topicYTestMessage = "topic-Y test";
     private final String topicZTestMessage = "topic-Z test";
 
-    @Before
+    @BeforeEach
     public void setUp() throws IOException {
 
         final Properties props = new Properties();
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableEOSIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableEOSIntegrationTest.java
index 097a79ff48a25..3ac94ad96834f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableEOSIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableEOSIntegrationTest.java
@@ -55,6 +55,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -73,6 +74,9 @@
 @RunWith(Parameterized.class)
 @Category({IntegrationTest.class})
 public class GlobalKTableEOSIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
     private static final int NUM_BROKERS = 1;
     private static final Properties BROKER_CONFIG;
     static {
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableIntegrationTest.java
index 90dc9e73e2e5f..4668eab7ca32f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/GlobalKTableIntegrationTest.java
@@ -41,17 +41,16 @@
 import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -65,20 +64,21 @@
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class GlobalKTableIntegrationTest {
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -97,15 +97,12 @@ public static void closeCluster() {
     private KStream<String, Long> stream;
     private MockApiProcessorSupplier<String, String, Void, Void> supplier;
 
-    @Rule
-    public TestName testName = new TestName();
-
-    @Before
-    public void before() throws Exception {
+    @BeforeEach
+    public void before(final TestInfo testInfo) throws Exception {
         builder = new StreamsBuilder();
-        createTopics();
+        createTopics(testInfo);
         streamsConfiguration = new Properties();
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
         streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
@@ -121,7 +118,7 @@ public void before() throws Exception {
         supplier = new MockApiProcessorSupplier<>();
     }
 
-    @After
+    @AfterEach
     public void whenShuttingDown() throws Exception {
         if (kafkaStreams != null) {
             kafkaStreams.close();
@@ -344,8 +341,8 @@ public void shouldGetToRunningWithOnlyGlobalTopology() throws Exception {
         kafkaStreams.close();
     }
 
-    private void createTopics() throws Exception {
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+    private void createTopics(final TestInfo testInfo) throws Exception {
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamTopic = "stream-" + safeTestName;
         globalTableTopic = "globalTable-" + safeTestName;
         CLUSTER.createTopics(streamTopic);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/GlobalThreadShutDownOrderTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/GlobalThreadShutDownOrderTest.java
index 98dec8706c9c5..4012224d30199 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/GlobalThreadShutDownOrderTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/GlobalThreadShutDownOrderTest.java
@@ -36,17 +36,16 @@
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.internals.KeyValueStoreBuilder;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -57,7 +56,7 @@
 import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 
 /**
@@ -69,9 +68,9 @@
  * Otherwise if the GlobalStreamThread were to close underneath the StreamThread
  * an exception would be thrown as the GlobalStreamThread closes all global stores on closing.
  */
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class GlobalThreadShutDownOrderTest {
-
     private static final int NUM_BROKERS = 1;
     private static final Properties BROKER_CONFIG;
 
@@ -85,12 +84,12 @@ public class GlobalThreadShutDownOrderTest {
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS, BROKER_CONFIG);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -106,15 +105,12 @@ public static void closeCluster() {
     private final List<Long> retrievedValuesList = new ArrayList<>();
     private boolean firstRecordProcessed;
 
-    @Rule
-    public TestName testName = new TestName();
-
-    @Before
-    public void before() throws Exception {
+    @BeforeEach
+    public void before(final TestInfo testInfo) throws Exception {
         builder = new StreamsBuilder();
         createTopics();
         streamsConfiguration = new Properties();
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
         streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
@@ -143,7 +139,7 @@ public void before() throws Exception {
 
     }
 
-    @After
+    @AfterEach
     public void after() throws Exception {
         if (kafkaStreams != null) {
             kafkaStreams.close();
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/HighAvailabilityTaskAssignorIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/HighAvailabilityTaskAssignorIntegrationTest.java
index 2b67a0f0fd878..a2ff7657a3789 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/HighAvailabilityTaskAssignorIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/HighAvailabilityTaskAssignorIntegrationTest.java
@@ -40,15 +40,14 @@
 import org.apache.kafka.streams.processor.internals.assignment.HighAvailabilityTaskAssignor;
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.Stores;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.NoRetryException;
 import org.apache.kafka.test.TestUtils;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -72,40 +71,38 @@
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.is;
 
-@Category(IntegrationTest.class)
+@Timeout(600)
+@Tag("integration")
 public class HighAvailabilityTaskAssignorIntegrationTest {
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
 
-
-    @Rule
-    public TestName testName = new TestName();
-
     @Test
-    public void shouldScaleOutWithWarmupTasksAndInMemoryStores() throws InterruptedException {
+    public void shouldScaleOutWithWarmupTasksAndInMemoryStores(final TestInfo testInfo) throws InterruptedException {
         // NB: this test takes at least a minute to run, because it needs a probing rebalance, and the minimum
         // value is one minute
-        shouldScaleOutWithWarmupTasks(storeName -> Materialized.as(Stores.inMemoryKeyValueStore(storeName)));
+        shouldScaleOutWithWarmupTasks(storeName -> Materialized.as(Stores.inMemoryKeyValueStore(storeName)), testInfo);
     }
 
     @Test
-    public void shouldScaleOutWithWarmupTasksAndPersistentStores() throws InterruptedException {
+    public void shouldScaleOutWithWarmupTasksAndPersistentStores(final TestInfo testInfo) throws InterruptedException {
         // NB: this test takes at least a minute to run, because it needs a probing rebalance, and the minimum
         // value is one minute
-        shouldScaleOutWithWarmupTasks(storeName -> Materialized.as(Stores.persistentKeyValueStore(storeName)));
+        shouldScaleOutWithWarmupTasks(storeName -> Materialized.as(Stores.persistentKeyValueStore(storeName)), testInfo);
     }
 
-    private void shouldScaleOutWithWarmupTasks(final Function<String, Materialized<Object, Object, KeyValueStore<Bytes, byte[]>>> materializedFunction) throws InterruptedException {
-        final String testId = safeUniqueTestName(getClass(), testName);
+    private void shouldScaleOutWithWarmupTasks(final Function<String, Materialized<Object, Object, KeyValueStore<Bytes, byte[]>>> materializedFunction,
+                                               final TestInfo testInfo) throws InterruptedException {
+        final String testId = safeUniqueTestName(getClass(), testInfo);
         final String appId = "appId_" + System.currentTimeMillis() + "_" + testId;
         final String inputTopic = "input" + testId;
         final Set<TopicPartition> inputTopicPartitions = mkSet(
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/IQv2IntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/IQv2IntegrationTest.java
index 5ba218c65ebd5..4889a47ad0fc3 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/IQv2IntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/IQv2IntegrationTest.java
@@ -53,16 +53,15 @@
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
 import org.apache.kafka.streams.state.internals.StoreQueryUtils;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
@@ -87,9 +86,9 @@
 import static org.hamcrest.Matchers.matchesPattern;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class IQv2IntegrationTest {
-
     private static final int NUM_BROKERS = 1;
     public static final Duration WINDOW_SIZE = Duration.ofMinutes(5);
     private static int port = 0;
@@ -101,10 +100,7 @@ public class IQv2IntegrationTest {
 
     private KafkaStreams kafkaStreams;
 
-    @Rule
-    public TestName testName = new TestName();
-
-    @BeforeClass
+    @BeforeAll
     public static void before()
         throws InterruptedException, IOException, ExecutionException, TimeoutException {
         CLUSTER.start();
@@ -153,8 +149,8 @@ public static void before()
         ));
     }
 
-    @Before
-    public void beforeTest() {
+    @BeforeEach
+    public void beforeTest(final TestInfo testInfo) {
         final StreamsBuilder builder = new StreamsBuilder();
 
         builder.table(
@@ -163,17 +159,17 @@ public void beforeTest() {
             Materialized.as(STORE_NAME)
         );
 
-        kafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration());
+        kafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration(testInfo));
         kafkaStreams.cleanUp();
     }
 
-    @After
+    @AfterEach
     public void afterTest() {
         kafkaStreams.close();
         kafkaStreams.cleanUp();
     }
 
-    @AfterClass
+    @AfterAll
     public static void after() {
         CLUSTER.stop();
     }
@@ -293,7 +289,7 @@ public void shouldFetchExplicitlyFromAllPartitions() {
     }
 
     @Test
-    public void shouldNotRequireQueryHandler() {
+    public void shouldNotRequireQueryHandler(final TestInfo testInfo) {
         final KeyQuery<Integer, ValueAndTimestamp<Integer>> query = KeyQuery.withKey(1);
         final int partition = 1;
         final Set<Integer> partitions = singleton(partition);
@@ -422,7 +418,7 @@ public String metricsScope() {
             })
         );
 
-        kafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration());
+        kafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration(testInfo));
         kafkaStreams.cleanUp();
 
         kafkaStreams.start();
@@ -440,8 +436,8 @@ public String metricsScope() {
     }
 
 
-    private Properties streamsConfiguration() {
-        final String safeTestName = IntegrationTestUtils.safeUniqueTestName(getClass(), testName);
+    private Properties streamsConfiguration(final TestInfo testInfo) {
+        final String safeTestName = IntegrationTestUtils.safeUniqueTestName(getClass(), testInfo);
 
         final Properties config = new Properties();
         config.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/IQv2StoreIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/IQv2StoreIntegrationTest.java
index f534d6d171e14..813626d9eccc0 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/IQv2StoreIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/IQv2StoreIntegrationTest.java
@@ -71,8 +71,10 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.slf4j.Logger;
@@ -117,6 +119,8 @@
 @Category({IntegrationTest.class})
 @RunWith(value = Parameterized.class)
 public class IQv2StoreIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private static final Logger LOG = LoggerFactory.getLogger(IQv2StoreIntegrationTest.class);
 
@@ -760,45 +764,50 @@ public static void after() {
 
     @Test
     public void verifyStore() {
-        if (storeToTest.global()) {
-            // See KAFKA-13523
-            globalShouldRejectAllQueries();
-        } else {
-            shouldRejectUnknownQuery();
-            shouldCollectExecutionInfo();
-            shouldCollectExecutionInfoUnderFailure();
-
-            if (storeToTest.keyValue()) {
-                if (storeToTest.timestamped()) {
-                    final Function<ValueAndTimestamp<Integer>, Integer> valueExtractor =
-                        ValueAndTimestamp::value;
-                    shouldHandleKeyQuery(2, valueExtractor, 2);
-                    shouldHandleRangeQueries(valueExtractor);
-                } else {
-                    final Function<Integer, Integer> valueExtractor = Function.identity();
-                    shouldHandleKeyQuery(2, valueExtractor, 2);
-                    shouldHandleRangeQueries(valueExtractor);
+        try {
+            if (storeToTest.global()) {
+                // See KAFKA-13523
+                globalShouldRejectAllQueries();
+            } else {
+                shouldRejectUnknownQuery();
+                shouldCollectExecutionInfo();
+                shouldCollectExecutionInfoUnderFailure();
+
+                if (storeToTest.keyValue()) {
+                    if (storeToTest.timestamped()) {
+                        final Function<ValueAndTimestamp<Integer>, Integer> valueExtractor =
+                            ValueAndTimestamp::value;
+                        shouldHandleKeyQuery(2, valueExtractor, 2);
+                        shouldHandleRangeQueries(valueExtractor);
+                    } else {
+                        final Function<Integer, Integer> valueExtractor = Function.identity();
+                        shouldHandleKeyQuery(2, valueExtractor, 2);
+                        shouldHandleRangeQueries(valueExtractor);
+                    }
                 }
-            }
 
-            if (storeToTest.isWindowed()) {
-                if (storeToTest.timestamped()) {
-                    final Function<ValueAndTimestamp<Integer>, Integer> valueExtractor =
+                if (storeToTest.isWindowed()) {
+                    if (storeToTest.timestamped()) {
+                        final Function<ValueAndTimestamp<Integer>, Integer> valueExtractor =
                             ValueAndTimestamp::value;
-                    shouldHandleWindowKeyQueries(valueExtractor);
-                    shouldHandleWindowRangeQueries(valueExtractor);
-                } else {
-                    final Function<Integer, Integer> valueExtractor = Function.identity();
-                    shouldHandleWindowKeyQueries(valueExtractor);
-                    shouldHandleWindowRangeQueries(valueExtractor);
+                        shouldHandleWindowKeyQueries(valueExtractor);
+                        shouldHandleWindowRangeQueries(valueExtractor);
+                    } else {
+                        final Function<Integer, Integer> valueExtractor = Function.identity();
+                        shouldHandleWindowKeyQueries(valueExtractor);
+                        shouldHandleWindowRangeQueries(valueExtractor);
+                    }
                 }
-            }
 
-            if (storeToTest.isSession()) {
-                // Note there's no "timestamped" differentiation here.
-                // Idiosyncratically, SessionStores are _never_ timestamped.
-                shouldHandleSessionKeyQueries();
+                if (storeToTest.isSession()) {
+                    // Note there's no "timestamped" differentiation here.
+                    // Idiosyncratically, SessionStores are _never_ timestamped.
+                    shouldHandleSessionKeyQueries();
+                }
             }
+        } catch (final AssertionError e) {
+            LOG.error("Failed assertion", e);
+            throw e;
         }
     }
 
@@ -1350,7 +1359,7 @@ private static Properties streamsConfiguration(final boolean cache, final boolea
                                                    final String supplier, final String kind) {
         final String safeTestName =
             IQv2StoreIntegrationTest.class.getName() + "-" + cache + "-" + log + "-" + supplier
-                + "-" + kind;
+                + "-" + kind + "-" + RANDOM.nextInt();
         final Properties config = new Properties();
         config.put(StreamsConfig.TOPOLOGY_OPTIMIZATION_CONFIG, StreamsConfig.OPTIMIZE);
         config.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java
index 29c61ec764779..2e19c0caabc36 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/InternalTopicIntegrationTest.java
@@ -41,15 +41,15 @@
 import org.apache.kafka.streams.kstream.TimeWindows;
 import org.apache.kafka.streams.processor.internals.ProcessorStateManager;
 import org.apache.kafka.streams.state.WindowStore;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.MockMapper;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -65,24 +65,25 @@
 import static java.util.Collections.singletonList;
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.startApplicationAndWaitUntilRunning;
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForCompletion;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  * Tests related to internal topics in streams
  */
 @SuppressWarnings("deprecation")
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class InternalTopicIntegrationTest {
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException, InterruptedException {
         CLUSTER.start();
         CLUSTER.createTopics(DEFAULT_INPUT_TOPIC, DEFAULT_INPUT_TABLE_TOPIC);
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -96,7 +97,7 @@ public static void closeCluster() {
 
     private Properties streamsProp;
 
-    @Before
+    @BeforeEach
     public void before() {
         streamsProp = new Properties();
         streamsProp.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
@@ -108,7 +109,7 @@ public void before() {
         streamsProp.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
     }
 
-    @After
+    @AfterEach
     public void after() throws IOException {
         // Remove any state from previous test runs
         IntegrationTestUtils.purgeLocalStreamsState(streamsProp);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/JoinStoreIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/JoinStoreIntegrationTest.java
index 04d3f7dd8b566..cbe32e4a255c3 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/JoinStoreIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/JoinStoreIntegrationTest.java
@@ -45,6 +45,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TemporaryFolder;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.Properties;
@@ -60,6 +61,8 @@
 @SuppressWarnings("deprecation")
 @Category({IntegrationTest.class})
 public class JoinStoreIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/JoinWithIncompleteMetadataIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/JoinWithIncompleteMetadataIntegrationTest.java
index 1f6152da55cf4..17cbf5f25046f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/JoinWithIncompleteMetadataIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/JoinWithIncompleteMetadataIntegrationTest.java
@@ -38,11 +38,15 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TemporaryFolder;
+import org.junit.rules.Timeout;
 
 import static org.junit.Assert.assertTrue;
 
 @Category({IntegrationTest.class})
 public class JoinWithIncompleteMetadataIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
     @BeforeClass
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationDedupIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationDedupIntegrationTest.java
index 4fe35a67daf81..459d5f871ca6a 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationDedupIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationDedupIntegrationTest.java
@@ -40,17 +40,16 @@
 import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.kstream.Reducer;
 import org.apache.kafka.streams.kstream.TimeWindows;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.MockMapper;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -64,7 +63,8 @@
  * Similar to KStreamAggregationIntegrationTest but with dedupping enabled
  * by virtue of having a large commit interval
  */
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 @SuppressWarnings("deprecation")
 public class KStreamAggregationDedupIntegrationTest {
     private static final int NUM_BROKERS = 1;
@@ -72,12 +72,12 @@ public class KStreamAggregationDedupIntegrationTest {
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -93,15 +93,12 @@ public static void closeCluster() {
     private Reducer<String> reducer;
     private KStream<Integer, String> stream;
 
-    @Rule
-    public TestName testName = new TestName();
-
-    @Before
-    public void before() throws InterruptedException {
+    @BeforeEach
+    public void before(final TestInfo testInfo) throws InterruptedException {
         builder = new StreamsBuilder();
-        createTopics();
+        createTopics(testInfo);
         streamsConfiguration = new Properties();
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
         streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
@@ -116,7 +113,7 @@ public void before() throws InterruptedException {
         reducer = (value1, value2) -> value1 + ":" + value2;
     }
 
-    @After
+    @AfterEach
     public void whenShuttingDown() throws IOException {
         if (kafkaStreams != null) {
             kafkaStreams.close();
@@ -126,7 +123,7 @@ public void whenShuttingDown() throws IOException {
 
 
     @Test
-    public void shouldReduce() throws Exception {
+    public void shouldReduce(final TestInfo testInfo) throws Exception {
         produceMessages(System.currentTimeMillis());
         groupedStream
                 .reduce(reducer, Materialized.as("reduce-by-key"))
@@ -146,11 +143,12 @@ public void shouldReduce() throws Exception {
                     new KeyValueTimestamp<>("B", "B:B", timestamp),
                     new KeyValueTimestamp<>("C", "C:C", timestamp),
                     new KeyValueTimestamp<>("D", "D:D", timestamp),
-                    new KeyValueTimestamp<>("E", "E:E", timestamp)));
+                    new KeyValueTimestamp<>("E", "E:E", timestamp)),
+                testInfo);
     }
 
     @Test
-    public void shouldReduceWindowed() throws Exception {
+    public void shouldReduceWindowed(final TestInfo testInfo) throws Exception {
         final long firstBatchTimestamp = System.currentTimeMillis() - 1000;
         produceMessages(firstBatchTimestamp);
         final long secondBatchTimestamp = System.currentTimeMillis();
@@ -182,12 +180,13 @@ public void shouldReduceWindowed() throws Exception {
                     new KeyValueTimestamp<>("D@" + secondBatchWindow, "D:D", secondBatchTimestamp),
                     new KeyValueTimestamp<>("E@" + firstBatchWindow, "E", firstBatchTimestamp),
                     new KeyValueTimestamp<>("E@" + secondBatchWindow, "E:E", secondBatchTimestamp)
-                )
+                ),
+                testInfo
         );
     }
 
     @Test
-    public void shouldGroupByKey() throws Exception {
+    public void shouldGroupByKey(final TestInfo testInfo) throws Exception {
         final long timestamp = mockTime.milliseconds();
         produceMessages(timestamp);
         produceMessages(timestamp);
@@ -211,7 +210,8 @@ public void shouldGroupByKey() throws Exception {
                     new KeyValueTimestamp<>("3@" + window, 2L, timestamp),
                     new KeyValueTimestamp<>("4@" + window, 2L, timestamp),
                     new KeyValueTimestamp<>("5@" + window, 2L, timestamp)
-                )
+                ),
+                testInfo
         );
     }
 
@@ -234,8 +234,8 @@ private void produceMessages(final long timestamp) throws Exception {
     }
 
 
-    private void createTopics() throws InterruptedException {
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+    private void createTopics(final TestInfo testInfo) throws InterruptedException {
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamOneInput = "stream-one-" + safeTestName;
         outputTopic = "output-" + safeTestName;
         CLUSTER.createTopic(streamOneInput, 3, 1);
@@ -250,10 +250,11 @@ private void startStreams() {
 
     private <K, V> void validateReceivedMessages(final Deserializer<K> keyDeserializer,
                                                  final Deserializer<V> valueDeserializer,
-                                                 final List<KeyValueTimestamp<K, V>> expectedRecords)
+                                                 final List<KeyValueTimestamp<K, V>> expectedRecords,
+                                                 final TestInfo testInfo)
             throws Exception {
 
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         final Properties consumerProperties = new Properties();
         consumerProperties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         consumerProperties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "group-" + safeTestName);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationIntegrationTest.java
index e581903544465..6954f86e8b5b0 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamAggregationIntegrationTest.java
@@ -60,17 +60,16 @@
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.QueryableStoreTypes;
 import org.apache.kafka.streams.state.ReadOnlySessionStore;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.MockMapper;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -95,22 +94,23 @@
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.core.Is.is;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 @SuppressWarnings({"unchecked", "deprecation"})
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class KStreamAggregationIntegrationTest {
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -129,15 +129,12 @@ public static void closeCluster() {
     private Aggregator<String, String, Integer> aggregator;
     private KStream<Integer, String> stream;
 
-    @Rule
-    public TestName testName = new TestName();
-
-    @Before
-    public void before() throws InterruptedException {
+    @BeforeEach
+    public void before(final TestInfo testInfo) throws InterruptedException {
         builder = new StreamsBuilder();
-        createTopics();
+        createTopics(testInfo);
         streamsConfiguration = new Properties();
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
         streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
@@ -156,7 +153,7 @@ public void before() throws InterruptedException {
         aggregator = (aggKey, value, aggregate) -> aggregate + value.length();
     }
 
-    @After
+    @AfterEach
     public void whenShuttingDown() throws IOException {
         if (kafkaStreams != null) {
             kafkaStreams.close();
@@ -165,7 +162,7 @@ public void whenShuttingDown() throws IOException {
     }
 
     @Test
-    public void shouldReduce() throws Exception {
+    public void shouldReduce(final TestInfo testInfo) throws Exception {
         produceMessages(mockTime.milliseconds());
         groupedStream
             .reduce(reducer, Materialized.as("reduce-by-key"))
@@ -179,7 +176,8 @@ public void shouldReduce() throws Exception {
         final List<KeyValueTimestamp<String, String>> results = receiveMessages(
             new StringDeserializer(),
             new StringDeserializer(),
-            10);
+            10,
+            testInfo);
 
         results.sort(KStreamAggregationIntegrationTest::compare);
 
@@ -211,7 +209,7 @@ private static <K extends Comparable, V extends Comparable> int compare(final Ke
 
     @SuppressWarnings("deprecation")
     @Test
-    public void shouldReduceWindowed() throws Exception {
+    public void shouldReduceWindowed(final TestInfo testInfo) throws Exception {
         final long firstBatchTimestamp = mockTime.milliseconds();
         mockTime.sleep(1000);
         produceMessages(firstBatchTimestamp);
@@ -233,7 +231,8 @@ public void shouldReduceWindowed() throws Exception {
             new TimeWindowedDeserializer<>(),
             new StringDeserializer(),
             String.class,
-            15);
+            15,
+            testInfo);
 
         // read from ConsoleConsumer
         final String resultFromConsoleConsumer = readWindowedKeyedMessagesViaConsoleConsumer(
@@ -286,7 +285,7 @@ public void shouldReduceWindowed() throws Exception {
     }
 
     @Test
-    public void shouldAggregate() throws Exception {
+    public void shouldAggregate(final TestInfo testInfo) throws Exception {
         produceMessages(mockTime.milliseconds());
         groupedStream.aggregate(
             initializer,
@@ -302,7 +301,8 @@ public void shouldAggregate() throws Exception {
         final List<KeyValueTimestamp<String, Integer>> results = receiveMessages(
             new StringDeserializer(),
             new IntegerDeserializer(),
-            10);
+            10,
+            testInfo);
 
         results.sort(KStreamAggregationIntegrationTest::compare);
 
@@ -322,7 +322,7 @@ public void shouldAggregate() throws Exception {
 
     @SuppressWarnings("deprecation")
     @Test
-    public void shouldAggregateWindowed() throws Exception {
+    public void shouldAggregateWindowed(final TestInfo testInfo) throws Exception {
         final long firstTimestamp = mockTime.milliseconds();
         mockTime.sleep(1000);
         produceMessages(firstTimestamp);
@@ -347,7 +347,8 @@ public void shouldAggregateWindowed() throws Exception {
             new TimeWindowedDeserializer<>(new StringDeserializer(), 500L),
             new IntegerDeserializer(),
             String.class,
-            15);
+            15,
+            testInfo);
 
         // read from ConsoleConsumer
         final String resultFromConsoleConsumer = readWindowedKeyedMessagesViaConsoleConsumer(
@@ -399,7 +400,7 @@ public void shouldAggregateWindowed() throws Exception {
 
     }
 
-    private void shouldCountHelper() throws Exception {
+    private void shouldCountHelper(final TestInfo testInfo) throws Exception {
         startStreams();
 
         produceMessages(mockTime.milliseconds());
@@ -407,7 +408,8 @@ private void shouldCountHelper() throws Exception {
         final List<KeyValueTimestamp<String, Long>> results = receiveMessages(
             new StringDeserializer(),
             new LongDeserializer(),
-            10);
+            10,
+            testInfo);
         results.sort(KStreamAggregationIntegrationTest::compare);
 
         assertThat(results, is(Arrays.asList(
@@ -425,30 +427,30 @@ private void shouldCountHelper() throws Exception {
     }
 
     @Test
-    public void shouldCount() throws Exception {
+    public void shouldCount(final TestInfo testInfo) throws Exception {
         produceMessages(mockTime.milliseconds());
 
         groupedStream.count(Materialized.as("count-by-key"))
                 .toStream()
                 .to(outputTopic, Produced.with(Serdes.String(), Serdes.Long()));
 
-        shouldCountHelper();
+        shouldCountHelper(testInfo);
     }
 
     @Test
-    public void shouldCountWithInternalStore() throws Exception {
+    public void shouldCountWithInternalStore(final TestInfo testInfo) throws Exception {
         produceMessages(mockTime.milliseconds());
 
         groupedStream.count()
                 .toStream()
                 .to(outputTopic, Produced.with(Serdes.String(), Serdes.Long()));
 
-        shouldCountHelper();
+        shouldCountHelper(testInfo);
     }
 
     @SuppressWarnings("deprecation")
     @Test
-    public void shouldGroupByKey() throws Exception {
+    public void shouldGroupByKey(final TestInfo testInfo) throws Exception {
         final long timestamp = mockTime.milliseconds();
         produceMessages(timestamp);
         produceMessages(timestamp);
@@ -464,7 +466,8 @@ public void shouldGroupByKey() throws Exception {
         final List<KeyValueTimestamp<String, Long>> results = receiveMessages(
             new StringDeserializer(),
             new LongDeserializer(),
-            10);
+            10,
+            testInfo);
         results.sort(KStreamAggregationIntegrationTest::compare);
 
         final long window = timestamp / 500 * 500;
@@ -484,7 +487,7 @@ public void shouldGroupByKey() throws Exception {
 
     @SuppressWarnings("deprecation")
     @Test
-    public void shouldReduceSlidingWindows() throws Exception {
+    public void shouldReduceSlidingWindows(final TestInfo testInfo) throws Exception {
         final long firstBatchTimestamp = mockTime.milliseconds();
         final long timeDifference = 500L;
         produceMessages(firstBatchTimestamp);
@@ -507,7 +510,8 @@ public void shouldReduceSlidingWindows() throws Exception {
                 new TimeWindowedDeserializer<>(new StringDeserializer(), 500L),
                 new StringDeserializer(),
                 String.class,
-                30);
+                30,
+                testInfo);
 
         final String resultFromConsoleConsumer = readWindowedKeyedMessagesViaConsoleConsumer(
                 new TimeWindowedDeserializer<String>(),
@@ -590,7 +594,7 @@ public void shouldReduceSlidingWindows() throws Exception {
 
     @SuppressWarnings("deprecation")
     @Test
-    public void shouldAggregateSlidingWindows() throws Exception {
+    public void shouldAggregateSlidingWindows(final TestInfo testInfo) throws Exception {
         final long firstBatchTimestamp = mockTime.milliseconds();
         final long timeDifference = 500L;
         produceMessages(firstBatchTimestamp);
@@ -616,7 +620,8 @@ public void shouldAggregateSlidingWindows() throws Exception {
                 new TimeWindowedDeserializer<>(),
                 new IntegerDeserializer(),
                 String.class,
-                30);
+                30,
+                testInfo);
 
         // read from ConsoleConsumer
         final String resultFromConsoleConsumer = readWindowedKeyedMessagesViaConsoleConsumer(
@@ -1036,8 +1041,8 @@ private void produceMessages(final long timestamp) throws Exception {
     }
 
 
-    private void createTopics() throws InterruptedException {
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+    private void createTopics(final TestInfo testInfo) throws InterruptedException {
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         streamOneInput = "stream-one-" + safeTestName;
         outputTopic = "output-" + safeTestName;
         userSessionsStream = "user-sessions-" + safeTestName;
@@ -1052,19 +1057,21 @@ private void startStreams() {
 
     private <K, V> List<KeyValueTimestamp<K, V>> receiveMessages(final Deserializer<K> keyDeserializer,
                                                                  final Deserializer<V> valueDeserializer,
-                                                                 final int numMessages)
+                                                                 final int numMessages,
+                                                                 final TestInfo testInfo)
             throws Exception {
 
-        return receiveMessages(keyDeserializer, valueDeserializer, null, numMessages);
+        return receiveMessages(keyDeserializer, valueDeserializer, null, numMessages, testInfo);
     }
 
     private <K, V> List<KeyValueTimestamp<K, V>> receiveMessages(final Deserializer<K> keyDeserializer,
                                                                  final Deserializer<V> valueDeserializer,
                                                                  final Class innerClass,
-                                                                 final int numMessages)
+                                                                 final int numMessages,
+                                                                 final TestInfo testInfo)
             throws Exception {
 
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         final Properties consumerProperties = new Properties();
         consumerProperties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         consumerProperties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "group-" + safeTestName);
@@ -1086,8 +1093,9 @@ private <K, V> List<KeyValueTimestamp<K, V>> receiveMessages(final Deserializer<
     private <K, V> List<KeyValueTimestamp<K, V>> receiveMessagesWithTimestamp(final Deserializer<K> keyDeserializer,
                                                                               final Deserializer<V> valueDeserializer,
                                                                               final Class innerClass,
-                                                                              final int numMessages) throws Exception {
-        final String safeTestName = safeUniqueTestName(getClass(), testName);
+                                                                              final int numMessages,
+                                                                              final TestInfo testInfo) throws Exception {
+        final String safeTestName = safeUniqueTestName(getClass(), testInfo);
         final Properties consumerProperties = new Properties();
         consumerProperties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         consumerProperties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "group-" + safeTestName);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamRepartitionIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamRepartitionIntegrationTest.java
index 1e7f685debbf1..5043ee2f8b953 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamRepartitionIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamRepartitionIntegrationTest.java
@@ -49,6 +49,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameter;
@@ -83,6 +84,9 @@
 @Category({IntegrationTest.class})
 @SuppressWarnings("deprecation")
 public class KStreamRepartitionIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamTransformIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamTransformIntegrationTest.java
index 7b13d9f3ecbda..d916faa9d7215 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KStreamTransformIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KStreamTransformIntegrationTest.java
@@ -39,8 +39,10 @@
 import org.apache.kafka.test.StreamsTestUtils;
 
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -55,7 +57,8 @@
 @SuppressWarnings("unchecked")
 @Category({IntegrationTest.class})
 public class KStreamTransformIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private StreamsBuilder builder;
     private final String topic = "stream";
     private final String stateStoreName = "myTransformState";
@@ -115,6 +118,7 @@ public void close() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldTransform() {
         builder.addStateStore(storeBuilder());
 
@@ -133,6 +137,7 @@ public void shouldTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldTransformWithConnectedStoreProvider() {
         stream
             .transform(new TransformerSupplier<Integer, Integer, KeyValue<Integer, Integer>>() {
@@ -185,6 +190,7 @@ public void close() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldFlatTransform() {
         builder.addStateStore(storeBuilder());
 
@@ -215,6 +221,7 @@ public void shouldFlatTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldFlatTransformWithConnectedStoreProvider() {
         stream
             .flatTransform(new TransformerSupplier<Integer, Integer, Iterable<KeyValue<Integer, Integer>>>() {
@@ -275,6 +282,7 @@ public void close() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldTransformValuesWithValueTransformerWithKey() {
         builder.addStateStore(storeBuilder());
 
@@ -293,6 +301,7 @@ public void shouldTransformValuesWithValueTransformerWithKey() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldTransformValuesWithValueTransformerWithKeyWithConnectedStoreProvider() {
         stream
             .transformValues(new ValueTransformerWithKeySupplier<Integer, Integer, Integer>() {
@@ -331,6 +340,7 @@ public void close() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldTransformValuesWithValueTransformerWithoutKey() {
         builder.addStateStore(storeBuilder());
 
@@ -349,6 +359,7 @@ public void shouldTransformValuesWithValueTransformerWithoutKey() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldTransformValuesWithValueTransformerWithoutKeyWithConnectedStoreProvider() {
         stream
             .transformValues(new ValueTransformerSupplier<Integer, Integer>() {
@@ -400,6 +411,7 @@ public void close() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldFlatTransformValuesWithKey() {
         builder.addStateStore(storeBuilder());
 
@@ -430,6 +442,7 @@ public void shouldFlatTransformValuesWithKey() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldFlatTransformValuesWithKeyWithConnectedStoreProvider() {
         stream
             .flatTransformValues(new ValueTransformerWithKeySupplier<Integer, Integer, Iterable<Integer>>() {
@@ -493,6 +506,7 @@ public void close() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldFlatTransformValuesWithValueTransformerWithoutKey() {
         builder.addStateStore(storeBuilder());
 
@@ -523,6 +537,7 @@ public void shouldFlatTransformValuesWithValueTransformerWithoutKey() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldFlatTransformValuesWithValueTransformerWithoutKeyWithConnectedStoreProvider() {
         stream
             .flatTransformValues(new ValueTransformerSupplier<Integer, Iterable<Integer>>() {
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableEfficientRangeQueryTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableEfficientRangeQueryTest.java
index b0564ba16a208..746de14f2171b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableEfficientRangeQueryTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableEfficientRangeQueryTest.java
@@ -40,6 +40,7 @@
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -61,6 +62,8 @@
 
 @RunWith(Parameterized.class)
 public class KTableEfficientRangeQueryTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private enum StoreType { InMemory, RocksDB, Timed };
     private static final String TABLE_NAME = "mytable";
     private static final int DATA_SIZE = 5;
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest.java
index c83bbaee62d1a..e628ed6ced66b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest.java
@@ -51,18 +51,19 @@
 import org.apache.kafka.streams.kstream.ValueJoiner;
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.utils.UniqueTopicSerdeScope;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
 
 import kafka.utils.MockTime;
-
-@Category({IntegrationTest.class})
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
+
+@Timeout(600)
+@Tag("integration")
 public class KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest {
     private final static int NUM_BROKERS = 1;
 
@@ -82,7 +83,7 @@ public class KTableKTableForeignKeyInnerJoinCustomPartitionerIntegrationTest {
     private final static Properties PRODUCER_CONFIG_1 = new Properties();
     private final static Properties PRODUCER_CONFIG_2 = new Properties();
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException, InterruptedException {
         CLUSTER.start();
         //Use multiple partitions to ensure distribution of keys.
@@ -121,12 +122,12 @@ public static void startCluster() throws IOException, InterruptedException {
         CONSUMER_CONFIG.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
 
-    @Before
+    @BeforeEach
     public void before() throws IOException {
         final String stateDirBasePath = TestUtils.tempDirectory().getPath();
         streamsConfig.put(StreamsConfig.STATE_DIR_CONFIG, stateDirBasePath + "-1");
@@ -134,7 +135,7 @@ public void before() throws IOException {
         streamsConfigThree.put(StreamsConfig.STATE_DIR_CONFIG, stateDirBasePath + "-3");
     }
 
-    @After
+    @AfterEach
     public void after() throws IOException {
         if (streams != null) {
             streams.close();
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java
index 0788b52e30c6f..3604f1127e368 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyInnerJoinMultiIntegrationTest.java
@@ -46,8 +46,10 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -64,6 +66,8 @@
 
 @Category({IntegrationTest.class})
 public class KTableKTableForeignKeyInnerJoinMultiIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private final static int NUM_BROKERS = 1;
 
     public final static EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinDistributedTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinDistributedTest.java
index af952bda9bcc4..c95b37ae85505 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinDistributedTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinDistributedTest.java
@@ -42,6 +42,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -58,6 +59,8 @@
 
 @Category({IntegrationTest.class})
 public class KTableKTableForeignKeyJoinDistributedTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
     private static final String LEFT_TABLE = "left_table";
     private static final String RIGHT_TABLE = "right_table";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java
index 60104c4755b60..931aaf8e53e65 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinIntegrationTest.java
@@ -33,11 +33,14 @@
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.utils.UniqueTopicSerdeScope;
+import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -59,8 +62,10 @@
 
 
 @RunWith(Parameterized.class)
+@Category(IntegrationTest.class)
 public class KTableKTableForeignKeyJoinIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final String LEFT_TABLE = "left_table";
     private static final String RIGHT_TABLE = "right_table";
     private static final String OUTPUT = "output-topic";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinMaterializationIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinMaterializationIntegrationTest.java
index 778f507cf20aa..2a36556c99f95 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinMaterializationIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableKTableForeignKeyJoinMaterializationIntegrationTest.java
@@ -32,11 +32,14 @@
 import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.kstream.ValueJoiner;
 import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -57,8 +60,10 @@
 
 
 @RunWith(Parameterized.class)
+@Category(IntegrationTest.class)
 public class KTableKTableForeignKeyJoinMaterializationIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final String LEFT_TABLE = "left_table";
     private static final String RIGHT_TABLE = "right_table";
     private static final String OUTPUT = "output-topic";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KTableSourceTopicRestartIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KTableSourceTopicRestartIntegrationTest.java
index 6d50ea99cc4a3..be583f17a7e20 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/KTableSourceTopicRestartIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KTableSourceTopicRestartIntegrationTest.java
@@ -33,16 +33,15 @@
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.processor.StateRestoreListener;
 import org.apache.kafka.streams.processor.WallclockTimestampExtractor;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.TestName;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -53,7 +52,8 @@
 import java.util.Properties;
 import java.util.concurrent.ConcurrentHashMap;
 
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class KTableSourceTopicRestartIntegrationTest {
     private static final int NUM_BROKERS = 3;
     private static final String SOURCE_TOPIC = "source-topic";
@@ -62,7 +62,7 @@ public class KTableSourceTopicRestartIntegrationTest {
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
         STREAMS_CONFIG.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
@@ -81,7 +81,7 @@ public static void startCluster() throws IOException {
         PRODUCER_CONFIG.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
@@ -96,15 +96,12 @@ public static void closeCluster() {
     private Map<String, String> expectedInitialResultsMap;
     private Map<String, String> expectedResultsWithDataWrittenDuringRestoreMap;
 
-    @Rule
-    public TestName testName = new TestName();
-
-    @Before
-    public void before() throws Exception {
-        sourceTopic = SOURCE_TOPIC + "-" + testName.getMethodName();
+    @BeforeEach
+    public void before(final TestInfo testInfo) throws Exception {
+        sourceTopic = SOURCE_TOPIC + "-" + IntegrationTestUtils.safeUniqueTestName(getClass(), testInfo);
         CLUSTER.createTopic(sourceTopic);
 
-        STREAMS_CONFIG.put(StreamsConfig.APPLICATION_ID_CONFIG, IntegrationTestUtils.safeUniqueTestName(getClass(), testName));
+        STREAMS_CONFIG.put(StreamsConfig.APPLICATION_ID_CONFIG, IntegrationTestUtils.safeUniqueTestName(getClass(), testInfo));
 
         final KTable<String, String> kTable = streamsBuilder.table(sourceTopic, Materialized.as("store"));
         kTable.toStream().foreach(readKeyValues::put);
@@ -113,7 +110,7 @@ public void before() throws Exception {
         expectedResultsWithDataWrittenDuringRestoreMap = createExpectedResultsMap("a", "b", "c", "d", "f", "g", "h");
     }
 
-    @After
+    @AfterEach
     public void after() throws Exception {
         IntegrationTestUtils.purgeLocalStreamsState(STREAMS_CONFIG);
     }
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/KafkaStreamsCloseOptionsIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/KafkaStreamsCloseOptionsIntegrationTest.java
new file mode 100644
index 0000000000000..8d3cb8e87959a
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/KafkaStreamsCloseOptionsIntegrationTest.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.integration;
+
+import kafka.server.KafkaConfig;
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.clients.admin.Admin;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.serialization.LongDeserializer;
+import org.apache.kafka.common.serialization.LongSerializer;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KafkaStreams.CloseOptions;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.Topology;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.TestUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
+import static java.util.Collections.singletonList;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForEmptyConsumerGroup;
+
+@Category({IntegrationTest.class})
+public class KafkaStreamsCloseOptionsIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+    @Rule
+    public final TestName testName = new TestName();
+    private static MockTime mockTime;
+
+    @Rule
+    public final TemporaryFolder testFolder = new TemporaryFolder(TestUtils.tempDirectory());
+
+    protected static final String INPUT_TOPIC = "inputTopic";
+    protected static final String OUTPUT_TOPIC = "outputTopic";
+
+    protected Properties streamsConfig;
+    protected static KafkaStreams streams;
+    protected static Admin adminClient;
+    protected Properties commonClientConfig;
+    private Properties producerConfig;
+    protected Properties resultConsumerConfig;
+
+    public static final EmbeddedKafkaCluster CLUSTER;
+
+    static {
+        final Properties brokerProps = new Properties();
+        brokerProps.setProperty(KafkaConfig.GroupMaxSessionTimeoutMsProp(), Integer.toString(Integer.MAX_VALUE));
+        CLUSTER = new EmbeddedKafkaCluster(1, brokerProps);
+    }
+
+    @BeforeClass
+    public static void startCluster() throws IOException {
+        CLUSTER.start();
+    }
+
+    @AfterClass
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+
+    @Before
+    public void before() throws Exception {
+        mockTime = CLUSTER.time;
+
+        final String appID = IntegrationTestUtils.safeUniqueTestName(getClass(), testName);
+
+        commonClientConfig = new Properties();
+        commonClientConfig.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+
+        streamsConfig = new Properties();
+        streamsConfig.put(StreamsConfig.STATE_DIR_CONFIG, testFolder.getRoot().getPath());
+        streamsConfig.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Long().getClass());
+        streamsConfig.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
+        streamsConfig.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
+        streamsConfig.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100L);
+        streamsConfig.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 100);
+        streamsConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        // In this test, we set the SESSION_TIMEOUT_MS_CONFIG high in order to show that the call to
+        // `close(CloseOptions)` can remove the application from the Consumder Groups successfully.
+        streamsConfig.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, Integer.MAX_VALUE);
+        streamsConfig.putAll(commonClientConfig);
+
+        producerConfig = new Properties();
+        producerConfig.put(ProducerConfig.ACKS_CONFIG, "all");
+        producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, LongSerializer.class);
+        producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
+        producerConfig.putAll(commonClientConfig);
+
+        resultConsumerConfig = new Properties();
+        resultConsumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, appID + "-result-consumer");
+        resultConsumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        resultConsumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, LongDeserializer.class);
+        resultConsumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
+        resultConsumerConfig.putAll(commonClientConfig);
+
+        if (adminClient == null) {
+            adminClient = Admin.create(commonClientConfig);
+        }
+
+        CLUSTER.deleteAllTopicsAndWait(120_000L);
+        CLUSTER.createTopic(INPUT_TOPIC, 2, 1);
+        CLUSTER.createTopic(OUTPUT_TOPIC, 2, 1);
+
+        add10InputElements();
+    }
+
+    @After
+    public void after() throws Exception {
+        if (streams != null) {
+            streams.close(Duration.ofSeconds(30));
+        }
+    }
+
+    @Test
+    public void testCloseOptions() throws Exception {
+        final String appID = IntegrationTestUtils.safeUniqueTestName(getClass(), testName);
+        streamsConfig.put(StreamsConfig.APPLICATION_ID_CONFIG, appID);
+        streamsConfig.put(ConsumerConfig.GROUP_INSTANCE_ID_CONFIG, "someGroupInstance");
+        // Test with two threads to show that each of the threads is being called to remove clients from the CG.
+        streamsConfig.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 2);
+
+        // RUN
+        streams = new KafkaStreams(setupTopologyWithoutIntermediateUserTopic(), streamsConfig);
+        IntegrationTestUtils.startApplicationAndWaitUntilRunning(singletonList(streams), Duration.ofSeconds(30));
+        IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived(resultConsumerConfig, OUTPUT_TOPIC, 10);
+
+        streams.close(new CloseOptions().leaveGroup(true).timeout(Duration.ofSeconds(30)));
+        waitForEmptyConsumerGroup(adminClient, appID, 0);
+    }
+
+    protected Topology setupTopologyWithoutIntermediateUserTopic() {
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final KStream<Long, String> input = builder.stream(INPUT_TOPIC);
+
+        input.to(OUTPUT_TOPIC, Produced.with(Serdes.Long(), Serdes.String()));
+        return builder.build();
+    }
+
+    private void add10InputElements() {
+        final List<KeyValue<Long, String>> records = Arrays.asList(KeyValue.pair(0L, "aaa"),
+            KeyValue.pair(1L, "bbb"),
+            KeyValue.pair(0L, "ccc"),
+            KeyValue.pair(1L, "ddd"),
+            KeyValue.pair(0L, "eee"),
+            KeyValue.pair(1L, "fff"),
+            KeyValue.pair(0L, "ggg"),
+            KeyValue.pair(1L, "hhh"),
+            KeyValue.pair(0L, "iii"),
+            KeyValue.pair(1L, "jjj"));
+
+        for (final KeyValue<Long, String> record : records) {
+            mockTime.sleep(10);
+            IntegrationTestUtils.produceKeyValuesSynchronouslyWithTimestamp(INPUT_TOPIC, Collections.singleton(record), producerConfig, mockTime.milliseconds());
+        }
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/LagFetchIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/LagFetchIntegrationTest.java
index 6a024966d9f23..3b7251d6da17e 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/LagFetchIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/LagFetchIntegrationTest.java
@@ -47,6 +47,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -76,7 +77,8 @@
 
 @Category({IntegrationTest.class})
 public class LagFetchIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
     @BeforeClass
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/MetricsIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/MetricsIntegrationTest.java
index 9ada60f40367c..c937999ca5ef5 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/MetricsIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/MetricsIntegrationTest.java
@@ -52,6 +52,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -68,7 +69,8 @@
 @Category({IntegrationTest.class})
 @SuppressWarnings("deprecation")
 public class MetricsIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
     private static final int NUM_THREADS = 2;
 
@@ -91,6 +93,7 @@ public static void closeCluster() {
     private static final String STREAM_THREAD_NODE_METRICS = "stream-thread-metrics";
     private static final String STREAM_TASK_NODE_METRICS = "stream-task-metrics";
     private static final String STREAM_PROCESSOR_NODE_METRICS = "stream-processor-node-metrics";
+    private static final String STREAM_TOPIC_METRICS = "stream-topic-metrics";
     private static final String STREAM_CACHE_NODE_METRICS = "stream-record-cache-metrics";
 
     private static final String IN_MEMORY_KVSTORE_TAG_KEY = "in-memory-state-id";
@@ -213,6 +216,10 @@ public static void closeCluster() {
     private static final String RECORD_E2E_LATENCY_AVG = "record-e2e-latency-avg";
     private static final String RECORD_E2E_LATENCY_MIN = "record-e2e-latency-min";
     private static final String RECORD_E2E_LATENCY_MAX = "record-e2e-latency-max";
+    private static final String BYTES_CONSUMED_TOTAL = "bytes-consumed-total";
+    private static final String RECORDS_CONSUMED_TOTAL = "records-consumed-total";
+    private static final String BYTES_PRODUCED_TOTAL = "bytes-produced-total";
+    private static final String RECORDS_PRODUCED_TOTAL = "records-produced-total";
 
     // stores name
     private static final String TIME_WINDOWED_AGGREGATED_STREAM_STORE = "time-windowed-aggregated-stream-store";
@@ -356,6 +363,7 @@ public void shouldAddMetricsOnAllLevels() throws Exception {
         checkThreadLevelMetrics();
         checkTaskLevelMetrics();
         checkProcessorNodeLevelMetrics();
+        checkTopicLevelMetrics();
         checkKeyValueStoreMetrics(IN_MEMORY_KVSTORE_TAG_KEY);
         checkKeyValueStoreMetrics(ROCKSDB_KVSTORE_TAG_KEY);
         checkKeyValueStoreMetrics(IN_MEMORY_LRUCACHE_TAG_KEY);
@@ -542,6 +550,18 @@ private void checkProcessorNodeLevelMetrics() {
         checkMetricByName(listMetricProcessor, RECORD_E2E_LATENCY_MAX, numberOfSourceNodes + numberOfTerminalNodes);
     }
 
+    private void checkTopicLevelMetrics() {
+        final List<Metric> listMetricProcessor = new ArrayList<Metric>(kafkaStreams.metrics().values()).stream()
+            .filter(m -> m.metricName().group().equals(STREAM_TOPIC_METRICS))
+            .collect(Collectors.toList());
+        final int numberOfSourceTopics = 4;
+        final int numberOfSinkTopics = 4;
+        checkMetricByName(listMetricProcessor, BYTES_CONSUMED_TOTAL, numberOfSourceTopics);
+        checkMetricByName(listMetricProcessor, RECORDS_CONSUMED_TOTAL, numberOfSourceTopics);
+        checkMetricByName(listMetricProcessor, BYTES_PRODUCED_TOTAL, numberOfSinkTopics);
+        checkMetricByName(listMetricProcessor, RECORDS_PRODUCED_TOTAL, numberOfSinkTopics);
+    }
+
     private void checkKeyValueStoreMetrics(final String tagKey) {
         final List<Metric> listMetricStore = new ArrayList<Metric>(kafkaStreams.metrics().values()).stream()
             .filter(m -> m.metricName().tags().containsKey(tagKey) && m.metricName().group().equals(STATE_STORE_LEVEL_GROUP))
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/MetricsReporterIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/MetricsReporterIntegrationTest.java
index a7c925ad5f5a7..e07a888690094 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/MetricsReporterIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/MetricsReporterIntegrationTest.java
@@ -35,6 +35,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -48,7 +49,8 @@
 
 @Category({IntegrationTest.class})
 public class MetricsReporterIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/NamedTopologyIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/NamedTopologyIntegrationTest.java
index 23aa898a8cce9..8facc8279d297 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/NamedTopologyIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/NamedTopologyIntegrationTest.java
@@ -56,6 +56,7 @@
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.internals.StreamsMetadataImpl;
 import org.apache.kafka.streams.utils.UniqueTopicSerdeScope;
+import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.StreamsTestUtils;
 import org.apache.kafka.test.TestUtils;
 
@@ -70,7 +71,10 @@
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
+
 import java.time.Duration;
 import java.util.Collection;
 import java.util.Iterator;
@@ -101,7 +105,10 @@
 import static java.util.Collections.singleton;
 import static java.util.Collections.singletonList;
 
+@Category(IntegrationTest.class)
 public class NamedTopologyIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final Duration STARTUP_TIMEOUT = Duration.ofSeconds(45);
     
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
@@ -779,7 +786,11 @@ public void shouldWaitForMissingInputTopicsToBeCreated() throws Exception {
             CLUSTER.createTopic(NEW_STREAM, 2, 1);
             produceToInputTopics(NEW_STREAM, STANDARD_INPUT_DATA);
 
-            assertThat(waitUntilMinKeyValueRecordsReceived(consumerConfig, OUTPUT_STREAM_1, 3), equalTo(COUNT_OUTPUT_DATA));
+            final List<KeyValue<String, Integer>> output =
+                waitUntilMinKeyValueRecordsReceived(consumerConfig, OUTPUT_STREAM_1, 3);
+            output.retainAll(COUNT_OUTPUT_DATA);
+
+            assertThat(output, equalTo(COUNT_OUTPUT_DATA));
 
             // Make sure the threads were not actually killed and replaced
             assertThat(streams.metadataForLocalThreads().size(), equalTo(2));
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/OptimizedKTableIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/OptimizedKTableIntegrationTest.java
index 44744cd3e2be3..8f8faea448fde 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/OptimizedKTableIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/OptimizedKTableIntegrationTest.java
@@ -30,6 +30,7 @@
 import java.util.Properties;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.kafka.clients.consumer.ConsumerConfig;
@@ -39,10 +40,11 @@
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyQueryMetadata;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
-import org.apache.kafka.streams.KeyQueryMetadata;
+import org.apache.kafka.streams.errors.InvalidStateStoreException;
 import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
 import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
 import org.apache.kafka.streams.kstream.Consumed;
@@ -51,6 +53,7 @@
 import org.apache.kafka.streams.state.QueryableStoreTypes;
 import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
 import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.NoRetryException;
 import org.apache.kafka.test.TestUtils;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -60,9 +63,15 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @Category(IntegrationTest.class)
 public class OptimizedKTableIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+    private static final Logger LOG = LoggerFactory.getLogger(OptimizedKTableIntegrationTest.class);
     private static final int NUM_BROKERS = 1;
     private static int port = 0;
     private static final String INPUT_TOPIC_NAME = "input-topic";
@@ -125,31 +134,37 @@ public void shouldApplyUpdatesToStandbyStore() throws Exception {
         // Assert that all messages in the first batch were processed in a timely manner
         assertThat(semaphore.tryAcquire(batch1NumMessages, 60, TimeUnit.SECONDS), is(equalTo(true)));
 
-        final ReadOnlyKeyValueStore<Integer, Integer> store1 = IntegrationTestUtils.getStore(TABLE_NAME, kafkaStreams1, QueryableStoreTypes.keyValueStore());
-        final ReadOnlyKeyValueStore<Integer, Integer> store2 = IntegrationTestUtils.getStore(TABLE_NAME, kafkaStreams2, QueryableStoreTypes.keyValueStore());
-
-        final boolean kafkaStreams1WasFirstActive;
-        final KeyQueryMetadata keyQueryMetadata = kafkaStreams1.queryMetadataForKey(TABLE_NAME, key, (topic, somekey, value, numPartitions) -> 0);
-
-        // Assert that the current value in store reflects all messages being processed
-        if ((keyQueryMetadata.activeHost().port() % 2) == 1) {
-            assertThat(store1.get(key), is(equalTo(batch1NumMessages - 1)));
-            kafkaStreams1WasFirstActive = true;
-        } else {
-            assertThat(store2.get(key), is(equalTo(batch1NumMessages - 1)));
-            kafkaStreams1WasFirstActive = false;
-        }
-
-        if (kafkaStreams1WasFirstActive) {
-            kafkaStreams1.close();
-        } else {
-            kafkaStreams2.close();
-        }
+        final AtomicReference<ReadOnlyKeyValueStore<Integer, Integer>> newActiveStore = new AtomicReference<>(null);
+        TestUtils.retryOnExceptionWithTimeout(() -> {
+            final ReadOnlyKeyValueStore<Integer, Integer> store1 = IntegrationTestUtils.getStore(TABLE_NAME, kafkaStreams1, QueryableStoreTypes.keyValueStore());
+            final ReadOnlyKeyValueStore<Integer, Integer> store2 = IntegrationTestUtils.getStore(TABLE_NAME, kafkaStreams2, QueryableStoreTypes.keyValueStore());
+
+            final KeyQueryMetadata keyQueryMetadata = kafkaStreams1.queryMetadataForKey(TABLE_NAME, key, (topic, somekey, value, numPartitions) -> 0);
+
+            try {
+                // Assert that the current value in store reflects all messages being processed
+                if ((keyQueryMetadata.activeHost().port() % 2) == 1) {
+                    assertThat(store1.get(key), is(equalTo(batch1NumMessages - 1)));
+                    kafkaStreams1.close();
+                    newActiveStore.set(store2);
+                } else {
+                    assertThat(store2.get(key), is(equalTo(batch1NumMessages - 1)));
+                    kafkaStreams2.close();
+                    newActiveStore.set(store1);
+                }
+            } catch (final InvalidStateStoreException e) {
+                LOG.warn("Detected an unexpected rebalance during test. Retrying if possible.", e);
+                throw e;
+            } catch (final Throwable t) {
+                LOG.error("Caught non-retriable exception in test. Exiting.", t);
+                throw new NoRetryException(t);
+            }
+        });
 
-        final ReadOnlyKeyValueStore<Integer, Integer> newActiveStore = kafkaStreams1WasFirstActive ? store2 : store1;
+        // Wait for failover
         TestUtils.retryOnExceptionWithTimeout(60 * 1000, 100, () -> {
             // Assert that after failover we have recovered to the last store write
-            assertThat(newActiveStore.get(key), is(equalTo(batch1NumMessages - 1)));
+            assertThat(newActiveStore.get().get(key), is(equalTo(batch1NumMessages - 1)));
         });
 
         final int totalNumMessages = batch1NumMessages + batch2NumMessages;
@@ -161,7 +176,7 @@ public void shouldApplyUpdatesToStandbyStore() throws Exception {
 
         TestUtils.retryOnExceptionWithTimeout(60 * 1000, 100, () -> {
             // Assert that the current value in store reflects all messages being processed
-            assertThat(newActiveStore.get(key), is(equalTo(totalNumMessages - 1)));
+            assertThat(newActiveStore.get().get(key), is(equalTo(totalNumMessages - 1)));
         });
     }
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/PauseResumeIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/PauseResumeIntegrationTest.java
new file mode 100644
index 0000000000000..c897e0602f9c5
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/PauseResumeIntegrationTest.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.integration;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.LongDeserializer;
+import org.apache.kafka.common.serialization.LongSerializer;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KafkaStreams.State;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper;
+import org.apache.kafka.streams.processor.internals.namedtopology.NamedTopologyBuilder;
+import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.TestUtils;
+import org.hamcrest.CoreMatchers;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Properties;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.singletonList;
+import static org.apache.kafka.streams.KeyValue.pair;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.cleanStateBeforeTest;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.getTopicSize;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForApplicationState;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitUntilMinKeyValueRecordsReceived;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitUntilStreamsHasPolled;
+import static org.apache.kafka.test.TestUtils.waitForCondition;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+@Category({IntegrationTest.class})
+public class PauseResumeIntegrationTest {
+    private static final Duration STARTUP_TIMEOUT = Duration.ofSeconds(45);
+    public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
+    private static Properties producerConfig;
+    private static Properties consumerConfig;
+
+    private static final Materialized<Object, Long, KeyValueStore<Bytes, byte[]>> IN_MEMORY_STORE =
+        Materialized.as(Stores.inMemoryKeyValueStore("store"));
+
+    private static final String INPUT_STREAM_1 = "input-stream-1";
+    private static final String INPUT_STREAM_2 = "input-stream-2";
+    private static final String OUTPUT_STREAM_1 = "output-stream-1";
+    private static final String OUTPUT_STREAM_2 = "output-stream-2";
+    private static final String TOPOLOGY1 = "topology1";
+    private static final String TOPOLOGY2 = "topology2";
+
+    private static final List<KeyValue<String, Long>> STANDARD_INPUT_DATA =
+        asList(pair("A", 100L), pair("B", 200L), pair("A", 300L), pair("C", 400L), pair("C", -50L));
+    private static final List<KeyValue<String, Long>> COUNT_OUTPUT_DATA =
+        asList(pair("A", 1L), pair("B", 1L), pair("A", 2L), pair("C", 1L), pair("C", 2L));
+    private static final List<KeyValue<String, Long>> COUNT_OUTPUT_DATA2 =
+        asList(pair("A", 3L), pair("B", 2L), pair("A", 4L), pair("C", 3L), pair("C", 4L));
+    private static final List<KeyValue<String, Long>> COUNT_OUTPUT_DATA_ALL = new ArrayList<KeyValue<String, Long>>() {{
+            addAll(COUNT_OUTPUT_DATA);
+            addAll(COUNT_OUTPUT_DATA2);
+        }};
+
+    private String appId;
+    private KafkaStreams kafkaStreams, kafkaStreams2;
+    private KafkaStreamsNamedTopologyWrapper streamsNamedTopologyWrapper;
+
+    @Rule
+    public final TestName testName = new TestName();
+
+    @BeforeClass
+    public static void startCluster() throws Exception {
+        CLUSTER.start();
+        producerConfig = TestUtils.producerConfig(CLUSTER.bootstrapServers(),
+            StringSerializer.class, LongSerializer.class);
+        consumerConfig = TestUtils.consumerConfig(CLUSTER.bootstrapServers(),
+            StringDeserializer.class, LongDeserializer.class);
+    }
+
+    @AfterClass
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+
+    @Before
+    public void createTopics() throws InterruptedException {
+        cleanStateBeforeTest(CLUSTER, 1, INPUT_STREAM_1, INPUT_STREAM_2, OUTPUT_STREAM_1, OUTPUT_STREAM_2);
+        appId = safeUniqueTestName(PauseResumeIntegrationTest.class, testName);
+    }
+
+    private Properties props() {
+        final Properties properties = new Properties();
+        properties.put(StreamsConfig.APPLICATION_ID_CONFIG, appId);
+        properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        properties.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory(appId).getPath());
+        properties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
+        properties.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass());
+        properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
+        properties.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
+        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        properties.put(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 100);
+        properties.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 1000);
+        return properties;
+    }
+
+    @After
+    public void shutdown() throws InterruptedException {
+        for (final KafkaStreams streams : Arrays.asList(kafkaStreams, kafkaStreams2, streamsNamedTopologyWrapper)) {
+            if (streams != null) {
+                streams.close(Duration.ofSeconds(30));
+            }
+        }
+    }
+
+    private static void produceToInputTopics(final String topic, final Collection<KeyValue<String, Long>> records) {
+        IntegrationTestUtils.produceKeyValuesSynchronously(topic, records, producerConfig, CLUSTER.time);
+    }
+
+    @Test
+    public void shouldPauseAndResumeKafkaStreams() throws Exception {
+        kafkaStreams = buildKafkaStreams(OUTPUT_STREAM_1);
+        kafkaStreams.start();
+        waitForApplicationState(singletonList(kafkaStreams), State.RUNNING, STARTUP_TIMEOUT);
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA);
+
+        kafkaStreams.pause();
+        assertTrue(kafkaStreams.isPaused());
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+
+        waitUntilStreamsHasPolled(kafkaStreams, 2);
+        assertTopicSize(OUTPUT_STREAM_1, 5);
+
+        kafkaStreams.resume();
+        assertFalse(kafkaStreams.isPaused());
+
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA2);
+        assertTopicSize(OUTPUT_STREAM_1, 10);
+    }
+
+    @Test
+    public void shouldAllowForTopologiesToStartPaused() throws Exception {
+        kafkaStreams = buildKafkaStreams(OUTPUT_STREAM_1);
+        kafkaStreams.pause();
+        kafkaStreams.start();
+        waitForApplicationState(singletonList(kafkaStreams), State.RUNNING, STARTUP_TIMEOUT);
+        assertTrue(kafkaStreams.isPaused());
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+
+        waitUntilStreamsHasPolled(kafkaStreams, 2);
+        assertTopicSize(OUTPUT_STREAM_1, 0);
+
+        kafkaStreams.resume();
+        assertFalse(kafkaStreams.isPaused());
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA);
+        assertTopicSize(OUTPUT_STREAM_1, 5);
+    }
+
+    @Test
+    public void shouldPauseAndResumeKafkaStreamsWithNamedTopologies() throws Exception {
+        streamsNamedTopologyWrapper = new KafkaStreamsNamedTopologyWrapper(props());
+        final NamedTopologyBuilder builder1 = getNamedTopologyBuilder1();
+        final NamedTopologyBuilder builder2 = getNamedTopologyBuilder2();
+
+        streamsNamedTopologyWrapper.start(asList(builder1.build(), builder2.build()));
+        waitForApplicationState(singletonList(streamsNamedTopologyWrapper), State.RUNNING, STARTUP_TIMEOUT);
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        produceToInputTopics(INPUT_STREAM_2, STANDARD_INPUT_DATA);
+
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA);
+        awaitOutput(OUTPUT_STREAM_2, 5, COUNT_OUTPUT_DATA);
+        assertTopicSize(OUTPUT_STREAM_1, 5);
+        assertTopicSize(OUTPUT_STREAM_2, 5);
+
+        streamsNamedTopologyWrapper.pauseNamedTopology(TOPOLOGY1);
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        assertFalse(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY2));
+        assertFalse(streamsNamedTopologyWrapper.isPaused());
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        produceToInputTopics(INPUT_STREAM_2, STANDARD_INPUT_DATA);
+
+        awaitOutput(OUTPUT_STREAM_2, 5, COUNT_OUTPUT_DATA2);
+        assertTopicSize(OUTPUT_STREAM_1, 5);
+        assertTopicSize(OUTPUT_STREAM_2, 10);
+
+        streamsNamedTopologyWrapper.resumeNamedTopology(TOPOLOGY1);
+        assertFalse(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA2);
+    }
+
+    @Test
+    public void shouldPauseAndResumeAllKafkaStreamsWithNamedTopologies() throws Exception {
+        streamsNamedTopologyWrapper = new KafkaStreamsNamedTopologyWrapper(props());
+        final NamedTopologyBuilder builder1 = getNamedTopologyBuilder1();
+        final NamedTopologyBuilder builder2 = getNamedTopologyBuilder2();
+
+        streamsNamedTopologyWrapper.start(asList(builder1.build(), builder2.build()));
+        waitForApplicationState(singletonList(streamsNamedTopologyWrapper), State.RUNNING, STARTUP_TIMEOUT);
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        produceToInputTopics(INPUT_STREAM_2, STANDARD_INPUT_DATA);
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA);
+        awaitOutput(OUTPUT_STREAM_2, 5, COUNT_OUTPUT_DATA);
+
+        streamsNamedTopologyWrapper.pause();
+        assertTrue(streamsNamedTopologyWrapper.isPaused());
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY2));
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        produceToInputTopics(INPUT_STREAM_2, STANDARD_INPUT_DATA);
+
+        waitUntilStreamsHasPolled(streamsNamedTopologyWrapper, 2);
+        assertTopicSize(OUTPUT_STREAM_1, 5);
+        assertTopicSize(OUTPUT_STREAM_2, 5);
+
+        streamsNamedTopologyWrapper.resumeNamedTopology(TOPOLOGY1);
+        assertFalse(streamsNamedTopologyWrapper.isPaused());
+        assertFalse(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY2));
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA2);
+        assertTopicSize(OUTPUT_STREAM_1, 10);
+        assertTopicSize(OUTPUT_STREAM_2, 5);
+    }
+
+    @Test
+    public void shouldAllowForNamedTopologiesToStartPaused() throws Exception {
+        streamsNamedTopologyWrapper = new KafkaStreamsNamedTopologyWrapper(props());
+        final NamedTopologyBuilder builder1 = getNamedTopologyBuilder1();
+        final NamedTopologyBuilder builder2 = getNamedTopologyBuilder2();
+
+        streamsNamedTopologyWrapper.pauseNamedTopology(TOPOLOGY1);
+        streamsNamedTopologyWrapper.start(asList(builder1.build(), builder2.build()));
+        waitForApplicationState(singletonList(streamsNamedTopologyWrapper), State.RUNNING, STARTUP_TIMEOUT);
+
+        assertFalse(streamsNamedTopologyWrapper.isPaused());
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        assertFalse(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY2));
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        produceToInputTopics(INPUT_STREAM_2, STANDARD_INPUT_DATA);
+
+        awaitOutput(OUTPUT_STREAM_2, 5, COUNT_OUTPUT_DATA);
+        assertTopicSize(OUTPUT_STREAM_1, 0);
+
+        streamsNamedTopologyWrapper.pause();
+        assertTrue(streamsNamedTopologyWrapper.isPaused());
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY2));
+
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+        produceToInputTopics(INPUT_STREAM_2, STANDARD_INPUT_DATA);
+
+        waitUntilStreamsHasPolled(streamsNamedTopologyWrapper, 2);
+        assertTopicSize(OUTPUT_STREAM_1, 0);
+        assertTopicSize(OUTPUT_STREAM_2, 5);
+
+        streamsNamedTopologyWrapper.resumeNamedTopology(TOPOLOGY1);
+        assertFalse(streamsNamedTopologyWrapper.isPaused());
+        assertFalse(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY1));
+        assertTrue(streamsNamedTopologyWrapper.isNamedTopologyPaused(TOPOLOGY2));
+
+        awaitOutput(OUTPUT_STREAM_1, 10, COUNT_OUTPUT_DATA_ALL);
+        assertTopicSize(OUTPUT_STREAM_1, 10);
+        assertTopicSize(OUTPUT_STREAM_2, 5);
+    }
+
+    @Test
+    public void pauseResumeShouldWorkAcrossInstances() throws Exception {
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+
+        kafkaStreams = buildKafkaStreams(OUTPUT_STREAM_1);
+        kafkaStreams.pause();
+        kafkaStreams.start();
+
+        waitForApplicationState(singletonList(kafkaStreams), State.RUNNING, STARTUP_TIMEOUT);
+        assertTrue(kafkaStreams.isPaused());
+
+        kafkaStreams2 = buildKafkaStreams(OUTPUT_STREAM_2);
+        kafkaStreams2.pause();
+        kafkaStreams2.start();
+        waitForApplicationState(singletonList(kafkaStreams2), State.RUNNING, STARTUP_TIMEOUT);
+        assertTrue(kafkaStreams2.isPaused());
+
+        waitUntilStreamsHasPolled(kafkaStreams, 2);
+        waitUntilStreamsHasPolled(kafkaStreams2, 2);
+        assertTopicSize(OUTPUT_STREAM_1, 0);
+
+        kafkaStreams2.close();
+        kafkaStreams2.cleanUp();
+        waitForApplicationState(singletonList(kafkaStreams2), State.NOT_RUNNING, STARTUP_TIMEOUT);
+
+        kafkaStreams.resume();
+        waitForApplicationState(singletonList(kafkaStreams), State.RUNNING, STARTUP_TIMEOUT);
+
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA);
+    }
+
+    @Test
+    public void pausedTopologyShouldNotRestoreStateStores() throws Exception {
+        final Properties properties1 = props();
+        properties1.put(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 1);
+        final Properties properties2 = props();
+        properties2.put(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 1);
+        produceToInputTopics(INPUT_STREAM_1, STANDARD_INPUT_DATA);
+
+        kafkaStreams = buildKafkaStreams(OUTPUT_STREAM_1, properties1);
+        kafkaStreams2 = buildKafkaStreams(OUTPUT_STREAM_1, properties2);
+        kafkaStreams.start();
+        kafkaStreams2.start();
+
+        waitForApplicationState(Arrays.asList(kafkaStreams, kafkaStreams2), State.RUNNING, STARTUP_TIMEOUT);
+
+        awaitOutput(OUTPUT_STREAM_1, 5, COUNT_OUTPUT_DATA);
+
+        kafkaStreams.close();
+        kafkaStreams2.close();
+
+        kafkaStreams = buildKafkaStreams(OUTPUT_STREAM_1, properties1);
+        kafkaStreams2 = buildKafkaStreams(OUTPUT_STREAM_1, properties2);
+        kafkaStreams.cleanUp();
+        kafkaStreams2.cleanUp();
+
+        kafkaStreams.pause();
+        kafkaStreams2.pause();
+        kafkaStreams.start();
+        kafkaStreams2.start();
+
+        waitForApplicationState(Arrays.asList(kafkaStreams, kafkaStreams2), State.REBALANCING, STARTUP_TIMEOUT);
+
+        assertStreamsLocalStoreLagStaysConstant(kafkaStreams);
+        assertStreamsLocalStoreLagStaysConstant(kafkaStreams2);
+    }
+
+    private void assertStreamsLocalStoreLagStaysConstant(final KafkaStreams streams) throws InterruptedException {
+        waitForCondition(
+            () -> !streams.allLocalStorePartitionLags().isEmpty(),
+            "Lags for local store partitions were not found within the timeout!");
+        waitUntilStreamsHasPolled(streams, 2);
+        final long stateStoreLag1 = streams.allLocalStorePartitionLags().get("test-store").get(0).offsetLag();
+        waitUntilStreamsHasPolled(streams, 2);
+        final long stateStoreLag2 = streams.allLocalStorePartitionLags().get("test-store").get(0).offsetLag();
+        assertTrue(stateStoreLag1 > 0);
+        assertEquals(stateStoreLag1, stateStoreLag2);
+    }
+
+    private KafkaStreams buildKafkaStreams(final String outputTopic) {
+        return buildKafkaStreams(outputTopic, props());
+    }
+
+    private KafkaStreams buildKafkaStreams(final String outputTopic, final Properties properties) {
+        final StreamsBuilder builder = new StreamsBuilder();
+        builder.stream(INPUT_STREAM_1).groupByKey().count(Materialized.as("test-store")).toStream().to(outputTopic);
+        return new KafkaStreams(builder.build(properties), properties);
+    }
+
+    private void assertTopicSize(final String topicName, final int size) {
+        assertEquals(getTopicSize(consumerConfig, topicName), size);
+    }
+
+    private void awaitOutput(final String topicName, final int count, final List<KeyValue<String, Long>> output)
+        throws Exception {
+        assertThat(waitUntilMinKeyValueRecordsReceived(consumerConfig, topicName, count), CoreMatchers.equalTo(output));
+    }
+
+    private NamedTopologyBuilder getNamedTopologyBuilder1() {
+        final NamedTopologyBuilder builder1 = streamsNamedTopologyWrapper.newNamedTopologyBuilder(TOPOLOGY1);
+        builder1.stream(INPUT_STREAM_1).groupByKey().count().toStream().to(OUTPUT_STREAM_1);
+        return builder1;
+    }
+
+    private NamedTopologyBuilder getNamedTopologyBuilder2() {
+        final NamedTopologyBuilder builder2 = streamsNamedTopologyWrapper.newNamedTopologyBuilder(TOPOLOGY2);
+        builder2.stream(INPUT_STREAM_2)
+            .groupBy((k, v) -> k)
+            .count(IN_MEMORY_STORE)
+            .toStream()
+            .to(OUTPUT_STREAM_2);
+        return builder2;
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/PositionRestartIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/PositionRestartIntegrationTest.java
index db1f86eb3573b..1c40b1ea6fb52 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/PositionRestartIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/PositionRestartIntegrationTest.java
@@ -67,8 +67,10 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.slf4j.Logger;
@@ -100,7 +102,8 @@
 @Category({IntegrationTest.class})
 @RunWith(value = Parameterized.class)
 public class PositionRestartIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final Logger LOG = LoggerFactory.getLogger(PositionRestartIntegrationTest.class);
     private static final long SEED = new Random().nextLong();
     private static final int NUM_BROKERS = 1;
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/PurgeRepartitionTopicIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/PurgeRepartitionTopicIntegrationTest.java
index ffb35312ea41e..26720a00215fa 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/PurgeRepartitionTopicIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/PurgeRepartitionTopicIntegrationTest.java
@@ -41,8 +41,10 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -55,7 +57,8 @@
 
 @Category({IntegrationTest.class})
 public class PurgeRepartitionTopicIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
 
     private static final String INPUT_TOPIC = "input-stream";
@@ -161,6 +164,7 @@ public void setup() {
         final Properties streamsConfiguration = new Properties();
         streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, APPLICATION_ID);
         streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, PURGE_INTERVAL_MS);
+        streamsConfiguration.put(StreamsConfig.REPARTITION_PURGE_INTERVAL_MS_CONFIG, PURGE_INTERVAL_MS);
         streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
         streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.Integer().getClass());
         streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Integer().getClass());
@@ -203,10 +207,11 @@ public void shouldRestoreState() throws Exception {
         TestUtils.waitForCondition(new RepartitionTopicCreatedWithExpectedConfigs(), 60000,
                 "Repartition topic " + REPARTITION_TOPIC + " not created with the expected configs after 60000 ms.");
 
+        // wait until we received more than 1 segment of data, so that we can confirm the purge succeeds in next verification
         TestUtils.waitForCondition(
-            new RepartitionTopicVerified(currentSize -> currentSize > 0),
+            new RepartitionTopicVerified(currentSize -> currentSize > PURGE_SEGMENT_BYTES),
             60000,
-            "Repartition topic " + REPARTITION_TOPIC + " not received data after 60000 ms."
+            "Repartition topic " + REPARTITION_TOPIC + " not received more than " + PURGE_SEGMENT_BYTES + "B of data after 60000 ms."
         );
 
         // we need long enough timeout to by-pass the log manager's InitialTaskDelayMs, which is hard-coded on server side
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/QueryableStateIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/QueryableStateIntegrationTest.java
index 15b9ea69241f1..a5dbdcc18c729 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/QueryableStateIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/QueryableStateIntegrationTest.java
@@ -69,6 +69,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -122,6 +123,8 @@
 @Category({IntegrationTest.class})
 @SuppressWarnings("deprecation")
 public class QueryableStateIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final Logger log = LoggerFactory.getLogger(QueryableStateIntegrationTest.class);
 
     private static final long DEFAULT_TIMEOUT_MS = 120 * 1000;
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/RangeQueryIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/RangeQueryIntegrationTest.java
index 1c22bfca372b6..c67696b2cad20 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/RangeQueryIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/RangeQueryIntegrationTest.java
@@ -47,6 +47,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -68,6 +69,8 @@
 @RunWith(Parameterized.class)
 @Category({IntegrationTest.class})
 public class RangeQueryIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
     private static final Properties STREAMS_CONFIG = new Properties();
     private static final String APP_ID = "range-query-integration-test";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/RegexSourceIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/RegexSourceIntegrationTest.java
index 1bfb1b625ac0e..894108c246d5b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/RegexSourceIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/RegexSourceIntegrationTest.java
@@ -51,9 +51,11 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -81,6 +83,8 @@
  */
 @Category({IntegrationTest.class})
 public class RegexSourceIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java
index 5c236e6ba80e5..204a66d148c1d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetIntegrationTest.java
@@ -32,8 +32,10 @@
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.BufferedWriter;
 import java.io.File;
@@ -55,7 +57,8 @@
  */
 @Category({IntegrationTest.class})
 public class ResetIntegrationTest extends AbstractResetIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final String NON_EXISTING_TOPIC = "nonExistingTopic";
 
     public static final EmbeddedKafkaCluster CLUSTER;
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/ResetPartitionTimeIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/ResetPartitionTimeIntegrationTest.java
index c3e7c283d0cb9..7fe905ae7d4f8 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/ResetPartitionTimeIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/ResetPartitionTimeIntegrationTest.java
@@ -41,6 +41,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -65,6 +66,8 @@
 @RunWith(Parameterized.class)
 @Category({IntegrationTest.class})
 public class ResetPartitionTimeIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
     private static final Properties BROKER_CONFIG;
     static {
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/RestoreIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/RestoreIntegrationTest.java
index 2c0e18070a5b0..d95468c08008f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/RestoreIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/RestoreIntegrationTest.java
@@ -53,16 +53,16 @@
 import org.apache.kafka.streams.state.internals.InMemoryKeyValueStore;
 import org.apache.kafka.streams.state.internals.KeyValueStoreBuilder;
 import org.apache.kafka.streams.state.internals.OffsetCheckpoint;
-import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestUtils;
 import org.hamcrest.CoreMatchers;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.File;
 import java.io.IOException;
@@ -74,7 +74,6 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
-import org.junit.rules.TestName;
 
 import static java.util.Arrays.asList;
 import static java.util.Collections.singletonList;
@@ -86,35 +85,34 @@
 import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.waitForStandbyCompletion;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-@Category({IntegrationTest.class})
+@Timeout(600)
+@Tag("integration")
 public class RestoreIntegrationTest {
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
 
-    @BeforeClass
+    @BeforeAll
     public static void startCluster() throws IOException {
         CLUSTER.start();
     }
 
-    @AfterClass
+    @AfterAll
     public static void closeCluster() {
         CLUSTER.stop();
     }
 
-    @Rule
-    public final TestName testName = new TestName();
     private String appId;
     private String inputStream;
 
     private final int numberOfKeys = 10000;
     private KafkaStreams kafkaStreams;
 
-    @Before
-    public void createTopics() throws InterruptedException {
-        appId = safeUniqueTestName(RestoreIntegrationTest.class, testName);
+    @BeforeEach
+    public void createTopics(final TestInfo testInfo) throws InterruptedException {
+        appId = safeUniqueTestName(RestoreIntegrationTest.class, testInfo);
         inputStream = appId + "-input-stream";
         CLUSTER.createTopic(inputStream, 2, 1);
     }
@@ -132,7 +130,7 @@ private Properties props() {
         return streamsConfiguration;
     }
 
-    @After
+    @AfterEach
     public void shutdown() {
         if (kafkaStreams != null) {
             kafkaStreams.close(Duration.ofSeconds(30));
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/RocksDBMetricsIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/RocksDBMetricsIntegrationTest.java
index c698d06772239..17610c8450dda 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/RocksDBMetricsIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/RocksDBMetricsIntegrationTest.java
@@ -47,6 +47,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.junit.runners.Parameterized.Parameter;
@@ -71,7 +72,8 @@
 @RunWith(Parameterized.class)
 @SuppressWarnings("deprecation")
 public class RocksDBMetricsIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 3;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/SlidingWindowedKStreamIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/SlidingWindowedKStreamIntegrationTest.java
new file mode 100644
index 0000000000000..b15ecee494214
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/SlidingWindowedKStreamIntegrationTest.java
@@ -0,0 +1,485 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.integration;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Optional;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.Serdes.StringSerde;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.KeyValueTimestamp;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
+import org.apache.kafka.streams.kstream.JoinWindows;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.SessionWindowedDeserializer;
+import org.apache.kafka.streams.kstream.SlidingWindows;
+import org.apache.kafka.streams.kstream.TimeWindowedDeserializer;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.WindowedSerdes;
+import org.apache.kafka.streams.kstream.internals.TimeWindow;
+import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.MockAggregator;
+import org.apache.kafka.test.MockInitializer;
+import org.apache.kafka.test.TestUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.junit.rules.Timeout;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+
+import static java.time.Duration.ofMillis;
+import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.common.utils.Utils.mkProperties;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+
+@SuppressWarnings({"unchecked"})
+@Category({IntegrationTest.class})
+@RunWith(Parameterized.class)
+public class SlidingWindowedKStreamIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+    private static final int NUM_BROKERS = 1;
+
+    public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS,
+        mkProperties(
+            mkMap(mkEntry("log.retention.hours", "-1"), mkEntry("log.retention.bytes", "-1")) // Don't expire records since we manipulate timestamp
+        )
+    );
+
+    @BeforeClass
+    public static void startCluster() throws IOException {
+        CLUSTER.start();
+    }
+
+    @AfterClass
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+
+    private StreamsBuilder builder;
+    private Properties streamsConfiguration;
+    private KafkaStreams kafkaStreams;
+    private String streamOneInput;
+    private String streamTwoInput;
+    private String outputTopic;
+
+    @Rule
+    public TestName testName = new TestName();
+
+    @Parameter
+    public StrategyType type;
+
+    @Parameter(1)
+    public boolean withCache;
+
+    private EmitStrategy emitStrategy;
+    private boolean emitFinal;
+
+    @Parameterized.Parameters(name = "{0}_cache:{1}")
+    public static Collection<Object[]> getEmitStrategy() {
+        return asList(new Object[][] {
+            {StrategyType.ON_WINDOW_UPDATE, true},
+            {StrategyType.ON_WINDOW_UPDATE, false},
+            {StrategyType.ON_WINDOW_CLOSE, true},
+            {StrategyType.ON_WINDOW_CLOSE, false}
+        });
+    }
+
+    @Before
+    public void before() throws InterruptedException {
+        builder = new StreamsBuilder();
+        createTopics();
+        streamsConfiguration = new Properties();
+        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
+        streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath());
+        streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
+        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100L);
+        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
+        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
+        streamsConfiguration.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 0); // Always process
+        streamsConfiguration.put(StreamsConfig.WINDOW_STORE_CHANGE_LOG_ADDITIONAL_RETENTION_MS_CONFIG, Long.MAX_VALUE); // Don't expire changelog
+
+        emitStrategy = StrategyType.forType(type);
+        emitFinal = type.equals(StrategyType.ON_WINDOW_CLOSE);
+    }
+
+    @After
+    public void whenShuttingDown() throws IOException {
+        if (kafkaStreams != null) {
+            kafkaStreams.close();
+            kafkaStreams.cleanUp();
+        }
+        IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration);
+    }
+
+    @Test
+    public void shouldAggregateWindowedWithNoGrace() throws Exception {
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("A", "1", 0),  // Create [0, 10](0+1)
+            new KeyValueTimestamp<>("A", "2", 5),  // Update [0, 10](0+1+2), create [1, 11](0+2)
+            new KeyValueTimestamp<>("A", "3", 10), // Update [0, 10](0+1+2+3), [1, 11](0+2+3), create [6, 16](0+3)
+            new KeyValueTimestamp<>("A", "4", 17), // Create [7, 17](0+3+4), [11, 21](0+4), close [0, 10], [1, 11], [6, 16]
+            new KeyValueTimestamp<>("B", "5", 6),  // Late and ignore
+            new KeyValueTimestamp<>("B", "6", 11), // Late and ignore
+            new KeyValueTimestamp<>("B", "7", 18), // Create [8, 18](0+7), close A/[7, 17]
+            new KeyValueTimestamp<>("C", "8", 25)  // Create [15, 25](0+8), close B/[8, 18], A[11, 21]
+        );
+
+        final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(String.class, 10L);
+        builder.stream(streamOneInput, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey()
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(10L)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                getMaterialized()
+            )
+            .toStream()
+            .to(outputTopic, Produced.with(windowedSerde, new StringSerde()));
+
+        startStreams();
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 5 : 10);
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> expectResult;
+        if (emitFinal) {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(7L, 17L)), "0+3+4", 17),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8L, 18L)), "0+7", 18),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11L, 21L)), "0+4", 17)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1", 0),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11L, 21L)), "0+4", 17),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(7L, 17L)), "0+3+4", 17),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8L, 18L)), "0+7", 18),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(15L, 25L)), "0+8", 25)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+    }
+
+    @Test
+    public void shouldAggregateWindowedWithGrace() throws Exception {
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("A", "1", 0),  // Create [0, 10](0+1)
+            new KeyValueTimestamp<>("A", "2", 5),  // Update [0, 10](0+1+2), create [1, 11](0+2)
+            new KeyValueTimestamp<>("A", "3", 10), // Update [0, 10](0+1+2+3), create [6, 16](0+3), update [1, 11](0+2+3)
+            new KeyValueTimestamp<>("A", "4", 6),  // Update [0, 10](0+1+2+3+4), update [1, 11](0+2+3+4], update [6, 16](0+3+4), create [7, 17](0+3)
+            new KeyValueTimestamp<>("A", "5", 11), // Update [1, 11](0+2+3+4+5), update [6, 16](0+3+4+5), create [11, 21](0+5), update [7, 17](0+3+5)
+            new KeyValueTimestamp<>("A", "6", 16), // close [0, 10], update [6, 16](0+3+4+5+6), update [11, 21](0+5+6), create [12, 22](0+6), update [7, 17](0+3+5+6)
+            new KeyValueTimestamp<>("A", "7", 27), // close [1, 11], [6, 16], [11, 21], [7, 17] create [17, 27](0+7)
+            new KeyValueTimestamp<>("A", "8", 11)  // Late and ignore
+        );
+
+        final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(String.class, 10L);
+        builder.stream(streamOneInput, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey()
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10L), ofMillis(5)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                getMaterialized()
+            )
+            .toStream()
+            .to(outputTopic, Produced.with(windowedSerde, new StringSerde()));
+
+        startStreams();
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 5 : 20);
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> expectResult;
+        if (emitFinal) {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2+3+4", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2+3+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3+4+5+6", 16),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(7L, 17L)), "0+3+5+6", 16),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11L, 21L)), "0+5+6", 16)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1", 0),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2+3+4", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3+4", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(7L, 17L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2+3+4", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(7L, 17L)), "0+3+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)), "0+2+3+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11L, 21L)), "0+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11L, 21L)), "0+5+6", 16),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(7L, 17L)), "0+3+5+6", 16),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(6L, 16L)), "0+3+4+5+6", 16),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(12L, 22L)), "0+6", 16),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(17L, 27L)), "0+7", 27),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(17L, 27L)), "0+7", 27)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+    }
+
+    @Test
+    public void shouldRestoreAfterJoinRestart() throws Exception {
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("A", "L1", 0),
+            new KeyValueTimestamp<>("A", "L2", 5),
+            new KeyValueTimestamp<>("B", "L3", 11),
+            new KeyValueTimestamp<>("B", "L4", 15),
+            new KeyValueTimestamp<>("C", "L5", 25)
+        );
+
+        produceMessages(
+            streamTwoInput,
+            new KeyValueTimestamp<>("A", "R1", 0),
+            new KeyValueTimestamp<>("A", "R2", 5),
+            new KeyValueTimestamp<>("B", "R3", 11),
+            new KeyValueTimestamp<>("B", "R4", 15),
+            new KeyValueTimestamp<>("C", "R5", 25)
+        );
+
+        final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(
+            String.class, 10L);
+        final KStream<String, String> streamOne = builder.stream(streamOneInput,
+            Consumed.with(Serdes.String(), Serdes.String()));
+        final KStream<String, String> streamTwo = builder.stream(streamTwoInput,
+            Consumed.with(Serdes.String(), Serdes.String()));
+
+        final KStream<String, String> joinedStream = streamOne
+            .join(streamTwo, (v1, v2) -> v1 + "," + v2,
+                JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(2)));
+
+        joinedStream.groupByKey()
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(10L)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                getMaterialized()
+            )
+            .toStream()
+            .to(outputTopic, Produced.with(windowedSerde, new StringSerde()));
+
+        startStreams();
+
+        List<KeyValueTimestamp<Windowed<String>, String>> windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 5 : 7);
+
+        List<KeyValueTimestamp<Windowed<String>, String>> expectResult;
+        if (emitFinal) {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)),
+                    "0+L1,R1+L2,R2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)),
+                    "0+L2,R2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(1L, 11L)),
+                    "0+L3,R3", 11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)),
+                    "0+L3,R3+L4,R4", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(12L, 22L)),
+                    "0+L4,R4", 15)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+L1,R1",
+                    0),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(1L, 11L)),
+                    "0+L2,R2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+L1,R1+L2,R2",
+                    5),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(1L, 11L)), "0+L3,R3",
+                    11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(12L, 22L)),
+                    "0+L4,R4", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)),
+                    "0+L3,R3+L4,R4", 15),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(15L, 25L)),
+                    "0+L5,R5", 25)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+
+        kafkaStreams.close();
+        kafkaStreams.cleanUp(); // Purge store to force restoration
+
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("C", "L6", 35)
+        );
+        produceMessages(
+            streamTwoInput,
+            new KeyValueTimestamp<>("C", "R6", 35)
+        );
+
+        // Restart
+        startStreams();
+
+        windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 1 : 2);
+
+        if (emitFinal) {
+            // Output just new closed window for C
+            expectResult = Collections.singletonList(
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(15L, 25L)),
+                    "0+L5,R5", 25)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(26L, 36L)),
+                    "0+L6,R6", 35),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(25L, 35L)),
+                    "0+L5,R5+L6,R6", 35)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+    }
+
+    private void produceMessages(final String topic, final KeyValueTimestamp<String, String>... records) {
+        IntegrationTestUtils.produceSynchronously(
+            TestUtils.producerConfig(
+                CLUSTER.bootstrapServers(),
+                StringSerializer.class,
+                StringSerializer.class),
+            false,
+            topic,
+            Optional.empty(),
+            Arrays.asList(records)
+        );
+    }
+
+    private Materialized getMaterialized() {
+        if (withCache) {
+            return Materialized.with(null, new StringSerde()).withCachingEnabled();
+        }
+        return Materialized.with(null, new StringSerde()).withCachingDisabled();
+    }
+
+    private void createTopics() throws InterruptedException {
+        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        streamOneInput = "stream-one-" + safeTestName;
+        streamTwoInput = "stream-two-" + safeTestName;
+        outputTopic = "output-" + safeTestName;
+        CLUSTER.createTopic(streamOneInput, 1, 1);
+        CLUSTER.createTopic(streamTwoInput, 1, 1);
+        CLUSTER.createTopic(outputTopic);
+    }
+
+    private void startStreams() {
+        kafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration);
+        kafkaStreams.start();
+    }
+
+    private <K, V> List<KeyValueTimestamp<K, V>> receiveMessagesWithTimestamp(final Deserializer<K> keyDeserializer,
+                                                                              final Deserializer<V> valueDeserializer,
+                                                                              final long windowSize,
+                                                                              final Class innerClass,
+                                                                              final int numMessages) throws Exception {
+        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final Properties consumerProperties = new Properties();
+        consumerProperties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        consumerProperties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "group-" + safeTestName);
+        consumerProperties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        consumerProperties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializer.getClass().getName());
+        consumerProperties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializer.getClass().getName());
+        consumerProperties.put(StreamsConfig.WINDOW_SIZE_MS_CONFIG, windowSize);
+        if (keyDeserializer instanceof TimeWindowedDeserializer || keyDeserializer instanceof SessionWindowedDeserializer) {
+            consumerProperties.setProperty(StreamsConfig.WINDOWED_INNER_CLASS_SERDE,
+                Serdes.serdeFrom(innerClass).getClass().getName());
+        }
+        return IntegrationTestUtils.waitUntilMinKeyValueWithTimestampRecordsReceived(
+            consumerProperties,
+            outputTopic,
+            numMessages,
+            60 * 1000);
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/SmokeTestDriverIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/SmokeTestDriverIntegrationTest.java
index 22d773595aacd..f76caf573b491 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/SmokeTestDriverIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/SmokeTestDriverIntegrationTest.java
@@ -27,8 +27,10 @@
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -42,6 +44,8 @@
 
 @Category(IntegrationTest.class)
 public class SmokeTestDriverIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(3);
 
     @BeforeClass
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskCreationIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskCreationIntegrationTest.java
index 28eeeef26f627..d59a7160040c7 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskCreationIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskCreationIntegrationTest.java
@@ -41,6 +41,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.Properties;
@@ -50,7 +51,8 @@
 
 @Category({IntegrationTest.class})
 public class StandbyTaskCreationIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
@@ -95,6 +97,7 @@ private Properties streamsConfiguration() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotCreateAnyStandByTasksForStateStoreWithLoggingDisabled() throws Exception {
         final StreamsBuilder builder = new StreamsBuilder();
         final String stateStoreName = "myTransformState";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskEOSIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskEOSIntegrationTest.java
index 4fbe73438b91c..ed68a1f33ae05 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskEOSIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StandbyTaskEOSIntegrationTest.java
@@ -48,6 +48,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -74,7 +75,8 @@
 @RunWith(Parameterized.class)
 @Category(IntegrationTest.class)
 public class StandbyTaskEOSIntegrationTest {
-
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private final static long REBALANCE_TIMEOUT = Duration.ofMinutes(2L).toMillis();
     private final static int KEY_0 = 0;
     private final static int KEY_1 = 1;
@@ -342,6 +344,7 @@ public void shouldWipeOutStandbyStateDirectoryIfCheckpointIsMissing() throws Exc
         );
     }
 
+    @SuppressWarnings("deprecation")
     private KafkaStreams buildWithDeduplicationTopology(final String stateDirPath) {
         final StreamsBuilder builder = new StreamsBuilder();
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StateDirectoryIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StateDirectoryIntegrationTest.java
index 9c34fba6f6a9b..1515debe84546 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StateDirectoryIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StateDirectoryIntegrationTest.java
@@ -18,6 +18,7 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
 import java.util.Arrays;
 import java.util.Properties;
 import java.util.concurrent.CountDownLatch;
@@ -47,6 +48,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
@@ -56,6 +58,8 @@
 
 @Category(IntegrationTest.class)
 public class StateDirectoryIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(3);
 
@@ -255,7 +259,8 @@ public void testNotCleanUpStateDirIfNotEmpty() throws InterruptedException {
             assertTrue(appDir.exists());    // Application state directory Exists
 
             try {
-                assertTrue((new File(appDir, "dummy")).createNewFile());
+                final File dummyFile = new File(appDir, "dummy");
+                Files.createFile(dummyFile.toPath());
             } catch (final IOException e) {
                 throw new RuntimeException("Failed to create dummy file.", e);
             }
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StateRestorationIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StateRestorationIntegrationTest.java
index d890a30ae8520..4cedf82bfccb3 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StateRestorationIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StateRestorationIntegrationTest.java
@@ -36,8 +36,10 @@
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -47,6 +49,9 @@
 
 @Category({IntegrationTest.class})
 public class StateRestorationIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
     private final StreamsBuilder builder = new StreamsBuilder();
 
     private static final String APPLICATION_ID = "restoration-test-app";
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StoreQueryIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StoreQueryIntegrationTest.java
index f8cb79cf56880..85595cefc3f8d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StoreQueryIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StoreQueryIntegrationTest.java
@@ -43,12 +43,14 @@
 import org.apache.kafka.test.IntegrationTest;
 import org.apache.kafka.test.TestCondition;
 import org.apache.kafka.test.TestUtils;
+import org.hamcrest.Matcher;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,9 +81,12 @@
 import static org.hamcrest.Matchers.anyOf;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 @Category({IntegrationTest.class})
 public class StoreQueryIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private static final Logger LOG = LoggerFactory.getLogger(StoreQueryIntegrationTest.class);
 
@@ -151,7 +156,7 @@ public void shouldQueryOnlyActivePartitionStoresByDefault() throws Exception {
                 }
                 return true;
             } catch (final InvalidStateStoreException exception) {
-                verifyRetrievableException(exception);
+                verifyRetriableException(exception);
                 LOG.info("Either streams wasn't running or a re-balancing took place. Will try again.");
                 return false;
             }
@@ -235,7 +240,7 @@ public void shouldQuerySpecificActivePartitionStores() throws Exception {
                 }
                 return true;
             } catch (final InvalidStateStoreException exception) {
-                verifyRetrievableException(exception);
+                verifyRetriableException(exception);
                 LOG.info("Either streams wasn't running or a re-balancing took place. Will try again.");
                 return false;
             }
@@ -504,7 +509,7 @@ public void shouldQueryStoresAfterAddingAndRemovingStreamThread() throws Excepti
                 assertThat(store1.get(key3), is(notNullValue()));
                 return true;
             } catch (final InvalidStateStoreException exception) {
-                verifyRetrievableException(exception);
+                verifyRetriableException(exception);
                 LOG.info("Either streams wasn't running or a re-balancing took place. Will try again.");
                 return false;
             }
@@ -525,31 +530,37 @@ public void shouldQueryStoresAfterAddingAndRemovingStreamThread() throws Excepti
                 assertThat(store1.get(key3), is(notNullValue()));
                 return true;
             } catch (final InvalidStateStoreException exception) {
-                verifyRetrievableException(exception);
+                verifyRetriableException(exception);
                 LOG.info("Either streams wasn't running or a re-balancing took place. Will try again.");
                 return false;
             }
         });
     }
 
-    private void verifyRetrievableException(final Exception exception) {
+    private Matcher<String> retriableException() {
+        return is(
+            anyOf(
+                containsString("Cannot get state store source-table because the stream thread is PARTITIONS_ASSIGNED, not RUNNING"),
+                containsString("The state store, source-table, may have migrated to another instance"),
+                containsString("Cannot get state store source-table because the stream thread is STARTING, not RUNNING"),
+                containsString("The specified partition 1 for store source-table does not exist.")
+            )
+        );
+    }
+
+    private void verifyRetriableException(final Exception exception) {
         assertThat(
             "Unexpected exception thrown while getting the value from store.",
             exception.getMessage(),
-            is(
-                anyOf(
-                    containsString("Cannot get state store source-table because the stream thread is PARTITIONS_ASSIGNED, not RUNNING"),
-                    containsString("The state store, source-table, may have migrated to another instance"),
-                    containsString("Cannot get state store source-table because the stream thread is STARTING, not RUNNING")
-                )
-            )
+            retriableException()
         );
     }
 
     private static void until(final TestCondition condition) {
         boolean success = false;
         final long deadline = System.currentTimeMillis() + IntegrationTestUtils.DEFAULT_TIMEOUT;
-        while (!success && System.currentTimeMillis() < deadline) {
+        boolean deadlineExceeded = System.currentTimeMillis() >= deadline;
+        while (!success && !deadlineExceeded) {
             try {
                 success = condition.conditionMet();
                 Thread.sleep(500L);
@@ -557,8 +568,13 @@ private static void until(final TestCondition condition) {
                 throw e;
             } catch (final Exception e) {
                 throw new RuntimeException(e);
+            } finally {
+                deadlineExceeded = System.currentTimeMillis() >= deadline;
             }
         }
+        if (deadlineExceeded) {
+            fail("Test execution timed out");
+        }
     }
 
     private void getStreamsBuilderWithTopology(final StreamsBuilder builder, final Semaphore semaphore) {
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StreamStreamJoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StreamStreamJoinIntegrationTest.java
index 9d2bd1e221c8b..49818e71a4509 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StreamStreamJoinIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StreamStreamJoinIntegrationTest.java
@@ -25,8 +25,10 @@
 import org.apache.kafka.test.MockMapper;
 
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -43,6 +45,8 @@
 @Category({IntegrationTest.class})
 @RunWith(value = Parameterized.class)
 public class StreamStreamJoinIntegrationTest extends AbstractJoinIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private KStream<Long, String> leftStream;
     private KStream<Long, String> rightStream;
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinIntegrationTest.java
index 0f7e8aa95f012..37d5fc2a7243d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinIntegrationTest.java
@@ -23,8 +23,10 @@
 import org.apache.kafka.streams.test.TestRecord;
 import org.apache.kafka.test.IntegrationTest;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -38,6 +40,8 @@
 @Category({IntegrationTest.class})
 @RunWith(value = Parameterized.class)
 public class StreamTableJoinIntegrationTest extends AbstractJoinIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private KStream<Long, String> leftStream;
     private KTable<Long, String> rightTable;
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinTopologyOptimizationIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinTopologyOptimizationIntegrationTest.java
index 512d1c13bd4c0..84876e77fd720 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinTopologyOptimizationIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StreamTableJoinTopologyOptimizationIntegrationTest.java
@@ -46,6 +46,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -67,6 +68,8 @@
 @RunWith(value = Parameterized.class)
 @Category({IntegrationTest.class})
 public class StreamTableJoinTopologyOptimizationIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private static final int NUM_BROKERS = 1;
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS);
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUncaughtExceptionHandlerIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUncaughtExceptionHandlerIntegrationTest.java
index 0f42d3546fdba..4af333a65a878 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUncaughtExceptionHandlerIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUncaughtExceptionHandlerIntegrationTest.java
@@ -49,6 +49,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -76,6 +77,8 @@
 @Category(IntegrationTest.class)
 @SuppressWarnings("deprecation") //Need to call the old handler, will remove those calls when the old handler is removed
 public class StreamsUncaughtExceptionHandlerIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1, new Properties(), 0L, 0L);
 
@@ -90,9 +93,6 @@ public static void closeCluster() {
     }
 
     public static final Duration DEFAULT_DURATION = Duration.ofSeconds(30);
-    private static final AtomicBoolean THROW_ERROR = new AtomicBoolean(true);
-    private static final AtomicBoolean THROW_ILLEGAL_STATE_EXCEPTION = new AtomicBoolean(false);
-    private static final AtomicBoolean THROW_ILLEGAL_ARGUMENT_EXCEPTION = new AtomicBoolean(false);
 
     @Rule
     public final TestName testName = new TestName();
@@ -105,6 +105,7 @@ public static void closeCluster() {
     private final String outputTopic2 = "output2" + testId;
     private final StreamsBuilder builder = new StreamsBuilder();
     private final List<String> processorValueCollector = new ArrayList<>();
+    private static AtomicBoolean throwError = new AtomicBoolean(true);
 
     private final Properties properties = basicProps();
 
@@ -170,49 +171,14 @@ public void shouldShutdownClient() throws InterruptedException {
         }
     }
 
-
-    @Test
-    public void shouldShutdownClientWhenIllegalStateException() throws InterruptedException {
-        THROW_ILLEGAL_STATE_EXCEPTION.compareAndSet(false, true);
-        try (final KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties)) {
-            kafkaStreams.setUncaughtExceptionHandler((t, e) -> fail("should not hit old handler"));
-
-            kafkaStreams.setUncaughtExceptionHandler(exception -> REPLACE_THREAD); // if the user defined uncaught exception handler would be hit we would be replacing the thread
-
-            StreamsTestUtils.startKafkaStreamsAndWaitForRunningState(kafkaStreams);
-
-            produceMessages(0L, inputTopic, "A");
-            waitForApplicationState(Collections.singletonList(kafkaStreams), KafkaStreams.State.ERROR, DEFAULT_DURATION);
-
-            assertThat(processorValueCollector.size(), equalTo(1));
-        } finally {
-            THROW_ILLEGAL_STATE_EXCEPTION.compareAndSet(true, false);
-        }
-
-    }
-
     @Test
-    public void shouldShutdownClientWhenIllegalArgumentException() throws InterruptedException {
-        THROW_ILLEGAL_ARGUMENT_EXCEPTION.compareAndSet(false, true);
-        try (final KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties)) {
-            kafkaStreams.setUncaughtExceptionHandler((t, e) -> fail("should not hit old handler"));
-
-            kafkaStreams.setUncaughtExceptionHandler(exception -> REPLACE_THREAD); // if the user defined uncaught exception handler would be hit we would be replacing the thread
-
-            StreamsTestUtils.startKafkaStreamsAndWaitForRunningState(kafkaStreams);
-
-            produceMessages(0L, inputTopic, "A");
-            waitForApplicationState(Collections.singletonList(kafkaStreams), KafkaStreams.State.ERROR, DEFAULT_DURATION);
-
-            assertThat(processorValueCollector.size(), equalTo(1));
-        } finally {
-            THROW_ILLEGAL_ARGUMENT_EXCEPTION.compareAndSet(true, false);
-        }
-
+    public void shouldReplaceThreads() throws InterruptedException {
+        testReplaceThreads(2);
     }
 
     @Test
-    public void shouldReplaceThreads() throws InterruptedException {
+    public void shouldReplaceThreadsWithoutJavaHandler() throws InterruptedException {
+        Thread.setDefaultUncaughtExceptionHandler((t, e) -> fail("exception thrown"));
         testReplaceThreads(2);
     }
 
@@ -362,16 +328,10 @@ private static class ShutdownProcessor extends org.apache.kafka.streams.processo
         @Override
         public void process(final String key, final String value) {
             valueList.add(value + " " + context.taskId());
-            if (THROW_ERROR.get()) {
-                if (THROW_ILLEGAL_STATE_EXCEPTION.get()) {
-                    throw new IllegalStateException("Something unexpected happened in " + Thread.currentThread().getName());
-                } else if (THROW_ILLEGAL_ARGUMENT_EXCEPTION.get()) {
-                    throw new IllegalArgumentException("Something unexpected happened in " + Thread.currentThread().getName());
-                } else {
-                    throw new StreamsException(Thread.currentThread().getName());
-                }
+            if (throwError.get()) {
+                throw new StreamsException(Thread.currentThread().getName());
             }
-            THROW_ERROR.set(true);
+            throwError.set(true);
         }
     }
 
@@ -405,7 +365,7 @@ private void testReplaceThreads(final int numThreads) throws InterruptedExceptio
             final AtomicInteger count = new AtomicInteger();
             kafkaStreams.setUncaughtExceptionHandler(exception -> {
                 if (count.incrementAndGet() == numThreads) {
-                    THROW_ERROR.set(false);
+                    throwError.set(false);
                 }
                 return REPLACE_THREAD;
             });
@@ -413,7 +373,7 @@ private void testReplaceThreads(final int numThreads) throws InterruptedExceptio
 
             produceMessages(0L, inputTopic, "A");
             TestUtils.waitForCondition(() -> count.get() == numThreads, "finished replacing threads");
-            TestUtils.waitForCondition(() -> THROW_ERROR.get(), "finished replacing threads");
+            TestUtils.waitForCondition(() -> throwError.get(), "finished replacing threads");
             kafkaStreams.close();
             waitForApplicationState(Collections.singletonList(kafkaStreams), KafkaStreams.State.NOT_RUNNING, DEFAULT_DURATION);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUpgradeTestIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUpgradeTestIntegrationTest.java
index 4285530958ffe..6a9453e9926b5 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUpgradeTestIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/StreamsUpgradeTestIntegrationTest.java
@@ -24,8 +24,10 @@
 import org.apache.kafka.test.IntegrationTest;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -44,6 +46,8 @@
 
 @Category(IntegrationTest.class)
 public class StreamsUpgradeTestIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(3);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionDurabilityIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionDurabilityIntegrationTest.java
index 64985927a3b12..1dc6e6a60729f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionDurabilityIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionDurabilityIntegrationTest.java
@@ -50,6 +50,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.slf4j.Logger;
@@ -85,6 +86,8 @@
 @RunWith(Parameterized.class)
 @Category({IntegrationTest.class})
 public class SuppressionDurabilityIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(
         3,
@@ -125,6 +128,7 @@ public static Collection<String[]> data() {
     public String processingGuaranteee;
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldRecoverBufferAfterShutdown() {
         final String testId = safeUniqueTestName(getClass(), testName);
         final String appId = "appId_" + testId;
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionIntegrationTest.java
index 71ef0e369098b..35eee4764d487 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/SuppressionIntegrationTest.java
@@ -47,8 +47,10 @@
 import org.hamcrest.Matchers;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -81,6 +83,8 @@
 
 @Category(IntegrationTest.class)
 public class SuppressionIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(
         1,
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/TableTableJoinIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/TableTableJoinIntegrationTest.java
index 579ed190a4e5b..aaa0f462e83ef 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/TableTableJoinIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/TableTableJoinIntegrationTest.java
@@ -26,8 +26,10 @@
 import org.apache.kafka.streams.test.TestRecord;
 import org.apache.kafka.test.IntegrationTest;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.Timeout;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
@@ -41,6 +43,8 @@
 @Category({IntegrationTest.class})
 @RunWith(value = Parameterized.class)
 public class TableTableJoinIntegrationTest extends AbstractJoinIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
     private KTable<Long, String> leftTable;
     private KTable<Long, String> rightTable;
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/TaskAssignorIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/TaskAssignorIntegrationTest.java
index 5ff6cb6ba7304..a706d74d66e9e 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/TaskAssignorIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/TaskAssignorIntegrationTest.java
@@ -37,6 +37,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
@@ -56,6 +57,8 @@
 
 @Category(IntegrationTest.class)
 public class TaskAssignorIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/TaskMetadataIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/TaskMetadataIntegrationTest.java
index 2aec4edf703b5..791ee58ff7bbd 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/TaskMetadataIntegrationTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/TaskMetadataIntegrationTest.java
@@ -40,6 +40,7 @@
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 
 import java.io.IOException;
 import java.time.Duration;
@@ -60,6 +61,8 @@
 
 @Category(IntegrationTest.class)
 public class TaskMetadataIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1, new Properties(), 0L, 0L);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/TimeWindowedKStreamIntegrationTest.java b/streams/src/test/java/org/apache/kafka/streams/integration/TimeWindowedKStreamIntegrationTest.java
new file mode 100644
index 0000000000000..e0ec6c9bbef71
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/TimeWindowedKStreamIntegrationTest.java
@@ -0,0 +1,507 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.integration;
+
+import java.util.Collection;
+import java.util.Optional;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.Serdes.StringSerde;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.serialization.StringSerializer;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.KeyValueTimestamp;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
+import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster;
+import org.apache.kafka.streams.integration.utils.IntegrationTestUtils;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
+import org.apache.kafka.streams.kstream.JoinWindows;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.SessionWindowedDeserializer;
+import org.apache.kafka.streams.kstream.TimeWindowedDeserializer;
+import org.apache.kafka.streams.kstream.TimeWindowedKStream;
+import org.apache.kafka.streams.kstream.TimeWindows;
+import org.apache.kafka.streams.kstream.UnlimitedWindows;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.WindowedSerdes;
+import org.apache.kafka.streams.kstream.internals.TimeWindow;
+import org.apache.kafka.test.IntegrationTest;
+import org.apache.kafka.test.MockAggregator;
+import org.apache.kafka.test.MockInitializer;
+import org.apache.kafka.test.TestUtils;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.junit.rules.Timeout;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+
+import static java.time.Duration.ofMillis;
+import static java.time.Instant.ofEpochMilli;
+import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.common.utils.Utils.mkProperties;
+import static org.apache.kafka.streams.integration.utils.IntegrationTestUtils.safeUniqueTestName;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThrows;
+
+@SuppressWarnings({"unchecked"})
+@Category({IntegrationTest.class})
+@RunWith(Parameterized.class)
+public class TimeWindowedKStreamIntegrationTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
+    private static final int NUM_BROKERS = 1;
+
+    public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(NUM_BROKERS,
+        mkProperties(
+            mkMap(mkEntry("log.retention.hours", "-1"), mkEntry("log.retention.bytes", "-1")) // Don't expire records since we manipulate timestamp
+        )
+    );
+
+    @BeforeClass
+    public static void startCluster() throws IOException {
+        CLUSTER.start();
+    }
+
+    @AfterClass
+    public static void closeCluster() {
+        CLUSTER.stop();
+    }
+
+
+    private StreamsBuilder builder;
+    private Properties streamsConfiguration;
+    private KafkaStreams kafkaStreams;
+    private String streamOneInput;
+    private String streamTwoInput;
+    private String outputTopic;
+
+    @Rule
+    public TestName testName = new TestName();
+
+    @Parameter
+    public StrategyType type;
+
+    @Parameter(1)
+    public boolean withCache;
+
+    @Parameter(2)
+    public EmitStrategy emitStrategy;
+
+    private boolean emitFinal;
+
+    @Parameterized.Parameters(name = "{0}_{1}")
+    public static Collection<Object[]> getEmitStrategy() {
+        return asList(new Object[][] {
+            {StrategyType.ON_WINDOW_UPDATE, true, EmitStrategy.onWindowUpdate()},
+            {StrategyType.ON_WINDOW_UPDATE, false, EmitStrategy.onWindowUpdate()},
+            {StrategyType.ON_WINDOW_CLOSE, true, EmitStrategy.onWindowClose()},
+            {StrategyType.ON_WINDOW_CLOSE, false, EmitStrategy.onWindowClose()}
+        });
+    }
+
+    @Before
+    public void before() throws InterruptedException {
+        builder = new StreamsBuilder();
+        createTopics();
+        streamsConfiguration = new Properties();
+        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "app-" + safeTestName);
+        streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath());
+        streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
+        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100L);
+        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
+        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
+        streamsConfiguration.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 0); // Always process
+        streamsConfiguration.put(StreamsConfig.WINDOW_STORE_CHANGE_LOG_ADDITIONAL_RETENTION_MS_CONFIG, Long.MAX_VALUE); // Don't expire changelog
+
+        emitFinal = emitStrategy.type() == StrategyType.ON_WINDOW_CLOSE;
+    }
+
+    @After
+    public void whenShuttingDown() throws IOException {
+        if (kafkaStreams != null) {
+            kafkaStreams.close();
+            kafkaStreams.cleanUp();
+        }
+        IntegrationTestUtils.purgeLocalStreamsState(streamsConfiguration);
+    }
+
+    @Test
+    public void shouldAggregateWindowedWithNoGrace() throws Exception {
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("A", "1", 0),
+            new KeyValueTimestamp<>("A", "2", 5),
+            new KeyValueTimestamp<>("A", "3", 10), // close [0, 10)
+            new KeyValueTimestamp<>("B", "4", 6),  // late and skip for [0, 10)
+            new KeyValueTimestamp<>("B", "5", 11),
+            new KeyValueTimestamp<>("B", "6", 15), // close [5, 15)
+            new KeyValueTimestamp<>("C", "7", 25)  // close [10, 20), [15, 25)
+        );
+
+        final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(String.class, 10L);
+        builder.stream(streamOneInput, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey()
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(10L)).advanceBy(ofMillis(5L)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                getMaterialized()
+            )
+            .toStream()
+            .to(outputTopic, Produced.with(windowedSerde, new StringSerde()));
+
+        startStreams();
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 6 : 12);
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> expectResult;
+        if (emitFinal) {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10L, 20L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)), "0+5+6", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15L, 25L)), "0+6", 15)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1", 0),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10L, 20L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+4", 6),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)), "0+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)), "0+5+6", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15L, 25L)), "0+6", 15),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(20L, 30L)), "0+7", 25),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(25L, 35L)), "0+7", 25)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+    }
+
+    @Test
+    public void shouldAggregateWindowedWithGrace() throws Exception {
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("A", "1", 0),
+            new KeyValueTimestamp<>("A", "2", 5),
+            new KeyValueTimestamp<>("A", "3", 10),
+            new KeyValueTimestamp<>("B", "4", 6),
+            new KeyValueTimestamp<>("B", "5", 11),
+            new KeyValueTimestamp<>("B", "6", 15), // close [0, 10), output A, B [0, 10)
+            new KeyValueTimestamp<>("C", "7", 25)  // close [5, 15), [10, 20)
+        );
+
+        final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(String.class, 10L);
+        builder.stream(streamOneInput, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey()
+            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10L), ofMillis(5)).advanceBy(ofMillis(5L)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                getMaterialized()
+            )
+            .toStream()
+            .to(outputTopic, Produced.with(windowedSerde, new StringSerde()));
+
+        startStreams();
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 6 : 13);
+
+        final List<KeyValueTimestamp<Windowed<String>, String>> expectResult;
+        if (emitFinal) {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0L, 10L)), "0+4", 6),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10L, 20L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)), "0+5+6", 15)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1", 0),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+1+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+2", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+2+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10L, 20L)), "0+3", 10),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0L, 10L)), "0+4", 6),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+4", 6),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+4+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)), "0+5", 11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)), "0+5+6", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15L, 25L)), "0+6", 15),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(20L, 30L)), "0+7", 25),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(25L, 35L)), "0+7", 25)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+    }
+
+    @Test
+    public void shouldRestoreAfterJoinRestart() throws Exception {
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("A", "L1", 0),
+            new KeyValueTimestamp<>("A", "L1", 5),
+            new KeyValueTimestamp<>("B", "L2", 11), // close [0, 10)
+            new KeyValueTimestamp<>("B", "L2", 15), // close [5, 15)
+            new KeyValueTimestamp<>("C", "L3", 25)  // close [15, 25), [10, 20)
+        );
+
+        produceMessages(
+            streamTwoInput,
+            new KeyValueTimestamp<>("A", "R1", 0),
+            new KeyValueTimestamp<>("A", "R1", 5),
+            new KeyValueTimestamp<>("B", "R2", 11), // close [0, 10)
+            new KeyValueTimestamp<>("B", "R2", 15), // close [5, 15)
+            new KeyValueTimestamp<>("C", "R3", 25)  // close [15, 25), [10, 20)
+        );
+
+        final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(
+            String.class, 10L);
+        final KStream<String, String> streamOne = builder.stream(streamOneInput,
+            Consumed.with(Serdes.String(), Serdes.String()));
+        final KStream<String, String> streamTwo = builder.stream(streamTwoInput,
+            Consumed.with(Serdes.String(), Serdes.String()));
+
+        final KStream<String, String> joinedStream = streamOne
+            .join(streamTwo, (v1, v2) -> v1 + "," + v2,
+                JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(2)));
+
+        joinedStream.groupByKey()
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(10L)).advanceBy(ofMillis(5L)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                getMaterialized()
+            )
+            .toStream()
+            .to(outputTopic, Produced.with(windowedSerde, new StringSerde()));
+
+        startStreams();
+
+        List<KeyValueTimestamp<Windowed<String>, String>> windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            emitFinal ? 5 : 9);
+
+        List<KeyValueTimestamp<Windowed<String>, String>> expectResult;
+        if (emitFinal) {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)),
+                    "0+L1,R1+L1,R1", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+L1,R1",
+                    5),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+L2,R2",
+                    11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)),
+                    "0+L2,R2+L2,R2", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15L, 25L)),
+                    "0+L2,R2", 15)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)), "0+L1,R1",
+                    0),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0L, 10L)),
+                    "0+L1,R1+L1,R1", 5),
+                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5L, 15L)), "0+L1,R1",
+                    5),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5L, 15L)), "0+L2,R2",
+                    11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)),
+                    "0+L2,R2", 11),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10L, 20L)),
+                    "0+L2,R2+L2,R2", 15),
+                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15L, 25L)),
+                    "0+L2,R2", 15),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(20L, 30L)),
+                    "0+L3,R3", 25),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(25L, 35L)),
+                    "0+L3,R3", 25)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+
+        kafkaStreams.close();
+        kafkaStreams.cleanUp(); // Purge store to force restoration
+
+        produceMessages(
+            streamOneInput,
+            new KeyValueTimestamp<>("C", "L3", 35)  // close [20, 30), [25, 35)
+        );
+        produceMessages(
+            streamTwoInput,
+            new KeyValueTimestamp<>("C", "R3", 35)  // close [20, 30), [25, 35)
+        );
+
+        // Restart
+        startStreams();
+
+        windowedMessages = receiveMessagesWithTimestamp(
+            new TimeWindowedDeserializer<>(new StringDeserializer(), 10L),
+            new StringDeserializer(),
+            10L,
+            String.class,
+            2);
+
+        if (emitFinal) {
+            // Output just new closed window for C
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(20L, 30L)),
+                    "0+L3,R3", 25),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(25L, 35L)),
+                    "0+L3,R3", 25)
+            );
+        } else {
+            expectResult = asList(
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(30L, 40L)),
+                    "0+L3,R3", 35),
+                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(35L, 45L)),
+                    "0+L3,R3", 35)
+            );
+        }
+
+        assertThat(windowedMessages, is(expectResult));
+    }
+
+    @Test
+    public void shouldThrowUnlimitedWindows() {
+        final TimeWindowedKStream<String, String> windowedStream = builder.stream(streamOneInput, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey()
+            .windowedBy(
+            UnlimitedWindows.of().startOn(ofEpochMilli(0))
+        );
+
+        if (emitFinal) {
+            assertThrows(IllegalArgumentException.class, () -> windowedStream.emitStrategy(emitStrategy));
+        } else {
+            windowedStream.emitStrategy(emitStrategy);
+        }
+    }
+
+
+    private void produceMessages(final String topic, final KeyValueTimestamp<String, String>... records) {
+        IntegrationTestUtils.produceSynchronously(
+            TestUtils.producerConfig(
+                CLUSTER.bootstrapServers(),
+                StringSerializer.class,
+                StringSerializer.class),
+            false,
+            topic,
+            Optional.empty(),
+            Arrays.asList(records)
+        );
+    }
+
+    private Materialized getMaterialized() {
+        if (withCache) {
+            return Materialized.with(null, new StringSerde()).withCachingEnabled();
+        }
+        return Materialized.with(null, new StringSerde()).withCachingDisabled();
+    }
+
+    private void createTopics() throws InterruptedException {
+        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        streamOneInput = "stream-one-" + safeTestName;
+        streamTwoInput = "stream-two-" + safeTestName;
+        outputTopic = "output-" + safeTestName;
+        CLUSTER.createTopic(streamOneInput, 1, 1);
+        CLUSTER.createTopic(streamTwoInput, 1, 1);
+        CLUSTER.createTopic(outputTopic);
+    }
+
+    private void startStreams() {
+        kafkaStreams = new KafkaStreams(builder.build(), streamsConfiguration);
+        kafkaStreams.start();
+    }
+
+    private <K, V> List<KeyValueTimestamp<K, V>> receiveMessagesWithTimestamp(final Deserializer<K> keyDeserializer,
+                                                                              final Deserializer<V> valueDeserializer,
+                                                                              final long windowSize,
+                                                                              final Class innerClass,
+                                                                              final int numMessages) throws Exception {
+        final String safeTestName = safeUniqueTestName(getClass(), testName);
+        final Properties consumerProperties = new Properties();
+        consumerProperties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, CLUSTER.bootstrapServers());
+        consumerProperties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "group-" + safeTestName);
+        consumerProperties.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        consumerProperties.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, keyDeserializer.getClass().getName());
+        consumerProperties.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, valueDeserializer.getClass().getName());
+        consumerProperties.put(StreamsConfig.WINDOW_SIZE_MS_CONFIG, windowSize);
+        if (keyDeserializer instanceof TimeWindowedDeserializer || keyDeserializer instanceof SessionWindowedDeserializer) {
+            consumerProperties.setProperty(StreamsConfig.WINDOWED_INNER_CLASS_SERDE,
+                Serdes.serdeFrom(innerClass).getClass().getName());
+        }
+        return IntegrationTestUtils.waitUntilMinKeyValueWithTimestampRecordsReceived(
+            consumerProperties,
+            outputTopic,
+            numMessages,
+            60 * 1000);
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java b/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java
index 202fd1fbbbd99..689b1c0beb75b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java
+++ b/streams/src/test/java/org/apache/kafka/streams/integration/utils/IntegrationTestUtils.java
@@ -58,6 +58,7 @@
 import org.apache.kafka.streams.state.QueryableStoreType;
 import org.apache.kafka.test.TestCondition;
 import org.apache.kafka.test.TestUtils;
+import org.junit.jupiter.api.TestInfo;
 import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -66,6 +67,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.Field;
+import java.lang.reflect.Method;
 import java.nio.file.Paths;
 import java.time.Duration;
 import java.util.ArrayList;
@@ -225,9 +227,24 @@ public boolean transitToPendingShutdownSeen() {
     /**
      * Gives a test name that is safe to be used in application ids, topic names, etc.
      * The name is safe even for parameterized methods.
+     * Used by tests not yet migrated from JUnit 4.
      */
     public static String safeUniqueTestName(final Class<?> testClass, final TestName testName) {
-        return (testClass.getSimpleName() + testName.getMethodName())
+        return safeUniqueTestName(testClass, testName.getMethodName());
+    }
+
+    /**
+     * Same as @see IntegrationTestUtils#safeUniqueTestName except it accepts a TestInfo passed in by
+     * JUnit 5 instead of a TestName from JUnit 4.
+     * Used by tests migrated to JUnit 5.
+     */
+    public static String safeUniqueTestName(final Class<?> testClass, final TestInfo testInfo) {
+        return safeUniqueTestName(testClass, testInfo.getTestMethod().map(Method::getName).orElse(""));
+    }
+
+    private static String safeUniqueTestName(final Class<?> testClass, final String methodName) {
+        return (testClass.getSimpleName() + methodName)
+                .replace(':', '_')
                 .replace('.', '_')
                 .replace('[', '_')
                 .replace(']', '_')
@@ -1386,6 +1403,41 @@ public static <S> S getStore(final long waitTime,
         }
     }
 
+    public static long getTopicSize(final Properties consumerConfig, final String topicName) {
+        long sum = 0;
+        try (final Consumer<Object, Object> consumer = createConsumer(consumerConfig)) {
+            final Collection<TopicPartition> partitions = consumer.partitionsFor(topicName)
+                .stream()
+                .map(info -> new TopicPartition(topicName, info.partition()))
+                .collect(Collectors.toList());
+            final Map<TopicPartition, Long> beginningOffsets = consumer.beginningOffsets(partitions);
+            final Map<TopicPartition, Long> endOffsets = consumer.endOffsets(partitions);
+
+            for (final TopicPartition partition : beginningOffsets.keySet()) {
+                sum += endOffsets.get(partition) - beginningOffsets.get(partition);
+            }
+        }
+        return sum;
+    }
+
+    private static Double getStreamsPollNumber(final KafkaStreams kafkaStreams) {
+        return (Double) kafkaStreams.metrics()
+            .entrySet()
+            .stream()
+            .filter(entry -> entry.getKey().name().equals("poll-total"))
+            .findFirst().get()
+            .getValue()
+            .metricValue();
+    }
+
+    public static void waitUntilStreamsHasPolled(final KafkaStreams kafkaStreams, final int pollNumber)
+        throws InterruptedException {
+        final Double initialCount = getStreamsPollNumber(kafkaStreams);
+        retryOnExceptionWithTimeout(1000, () -> {
+            assertThat(getStreamsPollNumber(kafkaStreams), is(greaterThanOrEqualTo(initialCount + pollNumber)));
+        });
+    }
+
     public static class StableAssignmentListener implements AssignmentListener {
         final AtomicInteger numStableAssignments = new AtomicInteger(0);
         int nextExpectedNumStableAssignments;
diff --git a/streams/src/test/java/org/apache/kafka/streams/internals/ApiUtilsTest.java b/streams/src/test/java/org/apache/kafka/streams/internals/ApiUtilsTest.java
index 6e4cdef46acae..33fcd8ede2b73 100644
--- a/streams/src/test/java/org/apache/kafka/streams/internals/ApiUtilsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/internals/ApiUtilsTest.java
@@ -31,7 +31,6 @@
 
 
 public class ApiUtilsTest {
-
     // This is the maximum limit that Duration accepts but fails when it converts to milliseconds.
     private static final long MAX_ACCEPTABLE_DAYS_FOR_DURATION = 106751991167300L;
     // This is the maximum limit that Duration accepts and converts to milliseconds with out fail.
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/MaterializedTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/MaterializedTest.java
index 2630f35b75a65..0a00cfbf94071 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/MaterializedTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/MaterializedTest.java
@@ -20,6 +20,7 @@
 import org.apache.kafka.streams.errors.TopologyException;
 import org.apache.kafka.streams.state.KeyValueBytesStoreSupplier;
 import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
+import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
 import org.junit.Test;
 
@@ -64,6 +65,14 @@ public void shouldThrowNullPointerIfKeyValueBytesStoreSupplierIsNull() {
         assertEquals(e.getMessage(), "supplier can't be null");
     }
 
+    @Test
+    public void shouldThrowNullPointerIfStoreTypeIsNull() {
+        final NullPointerException e = assertThrows(NullPointerException.class,
+            () -> Materialized.as((Materialized.StoreType) null));
+
+        assertEquals(e.getMessage(), "store type can't be null");
+    }
+
     @Test
     public void shouldThrowNullPointerIfSessionBytesStoreSupplierIsNull() {
         final NullPointerException e = assertThrows(NullPointerException.class,
@@ -80,6 +89,14 @@ public void shouldThrowIllegalArgumentExceptionIfRetentionIsNegative() {
         assertEquals(e.getMessage(), "Retention must not be negative.");
     }
 
+    @Test
+    public void shouldThrowIllegalArgumentExceptionIfStoreSupplierAndStoreTypeBothSet() {
+        final IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
+                () -> Materialized.as(Stores.persistentKeyValueStore("test")).withStoreType(Materialized.StoreType.ROCKS_DB));
+
+        assertEquals(e.getMessage(), "Cannot set store type when store supplier is pre-configured.");
+    }
+
     @Test
     public void shouldThrowTopologyExceptionIfStoreNameExceedsMaxAllowedLength() {
         final StringBuffer invalidStoreNameBuffer = new StringBuffer();
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedStreamImplTest.java
index 5025e7f4c576c..354fbcac3189d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedStreamImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KGroupedStreamImplTest.java
@@ -486,7 +486,7 @@ public void shouldNotAcceptInvalidStoreNameWhenReducingSessionWindows() {
     public void shouldNotAcceptNullStateStoreSupplierWhenReducingSessionWindows() {
         assertThrows(NullPointerException.class, () ->  groupedStream
                 .windowedBy(SessionWindows.ofInactivityGapWithNoGrace(ofMillis(30)))
-                .reduce(null, Materialized.<String, String, SessionStore<Bytes, byte[]>>as(null))
+                .reduce(null, Materialized.<String, String, SessionStore<Bytes, byte[]>>as((String) null))
         );
     }
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
index 0af5bd8e00394..81a2f1daf5bfb 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamImplTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.streams.kstream.internals;
 
+import java.util.HashSet;
+import java.util.Set;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.serialization.StringDeserializer;
@@ -56,12 +58,19 @@
 import org.apache.kafka.streams.processor.FailOnInvalidTimestamp;
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.TopicNameExtractor;
+import org.apache.kafka.streams.processor.api.ContextualFixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.ContextualProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.ProcessorSupplier;
+import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.ProcessorTopology;
 import org.apache.kafka.streams.processor.internals.SourceNode;
 import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.test.TestRecord;
+import org.apache.kafka.test.MockApiFixedKeyProcessorSupplier;
 import org.apache.kafka.test.MockApiProcessor;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.MockMapper;
@@ -105,6 +114,7 @@ public class KStreamImplTest {
 
     private final Consumed<String, String> stringConsumed = Consumed.with(Serdes.String(), Serdes.String());
     private final MockApiProcessorSupplier<String, String, Void, Void> processorSupplier = new MockApiProcessorSupplier<>();
+    private final MockApiFixedKeyProcessorSupplier<String, String, Void> fixedKeyProcessorSupplier = new MockApiFixedKeyProcessorSupplier<>();
     private final TransformerSupplier<String, String, KeyValue<String, String>> transformerSupplier =
         () -> new Transformer<String, String, KeyValue<String, String>>() {
             @Override
@@ -1756,6 +1766,7 @@ public void shouldProcessFromSourcesThatMatchMultiplePattern() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1764,6 +1775,7 @@ public void shouldNotAllowNullTransformerSupplierOnTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnTransformWithStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1772,6 +1784,7 @@ public void shouldNotAllowNullTransformerSupplierOnTransformWithStores() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnTransformWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1780,6 +1793,7 @@ public void shouldNotAllowNullTransformerSupplierOnTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnTransformWithNamedAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1788,6 +1802,7 @@ public void shouldNotAllowNullTransformerSupplierOnTransformWithNamedAndStores()
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1796,6 +1811,7 @@ public void shouldNotAllowNullStoreNamesOnTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1804,6 +1820,7 @@ public void shouldNotAllowNullStoreNameOnTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnTransformWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1812,6 +1829,7 @@ public void shouldNotAllowNullStoreNamesOnTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnTransformWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1820,6 +1838,7 @@ public void shouldNotAllowNullStoreNameOnTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1828,6 +1847,7 @@ public void shouldNotAllowNullNamedOnTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnTransformWithStoreName() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1836,6 +1856,7 @@ public void shouldNotAllowNullNamedOnTransformWithStoreName() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadTransformerSupplierOnFlatTransform() {
         final Transformer<String, String, Iterable<KeyValue<String, String>>> transformer = flatTransformerSupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1846,6 +1867,7 @@ public void shouldNotAllowBadTransformerSupplierOnFlatTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadTransformerSupplierOnFlatTransformWithStores() {
         final Transformer<String, String, Iterable<KeyValue<String, String>>> transformer = flatTransformerSupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1856,6 +1878,7 @@ public void shouldNotAllowBadTransformerSupplierOnFlatTransformWithStores() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadTransformerSupplierOnFlatTransformWithNamed() {
         final Transformer<String, String, Iterable<KeyValue<String, String>>> transformer = flatTransformerSupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1866,6 +1889,7 @@ public void shouldNotAllowBadTransformerSupplierOnFlatTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadTransformerSupplierOnFlatTransformWithNamedAndStores() {
         final Transformer<String, String, Iterable<KeyValue<String, String>>> transformer = flatTransformerSupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1876,6 +1900,7 @@ public void shouldNotAllowBadTransformerSupplierOnFlatTransformWithNamedAndStore
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnFlatTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1884,6 +1909,7 @@ public void shouldNotAllowNullTransformerSupplierOnFlatTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnFlatTransformWithStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1892,6 +1918,7 @@ public void shouldNotAllowNullTransformerSupplierOnFlatTransformWithStores() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnFlatTransformWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1900,6 +1927,7 @@ public void shouldNotAllowNullTransformerSupplierOnFlatTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullTransformerSupplierOnFlatTransformWithNamedAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1908,6 +1936,7 @@ public void shouldNotAllowNullTransformerSupplierOnFlatTransformWithNamedAndStor
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnFlatTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1916,6 +1945,7 @@ public void shouldNotAllowNullStoreNamesOnFlatTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnFlatTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1924,6 +1954,7 @@ public void shouldNotAllowNullStoreNameOnFlatTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnFlatTransformWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1932,6 +1963,7 @@ public void shouldNotAllowNullStoreNamesOnFlatTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnFlatTransformWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1940,6 +1972,7 @@ public void shouldNotAllowNullStoreNameOnFlatTransformWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnFlatTransform() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1948,6 +1981,7 @@ public void shouldNotAllowNullNamedOnFlatTransform() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnFlatTransformWithStoreName() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1956,6 +1990,7 @@ public void shouldNotAllowNullNamedOnFlatTransformWithStoreName() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadTransformerSupplierOnTransformValues() {
         final ValueTransformer<String, String> transformer = valueTransformerSupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1966,6 +2001,7 @@ public void shouldNotAllowBadTransformerSupplierOnTransformValues() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadTransformerSupplierOnTransformValuesWithNamed() {
         final ValueTransformer<String, String> transformer = valueTransformerSupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1976,6 +2012,7 @@ public void shouldNotAllowBadTransformerSupplierOnTransformValuesWithNamed() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnTransformValues() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -1984,6 +2021,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnTransformValues() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadValueTransformerWithKeySupplierOnTransformValues() {
         final ValueTransformerWithKey<String, String, String> transformer = valueTransformerWithKeySupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -1994,6 +2032,7 @@ public void shouldNotAllowBadValueTransformerWithKeySupplierOnTransformValues()
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowBadValueTransformerWithKeySupplierOnTransformValuesWithNamed() {
         final ValueTransformerWithKey<String, String, String> transformer = valueTransformerWithKeySupplier.get();
         final IllegalArgumentException exception = assertThrows(
@@ -2004,6 +2043,7 @@ public void shouldNotAllowBadValueTransformerWithKeySupplierOnTransformValuesWit
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValues() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2012,6 +2052,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValues()
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnTransformValuesWithStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2022,6 +2063,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnTransformValuesWithStore
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValuesWithStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2032,6 +2074,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValuesWi
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnTransformValuesWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2042,6 +2085,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnTransformValuesWithNamed
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValuesWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2052,6 +2096,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValuesWi
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnTransformValuesWithNamedAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2063,6 +2108,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnTransformValuesWithNamed
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValuesWithNamedAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2074,6 +2120,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnTransformValuesWi
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerSupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2084,6 +2131,7 @@ public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerSup
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerWithKeySupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2094,6 +2142,7 @@ public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerWit
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerSupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2103,6 +2152,7 @@ public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerSupp
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerWithKeySupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2113,6 +2163,7 @@ public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerWith
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerSupplierWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2124,6 +2175,7 @@ public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerSup
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerWithKeySupplierWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2135,6 +2187,7 @@ public void shouldNotAllowNullStoreNamesOnTransformValuesWithValueTransformerWit
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerSupplierWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2146,6 +2199,7 @@ public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerSupp
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerWithKeySupplierWithName() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2157,6 +2211,7 @@ public void shouldNotAllowNullStoreNameOnTransformValuesWithValueTransformerWith
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerSupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2167,6 +2222,7 @@ public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerSupplier
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerWithKeySupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2177,6 +2233,7 @@ public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerWithKeyS
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerSupplierAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2188,6 +2245,7 @@ public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerSupplier
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerWithKeySupplierAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2199,6 +2257,7 @@ public void shouldNotAllowNullNamedOnTransformValuesWithValueTransformerWithKeyS
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValues() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2207,6 +2266,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValues() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValues() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2215,6 +2275,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValu
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValuesWithStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2225,6 +2286,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValuesWithS
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValuesWithStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2235,6 +2297,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValu
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValuesWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2245,6 +2308,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValuesWithN
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValuesWithNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2255,6 +2319,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValu
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValuesWithNamedAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2266,6 +2331,7 @@ public void shouldNotAllowNullValueTransformerSupplierOnFlatTransformValuesWithN
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValuesWithNamedAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2277,6 +2343,7 @@ public void shouldNotAllowNullValueTransformerWithKeySupplierOnFlatTransformValu
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueSupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2287,6 +2354,7 @@ public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueSuppli
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueWithKeySupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2297,6 +2365,7 @@ public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueWithKe
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueSupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2307,6 +2376,7 @@ public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueSupplie
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueWithKeySupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2317,6 +2387,7 @@ public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueWithKey
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueSupplierAndNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2328,6 +2399,7 @@ public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueSuppli
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueWithKeySupplierAndNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2339,6 +2411,7 @@ public void shouldNotAllowNullStoreNamesOnFlatTransformValuesWithFlatValueWithKe
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueSupplierAndNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2350,6 +2423,7 @@ public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueSupplie
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueWithKeySupplierAndNamed() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2361,6 +2435,7 @@ public void shouldNotAllowNullStoreNameOnFlatTransformValuesWithFlatValueWithKey
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueSupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2371,6 +2446,7 @@ public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueSupplier()
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueWithKeySupplier() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2381,6 +2457,7 @@ public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueWithKeySupp
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueSupplierAndStores() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2392,6 +2469,7 @@ public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueSupplierAnd
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotAllowNullNamedOnFlatTransformValuesWithFlatValueWithKeySupplierAndStore() {
         final NullPointerException exception = assertThrows(
             NullPointerException.class,
@@ -2485,6 +2563,88 @@ public void shouldNotAllowNullNamedOnProcessWithStores() {
         assertThat(exception.getMessage(), equalTo("named can't be null"));
     }
 
+    @Test
+    public void shouldNotAllowNullProcessValuesSupplierOnProcess() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues((FixedKeyProcessorSupplier<? super String, ? super String, Void>) null));
+        assertThat(exception.getMessage(), equalTo("processorSupplier can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullProcessSupplierOnProcessValuesWithStores() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues((FixedKeyProcessorSupplier<? super String, ? super String, Void>) null,
+                "storeName"));
+        assertThat(exception.getMessage(), equalTo("processorSupplier can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullProcessSupplierOnProcessValuesWithNamed() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.process((ProcessorSupplier<? super String, ? super String, Void, Void>) null,
+                Named.as("processor")));
+        assertThat(exception.getMessage(), equalTo("processorSupplier can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullProcessSupplierOnProcessValuesWithNamedAndStores() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.process((ProcessorSupplier<? super String, ? super String, Void, Void>) null,
+                Named.as("processor"), "stateStore"));
+        assertThat(exception.getMessage(), equalTo("processorSupplier can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullStoreNamesOnProcessValues() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues(fixedKeyProcessorSupplier, (String[]) null));
+        assertThat(exception.getMessage(), equalTo("stateStoreNames can't be a null array"));
+    }
+
+    @Test
+    public void shouldNotAllowNullStoreNameOnProcessValues() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues(fixedKeyProcessorSupplier, (String) null));
+        assertThat(exception.getMessage(), equalTo("stateStoreNames can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullStoreNamesOnProcessValuesWithNamed() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues(fixedKeyProcessorSupplier, Named.as("processor"), (String[]) null));
+        assertThat(exception.getMessage(), equalTo("stateStoreNames can't be a null array"));
+    }
+
+    @Test
+    public void shouldNotAllowNullStoreNameOnProcessValuesWithNamed() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues(fixedKeyProcessorSupplier, Named.as("processor"), (String) null));
+        assertThat(exception.getMessage(), equalTo("stateStoreNames can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullNamedOnProcessValues() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues(fixedKeyProcessorSupplier, (Named) null));
+        assertThat(exception.getMessage(), equalTo("named can't be null"));
+    }
+
+    @Test
+    public void shouldNotAllowNullNamedOnProcessValuesWithStores() {
+        final NullPointerException exception = assertThrows(
+            NullPointerException.class,
+            () -> testStream.processValues(fixedKeyProcessorSupplier, (Named) null, "storeName"));
+        assertThat(exception.getMessage(), equalTo("named can't be null"));
+    }
 
     @Test
     public void shouldNotMaterializedKTableFromKStream() {
@@ -2540,6 +2700,308 @@ public void shouldNotMaterializedKTableFromKStream() {
         }
     }
 
+    @SuppressWarnings("deprecation")
+    @Test
+    public void shouldProcessWithOldProcessorAndState() {
+        final Consumed<String, String> consumed = Consumed.with(Serdes.String(), Serdes.String());
+
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final String input = "input";
+
+        builder.addStateStore(Stores.keyValueStoreBuilder(
+            Stores.inMemoryKeyValueStore("sum"),
+            Serdes.String(),
+            Serdes.Integer()
+        ));
+
+        builder.stream(input, consumed)
+            .process(() -> new org.apache.kafka.streams.processor.Processor<String, String>() {
+                private KeyValueStore<String, Integer> sumStore;
+
+                @Override
+                public void init(final ProcessorContext context) {
+                    this.sumStore = context.getStateStore("sum");
+                }
+
+                @Override
+                public void process(final String key, final String value) {
+                    final Integer counter = sumStore.get(key);
+                    if (counter == null) {
+                        sumStore.putIfAbsent(key, value.length());
+                    } else {
+                        if (value == null) {
+                            sumStore.delete(key);
+                        } else {
+                            sumStore.put(key, counter + value.length());
+                        }
+                    }
+                }
+
+                @Override
+                public void close() {
+                }
+            }, Named.as("p"), "sum");
+
+        final String topologyDescription = builder.build().describe().toString();
+
+        assertThat(
+            topologyDescription,
+            equalTo("Topologies:\n"
+                + "   Sub-topology: 0\n"
+                + "    Source: KSTREAM-SOURCE-0000000000 (topics: [input])\n"
+                + "      --> p\n"
+                + "    Processor: p (stores: [sum])\n"
+                + "      --> none\n"
+                + "      <-- KSTREAM-SOURCE-0000000000\n\n")
+        );
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic =
+                driver.createInputTopic(
+                    input,
+                    Serdes.String().serializer(),
+                    Serdes.String().serializer()
+                );
+
+            inputTopic.pipeInput("A", "0", 5L);
+            inputTopic.pipeInput("B", "00", 100L);
+            inputTopic.pipeInput("C", "000", 0L);
+            inputTopic.pipeInput("D", "0000", 0L);
+            inputTopic.pipeInput("A", "00000", 10L);
+            inputTopic.pipeInput("A", "000000", 8L);
+
+            final KeyValueStore<String, Integer> sumStore = driver.getKeyValueStore("sum");
+            assertEquals(12, sumStore.get("A").intValue());
+            assertEquals(2, sumStore.get("B").intValue());
+            assertEquals(3, sumStore.get("C").intValue());
+            assertEquals(4, sumStore.get("D").intValue());
+        }
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test
+    public void shouldBindStateWithOldProcessorSupplier() {
+        final Consumed<String, String> consumed = Consumed.with(Serdes.String(), Serdes.String());
+
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final String input = "input";
+
+        builder.stream(input, consumed)
+            .process(new org.apache.kafka.streams.processor.ProcessorSupplier<String, String>() {
+
+                @Override
+                public org.apache.kafka.streams.processor.Processor<String, String> get() {
+                    return new org.apache.kafka.streams.processor.Processor<String, String>() {
+                        private KeyValueStore<String, Integer> sumStore;
+
+                        @Override
+                        public void init(final ProcessorContext context) {
+                            this.sumStore = context.getStateStore("sum");
+                        }
+
+                        @Override
+                        public void process(final String key, final String value) {
+                            final Integer counter = sumStore.get(key);
+                            if (counter == null) {
+                                sumStore.putIfAbsent(key, value.length());
+                            } else {
+                                if (value == null) {
+                                    sumStore.delete(key);
+                                } else {
+                                    sumStore.put(key, counter + value.length());
+                                }
+                            }
+                        }
+
+                        @Override
+                        public void close() {
+                        }
+                    };
+                }
+
+                @SuppressWarnings("unchecked")
+                @Override
+                public Set<StoreBuilder<?>> stores() {
+                    final Set<StoreBuilder<?>> stores = new HashSet<>();
+                    stores.add(Stores.keyValueStoreBuilder(
+                        Stores.inMemoryKeyValueStore("sum"),
+                        Serdes.String(),
+                        Serdes.Integer()
+                    ));
+                    return stores;
+                }
+            }, Named.as("p"));
+
+        final String topologyDescription = builder.build().describe().toString();
+
+        assertThat(
+            topologyDescription,
+            equalTo("Topologies:\n"
+                + "   Sub-topology: 0\n"
+                + "    Source: KSTREAM-SOURCE-0000000000 (topics: [input])\n"
+                + "      --> p\n"
+                + "    Processor: p (stores: [sum])\n"
+                + "      --> none\n"
+                + "      <-- KSTREAM-SOURCE-0000000000\n\n")
+        );
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic =
+                driver.createInputTopic(
+                    input,
+                    Serdes.String().serializer(),
+                    Serdes.String().serializer()
+                );
+
+            inputTopic.pipeInput("A", "0", 5L);
+            inputTopic.pipeInput("B", "00", 100L);
+            inputTopic.pipeInput("C", "000", 0L);
+            inputTopic.pipeInput("D", "0000", 0L);
+            inputTopic.pipeInput("A", "00000", 10L);
+            inputTopic.pipeInput("A", "000000", 8L);
+
+            final KeyValueStore<String, Integer> sumStore = driver.getKeyValueStore("sum");
+            assertEquals(12, sumStore.get("A").intValue());
+            assertEquals(2, sumStore.get("B").intValue());
+            assertEquals(3, sumStore.get("C").intValue());
+            assertEquals(4, sumStore.get("D").intValue());
+        }
+    }
+
+    @Test
+    public void shouldBindStateWithOldProcessor() {
+        final Consumed<String, String> consumed = Consumed.with(Serdes.String(), Serdes.String());
+
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final String input = "input";
+        final String output = "output";
+
+        builder.stream(input, consumed)
+            .process(() -> new ContextualProcessor<String, String, String, Integer>() {
+                @Override
+                public void process(final Record<String, String> record) {
+                    context().forward(record.withValue(record.value().length()));
+                }
+            }, Named.as("p"))
+            .to(output, Produced.valueSerde(Serdes.Integer()));
+
+        final String topologyDescription = builder.build().describe().toString();
+
+        assertThat(
+            topologyDescription,
+            equalTo("Topologies:\n" +
+                "   Sub-topology: 0\n" +
+                "    Source: KSTREAM-SOURCE-0000000000 (topics: [input])\n" +
+                "      --> p\n" +
+                "    Processor: p (stores: [])\n" +
+                "      --> KSTREAM-SINK-0000000001\n" +
+                "      <-- KSTREAM-SOURCE-0000000000\n" +
+                "    Sink: KSTREAM-SINK-0000000001 (topic: output)\n" +
+                "      <-- p\n\n")
+        );
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic =
+                driver.createInputTopic(
+                    input,
+                    Serdes.String().serializer(),
+                    Serdes.String().serializer()
+                );
+            final TestOutputTopic<String, Integer> outputTopic =
+                driver.createOutputTopic(
+                    output,
+                    Serdes.String().deserializer(),
+                    Serdes.Integer().deserializer()
+                );
+
+            inputTopic.pipeInput("A", "0", 5L);
+            inputTopic.pipeInput("B", "00", 100L);
+            inputTopic.pipeInput("C", "000", 0L);
+            inputTopic.pipeInput("D", "0000", 0L);
+            inputTopic.pipeInput("A", "00000", 10L);
+            inputTopic.pipeInput("A", "000000", 8L);
+
+            final List<TestRecord<String, Integer>> outputExpectRecords = new ArrayList<>();
+            outputExpectRecords.add(new TestRecord<>("A", 1, Instant.ofEpochMilli(5L)));
+            outputExpectRecords.add(new TestRecord<>("B", 2, Instant.ofEpochMilli(100L)));
+            outputExpectRecords.add(new TestRecord<>("C", 3, Instant.ofEpochMilli(0L)));
+            outputExpectRecords.add(new TestRecord<>("D", 4, Instant.ofEpochMilli(0L)));
+            outputExpectRecords.add(new TestRecord<>("A", 5, Instant.ofEpochMilli(10L)));
+            outputExpectRecords.add(new TestRecord<>("A", 6, Instant.ofEpochMilli(8L)));
+
+            assertEquals(outputTopic.readRecordsToList(), outputExpectRecords);
+        }
+    }
+
+    @Test
+    public void shouldProcessValues() {
+        final Consumed<String, String> consumed = Consumed.with(Serdes.String(), Serdes.String());
+
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final String input = "input";
+        final String output = "output";
+
+        builder.stream(input, consumed)
+               .processValues(() -> new ContextualFixedKeyProcessor<String, String, Integer>() {
+                   @Override
+                   public void process(final FixedKeyRecord<String, String> record) {
+                       context().forward(record.withValue(record.value().length()));
+                   }
+               }, Named.as("fkp"))
+               .to(output, Produced.valueSerde(Serdes.Integer()));
+
+        final String topologyDescription = builder.build().describe().toString();
+
+        assertThat(
+            topologyDescription,
+            equalTo("Topologies:\n" +
+                        "   Sub-topology: 0\n" +
+                        "    Source: KSTREAM-SOURCE-0000000000 (topics: [input])\n" +
+                        "      --> fkp\n" +
+                        "    Processor: fkp (stores: [])\n" +
+                        "      --> KSTREAM-SINK-0000000001\n" +
+                        "      <-- KSTREAM-SOURCE-0000000000\n" +
+                        "    Sink: KSTREAM-SINK-0000000001 (topic: output)\n" +
+                        "      <-- fkp\n\n")
+        );
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic =
+                driver.createInputTopic(
+                    input,
+                    Serdes.String().serializer(),
+                    Serdes.String().serializer()
+                );
+            final TestOutputTopic<String, Integer> outputTopic =
+                driver.createOutputTopic(
+                    output,
+                    Serdes.String().deserializer(),
+                    Serdes.Integer().deserializer()
+                );
+
+            inputTopic.pipeInput("A", "0", 5L);
+            inputTopic.pipeInput("B", "00", 100L);
+            inputTopic.pipeInput("C", "000", 0L);
+            inputTopic.pipeInput("D", "0000", 0L);
+            inputTopic.pipeInput("A", "00000", 10L);
+            inputTopic.pipeInput("A", "000000", 8L);
+
+            final List<TestRecord<String, Integer>> outputExpectRecords = new ArrayList<>();
+            outputExpectRecords.add(new TestRecord<>("A", 1, Instant.ofEpochMilli(5L)));
+            outputExpectRecords.add(new TestRecord<>("B", 2, Instant.ofEpochMilli(100L)));
+            outputExpectRecords.add(new TestRecord<>("C", 3, Instant.ofEpochMilli(0L)));
+            outputExpectRecords.add(new TestRecord<>("D", 4, Instant.ofEpochMilli(0L)));
+            outputExpectRecords.add(new TestRecord<>("A", 5, Instant.ofEpochMilli(10L)));
+            outputExpectRecords.add(new TestRecord<>("A", 6, Instant.ofEpochMilli(8L)));
+
+            assertEquals(outputTopic.readRecordsToList(), outputExpectRecords);
+        }
+    }
+
     @Test
     public void shouldMaterializeKTableFromKStream() {
         final Consumed<String, String> consumed = Consumed.with(Serdes.String(), Serdes.String());
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java
index 2ffa0480daed2..d4f716df0843c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoinTest.java
@@ -103,7 +103,6 @@ public void shouldLogAndMeterOnSkippedRecordsWithNullValueWithBuiltInMetricsVers
         }
     }
 
-
     @Test
     public void shouldReuseRepartitionTopicWithGeneratedName() {
         final StreamsBuilder builder = new StreamsBuilder();
@@ -137,7 +136,6 @@ public void shouldCreateRepartitionTopicsWithUserProvidedName() {
 
     @Test
     public void shouldDisableLoggingOnStreamJoined() {
-
         final JoinWindows joinWindows = JoinWindows.ofTimeDifferenceAndGrace(ofMillis(100), Duration.ofMillis(50));
         final StreamJoined<String, Integer, Integer> streamJoined = StreamJoined
             .with(Serdes.String(), Serdes.Integer(), Serdes.Integer())
@@ -164,7 +162,6 @@ public void shouldDisableLoggingOnStreamJoined() {
 
     @Test
     public void shouldEnableLoggingWithCustomConfigOnStreamJoined() {
-
         final JoinWindows joinWindows = JoinWindows.ofTimeDifferenceAndGrace(ofMillis(100), Duration.ofMillis(50));
         final StreamJoined<String, Integer, Integer> streamJoined = StreamJoined
             .with(Serdes.String(), Serdes.Integer(), Serdes.Integer())
@@ -1197,7 +1194,7 @@ public void testAsymmetricWindowingAfter() {
         joined = stream1.join(
             stream2,
             MockValueJoiner.TOSTRING_JOINER,
-            JoinWindows.ofTimeDifferenceAndGrace(ofMillis(0), ofMillis(0)).after(ofMillis(100)),
+            JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(0)).after(ofMillis(100)),
             StreamJoined.with(Serdes.Integer(),
                 Serdes.String(),
                 Serdes.String())
@@ -1866,4 +1863,4 @@ private WindowBytesStoreSupplier buildWindowBytesStoreSupplier(final String name
             "      <-- KSTREAM-MERGE-0000000011\n" +
             "    Sink: KSTREAM-SINK-0000000021 (topic: out-to)\n" +
             "      <-- KSTREAM-MERGE-0000000020\n\n";
-}
\ No newline at end of file
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java
index 2a29915f75a55..156b553455d47 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamLeftJoinTest.java
@@ -21,6 +21,7 @@
 import org.apache.kafka.common.serialization.StringSerializer;
 import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.streams.TopologyWrapper;
 import org.apache.kafka.streams.kstream.Consumed;
@@ -34,6 +35,7 @@
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.MockValueJoiner;
 import org.apache.kafka.test.StreamsTestUtils;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import java.time.Duration;
@@ -44,6 +46,7 @@
 import java.util.Properties;
 import java.util.Set;
 
+import static java.time.Duration.ZERO;
 import static java.time.Duration.ofMillis;
 import static org.junit.Assert.assertEquals;
 
@@ -53,9 +56,14 @@ public class KStreamKStreamLeftJoinTest {
     private final String topic1 = "topic1";
     private final String topic2 = "topic2";
     private final Consumed<Integer, String> consumed = Consumed.with(Serdes.Integer(), Serdes.String());
-    private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
+    private final static Properties PROPS = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
 
-    @SuppressWarnings("deprecation")
+    @BeforeClass
+    public static void beforeClass() {
+        PROPS.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_OUTER_JOIN_SPURIOUS_RESULTS_FIX, 0L);
+    }
+
+    @SuppressWarnings("deprecation") // old join semantics; can be removed when `JoinWindows.of()` is removed
     @Test
     public void testLeftJoinWithSpuriousResultFixDisabledOldApi() {
         final StreamsBuilder builder = new StreamsBuilder();
@@ -77,7 +85,7 @@ public void testLeftJoinWithSpuriousResultFixDisabledOldApi() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(props), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(PROPS), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                     driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -115,7 +123,7 @@ public void testLeftJoinWithSpuriousResultFixDisabledOldApi() {
         }
     }
 
-    @SuppressWarnings("deprecation")
+    @SuppressWarnings("deprecation") // old join semantics; can be removed when `JoinWindows.of()` is removed
     @Test
     public void testLeftJoinDuplicatesWithSpuriousResultFixDisabledOldApi() {
         final StreamsBuilder builder = new StreamsBuilder();
@@ -135,7 +143,7 @@ public void testLeftJoinDuplicatesWithSpuriousResultFixDisabledOldApi() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(props), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(PROPS), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                     driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -177,7 +185,7 @@ public void testLeftJoinDuplicates() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -233,7 +241,7 @@ public void testLeftExpiredNonJoinedRecordsAreEmittedByTheLeftProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -288,7 +296,7 @@ public void testLeftExpiredNonJoinedRecordsAreEmittedByTheRightProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -343,7 +351,7 @@ public void testRightNonJoinedRecordsAreNeverEmittedByTheLeftProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -395,7 +403,7 @@ public void testRightNonJoinedRecordsAreNeverEmittedByTheRightProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -480,7 +488,7 @@ public void runLeftJoin(final StreamJoined<Integer, String, String> streamJoined
         assertEquals(1, copartitionGroups.size());
         assertEquals(new HashSet<>(Arrays.asList(topic1, topic2)), copartitionGroups.iterator().next());
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                     driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -585,7 +593,7 @@ public void testOrdering() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -642,7 +650,7 @@ public void testGracePeriod() {
         assertEquals(1, copartitionGroups.size());
         assertEquals(new HashSet<>(Arrays.asList(topic1, topic2)), copartitionGroups.iterator().next());
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -714,7 +722,7 @@ public void testWindowing() {
         assertEquals(1, copartitionGroups.size());
         assertEquals(new HashSet<>(Arrays.asList(topic1, topic2)), copartitionGroups.iterator().next());
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                     driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -749,6 +757,70 @@ public void testWindowing() {
         }
     }
 
+    @Test
+    public void shouldNotEmitLeftJoinResultForAsymmetricWindow() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final int[] expectedKeys = new int[] {0, 1, 2, 3};
+
+        final KStream<Integer, String> stream1;
+        final KStream<Integer, String> stream2;
+        final KStream<Integer, String> joined;
+        final MockApiProcessorSupplier<Integer, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        stream1 = builder.stream(topic1, consumed);
+        stream2 = builder.stream(topic2, consumed);
+
+        joined = stream1.leftJoin(
+            stream2,
+            MockValueJoiner.TOSTRING_JOINER,
+            JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(100)).before(ZERO),
+            StreamJoined.with(Serdes.Integer(), Serdes.String(), Serdes.String())
+        );
+        joined.process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
+            final TestInputTopic<Integer, String> inputTopic1 =
+                driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final TestInputTopic<Integer, String> inputTopic2 =
+                driver.createInputTopic(topic2, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final MockApiProcessor<Integer, String, Void, Void> processor = supplier.theCapturedProcessor();
+            long time = 0L;
+
+            // push two items to the primary stream; the other window is empty; this should not produce any items
+            // w1 = {}
+            // w2 = {}
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = {}
+            for (int i = 0; i < 2; i++) {
+                inputTopic1.pipeInput(expectedKeys[i], "A" + expectedKeys[i], time + i);
+            }
+            processor.checkAndClearProcessResult();
+
+            // push one item to the other stream; this should produce one full-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = {}
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 0:a0 (ts: 100) }
+            time += 100L;
+            inputTopic2.pipeInput(expectedKeys[0], "a" + expectedKeys[0], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(0, "A0+a0", 100L)
+            );
+
+            // push one item to the other stream; this should produce one left-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 0:a0 (ts: 100) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 0:a0 (ts: 100), 1:a1 (ts: 102) }
+            time += 2;
+            inputTopic2.pipeInput(expectedKeys[1], "a" + expectedKeys[1], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(1, "A1+null", 1L)
+            );
+        }
+    }
+
     private void testUpperWindowBound(final int[] expectedKeys,
                                       final TopologyTestDriver driver,
                                       final MockApiProcessor<Integer, String, Void, Void> processor) {
@@ -877,7 +949,6 @@ private void testUpperWindowBound(final int[] expectedKeys,
 
         // push a dummy record to produce all left-join non-joined items
         time += 301L;
-        driver.advanceWallClockTime(Duration.ofMillis(1000L));
         inputTopic1.pipeInput(0, "dummy", time);
         processor.checkAndClearProcessResult(
             new KeyValueTimestamp<>(0, "C0+null", 1101L),
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamOuterJoinTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamOuterJoinTest.java
index 0fcbfeb0049a1..8133e25ec4be9 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamOuterJoinTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamOuterJoinTest.java
@@ -16,11 +16,15 @@
  */
 package org.apache.kafka.streams.kstream.internals;
 
+import org.apache.kafka.common.header.Header;
+import org.apache.kafka.common.header.internals.RecordHeader;
+import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.serialization.IntegerSerializer;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.serialization.StringSerializer;
 import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.TestInputTopic;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.streams.TopologyWrapper;
@@ -28,12 +32,15 @@
 import org.apache.kafka.streams.kstream.JoinWindows;
 import org.apache.kafka.streams.kstream.KStream;
 import org.apache.kafka.streams.kstream.StreamJoined;
+import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
+import org.apache.kafka.streams.test.TestRecord;
 import org.apache.kafka.test.MockApiProcessor;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.MockValueJoiner;
 import org.apache.kafka.test.StreamsTestUtils;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import java.time.Duration;
@@ -44,6 +51,7 @@
 import java.util.Properties;
 import java.util.Set;
 
+import static java.time.Duration.ZERO;
 import static java.time.Duration.ofMillis;
 import static org.junit.Assert.assertEquals;
 
@@ -51,9 +59,14 @@ public class KStreamKStreamOuterJoinTest {
     private final String topic1 = "topic1";
     private final String topic2 = "topic2";
     private final Consumed<Integer, String> consumed = Consumed.with(Serdes.Integer(), Serdes.String());
-    private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
+    private final static Properties PROPS = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
 
-    @SuppressWarnings("deprecation")
+    @BeforeClass
+    public static void beforeClass() {
+        PROPS.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_OUTER_JOIN_SPURIOUS_RESULTS_FIX, 0L);
+    }
+
+    @SuppressWarnings("deprecation") // old join semantics; can be removed when `JoinWindows.of()` is removed
     @Test
     public void testOuterJoinDuplicatesWithFixDisabledOldApi() {
         final StreamsBuilder builder = new StreamsBuilder();
@@ -73,7 +86,7 @@ public void testOuterJoinDuplicatesWithFixDisabledOldApi() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(props), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(PROPS), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                     driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -117,7 +130,7 @@ public void testOuterJoinDuplicates() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -155,7 +168,6 @@ public void testOuterJoinDuplicates() {
 
             // this record should expired non-joined records; only null+a0 will be emitted because
             // it did not have a join
-            driver.advanceWallClockTime(Duration.ofMillis(1000L));
             inputTopic2.pipeInput(3, "dummy", 1500L);
 
             processor.checkAndClearProcessResult(
@@ -184,7 +196,7 @@ public void testLeftExpiredNonJoinedRecordsAreEmittedByTheLeftProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -239,7 +251,7 @@ public void testLeftExpiredNonJoinedRecordsAreEmittedByTheRightProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -294,7 +306,7 @@ public void testRightExpiredNonJoinedRecordsAreEmittedByTheLeftProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -349,7 +361,7 @@ public void testRightExpiredNonJoinedRecordsAreEmittedByTheRightProcessor() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -404,7 +416,7 @@ public void testOrdering() {
         );
         joined.process(supplier);
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -460,7 +472,7 @@ public void testGracePeriod() {
         assertEquals(1, copartitionGroups.size());
         assertEquals(new HashSet<>(Arrays.asList(topic1, topic2)), copartitionGroups.iterator().next());
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -559,7 +571,7 @@ public void runOuterJoin(final StreamJoined<Integer, String, String> streamJoine
         assertEquals(1, copartitionGroups.size());
         assertEquals(new HashSet<>(Arrays.asList(topic1, topic2)), copartitionGroups.iterator().next());
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -672,7 +684,7 @@ public void testWindowing() {
         assertEquals(1, copartitionGroups.size());
         assertEquals(new HashSet<>(Arrays.asList(topic1, topic2)), copartitionGroups.iterator().next());
 
-        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
             final TestInputTopic<Integer, String> inputTopic1 =
                 driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
             final TestInputTopic<Integer, String> inputTopic2 =
@@ -708,6 +720,270 @@ public void testWindowing() {
         }
     }
 
+    @Test
+    public void shouldNotEmitLeftJoinResultForAsymmetricBeforeWindow() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final int[] expectedKeys = new int[] {0, 1, 2, 3};
+
+        final KStream<Integer, String> stream1;
+        final KStream<Integer, String> stream2;
+        final KStream<Integer, String> joined;
+        final MockApiProcessorSupplier<Integer, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        stream1 = builder.stream(topic1, consumed);
+        stream2 = builder.stream(topic2, consumed);
+
+        joined = stream1.outerJoin(
+            stream2,
+            MockValueJoiner.TOSTRING_JOINER,
+            JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(100)).before(ZERO),
+            StreamJoined.with(Serdes.Integer(), Serdes.String(), Serdes.String())
+        );
+        joined.process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
+            final TestInputTopic<Integer, String> inputTopic1 =
+                driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final TestInputTopic<Integer, String> inputTopic2 =
+                driver.createInputTopic(topic2, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final MockApiProcessor<Integer, String, Void, Void> processor = supplier.theCapturedProcessor();
+            long time = 0L;
+
+            // push two items to the primary stream; the other window is empty; this should not produce any items
+            // w1 = {}
+            // w2 = {}
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = {}
+            for (int i = 0; i < 2; i++) {
+                inputTopic1.pipeInput(expectedKeys[i], "A" + expectedKeys[i], time + i);
+            }
+            processor.checkAndClearProcessResult();
+
+            // push one item to the other stream; this should produce one full-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = {}
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 0:a0 (ts: 100) }
+            time += 100L;
+            inputTopic2.pipeInput(expectedKeys[0], "a" + expectedKeys[0], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(0, "A0+a0", 100L)
+            );
+
+            // push one item to the other stream; this should produce one left-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 0:a0 (ts: 100) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 0:a0 (ts: 100), 1:a1 (ts: 102) }
+            time += 2;
+            inputTopic2.pipeInput(expectedKeys[1], "a" + expectedKeys[1], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(1, "A1+null", 1L)
+            );
+
+            // push one item to the other stream; this should not produce any items
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 0:a0 (ts: 100), 1:a1 (ts: 102) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 0:a0 (ts: 100), 1:a1 (ts: 102), 2:a2 (ts: 103) }
+            time += 1;
+            inputTopic2.pipeInput(expectedKeys[2], "a" + expectedKeys[2], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(1, "null+a1", 102L)
+            );
+
+            // push one item to the first stream; this should not produce one full-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 0:a0 (ts: 100), 1:a1 (ts: 102), 2:a2 (ts: 103) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1), 2:A2 (ts: 103) }
+            // --> w2 = { 0:a0 (ts: 100), 1:a1 (ts: 102), 2:a2 (ts: 103)  }
+            inputTopic1.pipeInput(expectedKeys[2], "A" + expectedKeys[2], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(2, "A2+a2", 103L)
+            );
+        }
+    }
+
+    @Test
+    public void shouldNotEmitLeftJoinResultForAsymmetricAfterWindow() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final int[] expectedKeys = new int[] {0, 1, 2, 3};
+
+        final KStream<Integer, String> stream1;
+        final KStream<Integer, String> stream2;
+        final KStream<Integer, String> joined;
+        final MockApiProcessorSupplier<Integer, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        stream1 = builder.stream(topic1, consumed);
+        stream2 = builder.stream(topic2, consumed);
+
+        joined = stream1.outerJoin(
+            stream2,
+            MockValueJoiner.TOSTRING_JOINER,
+            JoinWindows.ofTimeDifferenceWithNoGrace(ofMillis(100)).after(ZERO),
+            StreamJoined.with(Serdes.Integer(), Serdes.String(), Serdes.String())
+        );
+        joined.process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
+            final TestInputTopic<Integer, String> inputTopic1 =
+                driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final TestInputTopic<Integer, String> inputTopic2 =
+                driver.createInputTopic(topic2, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final MockApiProcessor<Integer, String, Void, Void> processor = supplier.theCapturedProcessor();
+            long time = 0L;
+
+            // push two items to the primary stream; the other window is empty; this should not produce any item
+            // w1 = {}
+            // w2 = {}
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = {}
+            for (int i = 0; i < 2; i++) {
+                inputTopic1.pipeInput(expectedKeys[i], "A" + expectedKeys[i], time + i);
+            }
+            processor.checkAndClearProcessResult();
+
+            // push one item to the other stream; this should produce one full-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = {}
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 1:a1 (ts: 1) }
+            time += 1;
+            inputTopic2.pipeInput(expectedKeys[1], "a" + expectedKeys[1], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(1, "A1+a1", 1L)
+            );
+
+            // push one item to the other stream; this should produce one left-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 1:a1 (ts: 1) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 1:a1 (ts: 1), 2:a2 (ts: 101) }
+            time += 100;
+            inputTopic2.pipeInput(expectedKeys[2], "a" + expectedKeys[2], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(0, "A0+null", 0L)
+            );
+
+            // push one item to the other stream; this should not produce any item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 1:a1 (ts: 1), 2:a2 (ts: 101) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // --> w2 = { 1:a1 (ts: 1), 2:a2 (ts: 101), 3:a3 (ts: 101) }
+            inputTopic2.pipeInput(expectedKeys[3], "a" + expectedKeys[3], time);
+
+            processor.checkAndClearProcessResult();
+
+            // push one item to the first stream; this should produce one full-join item
+            // w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1) }
+            // w2 = { 1:a1 (ts: 1), 2:a2 (ts: 101), 3:a3 (ts: 101) }
+            // --> w1 = { 0:A0 (ts: 0), 1:A1 (ts: 1), 2:A2 (ts: 201) }
+            // --> w2 = { 1:a1 (ts: 1), 2:a2 (ts: 101), 3:a3 (ts: 101 }
+            time += 100;
+            inputTopic1.pipeInput(expectedKeys[2], "A" + expectedKeys[2], time);
+
+            processor.checkAndClearProcessResult(
+                new KeyValueTimestamp<>(2, "A2+a2", 201L)
+            );
+        }
+    }
+
+    /**
+     * NOTE: Header forwarding is undefined behavior, but we still want to understand the
+     * behavior so that we can make decisions about defining it in the future.
+     */
+    @Test
+    public void shouldForwardCurrentHeaders() {
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final KStream<Integer, String> stream1;
+        final KStream<Integer, String> stream2;
+        final KStream<Integer, String> joined;
+        final MockApiProcessorSupplier<Integer, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        stream1 = builder.stream(topic1, consumed);
+        stream2 = builder.stream(topic2, consumed);
+
+        joined = stream1.outerJoin(
+            stream2,
+            MockValueJoiner.TOSTRING_JOINER,
+            JoinWindows.ofTimeDifferenceAndGrace(ofMillis(100L), ofMillis(10L)),
+            StreamJoined.with(Serdes.Integer(), Serdes.String(), Serdes.String())
+        );
+        joined.process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), PROPS)) {
+            final TestInputTopic<Integer, String> inputTopic1 =
+                driver.createInputTopic(topic1, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final TestInputTopic<Integer, String> inputTopic2 =
+                driver.createInputTopic(topic2, new IntegerSerializer(), new StringSerializer(), Instant.ofEpochMilli(0L), Duration.ZERO);
+            final MockApiProcessor<Integer, String, Void, Void> processor = supplier.theCapturedProcessor();
+
+            inputTopic1.pipeInput(new TestRecord<>(
+                0,
+                "A0",
+                new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x1})}),
+                0L
+            ));
+            inputTopic2.pipeInput(new TestRecord<>(
+                1,
+                "a0",
+                new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x2})}),
+                0L
+            ));
+            // bump stream-time to trigger outer-join results
+            inputTopic2.pipeInput(new TestRecord<>(
+                3,
+                "dummy",
+                new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x3})}),
+                (long) 211
+            ));
+
+            // Again, header forwarding is undefined, but the current observed behavior is that
+            // the headers pass through the forwarding record.
+            processor.checkAndClearProcessedRecords(
+                new Record<>(
+                    1,
+                    "null+a0",
+                    0L,
+                    new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x3})})
+                ),
+                new Record<>(
+                    0,
+                    "A0+null",
+                    0L,
+                    new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x3})})
+                )
+            );
+
+            // verifies joined duplicates are emitted
+            inputTopic1.pipeInput(new TestRecord<>(
+                2,
+                "A2",
+                new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x4})}),
+                200L
+            ));
+            inputTopic2.pipeInput(new TestRecord<>(
+                2,
+                "a2",
+                new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x5})}),
+                200L
+            ));
+
+            processor.checkAndClearProcessedRecords(
+                new Record<>(
+                    2,
+                    "A2+a2",
+                    200L,
+                    new RecordHeaders(new Header[]{new RecordHeader("h", new byte[]{0x5})})
+                )
+            );
+        }
+    }
+
     private void testUpperWindowBound(final int[] expectedKeys,
                                       final TopologyTestDriver driver,
                                       final MockApiProcessor<Integer, String, Void, Void> processor) {
@@ -839,7 +1115,6 @@ private void testUpperWindowBound(final int[] expectedKeys,
 
         // push a dummy record to produce all left-join non-joined items
         time += 301L;
-        driver.advanceWallClockTime(Duration.ofMillis(1000L));
         inputTopic1.pipeInput(0, "dummy", time);
         processor.checkAndClearProcessResult(
             new KeyValueTimestamp<>(0, "C0+null", 1101L),
@@ -1033,4 +1308,4 @@ private void testLowerWindowBound(final int[] expectedKeys,
             new KeyValueTimestamp<>(0, "dummy+null", 1103L)
         );
     }
-}
\ No newline at end of file
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregateProcessorTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregateProcessorTest.java
index 21c6e6af1228c..fc993b63a9e1d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregateProcessorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSessionWindowAggregateProcessorTest.java
@@ -22,11 +22,11 @@
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.LogContext;
 import org.apache.kafka.common.utils.MockTime;
-import org.apache.kafka.common.utils.Time;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.Initializer;
 import org.apache.kafka.streams.kstream.Merger;
 import org.apache.kafka.streams.kstream.SessionWindows;
@@ -40,9 +40,11 @@
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender.Event;
 import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.internals.RocksDbTimeOrderedSessionBytesStoreSupplier;
 import org.apache.kafka.streams.state.internals.ThreadCache;
 import org.apache.kafka.test.InternalMockProcessorContext;
 import org.apache.kafka.test.MockRecordCollector;
@@ -51,13 +53,18 @@
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
+import java.util.Properties;
 import java.util.stream.Collectors;
 
 import static java.time.Duration.ofMillis;
+import static java.util.Arrays.asList;
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.test.StreamsTestUtils.getMetricByName;
@@ -69,29 +76,39 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-
+@RunWith(Parameterized.class)
 public class KStreamSessionWindowAggregateProcessorTest {
 
     private static final long GAP_MS = 5 * 60 * 1000L;
     private static final String STORE_NAME = "session-store";
 
+    private final MockTime time = new MockTime();
+    private final Metrics metrics = new Metrics();
+    private final StreamsMetricsImpl streamsMetrics = new StreamsMetricsImpl(metrics, "test", StreamsConfig.METRICS_LATEST, time);
     private final String threadId = Thread.currentThread().getName();
     private final Initializer<Long> initializer = () -> 0L;
     private final Aggregator<String, String, Long> aggregator = (aggKey, value, aggregate) -> aggregate + 1;
     private final Merger<String, Long> sessionMerger = (aggKey, aggOne, aggTwo) -> aggOne + aggTwo;
-    private final KStreamSessionWindowAggregate<String, String, Long> sessionAggregator =
-        new KStreamSessionWindowAggregate<>(
-            SessionWindows.ofInactivityGapWithNoGrace(ofMillis(GAP_MS)),
-            STORE_NAME,
-            initializer,
-            aggregator,
-            sessionMerger);
-
     private final List<KeyValueTimestamp<Windowed<String>, Change<Long>>> results = new ArrayList<>();
-    private final Processor<String, String, Windowed<String>, Change<Long>> processor = sessionAggregator.get();
-    private SessionStore<String, Long> sessionStore;
+
     private InternalMockProcessorContext<Windowed<String>, Change<Long>> context;
-    private final Metrics metrics = new Metrics();
+    private KStreamSessionWindowAggregate<String, String, Long> sessionAggregator;
+    private Processor<String, String, Windowed<String>, Change<Long>> processor;
+    private SessionStore<String, Long> sessionStore;
+
+    @Parameterized.Parameter
+    public EmitStrategy.StrategyType type;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {EmitStrategy.StrategyType.ON_WINDOW_UPDATE},
+            {EmitStrategy.StrategyType.ON_WINDOW_CLOSE}
+        });
+    }
+
+    private EmitStrategy emitStrategy;
+    private boolean emitFinal;
 
     @Before
     public void setup() {
@@ -99,23 +116,44 @@ public void setup() {
     }
 
     private void setup(final boolean enableCache) {
-        final StreamsMetricsImpl streamsMetrics =
-            new StreamsMetricsImpl(metrics, "test", StreamsConfig.METRICS_LATEST, new MockTime());
+        // Always process
+        final Properties prop = StreamsTestUtils.getStreamsConfig();
+        prop.put(StreamsConfig.InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 0);
+        final StreamsConfig config = new StreamsConfig(prop);
+
         context = new InternalMockProcessorContext<Windowed<String>, Change<Long>>(
             TestUtils.tempDirectory(),
             Serdes.String(),
             Serdes.String(),
             streamsMetrics,
-            new StreamsConfig(StreamsTestUtils.getStreamsConfig()),
+            config,
             MockRecordCollector::new,
             new ThreadCache(new LogContext("testCache "), 100000, streamsMetrics),
-            Time.SYSTEM
+            time
         ) {
             @Override
             public <K extends Windowed<String>, V extends Change<Long>> void forward(final Record<K, V> record) {
                 results.add(new KeyValueTimestamp<>(record.key(), record.value(), record.timestamp()));
             }
         };
+
+
+        emitFinal = type.equals(EmitStrategy.StrategyType.ON_WINDOW_CLOSE);
+        emitStrategy = EmitStrategy.StrategyType.forType(type);
+
+        sessionAggregator = new KStreamSessionWindowAggregate<>(
+            SessionWindows.ofInactivityGapWithNoGrace(ofMillis(GAP_MS)),
+            STORE_NAME,
+            emitStrategy,
+            initializer,
+            aggregator,
+            sessionMerger);
+
+        if (processor != null) {
+            processor.close();
+        }
+        processor = sessionAggregator.get();
+
         // Set initial timestamp for CachingSessionStore to prepare entry from as default
         // InternalMockProcessorContext#timestamp returns -1.
         context.setTime(0L);
@@ -126,14 +164,14 @@ public <K extends Windowed<String>, V extends Change<Long>> void forward(final R
     }
 
     private void initStore(final boolean enableCaching) {
-        final StoreBuilder<SessionStore<String, Long>> storeBuilder =
-            Stores.sessionStoreBuilder(
-                Stores.persistentSessionStore(STORE_NAME, ofMillis(GAP_MS * 3)),
-                Serdes.String(),
-                Serdes.Long())
+        final SessionBytesStoreSupplier supplier = emitStrategy.type() == EmitStrategy.StrategyType.ON_WINDOW_CLOSE ?
+            new RocksDbTimeOrderedSessionBytesStoreSupplier(STORE_NAME, GAP_MS * 3, true) :
+            Stores.persistentSessionStore(STORE_NAME, ofMillis(GAP_MS * 3));
+
+        final StoreBuilder<SessionStore<String, Long>> storeBuilder = Stores.sessionStoreBuilder(supplier, Serdes.String(), Serdes.Long())
             .withLoggingDisabled();
 
-        if (enableCaching) {
+        if (enableCaching && emitStrategy.type() != EmitStrategy.StrategyType.ON_WINDOW_CLOSE) {
             storeBuilder.withCachingEnabled();
         }
 
@@ -147,6 +185,7 @@ private void initStore(final boolean enableCaching) {
     @After
     public void closeStore() {
         sessionStore.close();
+        processor.close();
     }
 
     @Test
@@ -198,35 +237,51 @@ public void shouldUpdateSessionIfTheSameTime() {
     @Test
     public void shouldHaveMultipleSessionsForSameIdWhenTimestampApartBySessionGap() {
         final String sessionId = "mel";
-        long time = 0;
-        processor.process(new Record<>(sessionId, "first", time));
-        final long time1 = time += GAP_MS + 1;
-        processor.process(new Record<>(sessionId, "second", time1));
-        processor.process(new Record<>(sessionId, "second", time1));
-        final long time2 = time += GAP_MS + 1;
-        processor.process(new Record<>(sessionId, "third", time2));
-        processor.process(new Record<>(sessionId, "third", time2));
-        processor.process(new Record<>(sessionId, "third", time2));
+        long now = 0;
+        processor.process(new Record<>(sessionId, "first", now));
+        now += GAP_MS + 1;
+        processor.process(new Record<>(sessionId, "second", now));
+        processor.process(new Record<>(sessionId, "second", now));
+        now += GAP_MS + 1;
+        processor.process(new Record<>(sessionId, "third", now));
+        processor.process(new Record<>(sessionId, "third", now));
+        processor.process(new Record<>(sessionId, "third", now));
 
         sessionStore.flush();
-        assertEquals(
-            Arrays.asList(
-                new KeyValueTimestamp<>(
-                    new Windowed<>(sessionId, new SessionWindow(0, 0)),
-                    new Change<>(1L, null),
-                    0L),
-                new KeyValueTimestamp<>(
-                    new Windowed<>(sessionId, new SessionWindow(GAP_MS + 1, GAP_MS + 1)),
-                    new Change<>(2L, null),
-                    GAP_MS + 1),
-                new KeyValueTimestamp<>(
-                    new Windowed<>(sessionId, new SessionWindow(time, time)),
-                    new Change<>(3L, null),
-                    time)
-            ),
-            results
-        );
 
+        if (emitFinal) {
+            assertEquals(
+                Arrays.asList(
+                    new KeyValueTimestamp<>(
+                        new Windowed<>(sessionId, new SessionWindow(0, 0)),
+                        new Change<>(1L, null),
+                        0L),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>(sessionId, new SessionWindow(GAP_MS + 1, GAP_MS + 1)),
+                        new Change<>(2L, null),
+                        GAP_MS + 1)
+                ),
+                results
+            );
+        } else {
+            assertEquals(
+                Arrays.asList(
+                    new KeyValueTimestamp<>(
+                        new Windowed<>(sessionId, new SessionWindow(0, 0)),
+                        new Change<>(1L, null),
+                        0L),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>(sessionId, new SessionWindow(GAP_MS + 1, GAP_MS + 1)),
+                        new Change<>(2L, null),
+                        GAP_MS + 1),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>(sessionId, new SessionWindow(now, now)),
+                        new Change<>(3L, null),
+                        now)
+                ),
+                results
+            );
+        }
     }
 
     @Test
@@ -264,8 +319,8 @@ public void shouldHandleMultipleSessionsAndMerging() {
 
         sessionStore.flush();
 
-        assertEquals(
-            Arrays.asList(
+        if (emitFinal) {
+            assertEquals(Arrays.asList(
                 new KeyValueTimestamp<>(
                     new Windowed<>("a", new SessionWindow(0, 0)),
                     new Change<>(1L, null),
@@ -281,22 +336,44 @@ public void shouldHandleMultipleSessionsAndMerging() {
                 new KeyValueTimestamp<>(
                     new Windowed<>("d", new SessionWindow(0, GAP_MS / 2)),
                     new Change<>(2L, null),
-                    GAP_MS / 2),
-                new KeyValueTimestamp<>(
-                    new Windowed<>("b", new SessionWindow(GAP_MS + 1, GAP_MS + 1)),
-                    new Change<>(1L, null),
-                    GAP_MS + 1),
-                new KeyValueTimestamp<>(
-                    new Windowed<>("a", new SessionWindow(GAP_MS + 1, GAP_MS + 1 + GAP_MS / 2)),
-                    new Change<>(2L, null),
-                    GAP_MS + 1 + GAP_MS / 2),
-                new KeyValueTimestamp<>(new Windowed<>(
-                    "c",
-                    new SessionWindow(GAP_MS + 1 + GAP_MS / 2, GAP_MS + 1 + GAP_MS / 2)), new Change<>(1L, null),
-                    GAP_MS + 1 + GAP_MS / 2)
-            ),
-            results
-        );
+                    GAP_MS / 2)
+                ),
+                results);
+        } else {
+            assertEquals(
+                Arrays.asList(
+                    new KeyValueTimestamp<>(
+                        new Windowed<>("a", new SessionWindow(0, 0)),
+                        new Change<>(1L, null),
+                        0L),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>("b", new SessionWindow(0, 0)),
+                        new Change<>(1L, null),
+                        0L),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>("c", new SessionWindow(0, 0)),
+                        new Change<>(1L, null),
+                       0L),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>("d", new SessionWindow(0, GAP_MS / 2)),
+                        new Change<>(2L, null),
+                        GAP_MS / 2),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>("b", new SessionWindow(GAP_MS + 1, GAP_MS + 1)),
+                        new Change<>(1L, null),
+                        GAP_MS + 1),
+                    new KeyValueTimestamp<>(
+                        new Windowed<>("a", new SessionWindow(GAP_MS + 1, GAP_MS + 1 + GAP_MS / 2)),
+                        new Change<>(2L, null),
+                        GAP_MS + 1 + GAP_MS / 2),
+                    new KeyValueTimestamp<>(new Windowed<>(
+                        "c",
+                        new SessionWindow(GAP_MS + 1 + GAP_MS / 2, GAP_MS + 1 + GAP_MS / 2)), new Change<>(1L, null),
+                        GAP_MS + 1 + GAP_MS / 2)
+                    ),
+                    results
+            );
+        }
     }
 
     @Test
@@ -314,6 +391,9 @@ public void shouldGetAggregatedValuesFromValueGetter() {
 
     @Test
     public void shouldImmediatelyForwardNewSessionWhenNonCachedStore() {
+        if (emitFinal)
+            return;
+
         initStore(false);
         processor.init(context);
 
@@ -342,6 +422,9 @@ public void shouldImmediatelyForwardNewSessionWhenNonCachedStore() {
 
     @Test
     public void shouldImmediatelyForwardRemovedSessionsWhenMerging() {
+        if (emitFinal)
+            return;
+
         initStore(false);
         processor.init(context);
 
@@ -399,6 +482,7 @@ public void shouldLogAndMeterWhenSkippingLateRecordWithZeroGrace() {
         final Processor<String, String, Windowed<String>, Change<Long>> processor = new KStreamSessionWindowAggregate<>(
             SessionWindows.ofInactivityGapAndGrace(ofMillis(10L), ofMillis(0L)),
             STORE_NAME,
+            EmitStrategy.onWindowUpdate(),
             initializer,
             aggregator,
             sessionMerger
@@ -464,6 +548,7 @@ public void shouldLogAndMeterWhenSkippingLateRecordWithNonzeroGrace() {
         final Processor<String, String, Windowed<String>, Change<Long>> processor = new KStreamSessionWindowAggregate<>(
             SessionWindows.ofInactivityGapAndGrace(ofMillis(10L), ofMillis(1L)),
             STORE_NAME,
+            EmitStrategy.onWindowUpdate(),
             initializer,
             aggregator,
             sessionMerger
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregateTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregateTest.java
index 8e8115f35e47f..df95103791db1 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregateTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamSlidingWindowAggregateTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.streams.kstream.internals;
 
+import java.util.HashSet;
+import java.util.Set;
 import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.serialization.StringDeserializer;
@@ -24,9 +26,12 @@
 import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.TestOutputTopic;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
 import org.apache.kafka.streams.kstream.Grouped;
 import org.apache.kafka.streams.kstream.KStream;
 import org.apache.kafka.streams.kstream.KTable;
@@ -53,6 +58,7 @@
 import org.apache.kafka.test.MockReducer;
 import org.apache.kafka.test.StreamsTestUtils;
 import org.hamcrest.Matcher;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -69,11 +75,13 @@
 import java.util.Properties;
 import java.util.Random;
 import java.util.stream.Collectors;
+import org.junit.runners.Parameterized.Parameter;
 
 import static java.time.Duration.ofMillis;
 import static java.util.Arrays.asList;
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.common.utils.Utils.mkSet;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.hasItem;
 import static org.hamcrest.CoreMatchers.hasItems;
@@ -86,73 +94,135 @@
 @RunWith(Parameterized.class)
 public class KStreamSlidingWindowAggregateTest {
 
-    @Parameterized.Parameters(name = "{0}")
-    public static Collection<Boolean[]> data() {
-        return Arrays.asList(new Boolean[][] {
-            {false},
-            {true}
+    @Parameterized.Parameters(name = "{0}_inorder:{1}_cache:{2}")
+    public static Collection<Object[]> data() {
+        return Arrays.asList(new Object[][] {
+            {StrategyType.ON_WINDOW_UPDATE, true, true},
+            {StrategyType.ON_WINDOW_UPDATE, true, false},
+            {StrategyType.ON_WINDOW_UPDATE, false, true},
+            {StrategyType.ON_WINDOW_UPDATE, false, false},
+            {StrategyType.ON_WINDOW_CLOSE, true, true},
+            {StrategyType.ON_WINDOW_CLOSE, true, false},
+            {StrategyType.ON_WINDOW_CLOSE, false, true},
+            {StrategyType.ON_WINDOW_CLOSE, false, false}
         });
     }
+    @Parameter
+    public StrategyType type;
 
-    @Parameterized.Parameter
+    @Parameter(1)
     public boolean inOrderIterator;
 
+    @Parameter(2)
+    public boolean withCache;
+
+    private boolean emitFinal;
+    private EmitStrategy emitStrategy;
+
     private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
     private final String threadId = Thread.currentThread().getName();
 
+    @Before
+    public void before() {
+        emitFinal = type.equals(StrategyType.ON_WINDOW_CLOSE);
+        emitStrategy = StrategyType.forType(type);
+        // Set interval to 0 so that it always tries to emit
+        props.setProperty(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, "0");
+    }
+
     @Test
     public void testAggregateSmallInput() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
 
-        final WindowBytesStoreSupplier storeSupplier =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(50000), Duration.ofMillis(10), false);
+        final WindowBytesStoreSupplier storeSupplier = setupWindowBytesStoreSupplier(1);
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
+
         final KTable<Windowed<String>, String> table = builder
             .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(50)))
-            .aggregate(
-                MockInitializer.STRING_INIT,
-                MockAggregator.TOSTRING_ADDER,
-                Materialized.as(storeSupplier)
-            );
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(5)))
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, materialized);
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table.toStream().process(supplier);
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             final TestInputTopic<String, String> inputTopic =
                 driver.createInputTopic(topic, new StringSerializer(), new StringSerializer());
             inputTopic.pipeInput("A", "1", 10L);
-            inputTopic.pipeInput("A", "2", 15L);
-            inputTopic.pipeInput("A", "3", 20L);
-            inputTopic.pipeInput("A", "4", 22L);
-            inputTopic.pipeInput("A", "5", 30L);
+            inputTopic.pipeInput("A", "2", 10L);
+            inputTopic.pipeInput("A", "3", 14L);
+            inputTopic.pipeInput("A", "4", 15L);
+            inputTopic.pipeInput("A", "5", 20L);
+            inputTopic.pipeInput("A", "6", 22L);
+            inputTopic.pipeInput("A", "7", 30L);
         }
 
-        final Map<Long, ValueAndTimestamp<String>> actual = new HashMap<>();
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = gatherOutput(supplier);
+        final Map<Long, Set<ValueAndTimestamp<String>>> expected = new HashMap<>();
 
-        for (final KeyValueTimestamp<Windowed<String>, String> entry : supplier.theCapturedProcessor().processed()) {
-            final Windowed<String> window = entry.key();
-            final Long start = window.window().start();
-            final ValueAndTimestamp<String> valueAndTimestamp = ValueAndTimestamp.make(entry.value(), entry.timestamp());
-
-            if (actual.putIfAbsent(start, valueAndTimestamp) != null) {
-                actual.replace(start, valueAndTimestamp);
-            }
+        if (emitFinal) {
+            expected.put(0L, mkSet(
+                ValueAndTimestamp.make("0+1+2", 10L)
+            ));
+            expected.put(4L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3", 14L)
+            ));
+            expected.put(5L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4", 15L)
+            ));
+            expected.put(10L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4+5", 20L)
+            ));
+            expected.put(11L, mkSet(
+                ValueAndTimestamp.make("0+3+4+5", 20L)
+            ));
+            expected.put(12L, mkSet(
+                ValueAndTimestamp.make("0+3+4+5+6", 22L)
+            ));
+        } else {
+            expected.put(0L, mkSet(
+                ValueAndTimestamp.make("0+1", 10L),
+                ValueAndTimestamp.make("0+1+2", 10L)
+            ));
+            expected.put(4L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3", 14L)
+            ));
+            expected.put(5L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4", 15L)
+            ));
+            expected.put(10L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4+5", 20L)
+            ));
+            expected.put(11L, mkSet(
+                ValueAndTimestamp.make("0+3", 14L),
+                ValueAndTimestamp.make("0+3+4", 15L),
+                ValueAndTimestamp.make("0+3+4+5", 20L)
+            ));
+            expected.put(12L, mkSet(
+                ValueAndTimestamp.make("0+3+4+5+6", 22L)
+            ));
+            expected.put(15L, mkSet(
+                ValueAndTimestamp.make("0+4", 15L),
+                ValueAndTimestamp.make("0+4+5", 20L),
+                ValueAndTimestamp.make("0+4+5+6", 22L)
+            ));
+            expected.put(16L, mkSet(
+                ValueAndTimestamp.make("0+5", 20L),
+                ValueAndTimestamp.make("0+5+6", 22L)
+            ));
+            expected.put(20L, mkSet(
+                ValueAndTimestamp.make("0+5+6+7", 30L)
+            ));
+            expected.put(21L, mkSet(
+                ValueAndTimestamp.make("0+6", 22L),
+                ValueAndTimestamp.make("0+6+7", 30L)
+            ));
+            expected.put(23L, mkSet(
+                ValueAndTimestamp.make("0+7", 30L)
+            ));
         }
 
-        final Map<Long, ValueAndTimestamp<String>> expected = new HashMap<>();
-        expected.put(0L, ValueAndTimestamp.make("0+1", 10L));
-        expected.put(5L, ValueAndTimestamp.make("0+1+2", 15L));
-        expected.put(10L, ValueAndTimestamp.make("0+1+2+3", 20L));
-        expected.put(11L, ValueAndTimestamp.make("0+2+3", 20L));
-        expected.put(12L, ValueAndTimestamp.make("0+2+3+4", 22L));
-        expected.put(16L, ValueAndTimestamp.make("0+3+4", 22L));
-        expected.put(20L, ValueAndTimestamp.make("0+3+4+5", 30L));
-        expected.put(21L, ValueAndTimestamp.make("0+4+5", 30L));
-        expected.put(23L, ValueAndTimestamp.make("0+5", 30L));
-
         assertEquals(expected, actual);
     }
 
@@ -160,18 +230,17 @@ public void testAggregateSmallInput() {
     public void testReduceSmallInput() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
-        final WindowBytesStoreSupplier storeSupplier =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(50000), Duration.ofMillis(10), false);
+        final WindowBytesStoreSupplier storeSupplier = setupWindowBytesStoreSupplier(1);
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
 
         final KTable<Windowed<String>, String> table = builder
             .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(50)))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(5)))
+            .emitStrategy(emitStrategy)
             .reduce(
                 MockReducer.STRING_ADDER,
-                Materialized.as(storeSupplier)
+                materialized
             );
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table.toStream().process(supplier);
@@ -186,29 +255,39 @@ public void testReduceSmallInput() {
             inputTopic.pipeInput("A", "6", 30L);
         }
 
-        final Map<Long, ValueAndTimestamp<String>> actual = new HashMap<>();
-
-        for (final KeyValueTimestamp<Windowed<String>, String> entry : supplier.theCapturedProcessor().processed()) {
-            final Windowed<String> window = entry.key();
-            final Long start = window.window().start();
-            final ValueAndTimestamp<String> valueAndTimestamp = ValueAndTimestamp.make(entry.value(), entry.timestamp());
-            if (actual.putIfAbsent(start, valueAndTimestamp) != null) {
-                actual.replace(start, valueAndTimestamp);
-            }
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = gatherOutput(supplier);
+        final Map<Long, Set<ValueAndTimestamp<String>>> expected = new HashMap<>();
+
+        if (emitFinal) {
+            expected.put(0L, mkSet(ValueAndTimestamp.make("1", 10L)));
+            expected.put(4L, mkSet(ValueAndTimestamp.make("1+2", 14L)));
+            expected.put(5L, mkSet(ValueAndTimestamp.make("1+2+3", 15L)));
+            expected.put(11L, mkSet(ValueAndTimestamp.make("2+3", 15L)));
+            expected.put(12L, mkSet(ValueAndTimestamp.make("2+3+4", 22L)));
+        } else {
+            expected.put(0L, mkSet(ValueAndTimestamp.make("1", 10L)));
+            expected.put(4L, mkSet(ValueAndTimestamp.make("1+2", 14L)));
+            expected.put(5L, mkSet(ValueAndTimestamp.make("1+2+3", 15L)));
+            expected.put(11L, mkSet(
+                ValueAndTimestamp.make("2", 14L),
+                ValueAndTimestamp.make("2+3", 15L)
+            ));
+            expected.put(12L, mkSet(ValueAndTimestamp.make("2+3+4", 22L)));
+            expected.put(15L, mkSet(
+                ValueAndTimestamp.make("3", 15L),
+                ValueAndTimestamp.make("3+4", 22L)
+            ));
+            expected.put(16L, mkSet(
+                ValueAndTimestamp.make("4", 22L),
+                ValueAndTimestamp.make("4+5", 26L)
+            ));
+            expected.put(20L, mkSet(ValueAndTimestamp.make("4+5+6", 30L)));
+            expected.put(23L, mkSet(
+                ValueAndTimestamp.make("5", 26L),
+                ValueAndTimestamp.make("5+6", 30L)
+            ));
+            expected.put(27L, mkSet(ValueAndTimestamp.make("6", 30L)));
         }
-
-        final Map<Long, ValueAndTimestamp<String>> expected = new HashMap<>();
-        expected.put(0L, ValueAndTimestamp.make("1", 10L));
-        expected.put(4L, ValueAndTimestamp.make("1+2", 14L));
-        expected.put(5L, ValueAndTimestamp.make("1+2+3", 15L));
-        expected.put(11L, ValueAndTimestamp.make("2+3", 15L));
-        expected.put(12L, ValueAndTimestamp.make("2+3+4", 22L));
-        expected.put(15L, ValueAndTimestamp.make("3+4", 22L));
-        expected.put(16L, ValueAndTimestamp.make("4+5", 26L));
-        expected.put(20L, ValueAndTimestamp.make("4+5+6", 30L));
-        expected.put(23L, ValueAndTimestamp.make("5+6", 30L));
-        expected.put(27L, ValueAndTimestamp.make("6", 30L));
-
         assertEquals(expected, actual);
     }
 
@@ -216,19 +295,20 @@ public void testReduceSmallInput() {
     public void testAggregateLargeInput() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic1 = "topic1";
+        final long grace = emitFinal ? 10L : 50L;
+
+        final WindowBytesStoreSupplier storeSupplier = setupWindowBytesStoreSupplier(1);
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
 
-        final WindowBytesStoreSupplier storeSupplier =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(50000), Duration.ofMillis(10), false);
         final KTable<Windowed<String>, String> table2 = builder
             .stream(topic1, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(50)))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(grace)))
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.TOSTRING_ADDER,
-                Materialized.as(storeSupplier)
+                materialized
             );
 
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
@@ -247,18 +327,18 @@ public void testAggregateLargeInput() {
             inputTopic1.pipeInput("B", "3", 18L);
             inputTopic1.pipeInput("B", "4", 19L);
             inputTopic1.pipeInput("B", "5", 25L);
-            inputTopic1.pipeInput("B", "6", 14L);
+            inputTopic1.pipeInput("B", "6", 14L); // skip for emit final [4, 14], close time 15
 
-            inputTopic1.pipeInput("C", "1", 11L);
+            inputTopic1.pipeInput("C", "1", 11L); // skip for emit final [1, 11], close time 15
             inputTopic1.pipeInput("C", "2", 15L);
             inputTopic1.pipeInput("C", "3", 16L);
             inputTopic1.pipeInput("C", "4", 21);
             inputTopic1.pipeInput("C", "5", 23L);
             
-            inputTopic1.pipeInput("D", "4", 11L);
-            inputTopic1.pipeInput("D", "2", 12L);
+            inputTopic1.pipeInput("D", "4", 11L); // skip for emit final [1, 11], close time 15
+            inputTopic1.pipeInput("D", "2", 12L); // skip for emit final [2, 12], close time 15
             inputTopic1.pipeInput("D", "3", 29L);
-            inputTopic1.pipeInput("D", "5", 16L);
+            inputTopic1.pipeInput("D", "5", 16L); // skip for emit final [6, 16], close time: 19
         }
         final Comparator<KeyValueTimestamp<Windowed<String>, String>> comparator =
             Comparator.comparing((KeyValueTimestamp<Windowed<String>, String> o) -> o.key().key())
@@ -266,108 +346,168 @@ public void testAggregateLargeInput() {
 
         final ArrayList<KeyValueTimestamp<Windowed<String>, String>> actual = supplier.theCapturedProcessor().processed();
         actual.sort(comparator);
-        assertEquals(
-            asList(
-                // FINAL WINDOW: A@10 left window created when A@10 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 10),
-                // FINAL WINDOW: A@15 left window created when A@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+4", 15),
-                // A@20 left window created when A@20 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10, 20)), "0+1+2", 20),
-                // FINAL WINDOW: A@20 left window updated when A@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10, 20)), "0+1+2+4", 20),
-                // A@10 right window created when A@20 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+2", 20),
-                // FINAL WINDOW: A@10 right window updated when A@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+2+4", 20),
-                // A@22 left window created when A@22 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(12, 22)), "0+2+3", 22),
-                // FINAL WINDOW: A@22 left window updated when A@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(12, 22)), "0+2+3+4", 22),
-                // FINAL WINDOW: A@15 right window created when A@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(16, 26)), "0+2+3", 22),
-                // FINAL WINDOW: A@20 right window created when A@22 processed
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(21, 31)), "0+3", 22),
-                // FINAL WINDOW: B@12 left window created when B@12 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(2, 12)), "0+1", 12),
-                // FINAL WINDOW: B@13 left window created when B@13 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(3, 13)), "0+1+2", 13),
-                // FINAL WINDOW: B@14 left window created when B@14 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(4, 14)), "0+1+2+6", 14),
-                // B@18 left window created when B@18 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8, 18)), "0+1+2+3", 18),
-                // FINAL WINDOW: B@18 left window updated when B@14 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8, 18)), "0+1+2+3+6", 18),
-                // B@19 left window created when B@19 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(9, 19)), "0+1+2+3+4", 19),
-                // FINAL WINDOW: B@19 left window updated when B@14 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(9, 19)), "0+1+2+3+4+6", 19),
-                // B@12 right window created when B@13 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2", 13),
-                // B@12 right window updated when B@18 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2+3", 18),
-                // B@12 right window updated when B@19 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2+3+4", 19),
-                // FINAL WINDOW: B@12 right window updated when B@14 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2+3+4+6", 19),
-                // B@13 right window created when B@18 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(14, 24)), "0+3", 18),
-                // B@13 right window updated when B@19 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(14, 24)), "0+3+4", 19),
-                // FINAL WINDOW: B@13 right window updated when B@14 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(14, 24)), "0+3+4+6", 19),
-                // FINAL WINDOW: B@25 left window created when B@25 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15, 25)), "0+3+4+5", 25),
-                // B@18 right window created when B@19 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(19, 29)), "0+4", 19),
-                // FINAL WINDOW: B@18 right window updated when B@25 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(19, 29)), "0+4+5", 25),
-                // FINAL WINDOW: B@19 right window updated when B@25 processed
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(20, 30)), "0+5", 25),
-                // FINAL WINDOW: C@11 left window created when C@11 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(1, 11)), "0+1", 11),
-                // FINAL WINDOW: C@15 left window created when C@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+1+2", 15),
-                // FINAL WINDOW: C@16 left window created when C@16 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(6, 16)), "0+1+2+3", 16),
-                // FINAL WINDOW: C@21 left window created when C@21 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(11, 21)), "0+1+2+3+4", 21),
-                // C@11 right window created when C@15 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(12, 22)), "0+2", 15),
-                // C@11 right window updated when C@16 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(12, 22)), "0+2+3", 16),
-                // FINAL WINDOW: C@11 right window updated when C@21 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(12, 22)), "0+2+3+4", 21),
-                // FINAL WINDOW: C@23 left window created when C@23 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)), "0+2+3+4+5", 23),
-                // C@15 right window created when C@16 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(16, 26)), "0+3", 16),
-                // C@15 right window updated when C@21 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(16, 26)), "0+3+4", 21),
-                // FINAL WINDOW: C@15 right window updated when C@23 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(16, 26)), "0+3+4+5", 23),
-                // C@16 right window created when C@21 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(17, 27)), "0+4", 21),
-                // FINAL WINDOW: C@16 right window updated when C@23 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(17, 27)), "0+4+5", 23),
-                // FINAL WINDOW: C@21 right window created when C@23 processed
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(22, 32)), "0+5", 23),
-                // FINAL WINDOW: D@11 left window created when D@11 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(1, 11)), "0+4", 11),
-                // FINAL WINDOW: D@12 left window created when D@12 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(2, 12)), "0+4+2", 12),
-                // FINAL WINDOW: D@16 left window created when D@16 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(6, 16)), "0+4+2+5", 16),
-                // D@11 right window created when D@12 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(12, 22)), "0+2", 12),
-                // FINAL WINDOW: D@11 right window updated when D@16 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(12, 22)), "0+2+5", 16),
-                // FINAL WINDOW: D@12 right window created when D@16 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(13, 23)), "0+5", 16),
-                // FINAL WINDOW: D@29 left window created when D@29 processed
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(19, 29)), "0+3", 29)),
-            actual
-        );
+
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    // FINAL WINDOW: A@10 left window created when A@10 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 10),
+                    // FINAL WINDOW: A@15 left window created when A@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+4",
+                        15),
+                    // FINAL WINDOW: B@12 left window created when B@12 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(2, 12)), "0+1", 12),
+                    // FINAL WINDOW: B@13 left window created when B@13 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(3, 13)), "0+1+2",
+                        13),
+                    // FINAL WINDOW: B@18 left window updated when B@14 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8, 18)), "0+1+2+3+6",
+                        18),
+                    // FINAL WINDOW: C@15 left window created when C@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+2",
+                        15),
+                    // FINAL WINDOW: C@16 left window created when C@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(6, 16)), "0+2+3",
+                        16)),
+                actual
+            );
+        } else {
+            assertEquals(
+                asList(
+                    // FINAL WINDOW: A@10 left window created when A@10 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 10),
+                    // FINAL WINDOW: A@15 left window created when A@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+4",
+                        15),
+                    // A@20 left window created when A@20 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10, 20)), "0+1+2",
+                        20),
+                    // FINAL WINDOW: A@20 left window updated when A@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10, 20)), "0+1+2+4",
+                        20),
+                    // A@10 right window created when A@20 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+2", 20),
+                    // FINAL WINDOW: A@10 right window updated when A@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+2+4",
+                        20),
+                    // A@22 left window created when A@22 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(12, 22)), "0+2+3",
+                        22),
+                    // FINAL WINDOW: A@22 left window updated when A@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(12, 22)), "0+2+3+4",
+                        22),
+                    // FINAL WINDOW: A@15 right window created when A@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(16, 26)), "0+2+3",
+                        22),
+                    // FINAL WINDOW: A@20 right window created when A@22 processed
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(21, 31)), "0+3", 22),
+                    // FINAL WINDOW: B@12 left window created when B@12 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(2, 12)), "0+1", 12),
+                    // FINAL WINDOW: B@13 left window created when B@13 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(3, 13)), "0+1+2",
+                        13),
+                    // FINAL WINDOW: B@14 left window created when B@14 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(4, 14)), "0+1+2+6",
+                        14),
+                    // B@18 left window created when B@18 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8, 18)), "0+1+2+3",
+                        18),
+                    // FINAL WINDOW: B@18 left window updated when B@14 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(8, 18)), "0+1+2+3+6",
+                        18),
+                    // B@19 left window created when B@19 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(9, 19)), "0+1+2+3+4",
+                        19),
+                    // FINAL WINDOW: B@19 left window updated when B@14 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(9, 19)),
+                        "0+1+2+3+4+6", 19),
+                    // B@12 right window created when B@13 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2", 13),
+                    // B@12 right window updated when B@18 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2+3",
+                        18),
+                    // B@12 right window updated when B@19 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)), "0+2+3+4",
+                        19),
+                    // FINAL WINDOW: B@12 right window updated when B@14 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(13, 23)),
+                        "0+2+3+4+6", 19),
+                    // B@13 right window created when B@18 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(14, 24)), "0+3", 18),
+                    // B@13 right window updated when B@19 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(14, 24)), "0+3+4",
+                        19),
+                    // FINAL WINDOW: B@13 right window updated when B@14 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(14, 24)), "0+3+4+6",
+                        19),
+                    // FINAL WINDOW: B@25 left window created when B@25 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(15, 25)), "0+3+4+5",
+                        25),
+                    // B@18 right window created when B@19 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(19, 29)), "0+4", 19),
+                    // FINAL WINDOW: B@18 right window updated when B@25 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(19, 29)), "0+4+5",
+                        25),
+                    // FINAL WINDOW: B@19 right window updated when B@25 processed
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(20, 30)), "0+5", 25),
+                    // FINAL WINDOW: C@11 left window created when C@11 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(1, 11)), "0+1", 11),
+                    // FINAL WINDOW: C@15 left window created when C@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+1+2",
+                        15),
+                    // FINAL WINDOW: C@16 left window created when C@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(6, 16)), "0+1+2+3",
+                        16),
+                    // FINAL WINDOW: C@21 left window created when C@21 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(11, 21)),
+                        "0+1+2+3+4", 21),
+                    // C@11 right window created when C@15 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(12, 22)), "0+2", 15),
+                    // C@11 right window updated when C@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(12, 22)), "0+2+3",
+                        16),
+                    // FINAL WINDOW: C@11 right window updated when C@21 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(12, 22)), "0+2+3+4",
+                        21),
+                    // FINAL WINDOW: C@23 left window created when C@23 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)),
+                        "0+2+3+4+5", 23),
+                    // C@15 right window created when C@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(16, 26)), "0+3", 16),
+                    // C@15 right window updated when C@21 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(16, 26)), "0+3+4",
+                        21),
+                    // FINAL WINDOW: C@15 right window updated when C@23 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(16, 26)), "0+3+4+5",
+                        23),
+                    // C@16 right window created when C@21 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(17, 27)), "0+4", 21),
+                    // FINAL WINDOW: C@16 right window updated when C@23 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(17, 27)), "0+4+5",
+                        23),
+                    // FINAL WINDOW: C@21 right window created when C@23 processed
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(22, 32)), "0+5", 23),
+                    // FINAL WINDOW: D@11 left window created when D@11 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(1, 11)), "0+4", 11),
+                    // FINAL WINDOW: D@12 left window created when D@12 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(2, 12)), "0+4+2",
+                        12),
+                    // FINAL WINDOW: D@16 left window created when D@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(6, 16)), "0+4+2+5",
+                        16),
+                    // D@11 right window created when D@12 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(12, 22)), "0+2", 12),
+                    // FINAL WINDOW: D@11 right window updated when D@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(12, 22)), "0+2+5",
+                        16),
+                    // FINAL WINDOW: D@12 right window created when D@16 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(13, 23)), "0+5", 16),
+                    // FINAL WINDOW: D@29 left window created when D@29 processed
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(19, 29)), "0+3",
+                        29)),
+                actual
+            );
+        }
     }
 
     @Test
@@ -375,37 +515,27 @@ public void testJoin() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic1 = "topic1";
         final String topic2 = "topic2";
-        final WindowBytesStoreSupplier storeSupplier1 =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder1", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse1", Duration.ofMillis(50000), Duration.ofMillis(10), false);
-        final WindowBytesStoreSupplier storeSupplier2 =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder2", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse2", Duration.ofMillis(50000), Duration.ofMillis(10), false);
+        final WindowBytesStoreSupplier storeSupplier1 = setupWindowBytesStoreSupplier(1);
+        final WindowBytesStoreSupplier storeSupplier2 = setupWindowBytesStoreSupplier(2);
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized1 = setupMaterialized(emitFinal ? Materialized.as("store-name1") : Materialized.as(storeSupplier1));
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized2 = setupMaterialized(emitFinal ? Materialized.as("store-name2") : Materialized.as(storeSupplier2));
 
+        final long grace = emitFinal ? 0 : 100;
         final KTable<Windowed<String>, String> table1 = builder
             .stream(topic1, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(100)))
-            .aggregate(
-                MockInitializer.STRING_INIT,
-                MockAggregator.TOSTRING_ADDER,
-                Materialized.as(storeSupplier1)
-            );
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(grace)))
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, materialized1);
         final KTable<Windowed<String>, String> table2 = builder
             .stream(topic2, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(100)))
-            .aggregate(
-                MockInitializer.STRING_INIT,
-                MockAggregator.TOSTRING_ADDER,
-                Materialized.as(storeSupplier2)
-            );
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(grace)))
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, materialized2);
 
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table1.toStream().process(supplier);
-
         table2.toStream().process(supplier);
 
         table1.join(table2, (p1, p2) -> p1 + "%" + p2).toStream().process(supplier);
@@ -421,12 +551,20 @@ public void testJoin() {
 
             final List<MockApiProcessor<Windowed<String>, String, Void, Void>> processors = supplier.capturedProcessors(3);
 
-            processors.get(0).checkAndClearProcessResult(
+            if (emitFinal) {
+                processors.get(0).checkAndClearProcessResult(
                     // left windows created by the first set of records to table 1
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1",  10),
-                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(1, 11)),  "0+2",  11),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)),  "0+3",  12)
-            );
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 10),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(1, 11)), "0+2", 11)
+                );
+            } else {
+                processors.get(0).checkAndClearProcessResult(
+                    // left windows created by the first set of records to table 1
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 10),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(1, 11)), "0+2", 11),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)), "0+3", 12)
+                );
+            }
             processors.get(1).checkAndClearProcessResult();
             processors.get(2).checkAndClearProcessResult();
 
@@ -434,58 +572,89 @@ public void testJoin() {
             inputTopic1.pipeInput("B", "2", 16L);
             inputTopic1.pipeInput("C", "3", 19L);
 
-            processors.get(0).checkAndClearProcessResult(
+            if (emitFinal) {
+                processors.get(0).checkAndClearProcessResult(
                     // right windows from previous records are created, and left windows from new records to table 1
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)),  "0+1",  15),
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1+1",  15),
-                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(12, 22)),  "0+2",  16),
-                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)),  "0+2+2",  16),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)),  "0+3",  19),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(9, 19)),  "0+3+3",  19)
-            );
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)), "0+3", 12),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)), "0+2+2", 16)
+                );
+            } else {
+                processors.get(0).checkAndClearProcessResult(
+                    // right windows from previous records are created, and left windows from new records to table 1
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+1", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(12, 22)), "0+2", 16),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)), "0+2+2", 16),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)), "0+3", 19),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(9, 19)), "0+3+3", 19)
+                );
+            }
             processors.get(1).checkAndClearProcessResult();
             processors.get(2).checkAndClearProcessResult();
 
             inputTopic2.pipeInput("A", "a", 10L);
-            inputTopic2.pipeInput("B", "b", 30L);
             inputTopic2.pipeInput("C", "c", 12L);
-            inputTopic2.pipeInput("C", "c", 35L);
-
 
             processors.get(0).checkAndClearProcessResult();
-            processors.get(1).checkAndClearProcessResult(
+            if (emitFinal) {
+                processors.get(1).checkAndClearProcessResult(
                     // left windows from first set of records sent to table 2
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+a",  10),
-                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(20, 30)),  "0+b",  30),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)),  "0+c",  12),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(25, 35)),  "0+c",  35)
-            );
-            processors.get(2).checkAndClearProcessResult(
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+a", 10)
+                );
+                processors.get(2).checkAndClearProcessResult(
                     // set of join windows from windows created by table 1 and table 2
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1%0+a",  10),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)),  "0+3%0+c",  12)
-            );
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1%0+a", 10)
+                );
+            } else {
+                processors.get(1).checkAndClearProcessResult(
+                    // left windows from first set of records sent to table 2
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+a", 10),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)), "0+c", 12)
+                );
+                processors.get(2).checkAndClearProcessResult(
+                    // set of join windows from windows created by table 1 and table 2
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1%0+a", 10),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)), "0+3%0+c", 12)
+                );
+            }
 
             inputTopic2.pipeInput("A", "a", 15L);
             inputTopic2.pipeInput("B", "b", 16L);
             inputTopic2.pipeInput("C", "c", 17L);
 
             processors.get(0).checkAndClearProcessResult();
-            processors.get(1).checkAndClearProcessResult(
+
+            if (emitFinal) {
+                processors.get(1).checkAndClearProcessResult(
                     // right windows from previous records are created (where applicable), and left windows from new records to table 2
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)),  "0+a",  15),
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+a+a",  15),
-                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)),  "0+b",  16),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)),  "0+c",  17),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(7, 17)),  "0+c+c",  17)
-            );
-            processors.get(2).checkAndClearProcessResult(
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)), "0+c", 12),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+a+a", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)), "0+b", 16)
+                );
+                processors.get(2).checkAndClearProcessResult(
                     // set of join windows from windows created by table 1 and table 2
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)),  "0+1%0+a",  15),
-                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1+1%0+a+a",  15),
-                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)),  "0+2+2%0+b",  16),
-                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)),  "0+3%0+c",  19)
-            );
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(2, 12)), "0+3%0+c", 12),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1%0+a+a", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)), "0+2+2%0+b", 16)
+                );
+            } else {
+                processors.get(1).checkAndClearProcessResult(
+                    // right windows from previous records are created (where applicable), and left windows from new records to table 2
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+a", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+a+a", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)), "0+b", 16),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)), "0+c", 17),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(7, 17)), "0+c+c", 17)
+                );
+                processors.get(2).checkAndClearProcessResult(
+                    // set of join windows from windows created by table 1 and table 2
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(11, 21)), "0+1%0+a", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1%0+a+a", 15),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(6, 16)), "0+2+2%0+b", 16),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(13, 23)), "0+3%0+c", 19)
+                );
+            }
         }
     }
 
@@ -494,14 +663,17 @@ public void testEarlyRecordsSmallInput() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
 
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(
+            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic-Canonized").withValueSerde(Serdes.String()));
         final KTable<Windowed<String>, String> table2 = builder
             .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(50), ofMillis(200)))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(50), ofMillis(0)))
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.TOSTRING_ADDER,
-                Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic-Canonized").withValueSerde(Serdes.String())
+                materialized
             );
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table2.toStream().process(supplier);
@@ -516,25 +688,94 @@ public void testEarlyRecordsSmallInput() {
             inputTopic.pipeInput("A", "4", 3L);
             inputTopic.pipeInput("A", "5", 13L);
             inputTopic.pipeInput("A", "6", 10L);
+            inputTopic.pipeInput("A", "7", 70L);
         }
 
-        final Map<Long, ValueAndTimestamp<String>> actual = new HashMap<>();
-        for (final KeyValueTimestamp<Windowed<String>, String> entry : supplier.theCapturedProcessor().processed()) {
-            final Windowed<String> window = entry.key();
-            final Long start = window.window().start();
-            final ValueAndTimestamp<String> valueAndTimestamp = ValueAndTimestamp.make(entry.value(), entry.timestamp());
-            if (actual.putIfAbsent(start, valueAndTimestamp) != null) {
-                actual.replace(start, valueAndTimestamp);
-            }
-        }
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = gatherOutput(supplier);
+        final Map<Long, Set<ValueAndTimestamp<String>>> expected = new HashMap<>();
 
-        final Map<Long, ValueAndTimestamp<String>> expected = new HashMap<>();
-        expected.put(0L, ValueAndTimestamp.make("0+1+2+3+4+5+6", 13L));
-        expected.put(1L, ValueAndTimestamp.make("0+2+3+4+5+6", 13L));
-        expected.put(4L, ValueAndTimestamp.make("0+2+3+5+6", 13L));
-        expected.put(6L, ValueAndTimestamp.make("0+3+5+6", 13L));
-        expected.put(7L, ValueAndTimestamp.make("0+5+6", 13L));
-        expected.put(11L, ValueAndTimestamp.make("0+5", 13L));
+        if (emitFinal) {
+            expected.put(0L,
+                mkSet(
+                    ValueAndTimestamp.make("0+1+2+3+4+5+6", 13L)
+                )
+            );
+            expected.put(1L,
+                mkSet(
+                    ValueAndTimestamp.make("0+2+3+4+5+6", 13L)
+                )
+            );
+            expected.put(4L,
+                mkSet(
+                    ValueAndTimestamp.make("0+2+3+5+6", 13L)
+                )
+            );
+            expected.put(6L,
+                mkSet(
+                    ValueAndTimestamp.make("0+3+5+6", 13L)
+                )
+            );
+            expected.put(7L,
+                mkSet(
+                    ValueAndTimestamp.make("0+5+6", 13L)
+                )
+            );
+            expected.put(11L,
+                mkSet(
+                    ValueAndTimestamp.make("0+5", 13L)
+                )
+            );
+        } else {
+            expected.put(0L,
+                mkSet(
+                    ValueAndTimestamp.make("0+1", 0L),
+                    ValueAndTimestamp.make("0+1+2", 5L),
+                    ValueAndTimestamp.make("0+1+2+3", 6L),
+                    ValueAndTimestamp.make("0+1+2+3+4", 6L),
+                    ValueAndTimestamp.make("0+1+2+3+4+5", 13L),
+                    ValueAndTimestamp.make("0+1+2+3+4+5+6", 13L)
+                )
+            );
+            expected.put(1L,
+                mkSet(
+                    ValueAndTimestamp.make("0+2", 5L),
+                    ValueAndTimestamp.make("0+2+3", 6L),
+                    ValueAndTimestamp.make("0+2+3+4", 6L),
+                    ValueAndTimestamp.make("0+2+3+4+5", 13L),
+                    ValueAndTimestamp.make("0+2+3+4+5+6", 13L)
+                )
+            );
+            expected.put(4L,
+                mkSet(
+                    ValueAndTimestamp.make("0+2+3", 6L),
+                    ValueAndTimestamp.make("0+2+3+5", 13L),
+                    ValueAndTimestamp.make("0+2+3+5+6", 13L)
+                )
+            );
+            expected.put(6L,
+                mkSet(
+                    ValueAndTimestamp.make("0+3", 6L),
+                    ValueAndTimestamp.make("0+3+5", 13L),
+                    ValueAndTimestamp.make("0+3+5+6", 13L)
+                )
+            );
+            expected.put(7L,
+                mkSet(
+                    ValueAndTimestamp.make("0+5", 13L),
+                    ValueAndTimestamp.make("0+5+6", 13L)
+                )
+            );
+            expected.put(11L,
+                mkSet(
+                    ValueAndTimestamp.make("0+5", 13L)
+                )
+            );
+            expected.put(20L,
+                mkSet(
+                    ValueAndTimestamp.make("0+7", 70L)
+                )
+            );
+        }
 
         assertEquals(expected, actual);
     }
@@ -544,14 +785,18 @@ public void testEarlyRecordsRepeatedInput() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
 
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(
+            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic-Canonized").withValueSerde(Serdes.String()));
+
         final KTable<Windowed<String>, String> table2 = builder
             .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(5), ofMillis(20)))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(5), ofMillis(0)))
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.TOSTRING_ADDER,
-                Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic-Canonized").withValueSerde(Serdes.String())
+                materialized
             );
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table2.toStream().process(supplier);
@@ -567,22 +812,53 @@ public void testEarlyRecordsRepeatedInput() {
             inputTopic.pipeInput("A", "5", 2L);
             inputTopic.pipeInput("A", "6", 2L);
             inputTopic.pipeInput("A", "7", 0L);
+            inputTopic.pipeInput("A", "8", 7L);
         }
 
-        final Map<Long, ValueAndTimestamp<String>> actual = new HashMap<>();
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = new HashMap<>();
         for (final KeyValueTimestamp<Windowed<String>, String> entry : supplier.theCapturedProcessor().processed()) {
             final Windowed<String> window = entry.key();
             final Long start = window.window().start();
             final ValueAndTimestamp<String> valueAndTimestamp = ValueAndTimestamp.make(entry.value(), entry.timestamp());
-            if (actual.putIfAbsent(start, valueAndTimestamp) != null) {
-                actual.replace(start, valueAndTimestamp);
-            }
+            final Set<ValueAndTimestamp<String>> valueSet = actual.computeIfAbsent(start, k -> new HashSet<>());
+            valueSet.add(valueAndTimestamp);
         }
 
-        final Map<Long, ValueAndTimestamp<String>> expected = new HashMap<>();
-        expected.put(0L, ValueAndTimestamp.make("0+1+2+3+4+5+6+7", 4L));
-        expected.put(1L, ValueAndTimestamp.make("0+2+3+5+6", 4L));
-        expected.put(3L, ValueAndTimestamp.make("0+3", 4L));
+        final Map<Long, Set<ValueAndTimestamp<String>>> expected = new HashMap<>();
+        if (emitFinal) {
+            expected.put(0L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4+5+6+7", 4L)
+            ));
+            expected.put(1L, mkSet(
+                ValueAndTimestamp.make("0+2+3+5+6", 4L)
+            ));
+        } else {
+            expected.put(0L, mkSet(
+                ValueAndTimestamp.make("0+1", 0L),
+                ValueAndTimestamp.make("0+1+2", 2L),
+                ValueAndTimestamp.make("0+1+2+3+4+5", 4L),
+                ValueAndTimestamp.make("0+1+2+3+4+5+6", 4L),
+                ValueAndTimestamp.make("0+1+2+3", 4L),
+                ValueAndTimestamp.make("0+1+2+3+4", 4L),
+                ValueAndTimestamp.make("0+1+2+3+4+5+6+7", 4L)
+            ));
+            expected.put(1L, mkSet(
+                ValueAndTimestamp.make("0+2+3+5+6", 4L),
+                ValueAndTimestamp.make("0+2", 2L),
+                ValueAndTimestamp.make("0+2+3", 4L),
+                ValueAndTimestamp.make("0+2+3+5", 4L)
+            ));
+            expected.put(2L, mkSet(
+                ValueAndTimestamp.make("0+2+3+5+6+8", 7)
+            ));
+            expected.put(3L, mkSet(
+                ValueAndTimestamp.make("0+3", 4L),
+                ValueAndTimestamp.make("0+3+8", 7L)
+            ));
+            expected.put(5L, mkSet(
+                ValueAndTimestamp.make("0+8", 7)
+            ));
+        }
         assertEquals(expected, actual);
     }
 
@@ -590,19 +866,19 @@ public void testEarlyRecordsRepeatedInput() {
     public void testEarlyRecordsLargeInput() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
-        final WindowBytesStoreSupplier storeSupplier =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(50000), Duration.ofMillis(10), false);
+        final WindowBytesStoreSupplier storeSupplier = setupWindowBytesStoreSupplier(1);
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
 
+        final long grace = emitFinal ? 1L : 50L;
         final KTable<Windowed<String>, String> table2 = builder
             .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(50)))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(grace)))
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.TOSTRING_ADDER,
-                Materialized.as(storeSupplier)
+                materialized
             );
 
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
@@ -628,74 +904,545 @@ public void testEarlyRecordsLargeInput() {
 
         final ArrayList<KeyValueTimestamp<Windowed<String>, String>> actual = supplier.theCapturedProcessor().processed();
         actual.sort(comparator);
-        assertEquals(
-            asList(
-                // E@0
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1", 0),
-                // E@5
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3", 5),
-                // E@6
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4", 6),
-                // E@3
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2", 6),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2+5", 10),
-                //E@4
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2+5+7", 10),
-                //E@2
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2+5+7+8", 10),
-                // E@5
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3", 5),
-                // E@6
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4", 6),
-                // E@3
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2", 6),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2+5", 10),
-                //E@4
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2+5+7", 10),
-                //E@2
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2+5+7+8", 10),
-                //E@13
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6", 13),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6+5", 13),
-                //E@4
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6+5+7", 13),
-                // E@3
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4", 6),
-                //E@13
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6", 13),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6+5", 13),
-                //E@4
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6+5+7", 13),
-                //E@4
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(5, 15)), "0+3+4+6+5", 13),
-                //E@15
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(5, 15)), "0+3+4+6+5+9", 15),
-                // E@6
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4", 6),
-                //E@13
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6", 13),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6+5", 13),
-                //E@15
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6+5+9", 15),
-                //E@13
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6", 13),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6+5", 13),
-                //E@15
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6+5+9", 15),
-                //E@10
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(11, 21)), "0+6", 13),
-                //E@15
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(11, 21)), "0+6+9", 15),
-                //E@15
-                new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(14, 24)), "0+9", 15)),
-            actual
-        );
+
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2",
+                        6),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2",
+                        6),
+                    // E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)),
+                        "0+3+4+2+6+5+7", 13)),
+                actual
+            );
+        } else {
+            assertEquals(
+                asList(
+                    // E@0
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1", 0),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3", 5),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4",
+                        6),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2",
+                        6),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)),
+                        "0+1+3+4+2+5", 10),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)),
+                        "0+1+3+4+2+5+7", 10),
+                    //E@2
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)),
+                        "0+1+3+4+2+5+7+8", 10),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3", 5),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4", 6),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2",
+                        6),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2+5",
+                        10),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)),
+                        "0+3+4+2+5+7", 10),
+                    //E@2
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)),
+                        "0+3+4+2+5+7+8", 10),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6",
+                        13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)),
+                        "0+3+4+2+6+5", 13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)),
+                        "0+3+4+2+6+5+7", 13),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4", 6),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6",
+                        13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6+5",
+                        13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)),
+                        "0+3+4+6+5+7", 13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(5, 15)), "0+3+4+6+5",
+                        13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(5, 15)),
+                        "0+3+4+6+5+9", 15),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4", 6),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6",
+                        13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6+5",
+                        13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6+5+9",
+                        15),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6", 13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6+5",
+                        13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6+5+9",
+                        15),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(11, 21)), "0+6", 13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(11, 21)), "0+6+9",
+                        15),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(14, 24)), "0+9",
+                        15)),
+                actual
+            );
+        }
+    }
+
+    @Test
+    public void testEarlyNoGracePeriodSmallInput() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final String topic = "topic";
+
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(
+            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic-Canonized").withValueSerde(Serdes.String()));
+
+        final KTable<Windowed<String>, String> table2 = builder
+            .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(50)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                materialized
+            );
+        final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        table2.toStream().process(supplier);
+
+        // all events are considered as early events since record timestamp is less than time difference of the window
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic =
+                driver.createInputTopic(topic, new StringSerializer(), new StringSerializer());
+
+            inputTopic.pipeInput("A", "1", 0L);
+            inputTopic.pipeInput("A", "2", 5L);
+            inputTopic.pipeInput("A", "3", 6L);
+            inputTopic.pipeInput("A", "4", 3L);
+            inputTopic.pipeInput("A", "5", 13L);
+            inputTopic.pipeInput("A", "6", 10L);
+            inputTopic.pipeInput("A", "6", 70L);
+        }
+
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = gatherOutput(supplier);
+        final Map<Long, Set<ValueAndTimestamp<String>>> expected = new HashMap<>();
+
+        if (emitFinal) {
+            expected.put(0L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4+5+6", 13L)
+            ));
+            expected.put(1L, mkSet(
+                ValueAndTimestamp.make("0+2+3+4+5+6", 13L)
+            ));
+            expected.put(4L, mkSet(
+                ValueAndTimestamp.make("0+2+3+5+6", 13L)
+            ));
+            expected.put(6L, mkSet(
+                ValueAndTimestamp.make("0+3+5+6", 13L)
+            ));
+            expected.put(7L, mkSet(
+                ValueAndTimestamp.make("0+5+6", 13L)
+            ));
+            expected.put(11L, mkSet(
+                ValueAndTimestamp.make("0+5", 13L)
+            ));
+        } else {
+            expected.put(0L, mkSet(
+                ValueAndTimestamp.make("0+1", 0L),
+                ValueAndTimestamp.make("0+1+2", 5L),
+                ValueAndTimestamp.make("0+1+2+3", 6L),
+                ValueAndTimestamp.make("0+1+2+3+4", 6L),
+                ValueAndTimestamp.make("0+1+2+3+4+5", 13L),
+                ValueAndTimestamp.make("0+1+2+3+4+5+6", 13L)
+            ));
+            expected.put(1L, mkSet(
+                ValueAndTimestamp.make("0+2", 5L),
+                ValueAndTimestamp.make("0+2+3", 6L),
+                ValueAndTimestamp.make("0+2+3+4", 6L),
+                ValueAndTimestamp.make("0+2+3+4+5", 13L),
+                ValueAndTimestamp.make("0+2+3+4+5+6", 13L)
+            ));
+            expected.put(4L, mkSet(
+                ValueAndTimestamp.make("0+2+3", 6L),
+                ValueAndTimestamp.make("0+2+3+5", 13L),
+                ValueAndTimestamp.make("0+2+3+5+6", 13L)
+            ));
+            expected.put(6L, mkSet(
+                ValueAndTimestamp.make("0+3", 6L),
+                ValueAndTimestamp.make("0+3+5", 13L),
+                ValueAndTimestamp.make("0+3+5+6", 13L)
+            ));
+            expected.put(7L, mkSet(
+                ValueAndTimestamp.make("0+5", 13L),
+                ValueAndTimestamp.make("0+5+6", 13L)
+            ));
+            expected.put(11L, mkSet(
+                ValueAndTimestamp.make("0+5", 13L)
+            ));
+            expected.put(20L, mkSet(
+                ValueAndTimestamp.make("0+6", 70L)
+            ));
+        }
+
+        assertEquals(expected, actual);
+    }
+
+    @Test
+    public void testNoGracePeriodSmallInput() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final String topic = "topic";
+
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(
+            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic-Canonized").withValueSerde(Serdes.String()));
+
+        final KTable<Windowed<String>, String> table2 = builder
+            .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(50)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                materialized
+            );
+        final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        table2.toStream().process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic =
+                    driver.createInputTopic(topic, new StringSerializer(), new StringSerializer());
+
+            inputTopic.pipeInput("A", "1", 100L);
+            inputTopic.pipeInput("A", "2", 105L);
+            inputTopic.pipeInput("A", "3", 106L);
+            inputTopic.pipeInput("A", "4", 103L);
+            inputTopic.pipeInput("A", "5", 113L);
+            inputTopic.pipeInput("A", "6", 110L);
+        }
+
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = gatherOutput(supplier);
+        final Map<Long, Set<ValueAndTimestamp<String>>> expected = new HashMap<>();
+
+        if (emitFinal) {
+            expected.put(50L, mkSet(
+                ValueAndTimestamp.make("0+1", 100L)
+            ));
+            expected.put(55L, mkSet(
+                ValueAndTimestamp.make("0+1+2", 105L)
+            ));
+            expected.put(56L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4", 106L)
+            ));
+        } else {
+            expected.put(50L, mkSet(
+                ValueAndTimestamp.make("0+1", 100L)
+            ));
+            expected.put(55L, mkSet(
+                ValueAndTimestamp.make("0+1+2", 105L)
+            ));
+            expected.put(56L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3", 106L),
+                ValueAndTimestamp.make("0+1+2+3+4", 106L)
+            ));
+            expected.put(63L, mkSet(
+                ValueAndTimestamp.make("0+1+2+3+4+5", 113L),
+                ValueAndTimestamp.make("0+1+2+3+4+5+6", 113L)
+            ));
+            expected.put(101L, mkSet(
+                ValueAndTimestamp.make("0+2", 105L),
+                ValueAndTimestamp.make("0+2+3", 106L),
+                ValueAndTimestamp.make("0+2+3+4", 106L),
+                ValueAndTimestamp.make("0+2+3+4+5", 113L),
+                ValueAndTimestamp.make("0+2+3+4+5+6", 113L)
+            ));
+            expected.put(104L, mkSet(
+                ValueAndTimestamp.make("0+2+3", 106L),
+                ValueAndTimestamp.make("0+2+3+5", 113L),
+                ValueAndTimestamp.make("0+2+3+5+6", 113L)
+            ));
+            expected.put(106L, mkSet(
+                ValueAndTimestamp.make("0+3", 106L),
+                ValueAndTimestamp.make("0+3+5", 113L),
+                ValueAndTimestamp.make("0+3+5+6", 113L)
+            ));
+            expected.put(107L, mkSet(
+                ValueAndTimestamp.make("0+5", 113L),
+                ValueAndTimestamp.make("0+5+6", 113L)
+            ));
+            expected.put(111L, mkSet(
+                ValueAndTimestamp.make("0+5", 113L)
+            ));
+        }
+
+        assertEquals(expected, actual);
+    }
+
+    @Test
+    public void testEarlyNoGracePeriodLargeInput() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final String topic = "topic";
+        final WindowBytesStoreSupplier storeSupplier =
+                inOrderIterator
+                        ? new InOrderMemoryWindowStoreSupplier("InOrder", 500L, 10L, false)
+                        : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(500), Duration.ofMillis(10), false);
+
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
+
+        final KTable<Windowed<String>, String> table2 = builder
+            .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(10)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                materialized
+            );
+
+        final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        table2.toStream().process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic1 =
+                    driver.createInputTopic(topic, new StringSerializer(), new StringSerializer());
+
+            inputTopic1.pipeInput("E", "1", 0L);
+            inputTopic1.pipeInput("E", "3", 5L);
+            inputTopic1.pipeInput("E", "4", 6L);
+            inputTopic1.pipeInput("E", "2", 3L);
+            inputTopic1.pipeInput("E", "6", 13L);
+            inputTopic1.pipeInput("E", "5", 10L);
+            inputTopic1.pipeInput("E", "7", 4L);
+            inputTopic1.pipeInput("E", "8", 2L);
+            inputTopic1.pipeInput("E", "9", 15L);
+        }
+        final Comparator<KeyValueTimestamp<Windowed<String>, String>> comparator =
+                Comparator.comparing((KeyValueTimestamp<Windowed<String>, String> o) -> o.key().key())
+                        .thenComparing((KeyValueTimestamp<Windowed<String>, String> o) -> o.key().window().start());
+
+        final ArrayList<KeyValueTimestamp<Windowed<String>, String>> actual = supplier.theCapturedProcessor().processed();
+        actual.sort(comparator);
+
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2", 6),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2", 6),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6+5+7", 13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6+5+7", 13)),
+                actual
+            );
+        } else {
+            assertEquals(
+                asList(
+                    // E@0
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1", 0),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3", 5),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4", 6),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(0, 10)), "0+1+3+4+2", 6),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3", 5),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4", 6),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(1, 11)), "0+3+4+2", 6),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6", 13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6+5", 13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(3, 13)), "0+3+4+2+6+5+7", 13),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4", 6),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6", 13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6+5", 13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(4, 14)), "0+3+4+6+5+7", 13),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(5, 15)), "0+3+4+6+5", 13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(5, 15)), "0+3+4+6+5+9", 15),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4", 6),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6", 13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6+5", 13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(6, 16)), "0+4+6+5+9", 15),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6", 13),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6+5", 13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(7, 17)), "0+6+5+9", 15),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(11, 21)), "0+6", 13),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(11, 21)), "0+6+9", 15),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(14, 24)), "0+9", 15)),
+                actual
+            );
+        }
+    }
+
+    @Test
+    public void testNoGracePeriodLargeInput() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final String topic = "topic";
+        final WindowBytesStoreSupplier storeSupplier =
+                inOrderIterator
+                        ? new InOrderMemoryWindowStoreSupplier("InOrder", 500L, 10L, false)
+                        : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(500), Duration.ofMillis(10), false);
+
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
+
+        final KTable<Windowed<String>, String> table2 = builder
+            .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(SlidingWindows.ofTimeDifferenceWithNoGrace(ofMillis(10)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                materialized
+            );
+
+        final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
+        table2.toStream().process(supplier);
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TestInputTopic<String, String> inputTopic1 =
+                    driver.createInputTopic(topic, new StringSerializer(), new StringSerializer());
+
+            inputTopic1.pipeInput("E", "1", 100L);
+            inputTopic1.pipeInput("E", "3", 105L);
+            inputTopic1.pipeInput("E", "4", 106L);
+            inputTopic1.pipeInput("E", "2", 103L);
+            inputTopic1.pipeInput("E", "6", 113L);
+            inputTopic1.pipeInput("E", "5", 110L);
+            inputTopic1.pipeInput("E", "7", 104L);
+            inputTopic1.pipeInput("E", "8", 102L);
+            inputTopic1.pipeInput("E", "9", 115L);
+        }
+        final Comparator<KeyValueTimestamp<Windowed<String>, String>> comparator =
+                Comparator.comparing((KeyValueTimestamp<Windowed<String>, String> o) -> o.key().key())
+                        .thenComparing((KeyValueTimestamp<Windowed<String>, String> o) -> o.key().window().start());
+
+        final ArrayList<KeyValueTimestamp<Windowed<String>, String>> actual = supplier.theCapturedProcessor().processed();
+        actual.sort(comparator);
+
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    // E@0
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(90, 100)), "0+1", 100),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(95, 105)), "0+1+3", 105),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(96, 106)), "0+1+3+4+2", 106),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(101, 111)), "0+3+4+2", 106),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(103, 113)), "0+3+4+2+6+5+7", 113),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(104, 114)), "0+3+4+6+5+7", 113)),
+                actual
+            );
+        } else {
+            assertEquals(
+                asList(
+                    // E@0
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(90, 100)), "0+1", 100),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(95, 105)), "0+1+3", 105),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(96, 106)), "0+1+3+4", 106),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(96, 106)), "0+1+3+4+2", 106),
+                    // E@5
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(101, 111)), "0+3", 105),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(101, 111)), "0+3+4", 106),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(101, 111)), "0+3+4+2", 106),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(103, 113)), "0+3+4+2+6", 113),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(103, 113)), "0+3+4+2+6+5", 113),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(103, 113)), "0+3+4+2+6+5+7", 113),
+                    // E@3
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(104, 114)), "0+3+4", 106),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(104, 114)), "0+3+4+6", 113),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(104, 114)), "0+3+4+6+5", 113),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(104, 114)), "0+3+4+6+5+7", 113),
+                    //E@4
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(105, 115)), "0+3+4+6+5", 113),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(105, 115)), "0+3+4+6+5+9", 115),
+                    // E@6
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(106, 116)), "0+4", 106),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(106, 116)), "0+4+6", 113),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(106, 116)), "0+4+6+5", 113),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(106, 116)), "0+4+6+5+9", 115),
+                    //E@13
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(107, 117)), "0+6", 113),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(107, 117)), "0+6+5", 113),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(107, 117)), "0+6+5+9", 115),
+                    //E@10
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(111, 121)), "0+6", 113),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(111, 121)), "0+6+9", 115),
+                    //E@15
+                    new KeyValueTimestamp<>(new Windowed<>("E", new TimeWindow(114, 124)), "0+9", 115)),
+                actual
+            );
+        }
     }
 
     @Test
@@ -703,11 +1450,14 @@ public void shouldLogAndMeterWhenSkippingNullKey() {
         final String builtInMetricsVersion = StreamsConfig.METRICS_LATEST;
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
-        builder
-                .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
-                .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-                .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(100)))
-                .aggregate(MockInitializer.STRING_INIT, MockAggregator.toStringInstance("+"), Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized").withValueSerde(Serdes.String()));
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(
+            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonized").withValueSerde(Serdes.String()));
+
+        builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(100)))
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.toStringInstance("+"), materialized);
 
         props.setProperty(StreamsConfig.BUILT_IN_METRICS_VERSION_CONFIG, builtInMetricsVersion);
 
@@ -731,18 +1481,17 @@ public void shouldLogAndMeterWhenSkippingExpiredWindowByGrace() {
         final String builtInMetricsVersion = StreamsConfig.METRICS_LATEST;
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
-        final WindowBytesStoreSupplier storeSupplier =
-            inOrderIterator
-                ? new InOrderMemoryWindowStoreSupplier("InOrder", 50000L, 10L, false)
-                : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(50000), Duration.ofMillis(10), false);
+        final WindowBytesStoreSupplier storeSupplier = setupWindowBytesStoreSupplier(1);
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
 
         final KStream<String, String> stream1 = builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()));
         stream1.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
             .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(90)))
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.TOSTRING_ADDER,
-                Materialized.as(storeSupplier)
+                materialized
             )
             .toStream()
             .to("output");
@@ -762,8 +1511,10 @@ public void shouldLogAndMeterWhenSkippingExpiredWindowByGrace() {
             inputTopic.pipeInput("k", "4", 104L);
             inputTopic.pipeInput("k", "5", 105L);
             inputTopic.pipeInput("k", "6", 15L);
+            inputTopic.pipeInput("k", "101", 300L);
+            inputTopic.pipeInput("k", "102", 400L);
 
-            assertLatenessMetrics(driver, is(7.0), is(185.0), is(96.25));
+            assertLatenessMetrics(driver, is(7.0), is(185.0), is(77.0));
 
             assertThat(appender.getMessages(), hasItems(
                     // left window for k@100
@@ -779,12 +1530,26 @@ public void shouldLogAndMeterWhenSkippingExpiredWindowByGrace() {
                     // left window for k@105
                     "Skipping record for expired window. topic=[topic] partition=[0] offset=[6] timestamp=[105] window=[95,105] expiration=[110] streamTime=[200]",
                     // left window for k@15
-                    "Skipping record for expired window. topic=[topic] partition=[0] offset=[7] timestamp=[15] window=[5,15] expiration=[110] streamTime=[200]"
+                    "Skipping record for expired window. topic=[topic] partition=[0] offset=[7] timestamp=[15] window=[15,25] expiration=[110] streamTime=[200]"
             ));
             final TestOutputTopic<Windowed<String>, String> outputTopic =
                     driver.createOutputTopic("output", new TimeWindowedDeserializer<>(new StringDeserializer(), 10L), new StringDeserializer());
-            assertThat(outputTopic.readRecord(), equalTo(new TestRecord<>(new Windowed<>("k", new TimeWindow(190, 200)), "0+100", null, 200L)));
-            assertTrue(outputTopic.isEmpty());
+
+            if (emitFinal) {
+                assertThat(outputTopic.readRecord(), equalTo(
+                    new TestRecord<>(new Windowed<>("k", new TimeWindow(190, 200)), "0+100", null, 200L)));
+                assertThat(outputTopic.readRecord(), equalTo(
+                    new TestRecord<>(new Windowed<>("k", new TimeWindow(290, 300)), "0+101", null, 300L)));
+                assertTrue(outputTopic.isEmpty());
+            } else {
+                assertThat(outputTopic.readRecord(), equalTo(
+                    new TestRecord<>(new Windowed<>("k", new TimeWindow(190, 200)), "0+100", null, 200L)));
+                assertThat(outputTopic.readRecord(), equalTo(
+                    new TestRecord<>(new Windowed<>("k", new TimeWindow(290, 300)), "0+101", null, 300L)));
+                assertThat(outputTopic.readRecord(), equalTo(
+                    new TestRecord<>(new Windowed<>("k", new TimeWindow(390, 400)), "0+102", null, 400L)));
+                assertTrue(outputTopic.isEmpty());
+            }
         }
     }
 
@@ -798,10 +1563,13 @@ public void testAggregateRandomInput() {
                 ? new InOrderMemoryWindowStoreSupplier("InOrder", 50000L, 10L, false)
                 : Stores.inMemoryWindowStore("Reverse", Duration.ofMillis(50000), Duration.ofMillis(10), false);
 
+        final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized = setupMaterialized(emitFinal ? Materialized.as("store-name") : Materialized.as(storeSupplier));
+
         final KTable<Windowed<String>, String> table = builder
             .stream(topic1, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(10000)))
+            .windowedBy(SlidingWindows.ofTimeDifferenceAndGrace(ofMillis(10), ofMillis(100)))
+            .emitStrategy(emitStrategy)
             // The aggregator needs to sort the strings so the window value is the same for the final windows even when
             // records are processed in a different order. Here, we sort alphabetically.
             .aggregate(
@@ -813,7 +1581,7 @@ public void testAggregateRandomInput() {
                     aggregate = String.valueOf(ch);
                     return aggregate;
                 },
-                Materialized.as(storeSupplier)
+                materialized
             );
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table.toStream().process(supplier);
@@ -853,6 +1621,7 @@ public void testAggregateRandomInput() {
                 for (final ValueAndTimestamp<String> i : input) {
                     inputTopic1.pipeInput("A", i.value(), i.timestamp());
                 }
+                inputTopic1.pipeInput("A", "V", 181);
             }
 
             final Map<Long, ValueAndTimestamp<String>> results = new HashMap<>();
@@ -879,33 +1648,63 @@ public void testAggregateRandomInput() {
 
     private void verifyRandomTestResults(final Map<Long, ValueAndTimestamp<String>> actual) {
         final Map<Long, ValueAndTimestamp<String>> expected = new HashMap<>();
-        expected.put(0L, ValueAndTimestamp.make("ARSTU", 10L));
-        expected.put(3L, ValueAndTimestamp.make("ASTU", 10L));
-        expected.put(4L, ValueAndTimestamp.make("ATU", 10L));
-        expected.put(5L, ValueAndTimestamp.make("ABTU", 15L));
-        expected.put(6L, ValueAndTimestamp.make("ABCU", 16L));
-        expected.put(8L, ValueAndTimestamp.make("ABCDU", 18L));
-        expected.put(9L, ValueAndTimestamp.make("ABCD", 18L));
-        expected.put(11L, ValueAndTimestamp.make("BCD", 18L));
-        expected.put(16L, ValueAndTimestamp.make("CD", 18L));
-        expected.put(17L, ValueAndTimestamp.make("D", 18L));
-        expected.put(20L, ValueAndTimestamp.make("E", 30L));
-        expected.put(30L, ValueAndTimestamp.make("EF", 40L));
-        expected.put(31L, ValueAndTimestamp.make("F", 40L));
-        expected.put(45L, ValueAndTimestamp.make("G", 55L));
-        expected.put(46L, ValueAndTimestamp.make("GH", 56L));
-        expected.put(48L, ValueAndTimestamp.make("GHIJ", 58L));
-        expected.put(52L, ValueAndTimestamp.make("GHIJK", 62L));
-        expected.put(53L, ValueAndTimestamp.make("GHIJKLMN", 63L));
-        expected.put(56L, ValueAndTimestamp.make("HIJKLMN", 63L));
-        expected.put(57L, ValueAndTimestamp.make("IJKLMN", 63L));
-        expected.put(59L, ValueAndTimestamp.make("KLMN", 63L));
-        expected.put(63L, ValueAndTimestamp.make("LMN", 63L));
-        expected.put(66L, ValueAndTimestamp.make("O", 76L));
-        expected.put(67L, ValueAndTimestamp.make("OP", 77L));
-        expected.put(70L, ValueAndTimestamp.make("OPQ", 80L));
-        expected.put(77L, ValueAndTimestamp.make("PQ", 80L));
-        expected.put(78L, ValueAndTimestamp.make("Q", 80L));
+
+        if (emitFinal) {
+            expected.put(0L, ValueAndTimestamp.make("ARSTU", 10L));
+            expected.put(3L, ValueAndTimestamp.make("ASTU", 10L));
+            expected.put(4L, ValueAndTimestamp.make("ATU", 10L));
+            expected.put(5L, ValueAndTimestamp.make("ABTU", 15L));
+            expected.put(6L, ValueAndTimestamp.make("ABCU", 16L));
+            expected.put(8L, ValueAndTimestamp.make("ABCDU", 18L));
+            expected.put(9L, ValueAndTimestamp.make("ABCD", 18L));
+            expected.put(11L, ValueAndTimestamp.make("BCD", 18L));
+            expected.put(16L, ValueAndTimestamp.make("CD", 18L));
+            expected.put(17L, ValueAndTimestamp.make("D", 18L));
+            expected.put(20L, ValueAndTimestamp.make("E", 30L));
+            expected.put(30L, ValueAndTimestamp.make("EF", 40L));
+            expected.put(31L, ValueAndTimestamp.make("F", 40L));
+            expected.put(45L, ValueAndTimestamp.make("G", 55L));
+            expected.put(46L, ValueAndTimestamp.make("GH", 56L));
+            expected.put(48L, ValueAndTimestamp.make("GHIJ", 58L));
+            expected.put(52L, ValueAndTimestamp.make("GHIJK", 62L));
+            expected.put(53L, ValueAndTimestamp.make("GHIJKLMN", 63L));
+            expected.put(56L, ValueAndTimestamp.make("HIJKLMN", 63L));
+            expected.put(57L, ValueAndTimestamp.make("IJKLMN", 63L));
+            expected.put(59L, ValueAndTimestamp.make("KLMN", 63L));
+            expected.put(63L, ValueAndTimestamp.make("LMN", 63L));
+            expected.put(66L, ValueAndTimestamp.make("O", 76L));
+            expected.put(67L, ValueAndTimestamp.make("OP", 77L));
+            expected.put(70L, ValueAndTimestamp.make("OPQ", 80L));
+        } else {
+            expected.put(0L, ValueAndTimestamp.make("ARSTU", 10L));
+            expected.put(3L, ValueAndTimestamp.make("ASTU", 10L));
+            expected.put(4L, ValueAndTimestamp.make("ATU", 10L));
+            expected.put(5L, ValueAndTimestamp.make("ABTU", 15L));
+            expected.put(6L, ValueAndTimestamp.make("ABCU", 16L));
+            expected.put(8L, ValueAndTimestamp.make("ABCDU", 18L));
+            expected.put(9L, ValueAndTimestamp.make("ABCD", 18L));
+            expected.put(11L, ValueAndTimestamp.make("BCD", 18L));
+            expected.put(16L, ValueAndTimestamp.make("CD", 18L));
+            expected.put(17L, ValueAndTimestamp.make("D", 18L));
+            expected.put(20L, ValueAndTimestamp.make("E", 30L));
+            expected.put(30L, ValueAndTimestamp.make("EF", 40L));
+            expected.put(31L, ValueAndTimestamp.make("F", 40L));
+            expected.put(45L, ValueAndTimestamp.make("G", 55L));
+            expected.put(46L, ValueAndTimestamp.make("GH", 56L));
+            expected.put(48L, ValueAndTimestamp.make("GHIJ", 58L));
+            expected.put(52L, ValueAndTimestamp.make("GHIJK", 62L));
+            expected.put(53L, ValueAndTimestamp.make("GHIJKLMN", 63L));
+            expected.put(56L, ValueAndTimestamp.make("HIJKLMN", 63L));
+            expected.put(57L, ValueAndTimestamp.make("IJKLMN", 63L));
+            expected.put(59L, ValueAndTimestamp.make("KLMN", 63L));
+            expected.put(63L, ValueAndTimestamp.make("LMN", 63L));
+            expected.put(66L, ValueAndTimestamp.make("O", 76L));
+            expected.put(67L, ValueAndTimestamp.make("OP", 77L));
+            expected.put(70L, ValueAndTimestamp.make("OPQ", 80L));
+            expected.put(77L, ValueAndTimestamp.make("PQ", 80L));
+            expected.put(78L, ValueAndTimestamp.make("Q", 80L));
+            expected.put(171L, ValueAndTimestamp.make("V", 181L));
+        }
 
         assertEquals(expected, actual);
     }
@@ -963,6 +1762,38 @@ private void assertLatenessMetrics(final TopologyTestDriver driver,
         assertThat(driver.metrics().get(latenessAvgMetric).metricValue(), avgLateness);
     }
 
+    private WindowBytesStoreSupplier setupWindowBytesStoreSupplier(final int index) {
+        return inOrderIterator
+                ? new InOrderMemoryWindowStoreSupplier("InOrder" + index, 50000L, 10L, false)
+                : Stores.inMemoryWindowStore("Reverse" + index, Duration.ofMillis(50000L), Duration.ofMillis(10L), false);
+    }
+
+    private Materialized<String, String, WindowStore<Bytes, byte[]>> setupMaterialized(final Materialized<String, String, WindowStore<Bytes, byte[]>> materialized) {
+        if (emitFinal) {
+            if (withCache) {
+                return materialized.withCachingEnabled();
+            }
+            return materialized.withCachingDisabled();
+        }
+        return materialized;
+    }
+
+    private static Map<Long, Set<ValueAndTimestamp<String>>> gatherOutput(final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier) {
+        final Map<Long, Set<ValueAndTimestamp<String>>> actual = new HashMap<>();
+
+        for (final KeyValueTimestamp<Windowed<String>, String> entry : supplier.theCapturedProcessor().processed()) {
+            final Windowed<String> window = entry.key();
+            final Long start = window.window().start();
+            final ValueAndTimestamp<String> valueAndTimestamp = ValueAndTimestamp.make(entry.value(), entry.timestamp());
+            final Set<ValueAndTimestamp<String>> valueSet = actual.computeIfAbsent(start, k -> new HashSet<>());
+            valueSet.add(valueAndTimestamp);
+        }
+
+        return actual;
+    }
+
+
+
     private static class InOrderMemoryWindowStore extends InMemoryWindowStore {
         InOrderMemoryWindowStore(final String name,
                         final long retentionPeriod,
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregateTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregateTest.java
index 750f7f508bfea..8af320ae70590 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregateTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KStreamWindowAggregateTest.java
@@ -16,38 +16,62 @@
  */
 package org.apache.kafka.streams.kstream.internals;
 
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
 import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.serialization.StringSerializer;
 import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.TestOutputTopic;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
 import org.apache.kafka.streams.kstream.Grouped;
-import org.apache.kafka.streams.kstream.KStream;
 import org.apache.kafka.streams.kstream.KTable;
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.kstream.TimeWindows;
+import org.apache.kafka.streams.kstream.UnlimitedWindows;
 import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.Windows;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.api.MockProcessorContext.CapturedForward;
+import org.apache.kafka.streams.processor.api.Processor;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorNode;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.TimestampedWindowStore;
+import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
 import org.apache.kafka.streams.state.WindowStore;
+import org.apache.kafka.streams.state.internals.RocksDbIndexedTimeOrderedWindowBytesStoreSupplier;
 import org.apache.kafka.streams.TestInputTopic;
 import org.apache.kafka.streams.test.TestRecord;
 import org.apache.kafka.test.MockAggregator;
 import org.apache.kafka.test.MockApiProcessor;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.MockInitializer;
+import org.apache.kafka.test.MockInternalNewProcessorContext;
 import org.apache.kafka.test.StreamsTestUtils;
+import org.apache.kafka.test.TestUtils;
 import org.hamcrest.Matcher;
+import org.junit.Before;
 import org.junit.Test;
 
 import java.time.Duration;
 import java.util.List;
 import java.util.Properties;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 
 import static java.time.Duration.ofMillis;
 import static java.util.Arrays.asList;
@@ -60,22 +84,53 @@
 import static org.hamcrest.CoreMatchers.not;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 
+@RunWith(Parameterized.class)
 public class KStreamWindowAggregateTest {
+    private static final String WINDOW_STORE_NAME = "dummy-store-name";
     private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
     private final String threadId = Thread.currentThread().getName();
 
+    @Parameter
+    public StrategyType type;
+
+    @Parameter(1)
+    public boolean withCache;
+
+    private EmitStrategy emitStrategy;
+
+    private boolean emitFinal;
+
+    @Parameterized.Parameters(name = "{0}_cache:{1}")
+    public static Collection<Object[]> getEmitStrategy() {
+        return asList(new Object[][] {
+            {StrategyType.ON_WINDOW_UPDATE, true},
+            {StrategyType.ON_WINDOW_UPDATE, false},
+            {StrategyType.ON_WINDOW_CLOSE, true},
+            {StrategyType.ON_WINDOW_CLOSE, false}
+        });
+    }
+
+    @Before
+    public void before() {
+        emitFinal = type.equals(StrategyType.ON_WINDOW_CLOSE);
+        emitStrategy = StrategyType.forType(type);
+        // Set interval to 0 so that it always tries to emit
+        props.setProperty(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, "0");
+    }
+
     @Test
     public void testAggBasic() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic1 = "topic1";
 
-        final KTable<Windowed<String>, String> table2 = builder
-            .stream(topic1, Consumed.with(Serdes.String(), Serdes.String()))
+        final KTable<Windowed<String>, String> table2 = builder.stream(topic1, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
             .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(100)).advanceBy(ofMillis(5)))
-            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonized").withValueSerde(Serdes.String()));
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonized").withValueSerde(Serdes.String())));
 
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table2.toStream().process(supplier);
@@ -106,41 +161,58 @@ public void testAggBasic() {
             inputTopic1.pipeInput("B", "3", 9L);
         }
 
-        assertEquals(
-            asList(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 0),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2", 1),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3", 2),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4", 3),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1+1", 4),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1+1+1",  5),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1",  5),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2",  6),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2",  6),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)),  "0+4+4",  7),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)),  "0+4",  7),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2",  8),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2+2",  8),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)),  "0+3+3",  9),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)),  "0+3",  9),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1+1",  10),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10, 20)),  "0+1",  10),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2+2+2",  11),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10, 20)),  "0+2",  11),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)),  "0+4+4",  12),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(10, 20)),  "0+4",  12),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2+2+2+2",  13),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10, 20)),  "0+2+2",  13),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)),  "0+3+3",  14),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(10, 20)),  "0+3",  14),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2+1",  8),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2+1+2",  8),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2+1+2+3",  9),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2+2+2+2+3",  13)
+        if (emitFinal) {
+            // Nothing processed since grace is 100L and no window closes
+            assertTrue(supplier.theCapturedProcessor().processed().isEmpty());
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 0),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2", 1),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3", 2),
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4", 3),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1+1", 4),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1+1+1",
+                        5),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1", 5),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2+2", 6),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2", 6),
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4+4", 7),
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+4", 7),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2+2+2",
+                        8),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2", 8),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3+3", 9),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+3", 9),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1",
+                        10),
+                    new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(10, 20)), "0+1", 10),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2+2",
+                        11),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10, 20)), "0+2", 11),
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+4+4",
+                        12),
+                    new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(10, 20)), "0+4", 12),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2+2+2",
+                        13),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(10, 20)), "0+2+2",
+                        13),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+3+3",
+                        14),
+                    new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(10, 20)), "0+3", 14),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2+2+2+1",
+                        8),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                        "0+2+2+2+1+2", 8),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                        "0+2+2+2+1+2+3", 9),
+                    new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        "0+2+2+2+2+3", 13)
 
                 ),
-            supplier.theCapturedProcessor().processed()
-        );
+                supplier.theCapturedProcessor().processed()
+            );
+        }
     }
 
     @Test
@@ -148,12 +220,14 @@ public void testJoin() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic1 = "topic1";
         final String topic2 = "topic2";
+        final long grace = emitFinal ? 5L : 100L;
 
         final KTable<Windowed<String>, String> table1 = builder
             .stream(topic1, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(100)).advanceBy(ofMillis(5)))
-            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonized").withValueSerde(Serdes.String()));
+            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(grace)).advanceBy(ofMillis(5)))
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonized").withValueSerde(Serdes.String())));
 
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         table1.toStream().process(supplier);
@@ -161,8 +235,9 @@ public void testJoin() {
         final KTable<Windowed<String>, String> table2 = builder
             .stream(topic2, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(100)).advanceBy(ofMillis(5)))
-            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic2-Canonized").withValueSerde(Serdes.String()));
+            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(grace)).advanceBy(ofMillis(5)))
+            .emitStrategy(emitStrategy)
+            .aggregate(MockInitializer.STRING_INIT, MockAggregator.TOSTRING_ADDER, setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic2-Canonized").withValueSerde(Serdes.String())));
         table2.toStream().process(supplier);
 
         table1.join(table2, (p1, p2) -> p1 + "%" + p2).toStream().process(supplier);
@@ -172,95 +247,201 @@ public void testJoin() {
                     driver.createInputTopic(topic1, new StringSerializer(), new StringSerializer());
             final TestInputTopic<String, String> inputTopic2 =
                     driver.createInputTopic(topic2, new StringSerializer(), new StringSerializer());
-            inputTopic1.pipeInput("A", "1", 0L);
-            inputTopic1.pipeInput("B", "2", 1L);
-            inputTopic1.pipeInput("C", "3", 2L);
-            inputTopic1.pipeInput("D", "4", 3L);
-            inputTopic1.pipeInput("A", "1", 9L);
 
-            final List<MockApiProcessor<Windowed<String>, String, Void, Void>> processors = supplier.capturedProcessors(3);
+            if (emitFinal) {
+                processEmitFinalJoin(inputTopic1, inputTopic2, supplier);
+            } else {
+                processEmitUpdateJoin(inputTopic1, inputTopic2, supplier);
+            }
+        }
+    }
 
-            processors.get(0).checkAndClearProcessResult(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1",  0),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2",  1),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)),  "0+3",  2),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)),  "0+4",  3),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1+1",  9),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1",  9)
-            );
-            processors.get(1).checkAndClearProcessResult();
-            processors.get(2).checkAndClearProcessResult();
+    private void processEmitFinalJoin(final TestInputTopic<String, String> inputTopic1,
+                                      final TestInputTopic<String, String> inputTopic2,
+                                      final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier) {
+        inputTopic1.pipeInput("A", "1", 0L);
+        inputTopic1.pipeInput("B", "2", 1L);
+        inputTopic1.pipeInput("C", "3", 2L);
+        inputTopic1.pipeInput("D", "4", 3L);
+        inputTopic1.pipeInput("A", "1", 9L);
+        inputTopic1.pipeInput("A", "1", 15L);
 
-            inputTopic1.pipeInput("A", "1", 5L);
-            inputTopic1.pipeInput("B", "2", 6L);
-            inputTopic1.pipeInput("D", "4", 7L);
-            inputTopic1.pipeInput("B", "2", 8L);
-            inputTopic1.pipeInput("C", "3", 9L);
+        final List<MockApiProcessor<Windowed<String>, String, Void, Void>> processors = supplier.capturedProcessors(
+            3);
 
-            processors.get(0).checkAndClearProcessResult(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1+1+1",  9),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1+1",  9),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2",  6),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2",  6),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)),  "0+4+4",  7),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)),  "0+4",  7),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2",  8),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2+2",  8),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)),  "0+3+3",  9),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)),  "0+3",  9)
-            );
-            processors.get(1).checkAndClearProcessResult();
-            processors.get(2).checkAndClearProcessResult();
-
-            inputTopic2.pipeInput("A", "a", 0L);
-            inputTopic2.pipeInput("B", "b", 1L);
-            inputTopic2.pipeInput("C", "c", 2L);
-            inputTopic2.pipeInput("D", "d", 20L);
-            inputTopic2.pipeInput("A", "a", 20L);
-
-            processors.get(0).checkAndClearProcessResult();
-            processors.get(1).checkAndClearProcessResult(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+a",  0),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+b",  1),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)),  "0+c",  2),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(15, 25)),  "0+d",  20),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(20, 30)),  "0+d",  20),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(15, 25)),  "0+a",  20),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(20, 30)),  "0+a",  20)
-            );
-            processors.get(2).checkAndClearProcessResult(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1+1+1%0+a",  9),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2%0+b",  8),
-                new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)),  "0+3+3%0+c",  9));
-
-            inputTopic2.pipeInput("A", "a", 5L);
-            inputTopic2.pipeInput("B", "b", 6L);
-            inputTopic2.pipeInput("D", "d", 7L);
-            inputTopic2.pipeInput("D", "d", 18L);
-            inputTopic2.pipeInput("A", "a", 21L);
-
-            processors.get(0).checkAndClearProcessResult();
-            processors.get(1).checkAndClearProcessResult(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+a+a",  5),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+a",  5),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+b+b",  6),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+b",  6),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)),  "0+d",  7),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)),  "0+d",  7),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(10, 20)),  "0+d",  18),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(15, 25)),  "0+d+d",  20),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(15, 25)),  "0+a+a",  21),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(20, 30)),  "0+a+a",  21)
-            );
-            processors.get(2).checkAndClearProcessResult(
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),  "0+1+1+1%0+a+a",  9),
-                new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)),  "0+1+1%0+a",  9),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),  "0+2+2+2%0+b+b",  8),
-                new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)),  "0+2+2%0+b",  8),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)),  "0+4+4%0+d",  7),
-                new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)),  "0+4%0+d",  7)
-            );
-        }
+        processors.get(0).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1+1", 9),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2", 1),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3", 2),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4", 3)
+        );
+        processors.get(1).checkAndClearProcessResult();
+        processors.get(2).checkAndClearProcessResult();
+
+        inputTopic1.pipeInput("A", "1", 10L);
+        inputTopic1.pipeInput("B", "2", 11L);
+        inputTopic1.pipeInput("D", "4", 12L);
+        inputTopic1.pipeInput("B", "2", 13L);
+        inputTopic1.pipeInput("C", "3", 14L);
+        inputTopic1.pipeInput("A", "1", 20L);
+
+        processors.get(0).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1", 10),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2", 13),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+3", 14),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+4", 12)
+        );
+        processors.get(1).checkAndClearProcessResult();
+        processors.get(2).checkAndClearProcessResult();
+
+        inputTopic2.pipeInput("A", "a", 0L);
+        inputTopic2.pipeInput("B", "b", 1L);
+        inputTopic2.pipeInput("C", "c", 2L);
+        inputTopic2.pipeInput("D", "d", 10L);
+        inputTopic2.pipeInput("A", "a", 15L);
+
+        processors.get(0).checkAndClearProcessResult();
+        processors.get(1).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+a", 0),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+b", 1),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+c", 2)
+        );
+        processors.get(2).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                "0+1+1%0+a", 9),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                "0+2%0+b", 1),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3%0+c",
+                2));
+
+        inputTopic2.pipeInput("A", "a", 5L);
+        inputTopic2.pipeInput("B", "b", 6L);
+        inputTopic2.pipeInput("D", "d", 7L);
+        inputTopic2.pipeInput("D", "d", 18L);
+        inputTopic2.pipeInput("A", "a", 21L);
+
+        processors.get(0).checkAndClearProcessResult();
+        processors.get(1).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+a", 5),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+b", 6),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+d+d", 10)
+        );
+        processors.get(2).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1%0+a",
+                10),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2%0+b",
+                13),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+4%0+d+d",
+                12)
+        );
+    }
+
+    private void processEmitUpdateJoin(final TestInputTopic<String, String> inputTopic1,
+                                      final TestInputTopic<String, String> inputTopic2,
+                                      final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier) {
+        inputTopic1.pipeInput("A", "1", 0L);
+        inputTopic1.pipeInput("B", "2", 1L);
+        inputTopic1.pipeInput("C", "3", 2L);
+        inputTopic1.pipeInput("D", "4", 3L);
+        inputTopic1.pipeInput("A", "1", 9L);
+
+        final List<MockApiProcessor<Windowed<String>, String, Void, Void>> processors = supplier.capturedProcessors(
+            3);
+
+        processors.get(0).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1", 0),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2", 1),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3", 2),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4", 3),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1+1", 9),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1", 9)
+        );
+        processors.get(1).checkAndClearProcessResult();
+        processors.get(2).checkAndClearProcessResult();
+
+        inputTopic1.pipeInput("A", "1", 5L);
+        inputTopic1.pipeInput("B", "2", 6L);
+        inputTopic1.pipeInput("D", "4", 7L);
+        inputTopic1.pipeInput("B", "2", 8L);
+        inputTopic1.pipeInput("C", "3", 9L);
+
+        processors.get(0).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+1+1+1",
+                9),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1", 9),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2+2", 6),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2", 6),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4+4", 7),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+4", 7),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+2+2+2",
+                8),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2", 8),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3+3", 9),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(5, 15)), "0+3", 9)
+        );
+        processors.get(1).checkAndClearProcessResult();
+        processors.get(2).checkAndClearProcessResult();
+
+        inputTopic2.pipeInput("A", "a", 0L);
+        inputTopic2.pipeInput("B", "b", 1L);
+        inputTopic2.pipeInput("C", "c", 2L);
+        inputTopic2.pipeInput("D", "d", 20L);
+        inputTopic2.pipeInput("A", "a", 20L);
+
+        processors.get(0).checkAndClearProcessResult();
+        processors.get(1).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+a", 0),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+b", 1),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+c", 2),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(15, 25)), "0+d", 20),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(20, 30)), "0+d", 20),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(15, 25)), "0+a", 20),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(20, 30)), "0+a", 20)
+        );
+        processors.get(2).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                "0+1+1+1%0+a", 9),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                "0+2+2+2%0+b", 8),
+            new KeyValueTimestamp<>(new Windowed<>("C", new TimeWindow(0, 10)), "0+3+3%0+c",
+                9));
+
+        inputTopic2.pipeInput("A", "a", 5L);
+        inputTopic2.pipeInput("B", "b", 6L);
+        inputTopic2.pipeInput("D", "d", 7L);
+        inputTopic2.pipeInput("D", "d", 18L);
+        inputTopic2.pipeInput("A", "a", 21L);
+
+        processors.get(0).checkAndClearProcessResult();
+        processors.get(1).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)), "0+a+a", 5),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+a", 5),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)), "0+b+b", 6),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+b", 6),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+d", 7),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+d", 7),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(10, 20)), "0+d", 18),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(15, 25)), "0+d+d",
+                20),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(15, 25)), "0+a+a",
+                21),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(20, 30)), "0+a+a",
+                21)
+        );
+        processors.get(2).checkAndClearProcessResult(
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                "0+1+1+1%0+a+a", 9),
+            new KeyValueTimestamp<>(new Windowed<>("A", new TimeWindow(5, 15)), "0+1+1%0+a",
+                9),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                "0+2+2+2%0+b+b", 8),
+            new KeyValueTimestamp<>(new Windowed<>("B", new TimeWindow(5, 15)), "0+2+2%0+b",
+                8),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(0, 10)), "0+4+4%0+d",
+                7),
+            new KeyValueTimestamp<>(new Windowed<>("D", new TimeWindow(5, 15)), "0+4%0+d",
+                7)
+        );
     }
 
     @Test
@@ -268,14 +449,14 @@ public void shouldLogAndMeterWhenSkippingNullKey() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
 
-        builder
-            .stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+        builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
             .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
             .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(10)).advanceBy(ofMillis(5)))
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.toStringInstance("+"),
-                Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized").withValueSerde(Serdes.String())
+                setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized").withValueSerde(Serdes.String()))
             );
 
         try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(KStreamWindowAggregate.class);
@@ -294,24 +475,24 @@ public void shouldLogAndMeterWhenSkippingExpiredWindow() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
 
-        final KStream<String, String> stream1 = builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()));
-        stream1.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-               .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(90)).advanceBy(ofMillis(5)))
-               .aggregate(
-                   () -> "",
-                   MockAggregator.toStringInstance("+"),
-                   Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized")
-                       .withValueSerde(Serdes.String())
-                       .withCachingDisabled()
-                       .withLoggingDisabled()
-                       .withRetention(Duration.ofMillis(100))
-               )
-               .toStream()
-               .map((key, value) -> new KeyValue<>(key.toString(), value))
-               .to("output");
+        builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(90)).advanceBy(ofMillis(5)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                () -> "",
+                MockAggregator.toStringInstance("+"),
+                setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized")
+                    .withValueSerde(Serdes.String())
+                    .withLoggingDisabled()
+                    .withRetention(Duration.ofMillis(100)))
+            )
+            .toStream()
+            .map((key, value) -> new KeyValue<>(key.toString(), value))
+            .to("output");
 
         try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(KStreamWindowAggregate.class);
-             final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
 
             final TestInputTopic<String, String> inputTopic =
                     driver.createInputTopic(topic, new StringSerializer(), new StringSerializer());
@@ -323,12 +504,14 @@ public void shouldLogAndMeterWhenSkippingExpiredWindow() {
             inputTopic.pipeInput("k", "4", 4L);
             inputTopic.pipeInput("k", "5", 5L);
             inputTopic.pipeInput("k", "6", 6L);
+            inputTopic.pipeInput("k", "105", 105L);
+            inputTopic.pipeInput("k", "106", 106L);
 
             assertLatenessMetrics(
                 driver,
                 is(7.0), // how many events get dropped
                 is(100.0), // k:0 is 100ms late, since its time is 0, but it arrives at stream time 100.
-                is(84.875) // (0 + 100 + 99 + 98 + 97 + 96 + 95 + 94) / 8
+                is(67.9) // (0 + 100 + 99 + 98 + 97 + 96 + 95 + 94 + 0) / 10
             );
 
             assertThat(appender.getMessages(), hasItems(
@@ -344,10 +527,29 @@ public void shouldLogAndMeterWhenSkippingExpiredWindow() {
             final TestOutputTopic<String, String> outputTopic =
                     driver.createOutputTopic("output", new StringDeserializer(), new StringDeserializer());
 
-            assertThat(outputTopic.readRecord(), equalTo(new TestRecord<>("[k@95/105]", "+100", null, 100L)));
-            assertThat(outputTopic.readRecord(), equalTo(new TestRecord<>("[k@100/110]", "+100", null, 100L)));
-            assertThat(outputTopic.readRecord(), equalTo(new TestRecord<>("[k@5/15]", "+5", null, 5L)));
-            assertThat(outputTopic.readRecord(), equalTo(new TestRecord<>("[k@5/15]", "+5+6", null, 6L)));
+            if (emitFinal) {
+                // Window close time is 15 when timestamp is 105
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@5/15]", "+5+6", null, 6L)));
+                assertEmittedMetrics(driver, is(1.0));
+            } else {
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@95/105]", "+100", null, 100L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@100/110]", "+100", null, 100L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@5/15]", "+5", null, 5L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@5/15]", "+5+6", null, 6L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@100/110]", "+100+105", null, 105L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@105/115]", "+105", null, 105L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@100/110]", "+100+105+106", null, 106L)));
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@105/115]", "+105+106", null, 106L)));
+            }
             assertTrue(outputTopic.isEmpty());
         }
     }
@@ -357,17 +559,18 @@ public void shouldLogAndMeterWhenSkippingExpiredWindowByGrace() {
         final StreamsBuilder builder = new StreamsBuilder();
         final String topic = "topic";
 
-        final KStream<String, String> stream1 = builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()));
-        stream1.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-               .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(90L)).advanceBy(ofMillis(10)))
-               .aggregate(
-                   () -> "",
-                   MockAggregator.toStringInstance("+"),
-                   Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized").withValueSerde(Serdes.String()).withCachingDisabled().withLoggingDisabled()
-               )
-               .toStream()
-               .map((key, value) -> new KeyValue<>(key.toString(), value))
-               .to("output");
+        builder.stream(topic, Consumed.with(Serdes.String(), Serdes.String()))
+            .groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+            .windowedBy(TimeWindows.ofSizeAndGrace(ofMillis(10), ofMillis(90)).advanceBy(ofMillis(10)))
+            .emitStrategy(emitStrategy)
+            .aggregate(
+                () -> "",
+                MockAggregator.toStringInstance("+"),
+                setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("topic1-Canonicalized").withValueSerde(Serdes.String()).withLoggingDisabled())
+            )
+            .toStream()
+            .map((key, value) -> new KeyValue<>(key.toString(), value))
+            .to("output");
 
         try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(KStreamWindowAggregate.class);
              final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
@@ -395,13 +598,417 @@ public void shouldLogAndMeterWhenSkippingExpiredWindowByGrace() {
                 "Skipping record for expired window. topic=[topic] partition=[0] offset=[7] timestamp=[6] window=[0,10) expiration=[110] streamTime=[200]"
             ));
 
-            final TestOutputTopic<String, String> outputTopic =
-                    driver.createOutputTopic("output", new StringDeserializer(), new StringDeserializer());
-            assertThat(outputTopic.readRecord(), equalTo(new TestRecord<>("[k@200/210]", "+100", null, 200L)));
-            assertTrue(outputTopic.isEmpty());
+            if (!emitFinal) {
+                final TestOutputTopic<String, String> outputTopic =
+                    driver.createOutputTopic("output", new StringDeserializer(),
+                        new StringDeserializer());
+                assertThat(outputTopic.readRecord(),
+                    equalTo(new TestRecord<>("[k@200/210]", "+100", null, 200L)));
+                assertTrue(outputTopic.isEmpty());
+            }
         }
     }
 
+    @Test
+    public void shouldNotEmitFinalIfNotProgressEnough() throws IOException {
+        final File stateDir = TestUtils.tempDirectory();
+        final long windowSize = 10L;
+        final Windows<TimeWindow> windows = TimeWindows.ofSizeAndGrace(ofMillis(windowSize), ofMillis(5)).advanceBy(ofMillis(5));
+
+        try {
+            // Always process
+            props.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 0);
+            final MockInternalNewProcessorContext<Windowed<String>, Change<String>> context = makeContext(stateDir, windowSize);
+            final KStreamWindowAggregate<String, String, String, TimeWindow> processorSupplier = new KStreamWindowAggregate<>(
+                windows,
+                WINDOW_STORE_NAME,
+                emitStrategy,
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER
+            );
+            final Processor<String, String, Windowed<String>, Change<String>> processor = processorSupplier.get();
+            processor.init(context);
+
+            context.setSystemTimeMs(0L);
+            processor.process(new Record<>("A", "1", 0));
+            processor.process(new Record<>("B", "2", 5));
+            processor.process(new Record<>("C", "3", 15));
+
+            List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> forwarded = context.forwarded();
+            List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> expected;
+            if (emitFinal) {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1", null), 0)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                        new Change<>("0+2", null), 5))
+                );
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1", null), 0)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                        new Change<>("0+2", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(10, 20)),
+                        new Change<>("0+3", null), 15)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(15, 25)),
+                        new Change<>("0+3", null), 15))
+                );
+            }
+            assertThat(forwarded, is(expected));
+            context.resetForwards();
+
+            processor.process(new Record<>("D", "4", 15));
+            forwarded = context.forwarded();
+            if (emitFinal) {
+                // None emitted because observedTime doesn't progress
+                assertTrue(forwarded.isEmpty());
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(10, 20)),
+                        new Change<>("0+4", null), 15)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(15, 25)),
+                        new Change<>("0+4", null), 15))
+                );
+                assertThat(forwarded, is(expected));
+            }
+            context.resetForwards();
+
+            processor.process(new Record<>("E", "5", 19));
+            forwarded = context.forwarded();
+            if (emitFinal) {
+                // None emitted because observedTime doesn't progress enough to cover new windows
+                assertTrue(forwarded.isEmpty());
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("E", new TimeWindow(10, 20)),
+                        new Change<>("0+5", null), 19)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("E", new TimeWindow(15, 25)),
+                        new Change<>("0+5", null), 19))
+                );
+                assertThat(forwarded, is(expected));
+            }
+
+            context.getStateStore(WINDOW_STORE_NAME).close();
+        } finally {
+            Utils.delete(stateDir);
+        }
+    }
+
+    @Test
+    public void shouldEmitWithInterval0() throws IOException {
+        final File stateDir = TestUtils.tempDirectory();
+        final long windowSize = 10L;
+        final Windows<TimeWindow> windows = TimeWindows.ofSizeAndGrace(ofMillis(windowSize), ofMillis(5)).advanceBy(ofMillis(5));
+
+        try {
+            // Always process
+            props.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 0);
+            final MockInternalNewProcessorContext<Windowed<String>, Change<String>> context = makeContext(stateDir, windowSize);
+            final KStreamWindowAggregate<String, String, String, TimeWindow> processorSupplier = new KStreamWindowAggregate<>(
+                windows,
+                WINDOW_STORE_NAME,
+                emitStrategy,
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER
+            );
+            final Processor<String, String, Windowed<String>, Change<String>> processor = processorSupplier.get();
+            processor.init(context);
+
+            context.setSystemTimeMs(0L);
+            processor.process(new Record<>("A", "1", 0));
+            processor.process(new Record<>("A", "1", 5));
+            processor.process(new Record<>("B", "2", 10));
+            processor.process(new Record<>("C", "3", 15));
+            processor.process(new Record<>("D", "4", 20));
+
+            final List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> forwarded = context.forwarded();
+            final List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> expected;
+            if (emitFinal) {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(5, 15)),
+                        new Change<>("0+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 10))
+                );
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1", null), 0)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(5, 15)),
+                        new Change<>("0+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 10)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(10, 20)),
+                        new Change<>("0+2", null), 10)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(10, 20)),
+                        new Change<>("0+3", null), 15)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(15, 25)),
+                        new Change<>("0+3", null), 15)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(15, 25)),
+                        new Change<>("0+4", null), 20)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(20, 30)),
+                        new Change<>("0+4", null), 20))
+                );
+            }
+            assertThat(forwarded, is(expected));
+            context.getStateStore(WINDOW_STORE_NAME).close();
+        } finally {
+            Utils.delete(stateDir);
+        }
+    }
+
+    @Test
+    public void shouldEmitWithLargeInterval() throws IOException {
+        final File stateDir = TestUtils.tempDirectory();
+        final long windowSize = 10L;
+        final Windows<TimeWindow> windows = TimeWindows.ofSizeAndGrace(ofMillis(windowSize), ofMillis(5)).advanceBy(ofMillis(5));
+
+        try {
+            // Emit final every second
+            props.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 1000L);
+            final MockInternalNewProcessorContext<Windowed<String>, Change<String>> context = makeContext(stateDir, windowSize);
+            final KStreamWindowAggregate<String, String, String, TimeWindow> processorSupplier = new KStreamWindowAggregate<>(
+                windows,
+                WINDOW_STORE_NAME,
+                emitStrategy,
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER
+            );
+            final Processor<String, String, Windowed<String>, Change<String>> processor = processorSupplier.get();
+            processor.init(context);
+
+            context.setSystemTimeMs(0L);
+            processor.process(new Record<>("A", "1", 0));
+            processor.process(new Record<>("A", "1", 5));
+            processor.process(new Record<>("B", "2", 10));
+            processor.process(new Record<>("C", "3", 15));
+
+            List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> forwarded = context.forwarded();
+            if (emitFinal) {
+                assertTrue(forwarded.isEmpty());
+            } else {
+                final List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1", null), 0)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(5, 15)),
+                        new Change<>("0+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 10)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(10, 20)),
+                        new Change<>("0+2", null), 10)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(10, 20)),
+                        new Change<>("0+3", null), 15)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(15, 25)),
+                        new Change<>("0+3", null), 15))
+                );
+                assertThat(forwarded, is(expected));
+            }
+            context.resetForwards();
+
+            // Progress
+            context.setSystemTimeMs(10000L);
+            processor.process(new Record<>("D", "4", 20));
+
+            forwarded = context.forwarded();
+            List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> expected;
+            if (emitFinal) {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(5, 15)),
+                        new Change<>("0+1", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 10))
+                );
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(15, 25)),
+                        new Change<>("0+4", null), 20)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(20, 30)),
+                        new Change<>("0+4", null), 20))
+                );
+            }
+            assertThat(forwarded, is(expected));
+            context.resetForwards();
+
+            // Progress
+            context.setSystemTimeMs(10100L);
+            processor.process(new Record<>("E", "5", 40));
+
+            forwarded = context.forwarded();
+            if (emitFinal) {
+                assertTrue(forwarded.isEmpty());
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("E", new TimeWindow(35, 45)),
+                        new Change<>("0+5", null), 40)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("E", new TimeWindow(40, 50)),
+                        new Change<>("0+5", null), 40))
+                );
+                assertThat(forwarded, is(expected));
+            }
+
+            context.getStateStore(WINDOW_STORE_NAME).close();
+        } finally {
+            Utils.delete(stateDir);
+        }
+    }
+
+    @Test
+    public void shouldEmitFromLastEmitTime() throws IOException {
+        final File stateDir = TestUtils.tempDirectory();
+        final long windowSize = 10L;
+        final Windows<TimeWindow> windows = TimeWindows.ofSizeAndGrace(ofMillis(windowSize), ofMillis(5)).advanceBy(ofMillis(5));
+
+        try {
+            // Always process
+            props.put(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, 0);
+            final MockInternalNewProcessorContext<Windowed<String>, Change<String>> context = makeContext(stateDir, windowSize);
+            final KStreamWindowAggregate<String, String, String, TimeWindow> processorSupplier = new KStreamWindowAggregate<>(
+                windows,
+                WINDOW_STORE_NAME,
+                emitStrategy,
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER
+            );
+            final Processor<String, String, Windowed<String>, Change<String>> processor = processorSupplier.get();
+            processor.init(context);
+
+            context.setSystemTimeMs(0L);
+            processor.process(new Record<>("A", "1", 0));
+            processor.process(new Record<>("B", "2", 5));
+            processor.process(new Record<>("C", "3", 15));
+
+            List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> forwarded = context.forwarded();
+            List<CapturedForward<? extends Windowed<String>, ? extends Change<String>>> expected;
+            if (emitFinal) {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1", null), 0)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                        new Change<>("0+2", null), 5))
+                );
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("A", new TimeWindow(0, 10)),
+                        new Change<>("0+1", null), 0)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(0, 10)),
+                        new Change<>("0+2", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(10, 20)),
+                        new Change<>("0+3", null), 15)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(15, 25)),
+                        new Change<>("0+3", null), 15))
+                );
+            }
+            assertThat(forwarded, is(expected));
+            context.resetForwards();
+
+            final Processor<String, String, Windowed<String>, Change<String>> newProcessor = processorSupplier.get();
+            newProcessor.init(context);
+            newProcessor.process(new Record<>("D", "4", 25));
+            forwarded = context.forwarded();
+            if (emitFinal) {
+                // Don't output old windows for new processor
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("B", new TimeWindow(5, 15)),
+                        new Change<>("0+2", null), 5)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("C", new TimeWindow(10, 20)),
+                        new Change<>("0+3", null), 15))
+                );
+            } else {
+                expected = asList(
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(20, 30)),
+                        new Change<>("0+4", null), 25)),
+                    new CapturedForward<>(new Record<>(new Windowed<>("D", new TimeWindow(25, 35)),
+                        new Change<>("0+4", null), 25))
+                );
+            }
+            assertThat(forwarded, is(expected));
+            context.resetForwards();
+
+            context.getStateStore(WINDOW_STORE_NAME).close();
+        } finally {
+            Utils.delete(stateDir);
+        }
+    }
+
+    @Test
+    public void showThrowIfEmitFinalUsedWithUnlimitedWindow() {
+        if (emitFinal) {
+            final IllegalArgumentException e = assertThrows(
+                IllegalArgumentException.class, () -> new KStreamWindowAggregate<>(
+                    UnlimitedWindows.of(),
+                    WINDOW_STORE_NAME,
+                    emitStrategy,
+                    MockInitializer.STRING_INIT,
+                    MockAggregator.TOSTRING_ADDER)
+            );
+            assertThat(e.getMessage(), is("ON_WINDOW_CLOSE strategy is only supported for "
+                + "TimeWindows and SlidingWindows for TimeWindowedKStream"));
+        } else {
+            new KStreamWindowAggregate<>(
+                UnlimitedWindows.of(),
+                WINDOW_STORE_NAME,
+                emitStrategy,
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER
+            );
+        }
+    }
+
+    private TimestampedWindowStore<String, String> getWindowStore(final long windowSize) {
+        final WindowBytesStoreSupplier supplier;
+        if (emitFinal) {
+            supplier = RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create(
+                WINDOW_STORE_NAME,
+                Duration.ofDays(1),
+                Duration.ofMillis(windowSize),
+                false,
+                false
+            );
+        } else {
+            supplier = Stores.persistentTimestampedWindowStore(
+                WINDOW_STORE_NAME,
+                Duration.ofDays(1),
+                Duration.ofMillis(windowSize),
+                false
+            );
+        }
+
+        return Stores.timestampedWindowStoreBuilder(supplier, Serdes.String(), Serdes.String())
+            .withLoggingDisabled() // Changelog is not supported by MockProcessorContext.
+            .withCachingDisabled() // Caching is not supported by MockProcessorContext.
+            .build();
+    }
+
+    private MockInternalNewProcessorContext<Windowed<String>, Change<String>> makeContext(final File stateDir, final long windowSize) {
+        final MockInternalNewProcessorContext<Windowed<String>, Change<String>> context = new MockInternalNewProcessorContext<>(
+            props,
+            new TaskId(0, 0),
+            stateDir
+        );
+
+        context.setCurrentNode(new ProcessorNode("testNode"));
+
+        // Create, initialize, and register the state store.
+        final TimestampedWindowStore<String, String> store = getWindowStore(windowSize);
+        store.init(context.getStateStoreContext(), store);
+        context.getStateStoreContext().register(store, null);
+
+        return context;
+    }
+
     private void assertLatenessMetrics(final TopologyTestDriver driver,
                                        final Matcher<Object> dropTotal,
                                        final Matcher<Object> maxLateness,
@@ -456,4 +1063,40 @@ private void assertLatenessMetrics(final TopologyTestDriver driver,
         assertThat(driver.metrics().get(latenessAvgMetric).metricValue(), avgLateness);
     }
 
+    private void assertEmittedMetrics(final TopologyTestDriver driver,
+                                      final Matcher<Object> emittedTotal) {
+
+        final MetricName emittedTotalMetric;
+        final MetricName emittedRateMetric;
+        emittedTotalMetric = new MetricName(
+            "window-aggregate-final-emit-total",
+            "stream-processor-node-metrics",
+            "The total number of emit final records",
+            mkMap(
+                mkEntry("thread-id", threadId),
+                mkEntry("task-id", "0_0"),
+                mkEntry("processor-node-id", "KSTREAM-AGGREGATE-0000000001")
+            )
+        );
+        emittedRateMetric = new MetricName(
+            "window-aggregate-final-emit-rate",
+            "stream-processor-node-metrics",
+            "The average number of emit final records per second",
+            mkMap(
+                mkEntry("thread-id", threadId),
+                mkEntry("task-id", "0_0"),
+                mkEntry("processor-node-id", "KSTREAM-AGGREGATE-0000000001")
+            )
+        );
+
+        assertThat(driver.metrics().get(emittedTotalMetric).metricValue(), emittedTotal);
+        assertThat(driver.metrics().get(emittedRateMetric).metricValue(), not(0.0));
+    }
+
+    private <K, V, S extends StateStore> Materialized<K, V, S> setMaterializedCache(final Materialized<K, V, S> materialized) {
+        if (withCache) {
+            return materialized.withCachingEnabled();
+        }
+        return materialized.withCachingDisabled();
+    }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableSourceTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableSourceTest.java
index 68c005a8c604f..70e1bccdbe65e 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableSourceTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/KTableSourceTest.java
@@ -184,7 +184,7 @@ public void kTableShouldLogOnOutOfOrder() {
                     .filter(e -> e.getLevel().equals("WARN"))
                     .map(Event::getMessage)
                     .collect(Collectors.toList()),
-                hasItem("Detected out-of-order KTable update for store, old timestamp=[10] new timestamp=[5]. topic=[topic] partition=[1] offset=[0].")
+                hasItem("Detected out-of-order KTable update for store, old timestamp=[10] new timestamp=[5]. topic=[topic] partition=[0] offset=[1].")
             );
         }
     }
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/MaterializedInternalTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/MaterializedInternalTest.java
index 5d5e8884d397b..302845a973996 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/MaterializedInternalTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/MaterializedInternalTest.java
@@ -18,10 +18,14 @@
 package org.apache.kafka.streams.kstream.internals;
 
 import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.KeyValueBytesStoreSupplier;
 import org.apache.kafka.streams.state.KeyValueStore;
+import org.apache.kafka.test.StreamsTestUtils;
 import org.easymock.EasyMock;
 import org.easymock.EasyMockRunner;
 import org.easymock.Mock;
@@ -29,6 +33,8 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 
+import java.util.Properties;
+
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
 
@@ -73,4 +79,20 @@ public void shouldUseStoreNameOfSupplierWhenProvided() {
             new MaterializedInternal<>(Materialized.as(supplier), nameProvider, prefix);
         assertThat(materialized.storeName(), equalTo(storeName));
     }
+
+    @Test
+    public void shouldUseStoreTypeWhenProvidedViaTopologyConfig() {
+        final Properties topologyOverrides = new Properties();
+        topologyOverrides.put(StreamsConfig.DEFAULT_DSL_STORE_CONFIG, StreamsConfig.IN_MEMORY);
+        final StreamsConfig config = new StreamsConfig(StreamsTestUtils.getStreamsConfig());
+
+        final InternalTopologyBuilder topologyBuilder = new InternalTopologyBuilder(
+            new TopologyConfig("my-topology", config, topologyOverrides));
+
+        final InternalStreamsBuilder internalStreamsBuilder = new InternalStreamsBuilder(topologyBuilder);
+
+        final MaterializedInternal<Object, Object, KeyValueStore<Bytes, byte[]>> materialized =
+            new MaterializedInternal<>(Materialized.as(supplier), internalStreamsBuilder, prefix);
+        assertThat(materialized.storeType(), equalTo(Materialized.StoreType.IN_MEMORY));
+    }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionTupleForwarderTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionTupleForwarderTest.java
deleted file mode 100644
index 60b37bb0523bc..0000000000000
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionTupleForwarderTest.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.kafka.streams.kstream.internals;
-
-import org.apache.kafka.streams.kstream.Windowed;
-import org.apache.kafka.streams.processor.StateStore;
-import org.apache.kafka.streams.processor.api.ProcessorContext;
-import org.apache.kafka.streams.processor.api.Record;
-import org.apache.kafka.streams.state.internals.WrappedStateStore;
-import org.junit.Test;
-
-import static org.easymock.EasyMock.expect;
-import static org.easymock.EasyMock.expectLastCall;
-import static org.easymock.EasyMock.mock;
-import static org.easymock.EasyMock.replay;
-import static org.easymock.EasyMock.verify;
-
-public class SessionTupleForwarderTest {
-
-    @Test
-    public void shouldSetFlushListenerOnWrappedStateStore() {
-        setFlushListener(true);
-        setFlushListener(false);
-    }
-
-    private void setFlushListener(final boolean sendOldValues) {
-        final WrappedStateStore<StateStore, Windowed<Object>, Object> store = mock(WrappedStateStore.class);
-        final SessionCacheFlushListener<Object, Object> flushListener = mock(SessionCacheFlushListener.class);
-
-        expect(store.setFlushListener(flushListener, sendOldValues)).andReturn(false);
-        replay(store);
-
-        new SessionTupleForwarder<>(store, null, flushListener, sendOldValues);
-
-        verify(store);
-    }
-
-    @Test
-    public void shouldForwardRecordsIfWrappedStateStoreDoesNotCache() {
-        shouldForwardRecordsIfWrappedStateStoreDoesNotCache(false);
-        shouldForwardRecordsIfWrappedStateStoreDoesNotCache(true);
-    }
-
-    private void shouldForwardRecordsIfWrappedStateStoreDoesNotCache(final boolean sendOldValued) {
-        final WrappedStateStore<StateStore, String, String> store = mock(WrappedStateStore.class);
-        final ProcessorContext<Windowed<String>, Change<String>> context = mock(
-            ProcessorContext.class);
-
-        expect(store.setFlushListener(null, sendOldValued)).andReturn(false);
-        if (sendOldValued) {
-            context.forward(
-                new Record<>(
-                    new Windowed<>("key", new SessionWindow(21L, 42L)),
-                    new Change<>("value", "oldValue"),
-                    42L));
-        } else {
-            context.forward(
-                new Record<>(
-                    new Windowed<>("key", new SessionWindow(21L, 42L)),
-                    new Change<>("value", null),
-                    42L));
-        }
-        expectLastCall();
-        replay(store, context);
-
-        new SessionTupleForwarder<>(store, context, null, sendOldValued)
-            .maybeForward(
-                new Record<>(
-                    new Windowed<>("key", new SessionWindow(21L, 42L)),
-                    new Change<>("value", "oldValue"),
-                    42L));
-
-        verify(store, context);
-    }
-
-    @Test
-    public void shouldNotForwardRecordsIfWrappedStateStoreDoesCache() {
-        final WrappedStateStore<StateStore, String, String> store = mock(WrappedStateStore.class);
-        final ProcessorContext<Windowed<String>, Change<String>> context = mock(ProcessorContext.class);
-
-        expect(store.setFlushListener(null, false)).andReturn(true);
-        replay(store, context);
-
-        new SessionTupleForwarder<>(store, context, null, false)
-            .maybeForward(
-                new Record<>(
-                    new Windowed<>("key", new SessionWindow(21L, 42L)),
-                    new Change<>("value", "oldValue"),
-                    42L));
-
-        verify(store, context);
-    }
-
-}
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImplTest.java
index 8c7d179b84dde..3356b37c4089a 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/SessionWindowedKStreamImplTest.java
@@ -21,10 +21,12 @@
 import org.apache.kafka.common.serialization.StringSerializer;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.Grouped;
 import org.apache.kafka.streams.kstream.KStream;
 import org.apache.kafka.streams.kstream.Materialized;
@@ -34,8 +36,11 @@
 import org.apache.kafka.streams.kstream.SessionWindows;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.state.SessionStore;
-import org.apache.kafka.streams.state.ValueAndTimestamp;
 import org.apache.kafka.streams.TestInputTopic;
+import org.apache.kafka.streams.state.internals.ChangeLoggingSessionBytesStore;
+import org.apache.kafka.streams.state.internals.MeteredSessionStore;
+import org.apache.kafka.streams.state.internals.RocksDBTimeOrderedSessionStore;
+import org.apache.kafka.streams.state.internals.WrappedStateStore;
 import org.apache.kafka.test.MockAggregator;
 import org.apache.kafka.test.MockApiProcessorSupplier;
 import org.apache.kafka.test.MockInitializer;
@@ -43,29 +48,58 @@
 import org.apache.kafka.test.StreamsTestUtils;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 import java.util.Properties;
 
 import static java.time.Duration.ofMillis;
+import static java.util.Arrays.asList;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThrows;
 
+@RunWith(Parameterized.class)
 public class SessionWindowedKStreamImplTest {
     private static final String TOPIC = "input";
     private final StreamsBuilder builder = new StreamsBuilder();
     private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
     private final Merger<String, String> sessionMerger = (aggKey, aggOne, aggTwo) -> aggOne + "+" + aggTwo;
+
     private SessionWindowedKStream<String, String> stream;
 
+    @Parameterized.Parameter
+    public EmitStrategy.StrategyType type;
+
+    private boolean emitFinal;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {EmitStrategy.StrategyType.ON_WINDOW_UPDATE},
+            {EmitStrategy.StrategyType.ON_WINDOW_CLOSE}
+        });
+    }
+
     @Before
     public void before() {
+        final EmitStrategy emitStrategy = EmitStrategy.StrategyType.forType(type);
+        emitFinal = type.equals(EmitStrategy.StrategyType.ON_WINDOW_CLOSE);
+
+        // Set interval to 0 so that it always tries to emit
+        props.setProperty(StreamsConfig.InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, "0");
+
         final KStream<String, String> stream = builder.stream(TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
         this.stream = stream.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
-                .windowedBy(SessionWindows.ofInactivityGapWithNoGrace(ofMillis(500)));
+            .windowedBy(SessionWindows.ofInactivityGapWithNoGrace(ofMillis(500)))
+            .emitStrategy(emitStrategy);
     }
 
     @Test
@@ -89,19 +123,30 @@ private void shouldCountSessionWindowed() {
             processData(driver);
         }
 
-        final Map<Windowed<String>, ValueAndTimestamp<Long>> result =
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey();
-
-        assertThat(result.size(), equalTo(3));
-        assertThat(
-            result.get(new Windowed<>("1", new SessionWindow(10L, 15L))),
-            equalTo(ValueAndTimestamp.make(2L, 15L)));
-        assertThat(
-            result.get(new Windowed<>("2", new SessionWindow(599L, 600L))),
-            equalTo(ValueAndTimestamp.make(2L, 600L)));
-        assertThat(
-            result.get(new Windowed<>("1", new SessionWindow(600L, 600L))),
-            equalTo(ValueAndTimestamp.make(1L, 600L)));
+        final ArrayList<KeyValueTimestamp<Windowed<String>, Long>> processed =
+            supplier.theCapturedProcessor().processed();
+
+        if (emitFinal) {
+            assertEquals(
+                Collections.singletonList(
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 15L)), 2L, 15L)
+                ),
+                processed
+            );
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 10L)), 1L, 10L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 10L)), null, 10L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 15L)), 2L, 15L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(600L, 600L)), 1L, 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(600L, 600L)), 1L, 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(600L, 600L)), null, 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(599L, 600L)), 2L, 600L)
+                ),
+                processed
+            );
+        }
     }
 
     @Test
@@ -115,19 +160,30 @@ public void shouldReduceWindowed() {
             processData(driver);
         }
 
-        final Map<Windowed<String>, ValueAndTimestamp<String>> result =
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey();
-
-        assertThat(result.size(), equalTo(3));
-        assertThat(
-            result.get(new Windowed<>("1", new SessionWindow(10, 15))),
-            equalTo(ValueAndTimestamp.make("1+2", 15L)));
-        assertThat(
-            result.get(new Windowed<>("2", new SessionWindow(599L, 600))),
-            equalTo(ValueAndTimestamp.make("1+2", 600L)));
-        assertThat(
-            result.get(new Windowed<>("1", new SessionWindow(600, 600))),
-            equalTo(ValueAndTimestamp.make("3", 600L)));
+        final ArrayList<KeyValueTimestamp<Windowed<String>, String>> processed =
+                supplier.theCapturedProcessor().processed();
+
+        if (emitFinal) {
+            assertEquals(
+                Collections.singletonList(
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 15L)), "1+2", 15L)
+                ),
+                processed
+            );
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 10L)), "1", 10L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 10L)), null, 10L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 15L)), "1+2", 15L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(600L, 600L)), "3", 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(600L, 600L)), "1", 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(600L, 600L)), null, 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(599L, 600L)), "1+2", 600L)
+                ),
+                processed
+            );
+        }
     }
 
     @Test
@@ -143,19 +199,30 @@ public void shouldAggregateSessionWindowed() {
             processData(driver);
         }
 
-        final Map<Windowed<String>, ValueAndTimestamp<String>> result =
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey();
-
-        assertThat(result.size(), equalTo(3));
-        assertThat(
-            result.get(new Windowed<>("1", new SessionWindow(10, 15))),
-            equalTo(ValueAndTimestamp.make("0+0+1+2", 15L)));
-        assertThat(
-            result.get(new Windowed<>("2", new SessionWindow(599, 600))),
-            equalTo(ValueAndTimestamp.make("0+0+1+2", 600L)));
-        assertThat(
-            result.get(new Windowed<>("1", new SessionWindow(600, 600))),
-            equalTo(ValueAndTimestamp.make("0+3", 600L)));
+        final ArrayList<KeyValueTimestamp<Windowed<String>, String>> processed =
+                supplier.theCapturedProcessor().processed();
+
+        if (emitFinal) {
+            assertEquals(
+                Collections.singletonList(
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 15L)), "0+0+1+2", 15L)
+                ),
+                processed
+            );
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 10L)), "0+1", 10L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 10L)), null, 10L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(10L, 15L)), "0+0+1+2", 15L),
+                    new KeyValueTimestamp<>(new Windowed<>("1", new SessionWindow(600L, 600L)), "0+3", 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(600L, 600L)), "0+1", 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(600L, 600L)), null, 600L),
+                    new KeyValueTimestamp<>(new Windowed<>("2", new SessionWindow(599L, 600L)), "0+0+1+2", 600L)
+                ),
+                processed
+            );
+        }
     }
 
     @Test
@@ -292,6 +359,26 @@ public void shouldThrowNullPointerOnCountIfMaterializedIsNull() {
         assertThrows(NullPointerException.class, () -> stream.count((Materialized<String, Long, SessionStore<Bytes, byte[]>>) null));
     }
 
+    @Test
+    public void shouldNotEnableCachingWithEmitFinal() {
+        if (!emitFinal)
+            return;
+
+        stream.aggregate(
+                MockInitializer.STRING_INIT,
+                MockAggregator.TOSTRING_ADDER,
+                sessionMerger,
+                Materialized.<String, String, SessionStore<Bytes, byte[]>>as("aggregated").withValueSerde(Serdes.String()));
+
+        try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
+            final SessionStore<String, String> store = driver.getSessionStore("aggregated");
+            final WrappedStateStore changeLogging = (WrappedStateStore) ((WrappedStateStore) store).wrapped();
+            assertThat(store, instanceOf(MeteredSessionStore.class));
+            assertThat(changeLogging, instanceOf(ChangeLoggingSessionBytesStore.class));
+            assertThat(changeLogging.wrapped(), instanceOf(RocksDBTimeOrderedSessionStore.class));
+        }
+    }
+
     private void processData(final TopologyTestDriver driver) {
         final TestInputTopic<String, String> inputTopic =
                 driver.createInputTopic(TOPIC, new StringSerializer(), new StringSerializer());
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImplTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImplTest.java
index f5f3ff88e5c9a..5ac43ac80825b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/TimeWindowedKStreamImplTest.java
@@ -17,20 +17,27 @@
 
 package org.apache.kafka.streams.kstream.internals;
 
+import java.util.ArrayList;
+import java.util.Collection;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.serialization.StringSerializer;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.KeyValueTimestamp;
 import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.EmitStrategy;
+import org.apache.kafka.streams.kstream.EmitStrategy.StrategyType;
 import org.apache.kafka.streams.kstream.Grouped;
 import org.apache.kafka.streams.kstream.KStream;
 import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.kstream.Named;
-import org.apache.kafka.streams.kstream.TimeWindowedKStream;
 import org.apache.kafka.streams.kstream.TimeWindows;
+import org.apache.kafka.streams.kstream.TimeWindowedKStream;
 import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
 import org.apache.kafka.streams.state.WindowStore;
 import org.apache.kafka.streams.TestInputTopic;
@@ -42,27 +49,59 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import java.util.Arrays;
 import java.util.List;
 import java.util.Properties;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 
 import static java.time.Duration.ofMillis;
 import static java.time.Instant.ofEpochMilli;
+import static java.util.Arrays.asList;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThrows;
 
+@RunWith(Parameterized.class)
 public class TimeWindowedKStreamImplTest {
     private static final String TOPIC = "input";
+    private static final Windowed<String> KEY_1_WINDOW_0 = new Windowed<>("1", new TimeWindow(0L, 500L));
+    private static final Windowed<String> KEY_1_WINDOW_1 = new Windowed<>("1", new TimeWindow(500L, 1000L));
+    private static final Windowed<String> KEY_2_WINDOW_1 = new Windowed<>("2", new TimeWindow(500L, 1000L));
+    private static final Windowed<String> KEY_2_WINDOW_2 = new Windowed<>("2", new TimeWindow(1000L, 1500L));
+
     private final StreamsBuilder builder = new StreamsBuilder();
     private final Properties props = StreamsTestUtils.getStreamsConfig(Serdes.String(), Serdes.String());
     private TimeWindowedKStream<String, String> windowedStream;
 
+    @Parameter
+    public StrategyType type;
+
+    @Parameter(1)
+    public boolean withCache;
+
+    private EmitStrategy emitStrategy;
+    private boolean emitFinal;
+
+    @Parameterized.Parameters(name = "{0}_cache:{1}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {StrategyType.ON_WINDOW_UPDATE, true},
+            {StrategyType.ON_WINDOW_UPDATE, false},
+            {StrategyType.ON_WINDOW_CLOSE, true},
+            {StrategyType.ON_WINDOW_CLOSE, false}
+        });
+    }
+
     @Before
     public void before() {
+        emitFinal = type.equals(StrategyType.ON_WINDOW_CLOSE);
+        emitStrategy = StrategyType.forType(type);
+        // Set interval to 0 so that it always tries to emit
+        props.setProperty(InternalConfig.EMIT_INTERVAL_MS_KSTREAMS_WINDOWED_AGGREGATION, "0");
         final KStream<String, String> stream = builder.stream(TOPIC, Consumed.with(Serdes.String(), Serdes.String()));
-        windowedStream = stream.
-            groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
+        windowedStream = stream.groupByKey(Grouped.with(Serdes.String(), Serdes.String()))
             .windowedBy(TimeWindows.ofSizeWithNoGrace(ofMillis(500L)));
     }
 
@@ -70,6 +109,7 @@ public void before() {
     public void shouldCountWindowed() {
         final MockApiProcessorSupplier<Windowed<String>, Long, Void, Void> supplier = new MockApiProcessorSupplier<>();
         windowedStream
+            .emitStrategy(emitStrategy)
             .count()
             .toStream()
             .process(supplier);
@@ -77,24 +117,37 @@ public void shouldCountWindowed() {
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             processData(driver);
         }
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("1", new TimeWindow(0L, 500L))),
-            equalTo(ValueAndTimestamp.make(2L, 15L)));
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("2", new TimeWindow(500L, 1000L))),
-            equalTo(ValueAndTimestamp.make(2L, 550L)));
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("1", new TimeWindow(500L, 1000L))),
-            equalTo(ValueAndTimestamp.make(1L, 500L)));
+        final ArrayList<KeyValueTimestamp<Windowed<String>, Long>> processed = supplier.theCapturedProcessor().processed();
+
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, 2L, 15L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_1, 1L, 500L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, 2L, 550L)
+                ),
+                processed
+            );
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, 1L, 10L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, 2L, 15L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_1, 1L, 500L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, 1L, 550L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, 2L, 550L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_2, 1L, 1000L)
+                ),
+                processed
+            );
+        }
     }
 
     @Test
     public void shouldReduceWindowed() {
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         windowedStream
+            .emitStrategy(emitStrategy)
             .reduce(MockReducer.STRING_ADDER)
             .toStream()
             .process(supplier);
@@ -102,54 +155,81 @@ public void shouldReduceWindowed() {
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             processData(driver);
         }
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("1", new TimeWindow(0L, 500L))),
-            equalTo(ValueAndTimestamp.make("1+2", 15L)));
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("2", new TimeWindow(500L, 1000L))),
-            equalTo(ValueAndTimestamp.make("10+20", 550L)));
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("1", new TimeWindow(500L, 1000L))),
-            equalTo(ValueAndTimestamp.make("3", 500L)));
+
+        final ArrayList<KeyValueTimestamp<Windowed<String>, String>> processed = supplier.theCapturedProcessor().processed();
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, "1+2", 15L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_1, "3", 500L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, "10+20", 550L)
+                ),
+                processed
+            );
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, "1", 10L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, "1+2", 15L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_1, "3", 500L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, "10", 550L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, "10+20", 550L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_2, "30", 1000L)
+                ),
+                processed
+            );
+        }
     }
 
     @Test
     public void shouldAggregateWindowed() {
         final MockApiProcessorSupplier<Windowed<String>, String, Void, Void> supplier = new MockApiProcessorSupplier<>();
         windowedStream
+            .emitStrategy(emitStrategy)
             .aggregate(
                 MockInitializer.STRING_INIT,
                 MockAggregator.TOSTRING_ADDER,
-                Materialized.with(Serdes.String(), Serdes.String()))
+                setMaterializedCache(Materialized.with(Serdes.String(), Serdes.String())))
             .toStream()
             .process(supplier);
 
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             processData(driver);
         }
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("1", new TimeWindow(0L, 500L))),
-            equalTo(ValueAndTimestamp.make("0+1+2", 15L)));
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("2", new TimeWindow(500L, 1000L))),
-            equalTo(ValueAndTimestamp.make("0+10+20", 550L)));
-        assertThat(
-            supplier.theCapturedProcessor().lastValueAndTimestampPerKey()
-                .get(new Windowed<>("1", new TimeWindow(500L, 1000L))),
-            equalTo(ValueAndTimestamp.make("0+3", 500L)));
+
+        final ArrayList<KeyValueTimestamp<Windowed<String>, String>> processed = supplier.theCapturedProcessor().processed();
+        if (emitFinal) {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, "0+1+2", 15L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_1, "0+3", 500L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, "0+10+20", 550L)
+                ),
+                processed
+            );
+        } else {
+            assertEquals(
+                asList(
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, "0+1", 10L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_0, "0+1+2", 15L),
+                    new KeyValueTimestamp<>(KEY_1_WINDOW_1, "0+3", 500L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, "0+10", 550L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_1, "0+10+20", 550L),
+                    new KeyValueTimestamp<>(KEY_2_WINDOW_2, "0+30", 1000L)
+                ),
+                processed
+            );
+        }
     }
 
     @Test
     public void shouldMaterializeCount() {
-        windowedStream.count(
-            Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("count-store")
-                .withKeySerde(Serdes.String())
-                .withValueSerde(Serdes.Long()));
+        windowedStream
+            .emitStrategy(emitStrategy)
+            .count(
+                setMaterializedCache(Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("count-store")
+                    .withKeySerde(Serdes.String())
+                    .withValueSerde(Serdes.Long())));
 
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             processData(driver);
@@ -158,10 +238,11 @@ public void shouldMaterializeCount() {
                 final List<KeyValue<Windowed<String>, Long>> data =
                     StreamsTestUtils.toList(windowStore.fetch("1", "2", ofEpochMilli(0), ofEpochMilli(1000L)));
 
-                assertThat(data, equalTo(Arrays.asList(
+                assertThat(data, equalTo(asList(
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(0, 500)), 2L),
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(500, 1000)), 1L),
-                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), 2L))));
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), 2L),
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(1000, 1500)), 1L))));
             }
             {
                 final WindowStore<String, ValueAndTimestamp<Long>> windowStore =
@@ -169,10 +250,11 @@ public void shouldMaterializeCount() {
                 final List<KeyValue<Windowed<String>, ValueAndTimestamp<Long>>> data =
                     StreamsTestUtils.toList(windowStore.fetch("1", "2", ofEpochMilli(0), ofEpochMilli(1000L)));
 
-                assertThat(data, equalTo(Arrays.asList(
+                assertThat(data, equalTo(asList(
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(0, 500)), ValueAndTimestamp.make(2L, 15L)),
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(500, 1000)), ValueAndTimestamp.make(1L, 500L)),
-                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), ValueAndTimestamp.make(2L, 550L)))));
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), ValueAndTimestamp.make(2L, 550L)),
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(1000, 1500)), ValueAndTimestamp.make(1L, 1000L)))));
             }
         }
     }
@@ -181,9 +263,9 @@ public void shouldMaterializeCount() {
     public void shouldMaterializeReduced() {
         windowedStream.reduce(
             MockReducer.STRING_ADDER,
-            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("reduced")
+            setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("reduced")
                 .withKeySerde(Serdes.String())
-                .withValueSerde(Serdes.String()));
+                .withValueSerde(Serdes.String())));
 
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             processData(driver);
@@ -192,20 +274,22 @@ public void shouldMaterializeReduced() {
                 final List<KeyValue<Windowed<String>, String>> data =
                     StreamsTestUtils.toList(windowStore.fetch("1", "2", ofEpochMilli(0), ofEpochMilli(1000L)));
 
-                assertThat(data, equalTo(Arrays.asList(
+                assertThat(data, equalTo(asList(
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(0, 500)), "1+2"),
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(500, 1000)), "3"),
-                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), "10+20"))));
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), "10+20"),
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(1000, 1500)), "30"))));
             }
             {
                 final WindowStore<String, ValueAndTimestamp<String>> windowStore = driver.getTimestampedWindowStore("reduced");
                 final List<KeyValue<Windowed<String>, ValueAndTimestamp<String>>> data =
                     StreamsTestUtils.toList(windowStore.fetch("1", "2", ofEpochMilli(0), ofEpochMilli(1000L)));
 
-                assertThat(data, equalTo(Arrays.asList(
+                assertThat(data, equalTo(asList(
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(0, 500)), ValueAndTimestamp.make("1+2", 15L)),
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(500, 1000)), ValueAndTimestamp.make("3", 500L)),
-                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), ValueAndTimestamp.make("10+20", 550L)))));
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), ValueAndTimestamp.make("10+20", 550L)),
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(1000, 1500)), ValueAndTimestamp.make("30", 1000L)))));
             }
         }
     }
@@ -215,9 +299,9 @@ public void shouldMaterializeAggregated() {
         windowedStream.aggregate(
             MockInitializer.STRING_INIT,
             MockAggregator.TOSTRING_ADDER,
-            Materialized.<String, String, WindowStore<Bytes, byte[]>>as("aggregated")
+            setMaterializedCache(Materialized.<String, String, WindowStore<Bytes, byte[]>>as("aggregated")
                 .withKeySerde(Serdes.String())
-                .withValueSerde(Serdes.String()));
+                .withValueSerde(Serdes.String())));
 
         try (final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), props)) {
             processData(driver);
@@ -226,20 +310,22 @@ public void shouldMaterializeAggregated() {
                 final List<KeyValue<Windowed<String>, String>> data =
                     StreamsTestUtils.toList(windowStore.fetch("1", "2", ofEpochMilli(0), ofEpochMilli(1000L)));
 
-                assertThat(data, equalTo(Arrays.asList(
+                assertThat(data, equalTo(asList(
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(0, 500)), "0+1+2"),
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(500, 1000)), "0+3"),
-                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), "0+10+20"))));
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), "0+10+20"),
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(1000, 1500)), "0+30"))));
             }
             {
                 final WindowStore<String, ValueAndTimestamp<String>> windowStore = driver.getTimestampedWindowStore("aggregated");
                 final List<KeyValue<Windowed<String>, ValueAndTimestamp<String>>> data =
                     StreamsTestUtils.toList(windowStore.fetch("1", "2", ofEpochMilli(0), ofEpochMilli(1000L)));
 
-                assertThat(data, equalTo(Arrays.asList(
+                assertThat(data, equalTo(asList(
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(0, 500)), ValueAndTimestamp.make("0+1+2", 15L)),
                     KeyValue.pair(new Windowed<>("1", new TimeWindow(500, 1000)), ValueAndTimestamp.make("0+3", 500L)),
-                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), ValueAndTimestamp.make("0+10+20", 550L)))));
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(500, 1000)), ValueAndTimestamp.make("0+10+20", 550L)),
+                    KeyValue.pair(new Windowed<>("2", new TimeWindow(1000, 1500)), ValueAndTimestamp.make("0+30", 1000L)))));
             }
         }
     }
@@ -264,7 +350,7 @@ public void shouldThrowNullPointerOnMaterializedAggregateIfInitializerIsNull() {
         assertThrows(NullPointerException.class, () -> windowedStream.aggregate(
             null,
             MockAggregator.TOSTRING_ADDER,
-            Materialized.as("store")));
+            setMaterializedCache(Materialized.as("store"))));
     }
 
     @Test
@@ -272,7 +358,7 @@ public void shouldThrowNullPointerOnMaterializedAggregateIfAggregatorIsNull() {
         assertThrows(NullPointerException.class, () -> windowedStream.aggregate(
             MockInitializer.STRING_INIT,
             null,
-            Materialized.as("store")));
+            setMaterializedCache(Materialized.as("store"))));
     }
 
     @SuppressWarnings("unchecked")
@@ -288,7 +374,7 @@ public void shouldThrowNullPointerOnMaterializedAggregateIfMaterializedIsNull()
     public void shouldThrowNullPointerOnMaterializedReduceIfReducerIsNull() {
         assertThrows(NullPointerException.class, () -> windowedStream.reduce(
             null,
-            Materialized.as("store")));
+            setMaterializedCache(Materialized.as("store"))));
     }
 
     @Test
@@ -319,6 +405,13 @@ private void processData(final TopologyTestDriver driver) {
         inputTopic.pipeInput("1", "3", 500L);
         inputTopic.pipeInput("2", "10", 550L);
         inputTopic.pipeInput("2", "20", 500L);
+        inputTopic.pipeInput("2", "30", 1000L);
     }
 
+    private <K, V, S extends StateStore> Materialized<K, V, S> setMaterializedCache(final Materialized<K, V, S> materialized) {
+        if (withCache) {
+            return materialized.withCachingEnabled();
+        }
+        return materialized.withCachingDisabled();
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitionerTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitionerTest.java
index adebf167de4ad..a659525727747 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitionerTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/WindowedStreamPartitionerTest.java
@@ -16,7 +16,6 @@
  */
 package org.apache.kafka.streams.kstream.internals;
 
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.Node;
 import org.apache.kafka.common.PartitionInfo;
@@ -55,7 +54,8 @@ public class WindowedStreamPartitionerTest {
     @Test
     public void testCopartitioning() {
         final Random rand = new Random();
-        final DefaultPartitioner defaultPartitioner = new DefaultPartitioner();
+        @SuppressWarnings("deprecation")
+        final org.apache.kafka.clients.producer.internals.DefaultPartitioner defaultPartitioner = new org.apache.kafka.clients.producer.internals.DefaultPartitioner();
         final WindowedSerializer<Integer> timeWindowedSerializer = new TimeWindowedSerializer<>(intSerializer);
         final WindowedStreamPartitioner<Integer, String> streamPartitioner = new WindowedStreamPartitioner<>(timeWindowedSerializer);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplierTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplierTest.java
new file mode 100644
index 0000000000000..1bf708b8ab5d7
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/ForeignJoinSubscriptionProcessorSupplierTest.java
@@ -0,0 +1,377 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.kstream.internals.foreignkeyjoin;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.KTableValueGetter;
+import org.apache.kafka.streams.kstream.internals.KTableValueGetterSupplier;
+import org.apache.kafka.streams.kstream.internals.foreignkeyjoin.SubscriptionWrapper.Instruction;
+import org.apache.kafka.streams.processor.api.MockProcessorContext;
+import org.apache.kafka.streams.processor.api.MockProcessorContext.CapturedForward;
+import org.apache.kafka.streams.processor.api.Processor;
+import org.apache.kafka.streams.processor.api.ProcessorContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class ForeignJoinSubscriptionProcessorSupplierTest {
+    final Map<String, ValueAndTimestamp<String>> fks = Collections.singletonMap(
+        "fk1", ValueAndTimestamp.make("foo", 1L)
+    );
+    final KTableValueGetterSupplier<String, String> valueGetterSupplier = valueGetterSupplier(fks);
+    final Processor<CombinedKey<String, String>,
+                    Change<ValueAndTimestamp<SubscriptionWrapper<String>>>,
+                    String,
+                    SubscriptionResponseWrapper<String>>
+        processor = processor(valueGetterSupplier);
+
+    @Test
+    public void shouldDetectVersionChange() {
+        // This test serves as a reminder to add new tests once we bump SubscriptionWrapper version.
+        Assert.assertEquals(SubscriptionWrapper.VERSION_1, SubscriptionWrapper.CURRENT_VERSION);
+    }
+
+    @Test
+    public void shouldDeleteKeyAndPropagateFKV0() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.DELETE_KEY_AND_PROPAGATE,
+            "pk1",
+            SubscriptionWrapper.VERSION_0,
+            null
+        );
+        final Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        final List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(1, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    null,
+                    null),
+                1L
+            ),
+            forwarded.get(0).record()
+        );
+    }
+
+    @Test
+    public void shouldDeleteKeyAndPropagateFKV1() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.DELETE_KEY_AND_PROPAGATE,
+            "pk1",
+            SubscriptionWrapper.VERSION_1,
+            12
+        );
+        final Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        final List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(1, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    null,
+                    12
+                ),
+                1L
+            ),
+            forwarded.get(0).record()
+        );
+    }
+
+    @Test
+    public void shouldPropagateOnlyIfFKAvailableV0() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            "pk1",
+            SubscriptionWrapper.VERSION_0,
+            null
+        );
+        final Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+        );
+        processor.process(record);
+        final List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(1, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    "foo",
+                    null
+                ),
+                1L
+            ),
+            forwarded.get(0).record()
+        );
+    }
+
+    @Test
+    public void shouldPropagateOnlyIfFKAvailableV1() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            "pk1",
+            SubscriptionWrapper.VERSION_1,
+            12
+        );
+        final Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        final List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(1, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    "foo",
+                     12
+                ),
+                1L
+            ),
+            forwarded.get(0).record());
+    }
+
+    @Test
+    public void shouldPropagateNullIfNoFKAvailableV0() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE,
+            "pk1",
+            SubscriptionWrapper.VERSION_0,
+            null
+        );
+        Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        // propagate matched FK
+        List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(1, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    "foo",
+                    null
+                ),
+                1L
+            ),
+            forwarded.get(0).record());
+
+        record = new Record<>(
+                new CombinedKey<>("fk9000", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        // propagate null if there is no match
+        forwarded = context.forwarded();
+        Assert.assertEquals(2, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    null,
+                    null
+                ),
+                1L
+            ),
+            forwarded.get(1).record());
+    }
+
+    @Test
+    public void shouldPropagateNullIfNoFKAvailableV1() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.PROPAGATE_NULL_IF_NO_FK_VAL_AVAILABLE,
+            "pk1",
+            SubscriptionWrapper.VERSION_1,
+            12);
+        Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(1, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    "foo",
+                    12
+                ),
+                1L
+            ),
+            forwarded.get(0).record());
+
+        record = new Record<>(
+            new CombinedKey<>("fk9000", "pk1"),
+            new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+            1L
+        );
+        processor.process(record);
+        // propagate null if there is no match
+        forwarded = context.forwarded();
+        Assert.assertEquals(2, forwarded.size());
+        Assert.assertEquals(
+            new Record<>(
+                "pk1",
+                new SubscriptionResponseWrapper<>(
+                    newValue.getHash(),
+                    null,
+                    12
+                ),
+                1L
+            ),
+            forwarded.get(1).record());
+    }
+
+    @Test
+    public void shouldDeleteKeyNoPropagateV0() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.DELETE_KEY_NO_PROPAGATE,
+            "pk1",
+            SubscriptionWrapper.VERSION_0,
+            null);
+        final Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(
+                new CombinedKey<>("fk1", "pk1"),
+                new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                1L
+            );
+        processor.process(record);
+        final List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(0, forwarded.size());
+    }
+
+    @Test
+    public void shouldDeleteKeyNoPropagateV1() {
+        final MockProcessorContext<String, SubscriptionResponseWrapper<String>> context = new MockProcessorContext<>();
+        processor.init(context);
+
+        final SubscriptionWrapper<String> newValue = new SubscriptionWrapper<>(
+            new long[]{1L},
+            Instruction.DELETE_KEY_NO_PROPAGATE,
+            "pk1",
+            SubscriptionWrapper.VERSION_1,
+            12);
+        final Record<CombinedKey<String, String>, Change<ValueAndTimestamp<SubscriptionWrapper<String>>>> record =
+            new Record<>(new CombinedKey<>("fk1", "pk1"),
+                         new Change<>(ValueAndTimestamp.make(newValue, 1L), null),
+                        1L
+            );
+        processor.process(record);
+        final List<CapturedForward<? extends String, ? extends SubscriptionResponseWrapper<String>>> forwarded = context.forwarded();
+        Assert.assertEquals(0, forwarded.size());
+    }
+
+    private KTableValueGetterSupplier<String, String> valueGetterSupplier(final Map<String, ValueAndTimestamp<String>> map) {
+        final KTableValueGetter<String, String> valueGetter = new KTableValueGetter<String, String>() {
+
+            @Override
+            public ValueAndTimestamp<String> get(final String key) {
+                return map.get(key);
+            }
+
+            @Override
+            public void init(final ProcessorContext context) {
+
+            }
+        };
+        return new KTableValueGetterSupplier<String, String>() {
+            @Override
+            public KTableValueGetter<String, String> get() {
+                return valueGetter;
+            }
+
+            @Override
+            public String[] storeNames() {
+                return new String[0];
+            }
+        };
+    }
+
+    private Processor<CombinedKey<String, String>,
+                      Change<ValueAndTimestamp<SubscriptionWrapper<String>>>,
+                      String,
+                      SubscriptionResponseWrapper<String>> processor(final KTableValueGetterSupplier<String, String> valueGetterSupplier) {
+        final SubscriptionJoinForeignProcessorSupplier<String, String, String> supplier =
+            new SubscriptionJoinForeignProcessorSupplier<>(valueGetterSupplier);
+        return supplier.get();
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplierTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplierTest.java
index 6c7c0972b0f56..dd794b0107d5b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplierTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResolverJoinProcessorSupplierTest.java
@@ -89,7 +89,7 @@ public void shouldNotForwardWhenHashDoesNotMatch() {
 
         valueGetterSupplier.put("lhs1", "lhsValue");
         final long[] oldHash = Murmur3.hash128(STRING_SERIALIZER.serialize("topic-join-resolver", "oldLhsValue"));
-        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(oldHash, "rhsValue"), 0));
+        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(oldHash, "rhsValue", 0), 0));
         final List<MockProcessorContext.CapturedForward<? extends String, ? extends String>> forwarded = context.forwarded();
         assertThat(forwarded, empty());
     }
@@ -114,7 +114,7 @@ public void shouldIgnoreUpdateWhenLeftHasBecomeNull() {
 
         valueGetterSupplier.put("lhs1", null);
         final long[] hash = Murmur3.hash128(STRING_SERIALIZER.serialize("topic-join-resolver", "lhsValue"));
-        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, "rhsValue"), 0));
+        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, "rhsValue", 0), 0));
         final List<MockProcessorContext.CapturedForward<? extends String, ? extends String>> forwarded = context.forwarded();
         assertThat(forwarded, empty());
     }
@@ -139,7 +139,7 @@ public void shouldForwardWhenHashMatches() {
 
         valueGetterSupplier.put("lhs1", "lhsValue");
         final long[] hash = Murmur3.hash128(STRING_SERIALIZER.serialize("topic-join-resolver", "lhsValue"));
-        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, "rhsValue"), 0));
+        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, "rhsValue", 0), 0));
         final List<MockProcessorContext.CapturedForward<? extends String, ? extends String>> forwarded = context.forwarded();
         assertThat(forwarded.size(), is(1));
         assertThat(forwarded.get(0).record(), is(new Record<>("lhs1", "(lhsValue,rhsValue)", 0)));
@@ -165,7 +165,7 @@ public void shouldEmitTombstoneForInnerJoinWhenRightIsNull() {
 
         valueGetterSupplier.put("lhs1", "lhsValue");
         final long[] hash = Murmur3.hash128(STRING_SERIALIZER.serialize("topic-join-resolver", "lhsValue"));
-        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, null), 0));
+        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, null, 0), 0));
         final List<MockProcessorContext.CapturedForward<? extends String, ? extends String>> forwarded = context.forwarded();
         assertThat(forwarded.size(), is(1));
         assertThat(forwarded.get(0).record(), is(new Record<>("lhs1", null, 0)));
@@ -191,7 +191,7 @@ public void shouldEmitResultForLeftJoinWhenRightIsNull() {
 
         valueGetterSupplier.put("lhs1", "lhsValue");
         final long[] hash = Murmur3.hash128(STRING_SERIALIZER.serialize("topic-join-resolver", "lhsValue"));
-        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, null), 0));
+        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, null, 0), 0));
         final List<MockProcessorContext.CapturedForward<? extends String, ? extends String>> forwarded = context.forwarded();
         assertThat(forwarded.size(), is(1));
         assertThat(forwarded.get(0).record(), is(new Record<>("lhs1", "(lhsValue,null)", 0)));
@@ -217,7 +217,7 @@ public void shouldEmitTombstoneForLeftJoinWhenRightIsNullAndLeftIsNull() {
 
         valueGetterSupplier.put("lhs1", null);
         final long[] hash = null;
-        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, null), 0));
+        processor.process(new Record<>("lhs1", new SubscriptionResponseWrapper<>(hash, null, 0), 0));
         final List<MockProcessorContext.CapturedForward<? extends String, ? extends String>> forwarded = context.forwarded();
         assertThat(forwarded.size(), is(1));
         assertThat(forwarded.get(0).record(), is(new Record<>("lhs1", null, 0)));
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java
index 30fc0c318519c..167c1f990f672 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionResponseWrapperSerdeTest.java
@@ -76,26 +76,28 @@ public T deserialize(final String topic, final byte[] data) {
     public void ShouldSerdeWithNonNullsTest() {
         final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0x01, (byte) 0x9A, (byte) 0xFF, (byte) 0x00});
         final String foreignValue = "foreignValue";
-        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue);
+        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue, 1);
         final SubscriptionResponseWrapperSerde<String> srwSerde = new SubscriptionResponseWrapperSerde(new NonNullableSerde(Serdes.String()));
         final byte[] serResponse = srwSerde.serializer().serialize(null, srw);
         final SubscriptionResponseWrapper<String> result = srwSerde.deserializer().deserialize(null, serResponse);
 
         assertArrayEquals(hashedValue, result.getOriginalValueHash());
         assertEquals(foreignValue, result.getForeignValue());
+        assertNull(result.getPrimaryPartition());
     }
 
     @Test
     @SuppressWarnings("unchecked")
     public void shouldSerdeWithNullForeignValueTest() {
         final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0x01, (byte) 0x9A, (byte) 0xFF, (byte) 0x00});
-        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, null);
+        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, null, 1);
         final SubscriptionResponseWrapperSerde<String> srwSerde = new SubscriptionResponseWrapperSerde(new NonNullableSerde(Serdes.String()));
         final byte[] serResponse = srwSerde.serializer().serialize(null, srw);
         final SubscriptionResponseWrapper<String> result = srwSerde.deserializer().deserialize(null, serResponse);
 
         assertArrayEquals(hashedValue, result.getOriginalValueHash());
         assertNull(result.getForeignValue());
+        assertNull(result.getPrimaryPartition());
     }
 
     @Test
@@ -103,13 +105,14 @@ public void shouldSerdeWithNullForeignValueTest() {
     public void shouldSerdeWithNullHashTest() {
         final long[] hashedValue = null;
         final String foreignValue = "foreignValue";
-        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue);
+        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue, 1);
         final SubscriptionResponseWrapperSerde<String> srwSerde = new SubscriptionResponseWrapperSerde(new NonNullableSerde(Serdes.String()));
         final byte[] serResponse = srwSerde.serializer().serialize(null, srw);
         final SubscriptionResponseWrapper<String> result = srwSerde.deserializer().deserialize(null, serResponse);
 
         assertArrayEquals(hashedValue, result.getOriginalValueHash());
         assertEquals(foreignValue, result.getForeignValue());
+        assertNull(result.getPrimaryPartition());
     }
 
     @Test
@@ -117,19 +120,20 @@ public void shouldSerdeWithNullHashTest() {
     public void shouldSerdeWithNullsTest() {
         final long[] hashedValue = null;
         final String foreignValue = null;
-        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue);
+        final SubscriptionResponseWrapper<String> srw = new SubscriptionResponseWrapper<>(hashedValue, foreignValue, 1);
         final SubscriptionResponseWrapperSerde<String> srwSerde = new SubscriptionResponseWrapperSerde(new NonNullableSerde(Serdes.String()));
         final byte[] serResponse = srwSerde.serializer().serialize(null, srw);
         final SubscriptionResponseWrapper<String> result = srwSerde.deserializer().deserialize(null, serResponse);
 
         assertArrayEquals(hashedValue, result.getOriginalValueHash());
         assertEquals(foreignValue, result.getForeignValue());
+        assertNull(result.getPrimaryPartition());
     }
 
     @Test
     public void shouldThrowExceptionWithBadVersionTest() {
         final long[] hashedValue = null;
         assertThrows(UnsupportedVersionException.class,
-            () -> new SubscriptionResponseWrapper<>(hashedValue, "foreignValue", (byte) 0xFF));
+            () -> new SubscriptionResponseWrapper<>(hashedValue, "foreignValue", (byte) 0xFF, 1));
     }
-}
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java
index e937efe2bc092..709a94bc6dbeb 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/foreignkeyjoin/SubscriptionWrapperSerdeTest.java
@@ -16,13 +16,16 @@
  */
 package org.apache.kafka.streams.kstream.internals.foreignkeyjoin;
 
+import java.util.Collections;
 import org.apache.kafka.common.errors.UnsupportedVersionException;
 import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.state.internals.Murmur3;
 import org.junit.Test;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThrows;
 
 @SuppressWarnings({"unchecked", "rawtypes"})
@@ -30,53 +33,229 @@ public class SubscriptionWrapperSerdeTest {
 
     @Test
     @SuppressWarnings("unchecked")
-    public void shouldSerdeTest() {
+    public void shouldSerdeV0Test() {
+        final byte version = SubscriptionWrapper.VERSION_0;
         final String originalKey = "originalKey";
         final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
         final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
-        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE, originalKey);
+        final Integer primaryPartition = null;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE,
+            originalKey,
+            version,
+            primaryPartition);
         final byte[] serialized = swSerde.serializer().serialize(null, wrapper);
-        final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer().deserialize(null, serialized);
+        final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer()
+            .deserialize(null, serialized);
 
         assertEquals(SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE, deserialized.getInstruction());
         assertArrayEquals(hashedValue, deserialized.getHash());
         assertEquals(originalKey, deserialized.getPrimaryKey());
+        assertEquals(primaryPartition, deserialized.getPrimaryPartition());
+        assertEquals(version, deserialized.getVersion());
+    }
+
+    @Test
+    @SuppressWarnings("unchecked")
+    public void shouldSerdeV1Test() {
+        final byte version = SubscriptionWrapper.VERSION_1;
+        final String originalKey = "originalKey";
+        final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
+        final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = 10;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE,
+            originalKey,
+            version,
+            primaryPartition);
+        final byte[] serialized = swSerde.serializer().serialize(null, wrapper);
+        final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer()
+            .deserialize(null, serialized);
+
+        assertEquals(SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE, deserialized.getInstruction());
+        assertArrayEquals(hashedValue, deserialized.getHash());
+        assertEquals(originalKey, deserialized.getPrimaryKey());
+        assertEquals(primaryPartition, deserialized.getPrimaryPartition());
+        assertEquals(version, deserialized.getVersion());
+    }
+
+    @Test
+    @SuppressWarnings("unchecked")
+    public void shouldSerdeWithV0IfUpgradeTest() {
+        final byte version = SubscriptionWrapper.VERSION_1;
+        final String originalKey = "originalKey";
+        final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
+        swSerde.configure(
+            Collections.singletonMap(StreamsConfig.UPGRADE_FROM_CONFIG, StreamsConfig.UPGRADE_FROM_32),
+            true);
+        final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = 10;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE,
+            originalKey,
+            version,
+            primaryPartition);
+        final byte[] serialized = swSerde.serializer().serialize(null, wrapper);
+        final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer()
+            .deserialize(null, serialized);
+
+        assertEquals(SubscriptionWrapper.Instruction.DELETE_KEY_AND_PROPAGATE, deserialized.getInstruction());
+        assertArrayEquals(hashedValue, deserialized.getHash());
+        assertEquals(originalKey, deserialized.getPrimaryKey());
+        assertEquals(0, deserialized.getVersion());
+        assertNull(deserialized.getPrimaryPartition());
+    }
+
+    @Test
+    @SuppressWarnings("unchecked")
+    public void shouldSerdeNullHashV0Test() {
+        final byte version = SubscriptionWrapper.VERSION_0;
+        final String originalKey = "originalKey";
+        final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
+        final long[] hashedValue = null;
+        final Integer primaryPartition = null;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            version,
+            primaryPartition);
+        final byte[] serialized = swSerde.serializer().serialize(null, wrapper);
+        final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer().deserialize(null, serialized);
+
+        assertEquals(SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, deserialized.getInstruction());
+        assertArrayEquals(hashedValue, deserialized.getHash());
+        assertEquals(originalKey, deserialized.getPrimaryKey());
+        assertEquals(primaryPartition, deserialized.getPrimaryPartition());
+        assertEquals(version, deserialized.getVersion());
     }
 
     @Test
     @SuppressWarnings("unchecked")
-    public void shouldSerdeNullHashTest() {
+    public void shouldSerdeNullHashV1Test() {
+        final byte version = SubscriptionWrapper.VERSION_1;
         final String originalKey = "originalKey";
         final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
         final long[] hashedValue = null;
-        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, originalKey);
+        final Integer primaryPartition = 10;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            version,
+            primaryPartition);
+        final byte[] serialized = swSerde.serializer().serialize(null, wrapper);
+        final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer()
+            .deserialize(null, serialized);
+
+        assertEquals(SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, deserialized.getInstruction());
+        assertArrayEquals(hashedValue, deserialized.getHash());
+        assertEquals(originalKey, deserialized.getPrimaryKey());
+        assertEquals(primaryPartition, deserialized.getPrimaryPartition());
+        assertEquals(version, deserialized.getVersion());
+    }
+
+    @Test
+    public void shouldSerdeNullPrimaryPartitionOnV0Test() {
+        final String originalKey = "originalKey";
+        final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
+        final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = null;
+        final byte version = SubscriptionWrapper.VERSION_0;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            version,
+            primaryPartition);
         final byte[] serialized = swSerde.serializer().serialize(null, wrapper);
         final SubscriptionWrapper deserialized = (SubscriptionWrapper) swSerde.deserializer().deserialize(null, serialized);
 
         assertEquals(SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, deserialized.getInstruction());
         assertArrayEquals(hashedValue, deserialized.getHash());
         assertEquals(originalKey, deserialized.getPrimaryKey());
+        assertEquals(primaryPartition, deserialized.getPrimaryPartition());
+        assertEquals(version, deserialized.getVersion());
     }
 
     @Test
-    public void shouldThrowExceptionOnNullKeyTest() {
+    public void shouldThrowExceptionOnNullKeyV0Test() {
         final String originalKey = null;
         final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = 10;
         assertThrows(NullPointerException.class, () -> new SubscriptionWrapper<>(hashedValue,
-            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, originalKey));
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            SubscriptionWrapper.VERSION_0,
+            primaryPartition));
     }
 
     @Test
-    public void shouldThrowExceptionOnNullInstructionTest() {
+    public void shouldThrowExceptionOnNullKeyV1Test() {
+        final String originalKey = null;
+        final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = 10;
+        assertThrows(NullPointerException.class, () -> new SubscriptionWrapper<>(hashedValue,
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            SubscriptionWrapper.VERSION_1,
+            primaryPartition));
+    }
+
+    @Test
+    public void shouldThrowExceptionOnNullInstructionV0Test() {
+        final String originalKey = "originalKey";
+        final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = 10;
+        assertThrows(NullPointerException.class, () -> new SubscriptionWrapper<>(
+            hashedValue,
+            null,
+            originalKey,
+            SubscriptionWrapper.VERSION_0,
+            primaryPartition));
+    }
+
+    @Test
+    public void shouldThrowExceptionOnNullInstructionV1Test() {
+        final String originalKey = "originalKey";
+        final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
+        final Integer primaryPartition = 10;
+        assertThrows(NullPointerException.class, () -> new SubscriptionWrapper<>(
+            hashedValue,
+            null,
+            originalKey,
+            SubscriptionWrapper.VERSION_0,
+            primaryPartition));
+    }
+
+    @Test
+    public void shouldThrowExceptionOnNullPrimaryPartitionV1Test() {
+        final SubscriptionWrapperSerde swSerde = new SubscriptionWrapperSerde<>(() -> "pkTopic", Serdes.String());
         final String originalKey = "originalKey";
         final long[] hashedValue = Murmur3.hash128(new byte[] {(byte) 0xFF, (byte) 0xAA, (byte) 0x00, (byte) 0x19});
-        assertThrows(NullPointerException.class, () -> new SubscriptionWrapper<>(hashedValue, null, originalKey));
+        final Integer primaryPartition = null;
+        final SubscriptionWrapper wrapper = new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            SubscriptionWrapper.VERSION_1,
+            primaryPartition);
+        assertThrows(NullPointerException.class, () -> swSerde.serializer().serialize(null, wrapper));
     }
 
     @Test (expected = UnsupportedVersionException.class)
     public void shouldThrowExceptionOnUnsupportedVersionTest() {
         final String originalKey = "originalKey";
         final long[] hashedValue = null;
-        new SubscriptionWrapper<>(hashedValue, SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE, originalKey, (byte) 0x80);
+        final Integer primaryPartition = 10;
+        new SubscriptionWrapper<>(
+            hashedValue,
+            SubscriptionWrapper.Instruction.PROPAGATE_ONLY_IF_FK_VAL_AVAILABLE,
+            originalKey,
+            (byte) 0x80,
+            primaryPartition);
     }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/graph/GraphGraceSearchUtilTest.java b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/graph/GraphGraceSearchUtilTest.java
index f8a7073dabcbb..1de78a8b85b13 100644
--- a/streams/src/test/java/org/apache/kafka/streams/kstream/internals/graph/GraphGraceSearchUtilTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/kstream/internals/graph/GraphGraceSearchUtilTest.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.streams.kstream.internals.graph;
 
 import org.apache.kafka.streams.errors.TopologyException;
+import org.apache.kafka.streams.kstream.EmitStrategy;
 import org.apache.kafka.streams.kstream.SessionWindows;
 import org.apache.kafka.streams.kstream.TimeWindows;
 import org.apache.kafka.streams.kstream.internals.KStreamSessionWindowAggregate;
@@ -86,6 +87,7 @@ public void shouldExtractGraceFromKStreamWindowAggregateNode() {
                 new KStreamWindowAggregate<String, Long, Integer, TimeWindow>(
                     windows,
                     "asdf",
+                    EmitStrategy.onWindowUpdate(),
                     null,
                     null
                 ),
@@ -108,6 +110,7 @@ public void shouldExtractGraceFromKStreamSessionWindowAggregateNode() {
                 new KStreamSessionWindowAggregate<String, Long, Integer>(
                     windows,
                     "asdf",
+                    EmitStrategy.onWindowUpdate(),
                     null,
                     null,
                     null
@@ -127,7 +130,7 @@ public void shouldExtractGraceFromSessionAncestorThroughStatefulParent() {
         final StatefulProcessorNode<String, Long> graceGrandparent = new StatefulProcessorNode<>(
             "asdf",
             new ProcessorParameters<>(new KStreamSessionWindowAggregate<String, Long, Integer>(
-                windows, "asdf", null, null, null
+                windows, "asdf", EmitStrategy.onWindowUpdate(), null, null, null
             ), "asdf"),
             (StoreBuilder<?>) null
         );
@@ -167,6 +170,7 @@ public void shouldExtractGraceFromSessionAncestorThroughStatelessParent() {
                 new KStreamSessionWindowAggregate<String, Long, Integer>(
                     windows,
                     "asdf",
+                    EmitStrategy.onWindowUpdate(),
                     null,
                     null,
                     null
@@ -194,6 +198,7 @@ public void shouldUseMaxIfMultiParentsDoNotAgreeOnGrace() {
                 new KStreamSessionWindowAggregate<String, Long, Integer>(
                     SessionWindows.ofInactivityGapAndGrace(ofMillis(10L), ofMillis(1234L)),
                     "asdf",
+                    EmitStrategy.onWindowUpdate(),
                     null,
                     null,
                     null
@@ -209,6 +214,7 @@ public void shouldUseMaxIfMultiParentsDoNotAgreeOnGrace() {
                 new KStreamWindowAggregate<String, Long, Integer, TimeWindow>(
                     TimeWindows.ofSizeAndGrace(ofMillis(10L), ofMillis(4321L)),
                     "asdf",
+                    EmitStrategy.onWindowUpdate(),
                     null,
                     null
                 ),
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContextTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContextTest.java
index f427227365163..90f3e61368f9d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContextTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/AbstractProcessorContextTest.java
@@ -32,6 +32,7 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.RocksDBConfigSetter;
@@ -263,5 +264,18 @@ public void registerCacheFlushListener(final String namespace, final DirtyEntryF
         public String changelogFor(final String storeName) {
             return ProcessorStateManager.storeChangelogTopic(applicationId(), storeName, taskId().topologyName());
         }
+
+        @Override
+        public <K, V> void forward(final FixedKeyRecord<K, V> record) {
+            forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+        }
+
+        @Override
+        public <K, V> void forward(final FixedKeyRecord<K, V> record, final String childName) {
+            forward(
+                new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+                childName
+            );
+        }
     }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreatorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreatorTest.java
index 4b4ff571673f6..538360bd63139 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreatorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ActiveTaskCreatorTest.java
@@ -30,7 +30,7 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.TimestampExtractor;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.internals.ThreadCache;
 import org.apache.kafka.test.MockClientSupplier;
 import org.easymock.EasyMockRunner;
@@ -60,6 +60,7 @@
 import static org.hamcrest.Matchers.closeTo;
 import static org.hamcrest.core.IsNot.not;
 import static org.junit.Assert.assertThrows;
+import static java.util.Collections.emptySet;
 
 @RunWith(EasyMockRunner.class)
 public class ActiveTaskCreatorTest {
@@ -478,6 +479,7 @@ private void createTasks() {
         reset(builder, stateDirectory);
         expect(builder.topologyConfigs()).andStubReturn(new TopologyConfig(new StreamsConfig(properties)));
         expect(builder.buildSubtopology(0)).andReturn(topology).anyTimes();
+        expect(topology.sinkTopics()).andStubReturn(emptySet());
         expect(stateDirectory.getOrCreateDirectoryForTask(task00)).andReturn(mock(File.class));
         expect(stateDirectory.checkpointFileFor(task00)).andReturn(mock(File.class));
         expect(stateDirectory.getOrCreateDirectoryForTask(task01)).andReturn(mock(File.class));
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ClientUtilsTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ClientUtilsTest.java
index a6c5e3d0b4d31..d715a3f975bc2 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ClientUtilsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ClientUtilsTest.java
@@ -17,6 +17,7 @@
 package org.apache.kafka.streams.processor.internals;
 
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import org.apache.kafka.clients.admin.Admin;
@@ -24,29 +25,85 @@
 import org.apache.kafka.clients.admin.ListOffsetsResult;
 import org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo;
 import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.KafkaFuture;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.errors.TimeoutException;
+import org.apache.kafka.common.header.Headers;
+import org.apache.kafka.common.header.internals.RecordHeader;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.record.TimestampType;
 import org.apache.kafka.streams.errors.StreamsException;
 import org.easymock.EasyMock;
 import org.junit.Test;
 
+import static java.util.Arrays.asList;
 import static java.util.Collections.emptySet;
 import static org.apache.kafka.common.utils.Utils.mkSet;
+import static org.apache.kafka.streams.processor.internals.ClientUtils.consumerRecordSizeInBytes;
 import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchCommittedOffsets;
 import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchEndOffsets;
+import static org.apache.kafka.streams.processor.internals.ClientUtils.producerRecordSizeInBytes;
+
 import static org.easymock.EasyMock.expect;
 import static org.easymock.EasyMock.replay;
 import static org.easymock.EasyMock.verify;
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 
 public class ClientUtilsTest {
 
+    // consumer and producer records use utf8 encoding for topic name, header keys, etc
+    private static final String TOPIC = "topic";
+    private static final int TOPIC_BYTES = 5;
+
+    private static final byte[] KEY = "key".getBytes();
+    private static final int KEY_BYTES = 3;
+
+    private static final byte[] VALUE = "value".getBytes();
+    private static final int VALUE_BYTES = 5;
+
+    private static final Headers HEADERS = new RecordHeaders(asList(
+        new RecordHeader("h1", "headerVal1".getBytes()),   // 2 + 10 --> 12 bytes
+        new RecordHeader("h2", "headerVal2".getBytes())    // 2 + 10 --> 12 bytes
+    ));
+    private static final int HEADERS_BYTES = 24;
+
+    // 20 bytes
+    private static final int RECORD_METADATA_BYTES =
+        8 + // timestamp
+        8 + // offset
+        4;  // partition
+
+    // 57 bytes
+    private static final long SIZE_IN_BYTES =
+        KEY_BYTES +
+        VALUE_BYTES +
+        TOPIC_BYTES +
+        HEADERS_BYTES +
+        RECORD_METADATA_BYTES;
+
+    // 54 bytes
+    private static final long NULL_KEY_SIZE_IN_BYTES =
+        VALUE_BYTES +
+        TOPIC_BYTES +
+        HEADERS_BYTES +
+        RECORD_METADATA_BYTES;
+
+    // 52 bytes
+    private static final long TOMBSTONE_SIZE_IN_BYTES =
+        KEY_BYTES +
+        TOPIC_BYTES +
+        HEADERS_BYTES +
+        RECORD_METADATA_BYTES;
+
     private static final Set<TopicPartition> PARTITIONS = mkSet(
-        new TopicPartition("topic", 1),
-        new TopicPartition("topic", 2)
+        new TopicPartition(TOPIC, 1),
+        new TopicPartition(TOPIC, 2)
     );
 
     @Test
@@ -121,5 +178,98 @@ public void fetchEndOffsetsShouldRethrowExecutionExceptionAsStreamsException() t
         assertThrows(StreamsException.class, () -> fetchEndOffsets(PARTITIONS, adminClient));
         verify(adminClient);
     }
+    
+    @Test
+    public void shouldComputeSizeInBytesForConsumerRecord() {
+        final ConsumerRecord<byte[], byte[]> record = new ConsumerRecord<>(
+            TOPIC,
+            1,
+            0L,
+            0L,
+            TimestampType.CREATE_TIME,
+            KEY_BYTES,
+            VALUE_BYTES,
+            KEY,
+            VALUE,
+            HEADERS,
+            Optional.empty()
+        );
+
+        assertThat(consumerRecordSizeInBytes(record), equalTo(SIZE_IN_BYTES));
+    }
+
+    @Test
+    public void shouldComputeSizeInBytesForProducerRecord() {
+        final ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(
+            TOPIC,
+            1,
+            0L,
+            KEY,
+            VALUE,
+            HEADERS
+        );
+        assertThat(producerRecordSizeInBytes(record), equalTo(SIZE_IN_BYTES));
+    }
 
+    @Test
+    public void shouldComputeSizeInBytesForConsumerRecordWithNullKey() {
+        final ConsumerRecord<byte[], byte[]> record = new ConsumerRecord<>(
+            TOPIC,
+            1,
+            0,
+            0L,
+            TimestampType.CREATE_TIME,
+            0,
+            5,
+            null,
+            VALUE,
+            HEADERS,
+            Optional.empty()
+        );
+        assertThat(consumerRecordSizeInBytes(record), equalTo(NULL_KEY_SIZE_IN_BYTES));
+    }
+
+    @Test
+    public void shouldComputeSizeInBytesForProducerRecordWithNullKey() {
+        final ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(
+            TOPIC,
+            1,
+            0L,
+            null,
+            VALUE,
+            HEADERS
+        );
+        assertThat(producerRecordSizeInBytes(record), equalTo(NULL_KEY_SIZE_IN_BYTES));
+    }
+
+    @Test
+    public void shouldComputeSizeInBytesForConsumerRecordWithNullValue() {
+        final ConsumerRecord<byte[], byte[]> record = new ConsumerRecord<>(
+            TOPIC,
+            1,
+            0,
+            0L,
+            TimestampType.CREATE_TIME,
+            KEY_BYTES,
+            0,
+            KEY,
+            null,
+            HEADERS,
+            Optional.empty()
+        );
+        assertThat(consumerRecordSizeInBytes(record), equalTo(TOMBSTONE_SIZE_IN_BYTES));
+    }
+
+    @Test
+    public void shouldComputeSizeInBytesForProducerRecordWithNullValue() {
+        final ProducerRecord<byte[], byte[]> record = new ProducerRecord<>(
+            TOPIC,
+            1,
+            0L,
+            KEY,
+            null,
+            HEADERS
+        );
+        assertThat(producerRecordSizeInBytes(record), equalTo(TOMBSTONE_SIZE_IN_BYTES));
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/DefaultStateUpdaterTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/DefaultStateUpdaterTest.java
new file mode 100644
index 0000000000000..e5718f53077a0
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/DefaultStateUpdaterTest.java
@@ -0,0 +1,1437 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.errors.StreamsException;
+import org.apache.kafka.streams.errors.TaskCorruptedException;
+import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.internals.StateUpdater.ExceptionAndTasks;
+import org.apache.kafka.streams.processor.internals.Task.State;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.InOrder;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.common.utils.Utils.mkObjectProperties;
+import static org.apache.kafka.common.utils.Utils.mkSet;
+import static org.apache.kafka.streams.StreamsConfig.producerPrefix;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.standbyTask;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.statefulTask;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.statelessTask;
+import static org.apache.kafka.test.TestUtils.waitForCondition;
+import static org.easymock.EasyMock.anyBoolean;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.inOrder;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.timeout;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+class DefaultStateUpdaterTest {
+
+    private final static int COMMIT_INTERVAL = 100;
+    private final static long CALL_TIMEOUT = 1000;
+    private final static long VERIFICATION_TIMEOUT = 15000;
+    private final static TopicPartition TOPIC_PARTITION_A_0 = new TopicPartition("topicA", 0);
+    private final static TopicPartition TOPIC_PARTITION_A_1 = new TopicPartition("topicA", 1);
+    private final static TopicPartition TOPIC_PARTITION_B_0 = new TopicPartition("topicB", 0);
+    private final static TopicPartition TOPIC_PARTITION_C_0 = new TopicPartition("topicC", 0);
+    private final static TopicPartition TOPIC_PARTITION_D_0 = new TopicPartition("topicD", 0);
+    private final static TaskId TASK_0_0 = new TaskId(0, 0);
+    private final static TaskId TASK_0_1 = new TaskId(0, 1);
+    private final static TaskId TASK_0_2 = new TaskId(0, 2);
+    private final static TaskId TASK_1_0 = new TaskId(1, 0);
+    private final static TaskId TASK_1_1 = new TaskId(1, 1);
+
+    // need an auto-tick timer to work for draining with timeout
+    private final Time time = new MockTime(1L);
+    private final StreamsConfig config = new StreamsConfig(configProps());
+    private final ChangelogReader changelogReader = mock(ChangelogReader.class);
+    private final DefaultStateUpdater stateUpdater = new DefaultStateUpdater(config, changelogReader, time);
+
+    @AfterEach
+    public void tearDown() {
+        stateUpdater.shutdown(Duration.ofMinutes(1));
+    }
+
+    private Properties configProps() {
+        return mkObjectProperties(mkMap(
+            mkEntry(StreamsConfig.APPLICATION_ID_CONFIG, "appId"),
+            mkEntry(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:2171"),
+            mkEntry(StreamsConfig.PROCESSING_GUARANTEE_CONFIG, StreamsConfig.EXACTLY_ONCE_V2),
+            mkEntry(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, COMMIT_INTERVAL),
+            mkEntry(producerPrefix(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG), COMMIT_INTERVAL)
+        ));
+    }
+
+    @Test
+    public void shouldShutdownStateUpdater() {
+        stateUpdater.start();
+
+        stateUpdater.shutdown(Duration.ofMinutes(1));
+
+        verify(changelogReader).clear();
+    }
+
+    @Test
+    public void shouldShutdownStateUpdaterAndRestart() {
+        stateUpdater.start();
+
+        stateUpdater.shutdown(Duration.ofMinutes(1));
+
+        stateUpdater.start();
+
+        stateUpdater.shutdown(Duration.ofMinutes(1));
+
+        verify(changelogReader, times(2)).clear();
+    }
+
+    @Test
+    public void shouldThrowIfStatelessTaskNotInStateRestoring() {
+        shouldThrowIfActiveTaskNotInStateRestoring(statelessTask(TASK_0_0).build());
+    }
+
+    @Test
+    public void shouldThrowIfStatefulTaskNotInStateRestoring() {
+        shouldThrowIfActiveTaskNotInStateRestoring(statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).build());
+    }
+
+    private void shouldThrowIfActiveTaskNotInStateRestoring(final StreamTask task) {
+        shouldThrowIfTaskNotInGivenState(task, State.RESTORING);
+    }
+
+    @Test
+    public void shouldThrowIfStandbyTaskNotInStateRunning() {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).build();
+        shouldThrowIfTaskNotInGivenState(task, State.RUNNING);
+    }
+
+    private void shouldThrowIfTaskNotInGivenState(final Task task, final State correctState) {
+        for (final State state : State.values()) {
+            if (state != correctState) {
+                when(task.state()).thenReturn(state);
+                assertThrows(IllegalStateException.class, () -> stateUpdater.add(task));
+            }
+        }
+    }
+
+    @Test
+    public void shouldThrowIfAddingActiveTasksWithSameId() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task2 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldThrowIfAddingTasksWithSameId(task1, task2);
+    }
+
+    @Test
+    public void shouldThrowIfAddingStandbyTasksWithSameId() throws Exception {
+        final StandbyTask task1 = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        shouldThrowIfAddingTasksWithSameId(task1, task2);
+    }
+
+    @Test
+    public void shouldThrowIfAddingActiveAndStandbyTaskWithSameId() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        shouldThrowIfAddingTasksWithSameId(task1, task2);
+    }
+
+    @Test
+    public void shouldThrowIfAddingStandbyAndActiveTaskWithSameId() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        shouldThrowIfAddingTasksWithSameId(task2, task1);
+    }
+
+    private void shouldThrowIfAddingTasksWithSameId(final Task task1, final Task task2) throws Exception {
+        stateUpdater.start();
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+
+        verifyFailedTasks(IllegalStateException.class, task1);
+    }
+
+    @Test
+    public void shouldImmediatelyAddSingleStatelessTaskToRestoredTasks() throws Exception {
+        final StreamTask task1 = statelessTask(TASK_0_0).inState(State.RESTORING).build();
+        shouldImmediatelyAddStatelessTasksToRestoredTasks(task1);
+    }
+
+    @Test
+    public void shouldImmediatelyAddMultipleStatelessTasksToRestoredTasks() throws Exception {
+        final StreamTask task1 = statelessTask(TASK_0_0).inState(State.RESTORING).build();
+        final StreamTask task2 = statelessTask(TASK_0_2).inState(State.RESTORING).build();
+        final StreamTask task3 = statelessTask(TASK_1_0).inState(State.RESTORING).build();
+        shouldImmediatelyAddStatelessTasksToRestoredTasks(task1, task2, task3);
+    }
+
+    private void shouldImmediatelyAddStatelessTasksToRestoredTasks(final StreamTask... tasks) throws Exception {
+        stateUpdater.start();
+        for (final StreamTask task : tasks) {
+            stateUpdater.add(task);
+        }
+
+        verifyRestoredActiveTasks(tasks);
+        verifyNeverCheckpointTasks(tasks);
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+    }
+
+    @Test
+    public void shouldRestoreSingleActiveStatefulTask() throws Exception {
+        final StreamTask task =
+            statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0, TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs())
+            .thenReturn(Collections.emptySet())
+            .thenReturn(mkSet(TOPIC_PARTITION_A_0))
+            .thenReturn(mkSet(TOPIC_PARTITION_A_0, TOPIC_PARTITION_B_0));
+        when(changelogReader.allChangelogsCompleted())
+            .thenReturn(false)
+            .thenReturn(false)
+            .thenReturn(true);
+        stateUpdater.start();
+
+        stateUpdater.add(task);
+
+        verifyRestoredActiveTasks(task);
+        verifyCheckpointTasks(true, task);
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+        verify(changelogReader, times(1)).enforceRestoreActive();
+        verify(changelogReader, atLeast(3)).restore(anyMap());
+        verify(changelogReader, never()).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldRestoreMultipleActiveStatefulTasks() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task2 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StreamTask task3 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs())
+            .thenReturn(Collections.emptySet())
+            .thenReturn(mkSet(TOPIC_PARTITION_C_0))
+            .thenReturn(mkSet(TOPIC_PARTITION_C_0, TOPIC_PARTITION_A_0))
+            .thenReturn(mkSet(TOPIC_PARTITION_C_0, TOPIC_PARTITION_A_0, TOPIC_PARTITION_B_0));
+        when(changelogReader.allChangelogsCompleted())
+            .thenReturn(false)
+            .thenReturn(false)
+            .thenReturn(false)
+            .thenReturn(true);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+
+        verifyRestoredActiveTasks(task3, task1, task2);
+        verifyCheckpointTasks(true, task3, task1, task2);
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+        verify(changelogReader, times(3)).enforceRestoreActive();
+        verify(changelogReader, atLeast(4)).restore(anyMap());
+        verify(changelogReader, never()).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldDrainRestoredActiveTasks() throws Exception {
+        assertTrue(stateUpdater.drainRestoredActiveTasks(Duration.ZERO).isEmpty());
+
+        final StreamTask task1 = statelessTask(TASK_0_0).inState(State.RESTORING).build();
+        stateUpdater.start();
+        stateUpdater.add(task1);
+
+        verifyDrainingRestoredActiveTasks(task1);
+
+        final StreamTask task2 = statelessTask(TASK_1_1).inState(State.RESTORING).build();
+        final StreamTask task3 = statelessTask(TASK_1_0).inState(State.RESTORING).build();
+        final StreamTask task4 = statelessTask(TASK_0_2).inState(State.RESTORING).build();
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+        stateUpdater.add(task4);
+
+        verifyDrainingRestoredActiveTasks(task2, task3, task4);
+    }
+
+    @Test
+    public void shouldUpdateSingleStandbyTask() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0, TOPIC_PARTITION_B_0))
+            .inState(State.RUNNING).build();
+        shouldUpdateStandbyTasks(task);
+    }
+
+    @Test
+    public void shouldUpdateMultipleStandbyTasks() throws Exception {
+        final StandbyTask task1 = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        final StandbyTask task3 = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        shouldUpdateStandbyTasks(task1, task2, task3);
+    }
+
+    private void shouldUpdateStandbyTasks(final StandbyTask... tasks) throws Exception {
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+
+        for (final StandbyTask task : tasks) {
+            stateUpdater.add(task);
+        }
+
+        verifyUpdatingStandbyTasks(tasks);
+        verifyRestoredActiveTasks();
+        verifyExceptionsAndFailedTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+        verify(changelogReader, times(1)).transitToUpdateStandby();
+        verify(changelogReader, timeout(VERIFICATION_TIMEOUT).atLeast(1)).restore(anyMap());
+        verify(changelogReader, never()).enforceRestoreActive();
+    }
+
+    @Test
+    public void shouldRestoreActiveStatefulTasksAndUpdateStandbyTasks() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task2 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask task3 = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final StandbyTask task4 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+        when(changelogReader.completedChangelogs())
+            .thenReturn(Collections.emptySet())
+            .thenReturn(mkSet(TOPIC_PARTITION_A_0))
+            .thenReturn(mkSet(TOPIC_PARTITION_A_0, TOPIC_PARTITION_B_0));
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+        stateUpdater.add(task4);
+
+        verifyRestoredActiveTasks(task2, task1);
+        verifyCheckpointTasks(true, task2, task1);
+        verifyUpdatingStandbyTasks(task4, task3);
+        verifyExceptionsAndFailedTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+        verify(changelogReader, atLeast(3)).restore(anyMap());
+        final InOrder orderVerifier = inOrder(changelogReader, task1, task2);
+        orderVerifier.verify(changelogReader, times(2)).enforceRestoreActive();
+        orderVerifier.verify(changelogReader, times(1)).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldRestoreActiveStatefulTaskThenUpdateStandbyTaskAndAgainRestoreActiveStatefulTask() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final StreamTask task3 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs())
+            .thenReturn(Collections.emptySet())
+            .thenReturn(mkSet(TOPIC_PARTITION_A_0))
+            .thenReturn(mkSet(TOPIC_PARTITION_B_0));
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+
+        verifyRestoredActiveTasks(task1);
+        verifyCheckpointTasks(true, task1);
+        verifyUpdatingStandbyTasks(task2);
+        final InOrder orderVerifier = inOrder(changelogReader);
+        orderVerifier.verify(changelogReader, times(1)).enforceRestoreActive();
+        orderVerifier.verify(changelogReader, times(1)).transitToUpdateStandby();
+
+        stateUpdater.add(task3);
+
+        verifyRestoredActiveTasks(task1, task3);
+        verifyCheckpointTasks(true, task3);
+        orderVerifier.verify(changelogReader, times(1)).enforceRestoreActive();
+        orderVerifier.verify(changelogReader, times(1)).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldUpdateStandbyTaskAfterAllActiveStatefulTasksFailed() throws Exception {
+        final StreamTask activeTask1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask activeTask2 = statefulTask(TASK_0_1, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final TaskCorruptedException taskCorruptedException =
+            new TaskCorruptedException(mkSet(activeTask1.id(), activeTask2.id()));
+        final Map<TaskId, Task> updatingTasks1 = mkMap(
+            mkEntry(activeTask1.id(), activeTask1),
+            mkEntry(activeTask2.id(), activeTask2),
+            mkEntry(standbyTask.id(), standbyTask)
+        );
+        doThrow(taskCorruptedException).doNothing().when(changelogReader).restore(updatingTasks1);
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+
+        stateUpdater.add(activeTask1);
+        stateUpdater.add(activeTask2);
+        stateUpdater.add(standbyTask);
+
+        final ExceptionAndTasks expectedExceptionAndTasks =
+            new ExceptionAndTasks(mkSet(activeTask1, activeTask2), taskCorruptedException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        final InOrder orderVerifier = inOrder(changelogReader);
+        orderVerifier.verify(changelogReader, atLeast(1)).enforceRestoreActive();
+        orderVerifier.verify(changelogReader, times(1)).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldUpdateStandbyTaskAfterAllActiveStatefulTasksRemoved() throws Exception {
+        final StreamTask activeTask1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask activeTask2 = statefulTask(TASK_0_1, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(activeTask1);
+        stateUpdater.add(activeTask2);
+        stateUpdater.add(standbyTask);
+        verifyUpdatingTasks(activeTask1, activeTask2, standbyTask);
+
+        stateUpdater.remove(activeTask1.id());
+        stateUpdater.remove(activeTask2.id());
+
+        verifyRemovedTasks(activeTask1, activeTask2);
+        final InOrder orderVerifier = inOrder(changelogReader);
+        orderVerifier.verify(changelogReader, atLeast(1)).enforceRestoreActive();
+        orderVerifier.verify(changelogReader, times(1)).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldRemoveActiveStatefulTask() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldRemoveStatefulTask(task);
+    }
+
+    @Test
+    public void shouldRemoveStandbyTask() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldRemoveStatefulTask(task);
+    }
+
+    private void shouldRemoveStatefulTask(final Task task) throws Exception {
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task);
+
+        stateUpdater.remove(task.id());
+
+        verifyRemovedTasks(task);
+        verifyCheckpointTasks(true, task);
+        verifyRestoredActiveTasks();
+        verifyUpdatingTasks();
+        verifyPausedTasks();
+        verifyExceptionsAndFailedTasks();
+        verify(changelogReader).unregister(task.changelogPartitions());
+    }
+
+    @Test
+    public void shouldRemovePausedTask() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+
+        stateUpdater.start();
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+
+        stateUpdater.pause(task1.id());
+        stateUpdater.pause(task2.id());
+
+        verifyPausedTasks(task1, task2);
+        verifyRemovedTasks();
+        verifyUpdatingTasks();
+
+        stateUpdater.remove(task1.id());
+        stateUpdater.remove(task2.id());
+
+        verifyRemovedTasks(task1, task2);
+        verifyPausedTasks();
+        verifyCheckpointTasks(true, task1, task2);
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotRemoveActiveStatefulTaskFromRestoredActiveTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldNotRemoveTaskFromRestoredActiveTasks(task);
+    }
+
+    @Test
+    public void shouldNotRemoveStatelessTaskFromRestoredActiveTasks() throws Exception {
+        final StreamTask task = statelessTask(TASK_0_0).inState(State.RESTORING).build();
+        shouldNotRemoveTaskFromRestoredActiveTasks(task);
+    }
+
+    private void shouldNotRemoveTaskFromRestoredActiveTasks(final StreamTask task) throws Exception {
+        final StreamTask controlTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.singleton(TOPIC_PARTITION_A_0));
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task);
+        stateUpdater.add(controlTask);
+        verifyRestoredActiveTasks(task);
+
+        stateUpdater.remove(task.id());
+        stateUpdater.remove(controlTask.id());
+
+        verifyRemovedTasks(controlTask);
+        verifyRestoredActiveTasks(task);
+        verifyUpdatingTasks();
+        verifyPausedTasks();
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotRemoveActiveStatefulTaskFromFailedTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldNotRemoveTaskFromFailedTasks(task);
+    }
+
+    @Test
+    public void shouldNotRemoveStandbyTaskFromFailedTasks() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldNotRemoveTaskFromFailedTasks(task);
+    }
+
+    private void shouldNotRemoveTaskFromFailedTasks(final Task task) throws Exception {
+        final StreamTask controlTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StreamsException streamsException = new StreamsException("Something happened", task.id());
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        final Map<TaskId, Task> updatingTasks = mkMap(
+            mkEntry(task.id(), task),
+            mkEntry(controlTask.id(), controlTask)
+        );
+        doThrow(streamsException)
+            .doNothing()
+            .when(changelogReader).restore(updatingTasks);
+        stateUpdater.start();
+
+        stateUpdater.add(task);
+        stateUpdater.add(controlTask);
+        final ExceptionAndTasks expectedExceptionAndTasks = new ExceptionAndTasks(mkSet(task), streamsException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+
+        stateUpdater.remove(task.id());
+        stateUpdater.remove(controlTask.id());
+
+        verifyRemovedTasks(controlTask);
+        verifyPausedTasks();
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyUpdatingTasks();
+        verifyRestoredActiveTasks();
+    }
+
+    @Test
+    public void shouldPauseActiveStatefulTask() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldPauseStatefulTask(task);
+        verify(changelogReader, never()).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldPauseStandbyTask() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldPauseStatefulTask(task);
+        verify(changelogReader, times(1)).transitToUpdateStandby();
+    }
+
+    @Test
+    public void shouldPauseActiveTaskAndTransitToUpdateStandby() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+
+        stateUpdater.start();
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+
+        stateUpdater.pause(task1.id());
+
+        verifyPausedTasks(task1);
+        verifyCheckpointTasks(true, task1);
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+        verifyUpdatingTasks(task2);
+        verifyExceptionsAndFailedTasks();
+        verify(changelogReader, times(1)).enforceRestoreActive();
+        verify(changelogReader, times(1)).transitToUpdateStandby();
+    }
+
+    private void shouldPauseStatefulTask(final Task task) throws Exception {
+        stateUpdater.start();
+        stateUpdater.add(task);
+
+        stateUpdater.pause(task.id());
+
+        verifyPausedTasks(task);
+        verifyCheckpointTasks(true, task);
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotPausingNonExistTasks() throws Exception {
+        stateUpdater.start();
+        stateUpdater.pause(TASK_0_0);
+
+        verifyPausedTasks();
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotPauseActiveStatefulTaskInRestoredActiveTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask controlTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.singleton(TOPIC_PARTITION_A_0));
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task);
+        stateUpdater.add(controlTask);
+        verifyRestoredActiveTasks(task);
+
+        stateUpdater.pause(task.id());
+        stateUpdater.pause(controlTask.id());
+
+        verifyPausedTasks(controlTask);
+        verifyRestoredActiveTasks(task);
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotPauseActiveStatefulTaskInFailedTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldNotPauseTaskInFailedTasks(task);
+    }
+
+    @Test
+    public void shouldNotPauseStandbyTaskInFailedTasks() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldNotPauseTaskInFailedTasks(task);
+    }
+
+    private void shouldNotPauseTaskInFailedTasks(final Task task) throws Exception {
+        final StreamTask controlTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StreamsException streamsException = new StreamsException("Something happened", task.id());
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        final Map<TaskId, Task> updatingTasks = mkMap(
+            mkEntry(task.id(), task),
+            mkEntry(controlTask.id(), controlTask)
+        );
+        doThrow(streamsException)
+            .doNothing()
+            .when(changelogReader).restore(updatingTasks);
+        stateUpdater.start();
+
+        stateUpdater.add(task);
+        stateUpdater.add(controlTask);
+        final ExceptionAndTasks expectedExceptionAndTasks = new ExceptionAndTasks(mkSet(task), streamsException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+
+        stateUpdater.pause(task.id());
+        stateUpdater.pause(controlTask.id());
+
+        verifyPausedTasks(controlTask);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyUpdatingTasks();
+        verifyRestoredActiveTasks();
+    }
+
+    @Test
+    public void shouldNotPauseActiveStatefulTaskInRemovedTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldNotPauseTaskInRemovedTasks(task);
+    }
+
+    @Test
+    public void shouldNotPauseStandbyTaskInRemovedTasks() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldNotPauseTaskInRemovedTasks(task);
+    }
+
+    private void shouldNotPauseTaskInRemovedTasks(final Task task) throws Exception {
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task);
+
+        stateUpdater.remove(task.id());
+
+        verifyRemovedTasks(task);
+        verifyCheckpointTasks(true, task);
+        verifyRestoredActiveTasks();
+        verifyUpdatingTasks();
+        verifyPausedTasks();
+        verifyExceptionsAndFailedTasks();
+        verify(changelogReader).unregister(task.changelogPartitions());
+
+        stateUpdater.pause(task.id());
+
+        verifyRemovedTasks(task);
+        verifyUpdatingTasks();
+        verifyPausedTasks();
+    }
+
+    @Test
+    public void shouldResumeActiveStatefulTask() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldResumeStatefulTask(task);
+        verify(changelogReader, times(2)).enforceRestoreActive();
+    }
+
+    @Test
+    public void shouldResumeStandbyTask() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldResumeStatefulTask(task);
+        verify(changelogReader, times(2)).transitToUpdateStandby();
+    }
+
+    private void shouldResumeStatefulTask(final Task task) throws Exception {
+        stateUpdater.start();
+        stateUpdater.add(task);
+
+        stateUpdater.pause(task.id());
+
+        verifyPausedTasks(task);
+        verifyUpdatingTasks();
+
+        stateUpdater.resume(task.id());
+
+        verifyPausedTasks();
+        verifyUpdatingTasks(task);
+    }
+
+    @Test
+    public void shouldNotResumeNonExistingTasks() throws Exception {
+        stateUpdater.start();
+        stateUpdater.resume(TASK_0_0);
+
+        verifyPausedTasks();
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+        verifyUpdatingTasks();
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotResumeActiveStatefulTaskInRestoredActiveTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask controlTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.singleton(TOPIC_PARTITION_A_0));
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task);
+        stateUpdater.add(controlTask);
+
+        verifyRestoredActiveTasks(task);
+
+        stateUpdater.resume(task.id());
+        stateUpdater.resume(controlTask.id());
+
+        verifyPausedTasks();
+        verifyRestoredActiveTasks(task);
+        verifyUpdatingTasks(controlTask);
+        verifyExceptionsAndFailedTasks();
+    }
+
+    @Test
+    public void shouldNotResumeActiveStatefulTaskInRemovedTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldNotPauseTaskInRemovedTasks(task);
+    }
+
+    @Test
+    public void shouldNotResumeStandbyTaskInRemovedTasks() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldNotResumeTaskInRemovedTasks(task);
+    }
+
+    private void shouldNotResumeTaskInRemovedTasks(final Task task) throws Exception {
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task);
+
+        verifyUpdatingTasks(task);
+        verifyExceptionsAndFailedTasks();
+
+        stateUpdater.remove(task.id());
+
+        verifyRemovedTasks(task);
+        verifyUpdatingTasks();
+
+        stateUpdater.resume(task.id());
+
+        verifyUpdatingTasks();
+    }
+
+    @Test
+    public void shouldNotResumeActiveStatefulTaskInFailedTasks() throws Exception {
+        final StreamTask task = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        shouldNotPauseTaskInFailedTasks(task);
+    }
+
+    @Test
+    public void shouldNotResumeStandbyTaskInFailedTasks() throws Exception {
+        final StandbyTask task = standbyTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        shouldNotResumeTaskInFailedTasks(task);
+    }
+
+    private void shouldNotResumeTaskInFailedTasks(final Task task) throws Exception {
+        final StreamTask controlTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StreamsException streamsException = new StreamsException("Something happened", task.id());
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        final Map<TaskId, Task> updatingTasks = mkMap(
+                mkEntry(task.id(), task),
+                mkEntry(controlTask.id(), controlTask)
+        );
+        doThrow(streamsException)
+                .doNothing()
+                .when(changelogReader).restore(updatingTasks);
+        stateUpdater.start();
+
+        stateUpdater.add(task);
+        stateUpdater.add(controlTask);
+        final ExceptionAndTasks expectedExceptionAndTasks = new ExceptionAndTasks(mkSet(task), streamsException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyUpdatingTasks(controlTask);
+
+        stateUpdater.resume(task.id());
+        stateUpdater.resume(controlTask.id());
+
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyUpdatingTasks(controlTask);
+    }
+
+    @Test
+    public void shouldDrainRemovedTasks() throws Exception {
+        assertTrue(stateUpdater.drainRemovedTasks().isEmpty());
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        stateUpdater.add(task1);
+        stateUpdater.remove(task1.id());
+
+        verifyDrainingRemovedTasks(task1);
+
+        final StreamTask task2 = statefulTask(TASK_1_1, mkSet(TOPIC_PARTITION_C_0)).inState(State.RESTORING).build();
+        final StreamTask task3 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task4 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_D_0)).inState(State.RESTORING).build();
+        stateUpdater.add(task2);
+        stateUpdater.remove(task2.id());
+        stateUpdater.add(task3);
+        stateUpdater.remove(task3.id());
+        stateUpdater.add(task4);
+        stateUpdater.remove(task4.id());
+
+        verifyDrainingRemovedTasks(task2, task3, task4);
+    }
+
+    @Test
+    public void shouldAddFailedTasksToQueueWhenRestoreThrowsStreamsExceptionWithoutTask() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        final String exceptionMessage = "The Streams were crossed!";
+        final StreamsException streamsException = new StreamsException(exceptionMessage);
+        final Map<TaskId, Task> updatingTasks = mkMap(
+            mkEntry(task1.id(), task1),
+            mkEntry(task2.id(), task2)
+        );
+        doNothing().doThrow(streamsException).when(changelogReader).restore(updatingTasks);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+
+        final ExceptionAndTasks expectedExceptionAndTasks = new ExceptionAndTasks(mkSet(task1, task2), streamsException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyRemovedTasks();
+        verifyPausedTasks();
+        verifyUpdatingTasks();
+        verifyRestoredActiveTasks();
+    }
+
+    @Test
+    public void shouldAddFailedTasksToQueueWhenRestoreThrowsStreamsExceptionWithTask() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task2 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask task3 = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final String exceptionMessage = "The Streams were crossed!";
+        final StreamsException streamsException1 = new StreamsException(exceptionMessage, task1.id());
+        final StreamsException streamsException2 = new StreamsException(exceptionMessage, task3.id());
+        final Map<TaskId, Task> updatingTasksBeforeFirstThrow = mkMap(
+            mkEntry(task1.id(), task1),
+            mkEntry(task2.id(), task2),
+            mkEntry(task3.id(), task3)
+        );
+        final Map<TaskId, Task> updatingTasksBeforeSecondThrow = mkMap(
+            mkEntry(task2.id(), task2),
+            mkEntry(task3.id(), task3)
+        );
+        doNothing()
+            .doThrow(streamsException1)
+            .when(changelogReader).restore(updatingTasksBeforeFirstThrow);
+        doNothing()
+            .doThrow(streamsException2)
+            .when(changelogReader).restore(updatingTasksBeforeSecondThrow);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+
+        final ExceptionAndTasks expectedExceptionAndTasks1 = new ExceptionAndTasks(mkSet(task1), streamsException1);
+        final ExceptionAndTasks expectedExceptionAndTasks2 = new ExceptionAndTasks(mkSet(task3), streamsException2);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks1, expectedExceptionAndTasks2);
+        verifyUpdatingTasks(task2);
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+    }
+
+    @Test
+    public void shouldAddFailedTasksToQueueWhenRestoreThrowsTaskCorruptedException() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        final StreamTask task3 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RESTORING).build();
+        final Set<TaskId> expectedTaskIds = mkSet(task1.id(), task2.id());
+        final TaskCorruptedException taskCorruptedException = new TaskCorruptedException(expectedTaskIds);
+        final Map<TaskId, Task> updatingTasks = mkMap(
+            mkEntry(task1.id(), task1),
+            mkEntry(task2.id(), task2),
+            mkEntry(task3.id(), task3)
+        );
+        doNothing().doThrow(taskCorruptedException).doNothing().when(changelogReader).restore(updatingTasks);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+
+        final ExceptionAndTasks expectedExceptionAndTasks = new ExceptionAndTasks(mkSet(task1, task2), taskCorruptedException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyUpdatingTasks(task3);
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+    }
+
+    @Test
+    public void shouldAddFailedTasksToQueueWhenUncaughtExceptionIsThrown() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask task2 = standbyTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RUNNING).build();
+        final IllegalStateException illegalStateException = new IllegalStateException("Nobody expects the Spanish inquisition!");
+        final Map<TaskId, Task> updatingTasks = mkMap(
+            mkEntry(task1.id(), task1),
+            mkEntry(task2.id(), task2)
+        );
+        doThrow(illegalStateException).when(changelogReader).restore(updatingTasks);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+
+        final ExceptionAndTasks expectedExceptionAndTasks = new ExceptionAndTasks(mkSet(task1, task2), illegalStateException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks);
+        verifyUpdatingTasks();
+        verifyRestoredActiveTasks();
+        verifyRemovedTasks();
+        verifyPausedTasks();
+    }
+
+    @Test
+    public void shouldDrainFailedTasksAndExceptions() throws Exception {
+        assertTrue(stateUpdater.drainExceptionsAndFailedTasks().isEmpty());
+
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StreamTask task2 = statefulTask(TASK_1_1, mkSet(TOPIC_PARTITION_C_0)).inState(State.RESTORING).build();
+        final StreamTask task3 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task4 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_D_0)).inState(State.RESTORING).build();
+        final String exceptionMessage = "The Streams were crossed!";
+        final StreamsException streamsException1 = new StreamsException(exceptionMessage, task1.id());
+        final Map<TaskId, Task> updatingTasks1 = mkMap(
+            mkEntry(task1.id(), task1)
+        );
+        doThrow(streamsException1)
+            .when(changelogReader).restore(updatingTasks1);
+        final StreamsException streamsException2 = new StreamsException(exceptionMessage, task2.id());
+        final StreamsException streamsException3 = new StreamsException(exceptionMessage, task3.id());
+        final StreamsException streamsException4 = new StreamsException(exceptionMessage, task4.id());
+        final Map<TaskId, Task> updatingTasks2 = mkMap(
+            mkEntry(task2.id(), task2),
+            mkEntry(task3.id(), task3),
+            mkEntry(task4.id(), task4)
+        );
+        doThrow(streamsException2).when(changelogReader).restore(updatingTasks2);
+        final Map<TaskId, Task> updatingTasks3 = mkMap(
+            mkEntry(task3.id(), task3),
+            mkEntry(task4.id(), task4)
+        );
+        doThrow(streamsException3).when(changelogReader).restore(updatingTasks3);
+        final Map<TaskId, Task> updatingTasks4 = mkMap(
+            mkEntry(task4.id(), task4)
+        );
+        doThrow(streamsException4).when(changelogReader).restore(updatingTasks4);
+        stateUpdater.start();
+
+        stateUpdater.add(task1);
+
+        final ExceptionAndTasks expectedExceptionAndTasks1 = new ExceptionAndTasks(mkSet(task1), streamsException1);
+        verifyDrainingExceptionsAndFailedTasks(expectedExceptionAndTasks1);
+
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+        stateUpdater.add(task4);
+
+        final ExceptionAndTasks expectedExceptionAndTasks2 = new ExceptionAndTasks(mkSet(task2), streamsException2);
+        final ExceptionAndTasks expectedExceptionAndTasks3 = new ExceptionAndTasks(mkSet(task3), streamsException3);
+        final ExceptionAndTasks expectedExceptionAndTasks4 = new ExceptionAndTasks(mkSet(task4), streamsException4);
+        verifyDrainingExceptionsAndFailedTasks(expectedExceptionAndTasks2, expectedExceptionAndTasks3, expectedExceptionAndTasks4);
+    }
+
+    @Test
+    public void shouldAutoCheckpointTasksOnInterval() throws Exception {
+        final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask task2 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask task3 = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final StandbyTask task4 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(task1);
+        stateUpdater.add(task2);
+        stateUpdater.add(task3);
+        stateUpdater.add(task4);
+        // wait for all tasks added to the thread before advance timer
+        verifyUpdatingTasks(task1, task2, task3, task4);
+
+        time.sleep(COMMIT_INTERVAL + 1);
+
+        verifyExceptionsAndFailedTasks();
+        verifyCheckpointTasks(false, task1, task2, task3, task4);
+    }
+
+    @Test
+    public void shouldNotAutoCheckpointTasksIfIntervalNotElapsed() {
+        // we need to use a non auto-ticking timer here to control how much time elapsed exactly
+        final Time time = new MockTime();
+        final DefaultStateUpdater stateUpdater = new DefaultStateUpdater(config, changelogReader, time);
+        try {
+            final StreamTask task1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+            final StreamTask task2 = statefulTask(TASK_0_2, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+            final StandbyTask task3 = standbyTask(TASK_1_0, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+            final StandbyTask task4 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+            when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+            when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+            stateUpdater.start();
+            stateUpdater.add(task1);
+            stateUpdater.add(task2);
+            stateUpdater.add(task3);
+            stateUpdater.add(task4);
+
+            time.sleep(COMMIT_INTERVAL);
+
+            verifyNeverCheckpointTasks(task1, task2, task3, task4);
+        } finally {
+            stateUpdater.shutdown(Duration.ofMinutes(1));
+        }
+    }
+
+    private void verifyCheckpointTasks(final boolean enforceCheckpoint, final Task... tasks) {
+        for (final Task task : tasks) {
+            verify(task, timeout(VERIFICATION_TIMEOUT).atLeast(1)).maybeCheckpoint(enforceCheckpoint);
+        }
+    }
+
+    private void verifyNeverCheckpointTasks(final Task... tasks) {
+        for (final Task task : tasks) {
+            verify(task, never()).maybeCheckpoint(anyBoolean());
+        }
+    }
+
+    @Test
+    public void shouldGetTasksFromInputQueue() {
+        stateUpdater.shutdown(Duration.ofMillis(Long.MAX_VALUE));
+
+        final StreamTask activeTask1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask activeTask2 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask1 = standbyTask(TASK_0_2, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final StandbyTask standbyTask2 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+        final StandbyTask standbyTask3 = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_A_1)).inState(State.RUNNING).build();
+        stateUpdater.add(activeTask1);
+        stateUpdater.add(standbyTask1);
+        stateUpdater.add(standbyTask2);
+        stateUpdater.remove(TASK_0_0);
+        stateUpdater.add(activeTask2);
+        stateUpdater.add(standbyTask3);
+
+        verifyGetTasks(mkSet(activeTask1, activeTask2), mkSet(standbyTask1, standbyTask2, standbyTask3));
+    }
+
+    @Test
+    public void shouldGetTasksFromUpdatingTasks() throws Exception {
+        final StreamTask activeTask1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask activeTask2 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask1 = standbyTask(TASK_0_2, mkSet(TOPIC_PARTITION_C_0)).inState(State.RUNNING).build();
+        final StandbyTask standbyTask2 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+        final StandbyTask standbyTask3 = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_A_1)).inState(State.RUNNING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(activeTask1);
+        stateUpdater.add(standbyTask1);
+        stateUpdater.add(standbyTask2);
+        stateUpdater.add(activeTask2);
+        stateUpdater.add(standbyTask3);
+        verifyUpdatingTasks(activeTask1, activeTask2, standbyTask1, standbyTask2, standbyTask3);
+
+        final Set<Task> tasks = stateUpdater.getTasks();
+
+        verifyGetTasks(mkSet(activeTask1, activeTask2), mkSet(standbyTask1, standbyTask2, standbyTask3));
+    }
+
+    @Test
+    public void shouldGetTasksFromRestoredActiveTasks() throws Exception {
+        final StreamTask activeTask1 = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StreamTask activeTask2 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(mkSet(TOPIC_PARTITION_A_0, TOPIC_PARTITION_B_0));
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(activeTask1);
+        stateUpdater.add(activeTask2);
+        verifyRestoredActiveTasks(activeTask1, activeTask2);
+
+        verifyGetTasks(mkSet(activeTask1, activeTask2), mkSet());
+
+        stateUpdater.drainRestoredActiveTasks(Duration.ofMinutes(1));
+
+        verifyGetTasks(mkSet(), mkSet());
+    }
+
+    @Test
+    public void shouldGetTasksFromExceptionsAndFailedTasks() throws Exception {
+        final StreamTask activeTask1 = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask2 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+        final StandbyTask standbyTask1 = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_A_1)).inState(State.RUNNING).build();
+        final TaskCorruptedException taskCorruptedException =
+            new TaskCorruptedException(mkSet(standbyTask1.id(), standbyTask2.id()));
+        final StreamsException streamsException = new StreamsException("The Streams were crossed!", activeTask1.id());
+        final Map<TaskId, Task> updatingTasks1 = mkMap(
+            mkEntry(activeTask1.id(), activeTask1),
+            mkEntry(standbyTask1.id(), standbyTask1),
+            mkEntry(standbyTask2.id(), standbyTask2)
+        );
+        doNothing().doThrow(taskCorruptedException).doNothing().when(changelogReader).restore(updatingTasks1);
+        final Map<TaskId, Task> updatingTasks2 = mkMap(
+            mkEntry(activeTask1.id(), activeTask1)
+        );
+        doNothing().doThrow(streamsException).doNothing().when(changelogReader).restore(updatingTasks2);
+        stateUpdater.start();
+        stateUpdater.add(standbyTask1);
+        stateUpdater.add(activeTask1);
+        stateUpdater.add(standbyTask2);
+        final ExceptionAndTasks expectedExceptionAndTasks1 =
+            new ExceptionAndTasks(mkSet(standbyTask1, standbyTask2), taskCorruptedException);
+        final ExceptionAndTasks expectedExceptionAndTasks2 = new ExceptionAndTasks(mkSet(activeTask1), streamsException);
+        verifyExceptionsAndFailedTasks(expectedExceptionAndTasks1, expectedExceptionAndTasks2);
+
+        verifyGetTasks(mkSet(activeTask1), mkSet(standbyTask1, standbyTask2));
+
+        stateUpdater.drainExceptionsAndFailedTasks();
+
+        verifyGetTasks(mkSet(), mkSet());
+    }
+
+    @Test
+    public void shouldGetTasksFromRemovedTasks() throws Exception {
+        final StreamTask activeTask = statefulTask(TASK_1_0, mkSet(TOPIC_PARTITION_B_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask2 = standbyTask(TASK_1_1, mkSet(TOPIC_PARTITION_D_0)).inState(State.RUNNING).build();
+        final StandbyTask standbyTask1 = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_A_1)).inState(State.RUNNING).build();
+        when(changelogReader.completedChangelogs()).thenReturn(Collections.emptySet());
+        when(changelogReader.allChangelogsCompleted()).thenReturn(false);
+        stateUpdater.start();
+        stateUpdater.add(standbyTask1);
+        stateUpdater.add(activeTask);
+        stateUpdater.add(standbyTask2);
+        stateUpdater.remove(standbyTask1.id());
+        stateUpdater.remove(standbyTask2.id());
+        stateUpdater.remove(activeTask.id());
+        verifyRemovedTasks(activeTask, standbyTask1, standbyTask2);
+
+        verifyGetTasks(mkSet(activeTask), mkSet(standbyTask1, standbyTask2));
+
+        stateUpdater.drainRemovedTasks();
+
+        verifyGetTasks(mkSet(), mkSet());
+    }
+
+    @Test
+    public void shouldGetTasksFromPausedTasks() throws Exception {
+        final StreamTask activeTask = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).inState(State.RESTORING).build();
+        final StandbyTask standbyTask = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_A_0)).inState(State.RUNNING).build();
+        stateUpdater.start();
+        stateUpdater.add(activeTask);
+        stateUpdater.add(standbyTask);
+
+        stateUpdater.pause(activeTask.id());
+        stateUpdater.pause(standbyTask.id());
+
+        verifyPausedTasks(activeTask, standbyTask);
+
+        verifyGetTasks(mkSet(activeTask), mkSet(standbyTask));
+    }
+
+    private void verifyGetTasks(final Set<StreamTask> expectedActiveTasks,
+                                final Set<StandbyTask> expectedStandbyTasks) {
+        final Set<Task> tasks = stateUpdater.getTasks();
+
+        assertEquals(expectedActiveTasks.size() + expectedStandbyTasks.size(), tasks.size());
+        tasks.forEach(task -> assertTrue(task instanceof ReadOnlyTask));
+        final Set<TaskId> actualTaskIds = tasks.stream().map(Task::id).collect(Collectors.toSet());
+        final Set<Task> expectedTasks = new HashSet<>(expectedActiveTasks);
+        expectedTasks.addAll(expectedStandbyTasks);
+        final Set<TaskId> expectedTaskIds = expectedTasks.stream().map(Task::id).collect(Collectors.toSet());
+        assertTrue(actualTaskIds.containsAll(expectedTaskIds));
+
+        final Set<StreamTask> activeTasks = stateUpdater.getActiveTasks();
+        assertEquals(expectedActiveTasks.size(), activeTasks.size());
+        assertTrue(activeTasks.containsAll(expectedActiveTasks));
+
+        final Set<StandbyTask> standbyTasks = stateUpdater.getStandbyTasks();
+        assertEquals(expectedStandbyTasks.size(), standbyTasks.size());
+        assertTrue(standbyTasks.containsAll(expectedStandbyTasks));
+    }
+
+    private void verifyRestoredActiveTasks(final StreamTask... tasks) throws Exception {
+        if (tasks.length == 0) {
+            waitForCondition(
+                () -> stateUpdater.getRestoredActiveTasks().isEmpty(),
+                VERIFICATION_TIMEOUT,
+                "Did not get empty restored active task within the given timeout!"
+            );
+        } else {
+            final Set<StreamTask> expectedRestoredTasks = mkSet(tasks);
+            final Set<StreamTask> restoredTasks = new HashSet<>();
+            waitForCondition(
+                () -> {
+                    restoredTasks.addAll(stateUpdater.getRestoredActiveTasks());
+                    return restoredTasks.containsAll(expectedRestoredTasks)
+                        && restoredTasks.size() == expectedRestoredTasks.size();
+                },
+                VERIFICATION_TIMEOUT,
+                "Did not get all restored active task within the given timeout!"
+            );
+        }
+    }
+
+    private void verifyDrainingRestoredActiveTasks(final StreamTask... tasks) throws Exception {
+        final Set<StreamTask> expectedRestoredTasks = mkSet(tasks);
+        final Set<StreamTask> restoredTasks = new HashSet<>();
+        waitForCondition(
+            () -> {
+                restoredTasks.addAll(stateUpdater.drainRestoredActiveTasks(Duration.ofMillis(CALL_TIMEOUT)));
+                return restoredTasks.containsAll(expectedRestoredTasks)
+                    && restoredTasks.size() == expectedRestoredTasks.size();
+            },
+            VERIFICATION_TIMEOUT,
+            "Did not get all restored active task within the given timeout!"
+        );
+        assertTrue(stateUpdater.drainRestoredActiveTasks(Duration.ZERO).isEmpty());
+    }
+
+    private void verifyUpdatingTasks(final Task... tasks) throws Exception {
+        if (tasks.length == 0) {
+            waitForCondition(
+                () -> stateUpdater.getUpdatingTasks().isEmpty(),
+                VERIFICATION_TIMEOUT,
+                "Did not get empty updating task within the given timeout!"
+            );
+        } else {
+            final Set<Task> expectedUpdatingTasks = mkSet(tasks);
+            final Set<Task> updatingTasks = new HashSet<>();
+            waitForCondition(
+                () -> {
+                    updatingTasks.addAll(stateUpdater.getUpdatingTasks());
+                    return updatingTasks.containsAll(expectedUpdatingTasks)
+                        && updatingTasks.size() == expectedUpdatingTasks.size();
+                },
+                VERIFICATION_TIMEOUT,
+                "Did not get all updating task within the given timeout!"
+            );
+        }
+    }
+
+    private void verifyUpdatingStandbyTasks(final StandbyTask... tasks) throws Exception {
+        final Set<StandbyTask> expectedStandbyTasks = mkSet(tasks);
+        final Set<StandbyTask> standbyTasks = new HashSet<>();
+        waitForCondition(
+            () -> {
+                standbyTasks.addAll(stateUpdater.getUpdatingStandbyTasks());
+                return standbyTasks.containsAll(expectedStandbyTasks)
+                    && standbyTasks.size() == expectedStandbyTasks.size();
+            },
+            VERIFICATION_TIMEOUT,
+            "Did not see all standby task within the given timeout!"
+        );
+    }
+
+    private void verifyRemovedTasks(final Task... tasks) throws Exception {
+        if (tasks.length == 0) {
+            waitForCondition(
+                () -> stateUpdater.getRemovedTasks().isEmpty(),
+                VERIFICATION_TIMEOUT,
+                "Did not get empty removed task within the given timeout!"
+            );
+        } else {
+            final Set<Task> expectedRemovedTasks = mkSet(tasks);
+            final Set<Task> removedTasks = new HashSet<>();
+            waitForCondition(
+                () -> {
+                    removedTasks.addAll(stateUpdater.getRemovedTasks());
+                    return removedTasks.containsAll(expectedRemovedTasks)
+                        && removedTasks.size() == expectedRemovedTasks.size();
+                },
+                VERIFICATION_TIMEOUT,
+                "Did not get all removed task within the given timeout!"
+            );
+        }
+    }
+
+    private void verifyPausedTasks(final Task... tasks) throws Exception {
+        if (tasks.length == 0) {
+            waitForCondition(
+                () -> stateUpdater.getPausedTasks().isEmpty(),
+                VERIFICATION_TIMEOUT,
+                "Did not get empty paused task within the given timeout!"
+            );
+        } else {
+            final Set<Task> expectedPausedTasks = mkSet(tasks);
+            final Set<Task> pausedTasks = new HashSet<>();
+            waitForCondition(
+                () -> {
+                    pausedTasks.addAll(stateUpdater.getPausedTasks());
+                    return pausedTasks.containsAll(expectedPausedTasks)
+                        && pausedTasks.size() == expectedPausedTasks.size();
+                },
+                VERIFICATION_TIMEOUT,
+                "Did not get all paused task within the given timeout!"
+            );
+        }
+    }
+
+    private void verifyDrainingRemovedTasks(final Task... tasks) throws Exception {
+        final Set<Task> expectedRemovedTasks = mkSet(tasks);
+        final Set<Task> removedTasks = new HashSet<>();
+        waitForCondition(
+            () -> {
+                removedTasks.addAll(stateUpdater.drainRemovedTasks());
+                return removedTasks.containsAll(mkSet(tasks))
+                    && removedTasks.size() == expectedRemovedTasks.size();
+            },
+            VERIFICATION_TIMEOUT,
+            "Did not get all restored active task within the given timeout!"
+        );
+        assertTrue(stateUpdater.drainRemovedTasks().isEmpty());
+    }
+
+    private void verifyExceptionsAndFailedTasks(final ExceptionAndTasks... exceptionsAndTasks) throws Exception {
+        final List<ExceptionAndTasks> expectedExceptionAndTasks = Arrays.asList(exceptionsAndTasks);
+        final Set<ExceptionAndTasks> failedTasks = new HashSet<>();
+        waitForCondition(
+            () -> {
+                failedTasks.addAll(stateUpdater.getExceptionsAndFailedTasks());
+                return failedTasks.containsAll(expectedExceptionAndTasks)
+                    && failedTasks.size() == expectedExceptionAndTasks.size();
+            },
+            VERIFICATION_TIMEOUT,
+            "Did not get all exceptions and failed tasks within the given timeout!"
+        );
+    }
+
+    private void verifyFailedTasks(final Class<? extends RuntimeException> clazz, final Task... tasks) throws Exception {
+        final List<Task> expectedFailedTasks = Arrays.asList(tasks);
+        final Set<Task> failedTasks = new HashSet<>();
+        waitForCondition(
+                () -> {
+                    for (final ExceptionAndTasks exceptionsAndTasks : stateUpdater.getExceptionsAndFailedTasks()) {
+                        if (clazz.isInstance(exceptionsAndTasks.exception())) {
+                            failedTasks.addAll(exceptionsAndTasks.getTasks());
+                        }
+                    }
+                    return failedTasks.containsAll(expectedFailedTasks)
+                            && failedTasks.size() == expectedFailedTasks.size();
+                },
+                VERIFICATION_TIMEOUT,
+                "Did not get all exceptions and failed tasks within the given timeout!"
+        );
+    }
+
+    private void verifyDrainingExceptionsAndFailedTasks(final ExceptionAndTasks... exceptionsAndTasks) throws Exception {
+        final List<ExceptionAndTasks> expectedExceptionAndTasks = Arrays.asList(exceptionsAndTasks);
+        final List<ExceptionAndTasks> failedTasks = new ArrayList<>();
+        waitForCondition(
+            () -> {
+                failedTasks.addAll(stateUpdater.drainExceptionsAndFailedTasks());
+                return failedTasks.containsAll(expectedExceptionAndTasks)
+                    && failedTasks.size() == expectedExceptionAndTasks.size();
+            },
+            VERIFICATION_TIMEOUT,
+            "Did not get all exceptions and failed tasks within the given timeout!"
+        );
+        assertTrue(stateUpdater.drainExceptionsAndFailedTasks().isEmpty());
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImplTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImplTest.java
index 0670fedb0aa63..4ba88b73e668c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/GlobalStateManagerImplTest.java
@@ -167,7 +167,7 @@ public void shouldLogWarningMessageWhenIOExceptionInCheckPoint() throws IOExcept
         // set readonly to the CHECKPOINT_FILE_NAME.tmp file because we will write data to the .tmp file first
         // and then swap to CHECKPOINT_FILE_NAME by replacing it
         final File file = new File(stateDirectory.globalStateDir(), StateManagerUtil.CHECKPOINT_FILE_NAME + ".tmp");
-        file.createNewFile();
+        Files.createFile(file.toPath());
         file.setWritable(false);
 
         try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(GlobalStateManagerImpl.class)) {
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilderTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilderTest.java
index 419a7ae15c4d5..f620cd22c1872 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilderTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/InternalTopologyBuilderTest.java
@@ -25,12 +25,14 @@
 import org.apache.kafka.streams.TopologyDescription;
 import org.apache.kafka.streams.errors.LogAndContinueExceptionHandler;
 import org.apache.kafka.streams.errors.TopologyException;
+import org.apache.kafka.streams.kstream.Materialized;
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TopicNameExtractor;
 import org.apache.kafka.streams.processor.api.Processor;
+import org.apache.kafka.streams.processor.api.ProcessorSupplier;
 import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder.SubtopologyDescription;
 import org.apache.kafka.streams.processor.internals.TopologyMetadata.Subtopology;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.KeyValueStore;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
@@ -733,32 +735,56 @@ public void shouldAllowIncrementalBuilds() {
 
     @Test
     public void shouldNotAllowNullNameWhenAddingSink() {
-        assertThrows(NullPointerException.class, () -> builder.addSink(null, "topic", null, null, null));
+        assertThrows(
+            NullPointerException.class,
+            () -> builder.addSink(null, "topic", null, null, null)
+        );
     }
 
     @Test
     public void shouldNotAllowNullTopicWhenAddingSink() {
-        assertThrows(NullPointerException.class, () -> builder.addSink("name", (String) null, null, null, null));
+        assertThrows(
+            NullPointerException.class,
+            () -> builder.addSink("name", (String) null, null, null, null)
+        );
     }
 
     @Test
     public void shouldNotAllowNullTopicChooserWhenAddingSink() {
-        assertThrows(NullPointerException.class, () -> builder.addSink("name", (TopicNameExtractor<Object, Object>) null, null, null, null));
+        assertThrows(
+            NullPointerException.class,
+            () -> builder.addSink("name", (TopicNameExtractor<Object, Object>) null, null, null, null)
+        );
     }
 
     @Test
     public void shouldNotAllowNullNameWhenAddingProcessor() {
-        assertThrows(NullPointerException.class, () -> builder.addProcessor(null, () -> null));
+        assertThrows(
+            NullPointerException.class,
+            () -> builder.addProcessor(
+                null,
+                (ProcessorSupplier<Object, Object, Object, Object>) () -> null
+            )
+        );
     }
 
     @Test
     public void shouldNotAllowNullProcessorSupplier() {
-        assertThrows(NullPointerException.class, () -> builder.addProcessor("name", null));
+        assertThrows(
+            NullPointerException.class,
+            () -> builder.addProcessor(
+                "name",
+                (ProcessorSupplier<Object, Object, Object, Object>) null
+            )
+        );
     }
 
     @Test
     public void shouldNotAllowNullNameWhenAddingSource() {
-        assertThrows(NullPointerException.class, () -> builder.addSource(null, null, null, null, null, Pattern.compile(".*")));
+        assertThrows(
+            NullPointerException.class,
+            () -> builder.addSource(null, null, null, null, null, Pattern.compile(".*"))
+        );
     }
 
     @Test
@@ -948,6 +974,7 @@ public void shouldOverrideGlobalStreamsConfigWhenGivenNamedTopologyProps() {
         topologyOverrides.put(StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_CONFIG, 15);
         topologyOverrides.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, MockTimestampExtractor.class);
         topologyOverrides.put(StreamsConfig.DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG, LogAndContinueExceptionHandler.class);
+        topologyOverrides.put(StreamsConfig.DEFAULT_DSL_STORE_CONFIG, StreamsConfig.IN_MEMORY);
 
         final StreamsConfig config = new StreamsConfig(StreamsTestUtils.getStreamsConfig());
         final InternalTopologyBuilder topologyBuilder = new InternalTopologyBuilder(
@@ -963,6 +990,7 @@ public void shouldOverrideGlobalStreamsConfigWhenGivenNamedTopologyProps() {
         assertThat(topologyBuilder.topologyConfigs().getTaskConfig().maxBufferedSize, equalTo(15));
         assertThat(topologyBuilder.topologyConfigs().getTaskConfig().timestampExtractor.getClass(), equalTo(MockTimestampExtractor.class));
         assertThat(topologyBuilder.topologyConfigs().getTaskConfig().deserializationExceptionHandler.getClass(), equalTo(LogAndContinueExceptionHandler.class));
+        assertThat(topologyBuilder.topologyConfigs().parseStoreType(), equalTo(Materialized.StoreType.IN_MEMORY));
     }
 
     @Test
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/MockChangelogReader.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/MockChangelogReader.java
index 6ea7fc3101561..d86728891cd94 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/MockChangelogReader.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/MockChangelogReader.java
@@ -59,6 +59,11 @@ public Set<TopicPartition> completedChangelogs() {
         return restoringPartitions;
     }
 
+    @Override
+    public boolean allChangelogsCompleted() {
+        return false;
+    }
+
     @Override
     public void clear() {
         restoringPartitions.clear();
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/PartitionGroupTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/PartitionGroupTest.java
index 40602b5edf1c7..632b0efc23c5b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/PartitionGroupTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/PartitionGroupTest.java
@@ -48,6 +48,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.common.utils.Utils.mkSet;
+
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.equalTo;
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorContextImplTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorContextImplTest.java
index f5e4f6364aaa7..3e1832048d917 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorContextImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorContextImplTest.java
@@ -58,6 +58,8 @@
 import java.util.function.Consumer;
 
 import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.streams.processor.internals.ProcessorContextImpl.BYTEARRAY_VALUE_SERIALIZER;
 import static org.apache.kafka.streams.processor.internals.ProcessorContextImpl.BYTES_KEY_SERIALIZER;
 import static org.easymock.EasyMock.anyLong;
@@ -69,6 +71,7 @@
 import static org.easymock.EasyMock.replay;
 import static org.easymock.EasyMock.verify;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -394,15 +397,17 @@ public void localSessionStoreShouldNotAllowInitOrClose() {
     @Test
     public void shouldNotSendRecordHeadersToChangelogTopic() {
         recordCollector.send(
-                CHANGELOG_PARTITION.topic(),
-                KEY_BYTES,
-                VALUE_BYTES,
-                null,
-                CHANGELOG_PARTITION.partition(),
-                TIMESTAMP,
-                BYTES_KEY_SERIALIZER,
-                BYTEARRAY_VALUE_SERIALIZER
-        );
+            CHANGELOG_PARTITION.topic(),
+            KEY_BYTES,
+            VALUE_BYTES,
+            null,
+            CHANGELOG_PARTITION.partition(),
+            TIMESTAMP,
+            BYTES_KEY_SERIALIZER,
+            BYTEARRAY_VALUE_SERIALIZER,
+            null,
+            null);
+
         final StreamTask task = EasyMock.createNiceMock(StreamTask.class);
 
         replay(recordCollector, task);
@@ -420,15 +425,16 @@ public void shouldSendRecordHeadersToChangelogTopicWhenConsistencyEnabled() {
         headers.add(new RecordHeader(ChangelogRecordDeserializationHelper.CHANGELOG_POSITION_HEADER_KEY,
                 PositionSerde.serialize(position).array()));
         recordCollector.send(
-                CHANGELOG_PARTITION.topic(),
-                KEY_BYTES,
-                VALUE_BYTES,
-                headers,
-                CHANGELOG_PARTITION.partition(),
-                TIMESTAMP,
-                BYTES_KEY_SERIALIZER,
-                BYTEARRAY_VALUE_SERIALIZER
-        );
+            CHANGELOG_PARTITION.topic(),
+            KEY_BYTES,
+            VALUE_BYTES,
+            headers,
+            CHANGELOG_PARTITION.partition(),
+            TIMESTAMP,
+            BYTES_KEY_SERIALIZER,
+            BYTEARRAY_VALUE_SERIALIZER,
+            null,
+            null);
 
         final StreamTask task = EasyMock.createNiceMock(StreamTask.class);
 
@@ -568,6 +574,36 @@ public void shouldMatchStreamTime() {
         assertEquals(STREAM_TIME, context.currentStreamTimeMs());
     }
 
+    @Test
+    public void shouldAddAndGetProcessorKeyValue() {
+        context.addProcessorMetadataKeyValue("key1", 100L);
+        final Long value = context.processorMetadataForKey("key1");
+        assertEquals(100L, value.longValue());
+
+        final Long noValue = context.processorMetadataForKey("nokey");
+        assertNull(noValue);
+    }
+
+    @Test
+    public void shouldSetAndGetProcessorMetaData() {
+        final ProcessorMetadata emptyMetadata = new ProcessorMetadata();
+        context.setProcessorMetadata(emptyMetadata);
+        assertEquals(emptyMetadata, context.getProcessorMetadata());
+
+        final ProcessorMetadata metadata = new ProcessorMetadata(
+            mkMap(
+                mkEntry("key1", 10L),
+                mkEntry("key2", 100L)
+            )
+        );
+
+        context.setProcessorMetadata(metadata);
+        assertEquals(10L, context.processorMetadataForKey("key1").longValue());
+        assertEquals(100L, context.processorMetadataForKey("key2").longValue());
+
+        assertThrows(NullPointerException.class, () -> context.setProcessorMetadata(null));
+    }
+
     @SuppressWarnings("unchecked")
     private KeyValueStore<String, Long> keyValueStoreMock() {
         final KeyValueStore<String, Long> keyValueStoreMock = mock(KeyValueStore.class);
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorMetadataTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorMetadataTest.java
new file mode 100644
index 0000000000000..c7ce0c80602be
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorMetadataTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.junit.Test;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class ProcessorMetadataTest {
+
+    @Test
+    public void shouldAddandGetKeyValueWithEmptyConstructor() {
+        final ProcessorMetadata metadata = new ProcessorMetadata();
+        final String key = "some_key";
+        final long value = 100L;
+
+        metadata.put(key, value);
+        final Long actualValue =  metadata.get(key);
+
+        assertThat(actualValue, is(value));
+
+        final Long noValue = metadata.get("no_key");
+        assertThat(noValue, is(nullValue()));
+    }
+
+    @Test
+    public void shouldAddandGetKeyValueWithExistingMeta() {
+        final Map<String, Long> map = new HashMap<>();
+        map.put("key1", 1L);
+        map.put("key2", 2L);
+
+        final ProcessorMetadata metadata = new ProcessorMetadata(map);
+
+        final long value1 = metadata.get("key1");
+        assertThat(value1, is(1L));
+
+        final long value2 = metadata.get("key2");
+        assertThat(value2, is(2L));
+
+        final Long noValue = metadata.get("key3");
+        assertThat(noValue, is(nullValue()));
+
+        metadata.put("key3", 3L);
+        final long value3 = metadata.get("key3");
+        assertThat(value3, is(3L));
+    }
+
+    @Test
+    public void shouldSerializeAndDeserialize() {
+        final ProcessorMetadata metadata = new ProcessorMetadata();
+        final String key1 = "key1", key2 = "key2", key3 = "key3";
+        final long value1 = 1L, value2 = 2L, value3 = 3L;
+
+        metadata.put(key1, value1);
+        metadata.put(key2, value2);
+        metadata.put(key3, value3);
+
+        final byte[] serialized = metadata.serialize();
+        final ProcessorMetadata deserialized = ProcessorMetadata.deserialize(serialized);
+
+        assertThat(deserialized.get(key1), is(value1));
+        assertThat(deserialized.get(key2), is(value2));
+        assertThat(deserialized.get(key3), is(value3));
+    }
+
+    @Test
+    public void shouldDeserializeNull() {
+        final ProcessorMetadata deserialized = ProcessorMetadata.deserialize(null);
+        assertThat(deserialized, is(new ProcessorMetadata()));
+    }
+
+    @Test
+    public void shouldUpdate() {
+        final ProcessorMetadata emptyMeta = new ProcessorMetadata();
+        emptyMeta.update(null);
+
+        assertThat(emptyMeta, is(new ProcessorMetadata()));
+
+        {
+            final Map<String, Long> map1 = new HashMap<>();
+            map1.put("key1", 1L);
+            map1.put("key2", 2L);
+            final ProcessorMetadata metadata1 = new ProcessorMetadata(map1);
+            emptyMeta.update(metadata1);
+            assertThat(emptyMeta.get("key1"), is(1L));
+            assertThat(emptyMeta.get("key2"), is(2L));
+        }
+
+        {
+            final Map<String, Long> map1 = new HashMap<>();
+            map1.put("key1", 0L);
+            map1.put("key2", 1L);
+            final ProcessorMetadata metadata1 = new ProcessorMetadata(map1);
+            emptyMeta.update(metadata1);
+            assertThat(emptyMeta.get("key1"), is(1L));
+            assertThat(emptyMeta.get("key2"), is(2L));
+        }
+
+        {
+            final Map<String, Long> map1 = new HashMap<>();
+            map1.put("key1", 2L);
+            map1.put("key2", 3L);
+            final ProcessorMetadata metadata1 = new ProcessorMetadata(map1);
+            emptyMeta.update(metadata1);
+            assertThat(emptyMeta.get("key1"), is(2L));
+            assertThat(emptyMeta.get("key2"), is(3L));
+        }
+    }
+
+    @Test
+    public void shouldUpdateCommitFlag() {
+        final ProcessorMetadata emptyMeta = new ProcessorMetadata();
+        assertFalse(emptyMeta.needsCommit());
+
+        emptyMeta.setNeedsCommit(true);
+        assertTrue(emptyMeta.needsCommit());
+
+        emptyMeta.setNeedsCommit(false);
+        assertFalse(emptyMeta.needsCommit());
+
+        emptyMeta.put("key1", 1L);
+        assertTrue(emptyMeta.needsCommit());
+
+        final Map<String, Long> map1 = new HashMap<>();
+        map1.put("key1", 2L);
+        map1.put("key2", 3L);
+        final ProcessorMetadata metadata1 = new ProcessorMetadata(map1);
+        emptyMeta.update(metadata1);
+        assertTrue(emptyMeta.needsCommit());
+    }
+
+    @Test
+    public void shouldNotUseCommitFlagForHashcodeAndEquals() {
+        final ProcessorMetadata metadata1 = new ProcessorMetadata();
+        metadata1.setNeedsCommit(true);
+        final ProcessorMetadata metadata2 = new ProcessorMetadata();
+        metadata2.setNeedsCommit(false);
+
+        assertEquals(metadata1, metadata2);
+        assertEquals(metadata1.hashCode(), metadata2.hashCode());
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java
index 5947842a74c13..4dc62734bf6e9 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ProcessorStateManagerTest.java
@@ -55,6 +55,7 @@
 import java.io.FileWriter;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
@@ -813,7 +814,7 @@ public void shouldThrowIfLoadCheckpointThrows() throws Exception {
 
         stateMgr.registerStore(persistentStore, persistentStore.stateRestoreCallback, null);
         final File file = new File(stateMgr.baseDir(), CHECKPOINT_FILE_NAME);
-        file.createNewFile();
+        Files.createFile(file.toPath());
         final FileWriter writer = new FileWriter(file);
         writer.write("abcdefg");
         writer.close();
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/RackAwarenessStreamsPartitionAssignorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/RackAwarenessStreamsPartitionAssignorTest.java
new file mode 100644
index 0000000000000..12a14c2dc5bde
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/RackAwarenessStreamsPartitionAssignorTest.java
@@ -0,0 +1,576 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.clients.admin.Admin;
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.ListOffsetsResult;
+import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerPartitionAssignor;
+import org.apache.kafka.common.Cluster;
+import org.apache.kafka.common.Node;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.internals.KafkaFutureImpl;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.internals.assignment.AssignmentInfo;
+import org.apache.kafka.streams.processor.internals.assignment.ReferenceContainer;
+import org.apache.kafka.streams.processor.internals.assignment.SubscriptionInfo;
+import org.apache.kafka.test.MockApiProcessorSupplier;
+import org.apache.kafka.test.MockClientSupplier;
+import org.apache.kafka.test.MockInternalTopicManager;
+import org.apache.kafka.test.MockKeyValueStoreBuilder;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.junit.MockitoJUnitRunner;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptySet;
+import static java.util.Collections.singletonList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.EMPTY_CHANGELOG_END_OFFSETS;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.EMPTY_TASKS;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_1;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_2;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_3;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_4;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_5;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_6;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_7;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_8;
+import static org.apache.kafka.streams.processor.internals.assignment.AssignmentTestUtils.UUID_9;
+import static org.apache.kafka.streams.processor.internals.assignment.StreamsAssignmentProtocolVersions.LATEST_SUPPORTED_VERSION;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@RunWith(MockitoJUnitRunner.StrictStubs.class)
+public class RackAwarenessStreamsPartitionAssignorTest {
+
+    private final List<PartitionInfo> infos = asList(
+        new PartitionInfo("topic0", 0, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic0", 1, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic0", 2, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic1", 0, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic1", 1, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic1", 2, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic2", 0, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic2", 1, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic2", 2, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic3", 0, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic3", 1, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic3", 2, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic4", 0, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic4", 1, Node.noNode(), new Node[0], new Node[0]),
+        new PartitionInfo("topic4", 2, Node.noNode(), new Node[0], new Node[0])
+    );
+
+    final String consumer1 = "consumer1";
+    final String consumer2 = "consumer2";
+    final String consumer3 = "consumer3";
+    final String consumer4 = "consumer4";
+    final String consumer5 = "consumer5";
+    final String consumer6 = "consumer6";
+    final String consumer7 = "consumer7";
+    final String consumer8 = "consumer8";
+    final String consumer9 = "consumer9";
+
+
+    private final Cluster metadata = new Cluster(
+            "cluster",
+            singletonList(Node.noNode()),
+            infos,
+            emptySet(),
+            emptySet());
+
+    private final static List<String> ALL_TAG_KEYS = new ArrayList<>();
+    static {
+        for (int i = 0; i < StreamsConfig.MAX_RACK_AWARE_ASSIGNMENT_TAG_LIST_SIZE; i++) {
+            ALL_TAG_KEYS.add("key-" + i);
+        }
+    }
+
+    private final StreamsPartitionAssignor partitionAssignor = new StreamsPartitionAssignor();
+    private final MockClientSupplier mockClientSupplier = new MockClientSupplier();
+    private static final String USER_END_POINT = "localhost:8080";
+    private static final String APPLICATION_ID = "stream-partition-assignor-test";
+
+    private TaskManager taskManager;
+    private Admin adminClient;
+    private StreamsConfig streamsConfig = new StreamsConfig(configProps());
+    private final InternalTopologyBuilder builder = new InternalTopologyBuilder();
+    private TopologyMetadata topologyMetadata = new TopologyMetadata(builder, streamsConfig);
+    private final StreamsMetadataState streamsMetadataState = mock(StreamsMetadataState.class);
+    private final Map<String, ConsumerPartitionAssignor.Subscription> subscriptions = new HashMap<>();
+    private final MockTime time = new MockTime();
+
+    @SuppressWarnings("unchecked")
+    private Map<String, Object> configProps() {
+        final Map<String, Object> configurationMap = new HashMap<>();
+        configurationMap.put(StreamsConfig.APPLICATION_ID_CONFIG, APPLICATION_ID);
+        configurationMap.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, USER_END_POINT);
+        final ReferenceContainer referenceContainer = new ReferenceContainer();
+        referenceContainer.mainConsumer = (Consumer<byte[], byte[]>) mock(Consumer.class);
+        referenceContainer.adminClient = adminClient;
+        referenceContainer.taskManager = taskManager;
+        referenceContainer.streamsMetadataState = streamsMetadataState;
+        referenceContainer.time = time;
+        configurationMap.put(StreamsConfig.InternalConfig.REFERENCE_CONTAINER_PARTITION_ASSIGNOR, referenceContainer);
+        configurationMap.put(StreamsConfig.RACK_AWARE_ASSIGNMENT_TAGS_CONFIG, String.join(",", ALL_TAG_KEYS));
+        ALL_TAG_KEYS.forEach(key -> configurationMap.put(StreamsConfig.clientTagPrefix(key), "dummy"));
+        return configurationMap;
+    }
+
+    // Make sure to complete setting up any mocks (such as TaskManager or AdminClient) before configuring the assignor
+    private void configurePartitionAssignorWith(final Map<String, Object> props) {
+        final Map<String, Object> configMap = configProps();
+        configMap.putAll(props);
+
+        streamsConfig = new StreamsConfig(configMap);
+        topologyMetadata = new TopologyMetadata(builder, streamsConfig);
+        partitionAssignor.configure(configMap);
+
+        overwriteInternalTopicManagerWithMock();
+    }
+
+    // Useful for tests that don't care about the task offset sums
+    private void createMockTaskManager() {
+        taskManager = mock(TaskManager.class);
+        when(taskManager.topologyMetadata()).thenReturn(topologyMetadata);
+        when(taskManager.processId()).thenReturn(UUID_1);
+        topologyMetadata.buildAndRewriteTopology();
+    }
+
+    // If you don't care about setting the end offsets for each specific topic partition, the helper method
+    // getTopicPartitionOffsetMap is useful for building this input map for all partitions
+    private void createMockAdminClient(final Map<TopicPartition, Long> changelogEndOffsets) {
+        adminClient = mock(AdminClient.class);
+
+        final ListOffsetsResult result = mock(ListOffsetsResult.class);
+        final KafkaFutureImpl<Map<TopicPartition, ListOffsetsResult.ListOffsetsResultInfo>> allFuture = new KafkaFutureImpl<>();
+        allFuture.complete(changelogEndOffsets.entrySet().stream().collect(Collectors.toMap(
+                Map.Entry::getKey,
+                t -> {
+                    final ListOffsetsResult.ListOffsetsResultInfo info = mock(ListOffsetsResult.ListOffsetsResultInfo.class);
+                    when(info.offset()).thenReturn(t.getValue());
+                    return info;
+                }))
+        );
+
+        when(adminClient.listOffsets(any())).thenReturn(result);
+        when(result.all()).thenReturn(allFuture);
+    }
+
+    private void overwriteInternalTopicManagerWithMock() {
+        final MockInternalTopicManager mockInternalTopicManager = new MockInternalTopicManager(
+                time,
+                streamsConfig,
+                mockClientSupplier.restoreConsumer,
+                false
+        );
+        partitionAssignor.setInternalTopicManager(mockInternalTopicManager);
+    }
+
+    @Before
+    public void setUp() {
+        createMockAdminClient(EMPTY_CHANGELOG_END_OFFSETS);
+    }
+
+    @Test
+    public void shouldDistributeWithMaximumNumberOfClientTags() {
+        setupTopology(3, 2);
+
+        createMockTaskManager();
+        createMockAdminClient(getTopicPartitionOffsetsMap(
+            Arrays.asList(APPLICATION_ID + "-store2-changelog", APPLICATION_ID + "-store3-changelog", APPLICATION_ID + "-store4-changelog"),
+            Arrays.asList(3, 3, 3)));
+        configurePartitionAssignorWith(Collections.singletonMap(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 1));
+
+        final Map<String, String> clientTags1 = new HashMap<>();
+        final Map<String, String> clientTags2 = new HashMap<>();
+
+        for (int i = 0; i < ALL_TAG_KEYS.size(); i++) {
+            final String key = ALL_TAG_KEYS.get(i);
+            clientTags1.put(key, "value-1-" + i);
+            clientTags2.put(key, "value-2-" + i);
+        }
+
+        final Map<String, Map<String, String>> hostTags = new HashMap<>();
+        subscriptions.put(consumer1, getSubscription(UUID_1, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer1, clientTags1);
+        subscriptions.put(consumer2, getSubscription(UUID_2, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer2, clientTags1);
+        subscriptions.put(consumer3, getSubscription(UUID_3, EMPTY_TASKS, clientTags2));
+        hostTags.put(consumer3, clientTags2);
+
+        Map<String, ConsumerPartitionAssignor.Assignment> assignments = partitionAssignor
+            .assign(metadata, new ConsumerPartitionAssignor.GroupSubscription(subscriptions))
+            .groupAssignment();
+
+        verifyIdealTaskDistributionReached(getClientTagDistributions(assignments, hostTags), ALL_TAG_KEYS);
+
+        // kill the first consumer and rebalance, should still achieve ideal distribution
+        subscriptions.clear();
+        subscriptions.put(consumer2, getSubscription(UUID_2, AssignmentInfo.decode(assignments.get(consumer2).userData()).activeTasks(), clientTags1));
+        subscriptions.put(consumer3, getSubscription(UUID_3, AssignmentInfo.decode(assignments.get(consumer3).userData()).activeTasks(), clientTags2));
+
+        assignments = partitionAssignor.assign(metadata, new ConsumerPartitionAssignor.GroupSubscription(subscriptions))
+            .groupAssignment();
+
+        verifyIdealTaskDistributionReached(getClientTagDistributions(assignments, hostTags), ALL_TAG_KEYS);
+    }
+
+    @Test
+    public void shouldDistributeOnDistinguishingTagSubset() {
+        setupTopology(3, 0);
+
+        createMockTaskManager();
+        createMockAdminClient(getTopicPartitionOffsetsMap(
+            Arrays.asList(APPLICATION_ID + "-store0-changelog", APPLICATION_ID + "-store1-changelog", APPLICATION_ID + "-store2-changelog"),
+            Arrays.asList(3, 3, 3)));
+        configurePartitionAssignorWith(Collections.singletonMap(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 1));
+
+        // use the same tag value for key1, and different value for key2
+        // then we verify that for key2 we still achieve ideal distribution
+        final Map<String, String> clientTags1 = new HashMap<>();
+        final Map<String, String> clientTags2 = new HashMap<>();
+        clientTags1.put(ALL_TAG_KEYS.get(0), "value-1-all");
+        clientTags2.put(ALL_TAG_KEYS.get(0), "value-2-all");
+        clientTags1.put(ALL_TAG_KEYS.get(1), "value-1-1");
+        clientTags2.put(ALL_TAG_KEYS.get(1), "value-2-2");
+
+        final String consumer1 = "consumer1";
+        final String consumer2 = "consumer2";
+        final String consumer3 = "consumer3";
+        final String consumer4 = "consumer4";
+        final String consumer5 = "consumer5";
+        final String consumer6 = "consumer6";
+
+        final Map<String, Map<String, String>> hostTags = new HashMap<>();
+        subscriptions.put(consumer1, getSubscription(UUID_1, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer1, clientTags1);
+        subscriptions.put(consumer2, getSubscription(UUID_2, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer2, clientTags1);
+        subscriptions.put(consumer3, getSubscription(UUID_3, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer3, clientTags1);
+        subscriptions.put(consumer4, getSubscription(UUID_4, EMPTY_TASKS, clientTags2));
+        hostTags.put(consumer4, clientTags2);
+        subscriptions.put(consumer5, getSubscription(UUID_5, EMPTY_TASKS, clientTags2));
+        hostTags.put(consumer5, clientTags2);
+        subscriptions.put(consumer6, getSubscription(UUID_6, EMPTY_TASKS, clientTags2));
+        hostTags.put(consumer6, clientTags2);
+
+        final Map<String, ConsumerPartitionAssignor.Assignment> assignments = partitionAssignor
+            .assign(metadata, new ConsumerPartitionAssignor.GroupSubscription(subscriptions))
+            .groupAssignment();
+
+        verifyIdealTaskDistributionReached(getClientTagDistributions(assignments, hostTags), Collections.singletonList(ALL_TAG_KEYS.get(1)));
+    }
+
+    @Test
+    public void shouldDistributeWithMultipleStandbys() {
+        setupTopology(3, 0);
+
+        createMockTaskManager();
+        createMockAdminClient(getTopicPartitionOffsetsMap(
+            Arrays.asList(APPLICATION_ID + "-store0-changelog", APPLICATION_ID + "-store1-changelog", APPLICATION_ID + "-store2-changelog"),
+            Arrays.asList(3, 3, 3)));
+        configurePartitionAssignorWith(Collections.singletonMap(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 2));
+
+        final Map<String, String> clientTags1 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-1"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-1"));
+        final Map<String, String> clientTags2 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-1"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-2"));
+        final Map<String, String> clientTags3 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-1"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-3"));
+        final Map<String, String> clientTags4 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-2"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-1"));
+        final Map<String, String> clientTags5 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-2"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-2"));
+        final Map<String, String> clientTags6 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-2"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-3"));
+        final Map<String, String> clientTags7 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-3"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-1"));
+        final Map<String, String> clientTags8 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-3"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-2"));
+        final Map<String, String> clientTags9 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-3"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-3"));
+
+        final Map<String, Map<String, String>> hostTags = new HashMap<>();
+        subscriptions.put(consumer1, getSubscription(UUID_1, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer1, clientTags1);
+        subscriptions.put(consumer2, getSubscription(UUID_2, EMPTY_TASKS, clientTags2));
+        hostTags.put(consumer2, clientTags2);
+        subscriptions.put(consumer3, getSubscription(UUID_3, EMPTY_TASKS, clientTags3));
+        hostTags.put(consumer3, clientTags3);
+        subscriptions.put(consumer4, getSubscription(UUID_4, EMPTY_TASKS, clientTags4));
+        hostTags.put(consumer4, clientTags4);
+        subscriptions.put(consumer5, getSubscription(UUID_5, EMPTY_TASKS, clientTags5));
+        hostTags.put(consumer5, clientTags5);
+        subscriptions.put(consumer6, getSubscription(UUID_6, EMPTY_TASKS, clientTags6));
+        hostTags.put(consumer6, clientTags6);
+        subscriptions.put(consumer7, getSubscription(UUID_7, EMPTY_TASKS, clientTags7));
+        hostTags.put(consumer7, clientTags7);
+        subscriptions.put(consumer8, getSubscription(UUID_8, EMPTY_TASKS, clientTags8));
+        hostTags.put(consumer8, clientTags8);
+        subscriptions.put(consumer9, getSubscription(UUID_9, EMPTY_TASKS, clientTags9));
+        hostTags.put(consumer9, clientTags9);
+
+        final Map<String, ConsumerPartitionAssignor.Assignment> assignments = partitionAssignor
+            .assign(metadata, new ConsumerPartitionAssignor.GroupSubscription(subscriptions))
+            .groupAssignment();
+
+        verifyIdealTaskDistributionReached(getClientTagDistributions(assignments, hostTags), Arrays.asList(ALL_TAG_KEYS.get(0), ALL_TAG_KEYS.get(1)));
+    }
+
+    @Test
+    public void shouldDistributePartiallyWhenDoNotHaveEnoughClients() {
+        setupTopology(3, 0);
+
+        createMockTaskManager();
+        createMockAdminClient(getTopicPartitionOffsetsMap(
+            Arrays.asList(APPLICATION_ID + "-store0-changelog", APPLICATION_ID + "-store1-changelog", APPLICATION_ID + "-store2-changelog"),
+            Arrays.asList(3, 3, 3)));
+        configurePartitionAssignorWith(Collections.singletonMap(StreamsConfig.NUM_STANDBY_REPLICAS_CONFIG, 2));
+
+        final Map<String, String> clientTags1 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-1"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-1"));
+        final Map<String, String> clientTags2 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-1"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-2"));
+        final Map<String, String> clientTags3 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-1"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-3"));
+        final Map<String, String> clientTags4 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-2"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-1"));
+        final Map<String, String> clientTags5 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-2"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-2"));
+        final Map<String, String> clientTags6 = mkMap(
+            mkEntry(ALL_TAG_KEYS.get(0), "value-0-2"),
+            mkEntry(ALL_TAG_KEYS.get(1), "value-1-3"));
+
+        final Map<String, Map<String, String>> hostTags = new HashMap<>();
+        subscriptions.put(consumer1, getSubscription(UUID_1, EMPTY_TASKS, clientTags1));
+        hostTags.put(consumer1, clientTags1);
+        subscriptions.put(consumer2, getSubscription(UUID_2, EMPTY_TASKS, clientTags2));
+        hostTags.put(consumer2, clientTags2);
+        subscriptions.put(consumer3, getSubscription(UUID_3, EMPTY_TASKS, clientTags3));
+        hostTags.put(consumer3, clientTags3);
+        subscriptions.put(consumer4, getSubscription(UUID_4, EMPTY_TASKS, clientTags4));
+        hostTags.put(consumer4, clientTags4);
+        subscriptions.put(consumer5, getSubscription(UUID_5, EMPTY_TASKS, clientTags5));
+        hostTags.put(consumer5, clientTags5);
+        subscriptions.put(consumer6, getSubscription(UUID_6, EMPTY_TASKS, clientTags6));
+        hostTags.put(consumer6, clientTags6);
+
+        final Map<String, ConsumerPartitionAssignor.Assignment> assignments = partitionAssignor
+            .assign(metadata, new ConsumerPartitionAssignor.GroupSubscription(subscriptions))
+            .groupAssignment();
+
+        verifyIdealTaskDistributionReached(getClientTagDistributions(assignments, hostTags), Collections.singletonList(ALL_TAG_KEYS.get(1)));
+        verifyPartialTaskDistributionReached(getClientTagDistributions(assignments, hostTags), Collections.singletonList(ALL_TAG_KEYS.get(0)));
+    }
+
+    private Map<TaskId, ClientTagDistribution> getClientTagDistributions(final Map<String, ConsumerPartitionAssignor.Assignment> assignments,
+                                                                         final Map<String, Map<String, String>> hostTags) {
+        final Map<TaskId, ClientTagDistribution> taskClientTags = new HashMap<>();
+
+        for (final Map.Entry<String, ConsumerPartitionAssignor.Assignment> entry : assignments.entrySet()) {
+            final AssignmentInfo info = AssignmentInfo.decode(entry.getValue().userData());
+
+            for (final TaskId activeTaskId : info.activeTasks()) {
+                taskClientTags.putIfAbsent(activeTaskId, new ClientTagDistribution(activeTaskId));
+                final ClientTagDistribution tagDistribution = taskClientTags.get(activeTaskId);
+                tagDistribution.addActiveTags(hostTags.get(entry.getKey()));
+            }
+
+            for (final TaskId standbyTaskId : info.standbyTasks().keySet()) {
+                taskClientTags.putIfAbsent(standbyTaskId, new ClientTagDistribution(standbyTaskId));
+                final ClientTagDistribution tagDistribution = taskClientTags.get(standbyTaskId);
+                tagDistribution.addStandbyTags(hostTags.get(entry.getKey()));
+            }
+        }
+
+        return taskClientTags;
+    }
+
+    private void verifyIdealTaskDistributionReached(final Map<TaskId, ClientTagDistribution> taskClientTags,
+                                                    final List<String> tagsToCheck) {
+        for (final Map.Entry<TaskId, ClientTagDistribution> entry: taskClientTags.entrySet()) {
+            if (!tagsAmongStandbysAreDifferent(entry.getValue(), tagsToCheck))
+                throw new AssertionError("task " + entry.getKey() + "'s tag-distribution for " + tagsToCheck +
+                    " among standbys is not ideal: " + entry.getValue());
+
+            if (!tagsAmongActiveAndAllStandbysAreDifferent(entry.getValue(), tagsToCheck))
+                throw new AssertionError("task " + entry.getKey() + "'s tag-distribution for " + tagsToCheck +
+                    " between active and standbys is not ideal: " + entry.getValue());
+        }
+    }
+
+    private void verifyPartialTaskDistributionReached(final Map<TaskId, ClientTagDistribution> taskClientTags,
+                                                      final List<String> tagsToCheck) {
+        for (final Map.Entry<TaskId, ClientTagDistribution> entry: taskClientTags.entrySet()) {
+            if (!tagsAmongActiveAndAtLeastOneStandbyIsDifferent(entry.getValue(), tagsToCheck))
+                throw new AssertionError("task " + entry.getKey() + "'s tag-distribution for " + tagsToCheck +
+                    "between active and standbys is not partially ideal: " + entry.getValue());
+        }
+    }
+
+    private static boolean tagsAmongActiveAndAllStandbysAreDifferent(final ClientTagDistribution tagDistribution,
+                                                                     final List<String> tagsToCheck) {
+        return tagDistribution.standbysClientTags.stream().allMatch(standbyTags ->
+            tagsToCheck.stream().noneMatch(tag -> tagDistribution.activeClientTags.get(tag).equals(standbyTags.get(tag))));
+    }
+
+    private static boolean tagsAmongActiveAndAtLeastOneStandbyIsDifferent(final ClientTagDistribution tagDistribution,
+                                                                          final List<String> tagsToCheck) {
+        return tagDistribution.standbysClientTags.stream().anyMatch(standbyTags ->
+            tagsToCheck.stream().noneMatch(tag -> tagDistribution.activeClientTags.get(tag).equals(standbyTags.get(tag))));
+    }
+
+    private static boolean tagsAmongStandbysAreDifferent(final ClientTagDistribution tagDistribution,
+                                                         final List<String> tagsToCheck) {
+        final Map<String, Integer> statistics = new HashMap<>();
+
+        for (final Map<String, String> tags : tagDistribution.standbysClientTags) {
+            for (final Map.Entry<String, String> tag : tags.entrySet()) {
+                if (tagsToCheck.contains(tag.getKey())) {
+                    final String tagValue = tag.getValue();
+                    final Integer tagValueOccurrence = statistics.getOrDefault(tagValue, 0);
+                    statistics.put(tagValue, tagValueOccurrence + 1);
+                }
+            }
+        }
+
+        return statistics.values().stream().noneMatch(occurrence -> occurrence > 1);
+    }
+
+    private void setupTopology(final int numOfStatefulTopologies, final int numOfStatelessTopologies) {
+        if (numOfStatefulTopologies + numOfStatelessTopologies > 5) {
+            throw new IllegalArgumentException("Should not have more than 5 topologies, but have " + numOfStatefulTopologies);
+        }
+
+        for (int i = 0; i < numOfStatelessTopologies; i++) {
+            builder.addSource(null, "source" + i, null, null, null, "topic" + i);
+            builder.addProcessor("processor" + i, new MockApiProcessorSupplier<>(), "source" + i);
+        }
+
+        for (int i = numOfStatelessTopologies; i < numOfStatelessTopologies + numOfStatefulTopologies; i++) {
+            builder.addSource(null, "source" + i, null, null, null, "topic" + i);
+            builder.addProcessor("processor" + i, new MockApiProcessorSupplier<>(), "source" + i);
+            builder.addStateStore(new MockKeyValueStoreBuilder("store" + i, false), "processor" + i);
+        }
+    }
+
+    private static final class ClientTagDistribution {
+        private final TaskId taskId;
+        private final Map<String, String> activeClientTags;
+        private final List<Map<String, String>> standbysClientTags;
+
+        ClientTagDistribution(final TaskId taskId) {
+            this.taskId = taskId;
+            this.activeClientTags = new HashMap<>();
+            this.standbysClientTags = new ArrayList<>();
+        }
+
+        void addActiveTags(final Map<String, String> activeClientTags) {
+            if (!this.activeClientTags.isEmpty()) {
+                throw new IllegalStateException("Found multiple active tasks for " + taskId + ", this should not happen");
+            }
+            this.activeClientTags.putAll(activeClientTags);
+        }
+
+        void addStandbyTags(final Map<String, String> standbyClientTags) {
+            this.standbysClientTags.add(standbyClientTags);
+        }
+
+        @Override
+        public String toString() {
+            return "ClientTagDistribution{" +
+                "taskId=" + taskId +
+                ", activeClientTags=" + activeClientTags +
+                ", standbysClientTags=" + standbysClientTags +
+                '}';
+        }
+    }
+
+    /**
+     * Helper for building the input to createMockAdminClient in cases where we don't care about the actual offsets
+     * @param changelogTopics The names of all changelog topics in the topology
+     * @param topicsNumPartitions The number of partitions for the corresponding changelog topic, such that the number
+     *            of partitions of the ith topic in changelogTopics is given by the ith element of topicsNumPartitions
+     */
+    private static Map<TopicPartition, Long> getTopicPartitionOffsetsMap(final List<String> changelogTopics,
+                                                                         final List<Integer> topicsNumPartitions) {
+        if (changelogTopics.size() != topicsNumPartitions.size()) {
+            throw new IllegalStateException("Passed in " + changelogTopics.size() + " changelog topic names, but " +
+                    topicsNumPartitions.size() + " different numPartitions for the topics");
+        }
+        final Map<TopicPartition, Long> changelogEndOffsets = new HashMap<>();
+        for (int i = 0; i < changelogTopics.size(); ++i) {
+            final String topic = changelogTopics.get(i);
+            final int numPartitions = topicsNumPartitions.get(i);
+            for (int partition = 0; partition < numPartitions; ++partition) {
+                changelogEndOffsets.put(new TopicPartition(topic, partition), Long.MAX_VALUE);
+            }
+        }
+        return changelogEndOffsets;
+    }
+
+    private static ConsumerPartitionAssignor.Subscription getSubscription(final UUID processId,
+                                                                          final Collection<TaskId> prevActiveTasks,
+                                                                          final Map<String, String> clientTags) {
+        return new ConsumerPartitionAssignor.Subscription(
+            singletonList("source1"),
+            new SubscriptionInfo(LATEST_SUPPORTED_VERSION, LATEST_SUPPORTED_VERSION, processId, null,
+                getTaskOffsetSums(prevActiveTasks), (byte) 0, 0, clientTags).encode()
+        );
+    }
+
+    // Stub offset sums for when we only care about the prev/standby task sets, not the actual offsets
+    private static Map<TaskId, Long> getTaskOffsetSums(final Collection<TaskId> activeTasks) {
+        final Map<TaskId, Long> taskOffsetSums = activeTasks.stream().collect(Collectors.toMap(t -> t, t -> Task.LATEST_OFFSET));
+        taskOffsetSums.putAll(EMPTY_TASKS.stream().collect(Collectors.toMap(t -> t, t -> 0L)));
+        return taskOffsetSums;
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/ReadOnlyTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ReadOnlyTaskTest.java
new file mode 100644
index 0000000000000..cd5da8739818f
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/ReadOnlyTaskTest.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.streams.processor.TaskId;
+import org.junit.jupiter.api.Test;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.statelessTask;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.verify;
+
+class ReadOnlyTaskTest {
+
+    private final List<String> readOnlyMethods = new LinkedList<String>() {
+        {
+            add("needsInitializationOrRestoration");
+            add("inputPartitions");
+            add("changelogPartitions");
+            add("commitRequested");
+            add("isActive");
+            add("state");
+            add("id");
+        }
+    };
+
+    private final List<String> objectMethods = new LinkedList<String>() {
+        {
+            add("wait");
+            add("equals");
+            add("getClass");
+            add("hashCode");
+            add("notify");
+            add("notifyAll");
+            add("toString");
+        }
+    };
+
+    final Task task = statelessTask(new TaskId(1, 0)).build();
+
+    @Test
+    public void shouldDelegateNeedsInitializationOrRestoration() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.needsInitializationOrRestoration();
+
+        verify(task).needsInitializationOrRestoration();
+    }
+
+    @Test
+    public void shouldDelegateId() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.id();
+
+        verify(task).id();
+    }
+
+    @Test
+    public void shouldDelegateIsActive() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.isActive();
+
+        verify(task).isActive();
+    }
+
+    @Test
+    public void shouldDelegateInputPartitions() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.inputPartitions();
+
+        verify(task).inputPartitions();
+    }
+
+    @Test
+    public void shouldDelegateChangelogPartitions() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.changelogPartitions();
+
+        verify(task).changelogPartitions();
+    }
+
+    @Test
+    public void shouldDelegateCommitRequested() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.commitRequested();
+
+        verify(task).commitRequested();
+    }
+
+    @Test
+    public void shouldDelegateState() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+
+        readOnlyTask.state();
+
+        verify(task).state();
+    }
+
+    @Test
+    public void shouldThrowUnsupportedOperationExceptionForForbiddenMethods() {
+        final ReadOnlyTask readOnlyTask = new ReadOnlyTask(task);
+        for (final Method method : ReadOnlyTask.class.getMethods()) {
+            final String methodName = method.getName();
+            if (!readOnlyMethods.contains(methodName) && !objectMethods.contains(methodName)) {
+                shouldThrowUnsupportedOperationException(readOnlyTask, method);
+            }
+        }
+
+    }
+
+    private void shouldThrowUnsupportedOperationException(final ReadOnlyTask readOnlyTask,
+                                                          final Method method) {
+        final Exception exception = assertThrows(
+            UnsupportedOperationException.class,
+            () -> {
+                try {
+                    method.invoke(readOnlyTask, getParameters(method.getParameterTypes()));
+                } catch (final InvocationTargetException invocationTargetException) {
+                    throw invocationTargetException.getCause();
+                }
+            },
+            "Something unexpected happened during invocation of method '" + method.getName() + "'!"
+        );
+        assertEquals("This task is read-only", exception.getMessage());
+    }
+
+    private Object[] getParameters(final Class<?>[] parameterTypes) throws Exception {
+        final Object[] parameters = new Object[parameterTypes.length];
+
+        for (int i = 0; i < parameterTypes.length; ++i) {
+            switch (parameterTypes[i].getName()) {
+                case "boolean":
+                    parameters[i] = true;
+                    break;
+                case "long":
+                    parameters[i] = 0;
+                    break;
+                case "java.util.Set":
+                    parameters[i] = Collections.emptySet();
+                    break;
+                case "java.util.Collection":
+                    parameters[i] = Collections.emptySet();
+                    break;
+                case "java.util.Map":
+                    parameters[i] = Collections.emptyMap();
+                    break;
+                case "org.apache.kafka.common.TopicPartition":
+                    parameters[i] = new TopicPartition("topic", 0);
+                    break;
+                case "java.lang.Exception":
+                    parameters[i] = new IllegalStateException();
+                    break;
+                case "java.util.function.Consumer":
+                    parameters[i] = (Consumer) ignored -> { };
+                    break;
+                case "java.lang.Iterable":
+                    parameters[i] = Collections.emptySet();
+                    break;
+                default:
+                    parameters[i] = parameterTypes[i].getConstructor().newInstance();
+            }
+        }
+
+        return parameters;
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordCollectorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordCollectorTest.java
index 48364f27db583..b3fa516a3f7e6 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordCollectorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordCollectorTest.java
@@ -21,7 +21,6 @@
 import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.clients.producer.RecordMetadata;
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.KafkaException;
 import org.apache.kafka.common.Metric;
@@ -54,6 +53,7 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.test.InternalMockProcessorContext;
 import org.apache.kafka.test.MockClientSupplier;
 
 import java.util.UUID;
@@ -70,6 +70,9 @@
 
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.streams.processor.internals.ClientUtils.producerRecordSizeInBytes;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOPIC_LEVEL_GROUP;
+
 import static org.easymock.EasyMock.expect;
 import static org.easymock.EasyMock.expectLastCall;
 import static org.easymock.EasyMock.mock;
@@ -82,6 +85,10 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.emptyMap;
+import static java.util.Collections.emptySet;
+import static java.util.Collections.singletonMap;
 
 public class RecordCollectorTest {
 
@@ -100,6 +107,7 @@ public class RecordCollectorTest {
     ));
 
     private final String topic = "topic";
+    private final String sinkNodeName = "output-node";
     private final Cluster cluster = new Cluster(
         "cluster",
         Collections.singletonList(Node.noNode()),
@@ -121,6 +129,8 @@ public class RecordCollectorTest {
 
     private MockProducer<byte[], byte[]> mockProducer;
     private StreamsProducer streamsProducer;
+    private ProcessorTopology topology;
+    private final InternalProcessorContext<Void, Void> context = new InternalMockProcessorContext<>();
 
     private RecordCollectorImpl collector;
 
@@ -138,12 +148,29 @@ public void setup() {
             Time.SYSTEM
         );
         mockProducer = clientSupplier.producers.get(0);
+        final SinkNode<?, ?> sinkNode = new SinkNode<>(
+            sinkNodeName,
+            new StaticTopicNameExtractor<>(topic),
+            stringSerializer,
+            byteArraySerializer,
+            streamPartitioner);
+        topology = new ProcessorTopology(
+            emptyList(),
+            emptyMap(),
+            singletonMap(topic, sinkNode),
+            emptyList(),
+            emptyList(),
+            emptyMap(),
+            emptySet()
+        );
         collector = new RecordCollectorImpl(
             logContext,
             taskId,
             streamsProducer,
             productionExceptionHandler,
-            streamsMetrics);
+            streamsMetrics,
+            topology
+        );
     }
 
     @After
@@ -151,16 +178,73 @@ public void cleanup() {
         collector.closeClean();
     }
 
+    @Test
+    public void shouldRecordRecordsAndBytesProduced() {
+        final Headers headers = new RecordHeaders(new Header[]{new RecordHeader("key", "value".getBytes())});
+
+        final String threadId = Thread.currentThread().getName();
+        final String processorNodeId = sinkNodeName;
+        final String topic = "topic";
+        final Metric recordsProduced = streamsMetrics.metrics().get(
+            new MetricName("records-produced-total",
+                           TOPIC_LEVEL_GROUP,
+                           "The total number of records produced from this topic",
+                           streamsMetrics.topicLevelTagMap(threadId, taskId.toString(), processorNodeId, topic))
+        );
+        final Metric bytesProduced = streamsMetrics.metrics().get(
+            new MetricName("bytes-produced-total",
+                           TOPIC_LEVEL_GROUP,
+                           "The total number of bytes produced from this topic",
+                           streamsMetrics.topicLevelTagMap(threadId, taskId.toString(), processorNodeId, topic))
+        );
+
+        double totalRecords = 0D;
+        double totalBytes = 0D;
+
+        assertThat(recordsProduced.metricValue(), equalTo(totalRecords));
+        assertThat(bytesProduced.metricValue(), equalTo(totalBytes));
+
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, sinkNodeName, context);
+        ++totalRecords;
+        totalBytes += producerRecordSizeInBytes(mockProducer.history().get(0));
+        assertThat(recordsProduced.metricValue(), equalTo(totalRecords));
+        assertThat(bytesProduced.metricValue(), equalTo(totalBytes));
+
+        collector.send(topic, "999", "0", headers, 1, null, stringSerializer, stringSerializer, sinkNodeName, context);
+        ++totalRecords;
+        totalBytes += producerRecordSizeInBytes(mockProducer.history().get(1));
+        assertThat(recordsProduced.metricValue(), equalTo(totalRecords));
+        assertThat(bytesProduced.metricValue(), equalTo(totalBytes));
+
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, sinkNodeName, context);
+        ++totalRecords;
+        totalBytes += producerRecordSizeInBytes(mockProducer.history().get(2));
+        assertThat(recordsProduced.metricValue(), equalTo(totalRecords));
+        assertThat(bytesProduced.metricValue(), equalTo(totalBytes));
+
+        collector.send(topic, "999", "0", headers, 1, null, stringSerializer, stringSerializer, sinkNodeName, context);
+        ++totalRecords;
+        totalBytes += producerRecordSizeInBytes(mockProducer.history().get(3));
+        assertThat(recordsProduced.metricValue(), equalTo(totalRecords));
+        assertThat(bytesProduced.metricValue(), equalTo(totalBytes));
+
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, sinkNodeName, context);
+        ++totalRecords;
+        totalBytes += producerRecordSizeInBytes(mockProducer.history().get(4));
+        assertThat(recordsProduced.metricValue(), equalTo(totalRecords));
+        assertThat(bytesProduced.metricValue(), equalTo(totalBytes));
+    }
+
     @Test
     public void shouldSendToSpecificPartition() {
         final Headers headers = new RecordHeaders(new Header[] {new RecordHeader("key", "value".getBytes())});
 
-        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", headers, 1, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", headers, 1, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", headers, 2, null, stringSerializer, stringSerializer);
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", headers, 1, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", headers, 1, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", headers, 2, null, stringSerializer, stringSerializer, null, null);
 
         Map<TopicPartition, Long> offsets = collector.offsets();
 
@@ -169,9 +253,9 @@ public void shouldSendToSpecificPartition() {
         assertEquals(0L, (long) offsets.get(new TopicPartition(topic, 2)));
         assertEquals(6, mockProducer.history().size());
 
-        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", null, 1, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", headers, 2, null, stringSerializer, stringSerializer);
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", null, 1, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", headers, 2, null, stringSerializer, stringSerializer, null, null);
 
         offsets = collector.offsets();
 
@@ -185,15 +269,15 @@ public void shouldSendToSpecificPartition() {
     public void shouldSendWithPartitioner() {
         final Headers headers = new RecordHeaders(new Header[] {new RecordHeader("key", "value".getBytes())});
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "9", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "27", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "81", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "243", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "28", "0", headers, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "82", "0", headers, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "244", "0", headers, null, stringSerializer, stringSerializer, streamPartitioner);
-        collector.send(topic, "245", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "9", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "27", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "81", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "243", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "28", "0", headers, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "82", "0", headers, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "244", "0", headers, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
+        collector.send(topic, "245", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final Map<TopicPartition, Long> offsets = collector.offsets();
 
@@ -211,15 +295,15 @@ public void shouldSendWithPartitioner() {
     public void shouldSendWithNoPartition() {
         final Headers headers = new RecordHeaders(new Header[] {new RecordHeader("key", "value".getBytes())});
 
-        collector.send(topic, "3", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "9", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "27", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "81", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "243", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "28", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "82", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "244", "0", headers, null, null, stringSerializer, stringSerializer);
-        collector.send(topic, "245", "0", headers, null, null, stringSerializer, stringSerializer);
+        collector.send(topic, "3", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "9", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "27", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "81", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "243", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "28", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "82", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "244", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "245", "0", headers, null, null, stringSerializer, stringSerializer, null, null);
 
         final Map<TopicPartition, Long> offsets = collector.offsets();
 
@@ -234,9 +318,9 @@ public void shouldSendWithNoPartition() {
     public void shouldUpdateOffsetsUponCompletion() {
         Map<TopicPartition, Long> offsets = collector.offsets();
 
-        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", null, 1, null, stringSerializer, stringSerializer);
-        collector.send(topic, "999", "0", null, 2, null, stringSerializer, stringSerializer);
+        collector.send(topic, "999", "0", null, 0, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", null, 1, null, stringSerializer, stringSerializer, null, null);
+        collector.send(topic, "999", "0", null, 2, null, stringSerializer, stringSerializer, null, null);
 
         assertEquals(Collections.<TopicPartition, Long>emptyMap(), offsets);
 
@@ -254,7 +338,7 @@ public void shouldPassThroughRecordHeaderToSerializer() {
         final CustomStringSerializer valueSerializer = new CustomStringSerializer();
         keySerializer.configure(Collections.emptyMap(), true);
 
-        collector.send(topic, "3", "0", new RecordHeaders(), null, keySerializer, valueSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", new RecordHeaders(), null, keySerializer, valueSerializer, null, null, streamPartitioner);
 
         final List<ProducerRecord<byte[], byte[]>> recordHistory = mockProducer.history();
         for (final ProducerRecord<byte[], byte[]> sentRecord : recordHistory) {
@@ -271,14 +355,19 @@ public void shouldForwardFlushToStreamsProducer() {
         expect(streamsProducer.eosEnabled()).andReturn(false);
         streamsProducer.flush();
         expectLastCall();
-        replay(streamsProducer);
+
+        final ProcessorTopology topology = mock(ProcessorTopology.class);
+        expect(topology.sinkTopics()).andStubReturn(Collections.emptySet());
+        replay(streamsProducer, topology);
 
         final RecordCollector collector = new RecordCollectorImpl(
             logContext,
             taskId,
             streamsProducer,
             productionExceptionHandler,
-            streamsMetrics);
+            streamsMetrics, 
+            topology
+        );
 
         collector.flush();
 
@@ -291,14 +380,18 @@ public void shouldForwardFlushToStreamsProducerEosEnabled() {
         expect(streamsProducer.eosEnabled()).andReturn(true);
         streamsProducer.flush();
         expectLastCall();
-        replay(streamsProducer);
-
+        final ProcessorTopology topology = mock(ProcessorTopology.class);
+        expect(topology.sinkTopics()).andStubReturn(Collections.emptySet());
+        replay(streamsProducer, topology);
+        
         final RecordCollector collector = new RecordCollectorImpl(
             logContext,
             taskId,
             streamsProducer,
             productionExceptionHandler,
-            streamsMetrics);
+            streamsMetrics,
+            topology
+        );
 
         collector.flush();
 
@@ -309,15 +402,20 @@ public void shouldForwardFlushToStreamsProducerEosEnabled() {
     public void shouldNotAbortTxOnCloseCleanIfEosEnabled() {
         final StreamsProducer streamsProducer = mock(StreamsProducer.class);
         expect(streamsProducer.eosEnabled()).andReturn(true);
-        replay(streamsProducer);
-
+        
+        final ProcessorTopology topology = mock(ProcessorTopology.class);
+        expect(topology.sinkTopics()).andStubReturn(Collections.emptySet());
+        replay(streamsProducer, topology);
+        
         final RecordCollector collector = new RecordCollectorImpl(
             logContext,
             taskId,
             streamsProducer,
             productionExceptionHandler,
-            streamsMetrics);
-
+            streamsMetrics,
+            topology
+        );
+       
         collector.closeClean();
 
         verify(streamsProducer);
@@ -328,14 +426,19 @@ public void shouldAbortTxOnCloseDirtyIfEosEnabled() {
         final StreamsProducer streamsProducer = mock(StreamsProducer.class);
         expect(streamsProducer.eosEnabled()).andReturn(true);
         streamsProducer.abortTransaction();
-        replay(streamsProducer);
-
+        
+        final ProcessorTopology topology = mock(ProcessorTopology.class);
+        expect(topology.sinkTopics()).andStubReturn(Collections.emptySet());
+        replay(streamsProducer, topology);
+        
         final RecordCollector collector = new RecordCollectorImpl(
             logContext,
             taskId,
             streamsProducer,
             productionExceptionHandler,
-            streamsMetrics);
+            streamsMetrics,
+            topology
+        );
 
         collector.closeDirty();
 
@@ -355,7 +458,7 @@ public void shouldThrowInformativeStreamsExceptionOnKeyClassCastException() {
                 0,
                 0L,
                 (Serializer) new LongSerializer(), // need to add cast to trigger `ClassCastException`
-                new StringSerializer())
+                new StringSerializer(), null, null)
         );
 
         assertThat(expected.getCause(), instanceOf(ClassCastException.class));
@@ -383,7 +486,7 @@ public void shouldThrowInformativeStreamsExceptionOnKeyAndNullValueClassCastExce
                 0,
                 0L,
                 (Serializer) new LongSerializer(), // need to add cast to trigger `ClassCastException`
-                new StringSerializer())
+                new StringSerializer(), null, null)
         );
 
         assertThat(expected.getCause(), instanceOf(ClassCastException.class));
@@ -411,7 +514,7 @@ public void shouldThrowInformativeStreamsExceptionOnValueClassCastException() {
                 0,
                 0L,
                 new StringSerializer(),
-                (Serializer) new LongSerializer()) // need to add cast to trigger `ClassCastException`
+                (Serializer) new LongSerializer(), null, null) // need to add cast to trigger `ClassCastException`
         );
 
         assertThat(expected.getCause(), instanceOf(ClassCastException.class));
@@ -439,7 +542,7 @@ public void shouldThrowInformativeStreamsExceptionOnValueAndNullKeyClassCastExce
                 0,
                 0L,
                 new StringSerializer(),
-                (Serializer) new LongSerializer()) // need to add cast to trigger `ClassCastException`
+                (Serializer) new LongSerializer(), null, null) // need to add cast to trigger `ClassCastException`
         );
 
         assertThat(expected.getCause(), instanceOf(ClassCastException.class));
@@ -461,13 +564,14 @@ public void shouldThrowInformativeStreamsExceptionOnKafkaExceptionFromStreamPart
             taskId,
             getExceptionalStreamProducerOnPartitionsFor(new KafkaException("Kaboom!")),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
         collector.initialize();
 
         final StreamsException exception = assertThrows(
             StreamsException.class,
-            () -> collector.send(topic, "0", "0", null, null, stringSerializer, stringSerializer, streamPartitioner)
+            () -> collector.send(topic, "0", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner)
         );
         assertThat(
             exception.getMessage(),
@@ -492,13 +596,14 @@ private <E extends RuntimeException> void shouldForwardExceptionWithoutWrappingI
             taskId,
             getExceptionalStreamProducerOnPartitionsFor(runtimeException),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
         collector.initialize();
 
         final RuntimeException exception = assertThrows(
             runtimeException.getClass(),
-            () -> collector.send(topic, "0", "0", null, null, stringSerializer, stringSerializer, streamPartitioner)
+            () -> collector.send(topic, "0", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner)
         );
         assertThat(exception.getMessage(), equalTo("Kaboom!"));
     }
@@ -519,15 +624,16 @@ private void testThrowTaskMigratedExceptionOnSubsequentSend(final RuntimeExcepti
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
         collector.initialize();
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final TaskMigratedException thrown = assertThrows(
             TaskMigratedException.class,
-            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner)
+            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner)
         );
         assertEquals(exception, thrown.getCause());
     }
@@ -548,11 +654,12 @@ private void testThrowTaskMigratedExceptionOnSubsequentFlush(final RuntimeExcept
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
         collector.initialize();
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final TaskMigratedException thrown = assertThrows(TaskMigratedException.class, collector::flush);
         assertEquals(exception, thrown.getCause());
@@ -574,11 +681,12 @@ private void testThrowTaskMigratedExceptionOnSubsequentClose(final RuntimeExcept
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
         collector.initialize();
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final TaskMigratedException thrown = assertThrows(TaskMigratedException.class, collector::closeClean);
         assertEquals(exception, thrown.getCause());
@@ -592,14 +700,15 @@ public void shouldThrowStreamsExceptionOnSubsequentSendIfASendFailsWithDefaultEx
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final StreamsException thrown = assertThrows(
             StreamsException.class,
-            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner)
+            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner)
         );
         assertEquals(exception, thrown.getCause());
         assertThat(
@@ -618,10 +727,11 @@ public void shouldThrowStreamsExceptionOnSubsequentFlushIfASendFailsWithDefaultE
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final StreamsException thrown = assertThrows(StreamsException.class, collector::flush);
         assertEquals(exception, thrown.getCause());
@@ -641,10 +751,11 @@ public void shouldThrowStreamsExceptionOnSubsequentCloseIfASendFailsWithDefaultE
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final StreamsException thrown = assertThrows(StreamsException.class, collector::closeClean);
         assertEquals(exception, thrown.getCause());
@@ -664,14 +775,15 @@ public void shouldThrowStreamsExceptionOnSubsequentSendIfFatalEvenWithContinueEx
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             new AlwaysContinueProductionExceptionHandler(),
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final StreamsException thrown = assertThrows(
             StreamsException.class,
-            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner)
+            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner)
         );
         assertEquals(exception, thrown.getCause());
         assertThat(
@@ -690,10 +802,11 @@ public void shouldThrowStreamsExceptionOnSubsequentFlushIfFatalEvenWithContinueE
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             new AlwaysContinueProductionExceptionHandler(),
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final StreamsException thrown = assertThrows(StreamsException.class, collector::flush);
         assertEquals(exception, thrown.getCause());
@@ -713,10 +826,11 @@ public void shouldThrowStreamsExceptionOnSubsequentCloseIfFatalEvenWithContinueE
             taskId,
             getExceptionalStreamsProducerOnSend(exception),
             new AlwaysContinueProductionExceptionHandler(),
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
 
         final StreamsException thrown = assertThrows(StreamsException.class, collector::closeClean);
         assertEquals(exception, thrown.getCause());
@@ -735,13 +849,14 @@ public void shouldNotThrowStreamsExceptionOnSubsequentCallIfASendFailsWithContin
             taskId,
             getExceptionalStreamsProducerOnSend(new Exception()),
             new AlwaysContinueProductionExceptionHandler(),
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
         try (final LogCaptureAppender logCaptureAppender =
                  LogCaptureAppender.createAndRegister(RecordCollectorImpl.class)) {
 
-            collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+            collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
             collector.flush();
 
             final List<String> messages = logCaptureAppender.getMessages();
@@ -767,7 +882,7 @@ public void shouldNotThrowStreamsExceptionOnSubsequentCallIfASendFailsWithContin
         ));
         assertEquals(1.0, metric.metricValue());
 
-        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner);
+        collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner);
         collector.flush();
         collector.closeClean();
     }
@@ -784,7 +899,7 @@ public void shouldNotAbortTxnOnEOSCloseDirtyIfNothingSent() {
                 new MockClientSupplier() {
                     @Override
                     public Producer<byte[], byte[]> getProducer(final Map<String, Object> config) {
-                        return new MockProducer<byte[], byte[]>(cluster, true, new DefaultPartitioner(), byteArraySerializer, byteArraySerializer) {
+                        return new MockProducer<byte[], byte[]>(cluster, true, byteArraySerializer, byteArraySerializer) {
                             @Override
                             public void abortTransaction() {
                                 functionCalled.set(true);
@@ -798,7 +913,8 @@ public void abortTransaction() {
                 Time.SYSTEM
             ),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
         collector.closeDirty();
@@ -816,7 +932,7 @@ public void shouldThrowIfTopicIsUnknownOnSendWithPartitioner() {
                 new MockClientSupplier() {
                     @Override
                     public Producer<byte[], byte[]> getProducer(final Map<String, Object> config) {
-                        return new MockProducer<byte[], byte[]>(cluster, true, new DefaultPartitioner(), byteArraySerializer, byteArraySerializer) {
+                        return new MockProducer<byte[], byte[]>(cluster, true, byteArraySerializer, byteArraySerializer) {
                             @Override
                             public List<PartitionInfo> partitionsFor(final String topic) {
                                 return Collections.emptyList();
@@ -830,13 +946,14 @@ public List<PartitionInfo> partitionsFor(final String topic) {
                 Time.SYSTEM
             ),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
         collector.initialize();
 
         final StreamsException thrown = assertThrows(
             StreamsException.class,
-            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, streamPartitioner)
+            () -> collector.send(topic, "3", "0", null, null, stringSerializer, stringSerializer, null, null, streamPartitioner)
         );
         assertThat(
             thrown.getMessage(),
@@ -865,7 +982,8 @@ public Producer<byte[], byte[]> getProducer(final Map<String, Object> config) {
                 Time.SYSTEM
             ),
             productionExceptionHandler,
-            streamsMetrics
+            streamsMetrics,
+            topology
         );
 
         collector.closeClean();
@@ -889,7 +1007,7 @@ private StreamsProducer getExceptionalStreamsProducerOnSend(final Exception exce
             new MockClientSupplier() {
                 @Override
                 public Producer<byte[], byte[]> getProducer(final Map<String, Object> config) {
-                    return new MockProducer<byte[], byte[]>(cluster, true, new DefaultPartitioner(), byteArraySerializer, byteArraySerializer) {
+                    return new MockProducer<byte[], byte[]>(cluster, true, byteArraySerializer, byteArraySerializer) {
                         @Override
                         public synchronized Future<RecordMetadata> send(final ProducerRecord<byte[], byte[]> record, final Callback callback) {
                             callback.onCompletion(null, exception);
@@ -912,7 +1030,7 @@ private StreamsProducer getExceptionalStreamProducerOnPartitionsFor(final Runtim
             new MockClientSupplier() {
                 @Override
                 public Producer<byte[], byte[]> getProducer(final Map<String, Object> config) {
-                    return new MockProducer<byte[], byte[]>(cluster, true, new DefaultPartitioner(), byteArraySerializer, byteArraySerializer) {
+                    return new MockProducer<byte[], byte[]>(cluster, true, byteArraySerializer, byteArraySerializer) {
                         @Override
                         public synchronized List<PartitionInfo> partitionsFor(final String topic) {
                             throw exception;
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordQueueTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordQueueTest.java
index bea7a057003dc..9741ba1c17e99 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordQueueTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/RecordQueueTest.java
@@ -17,9 +17,12 @@
 package org.apache.kafka.streams.processor.internals;
 
 import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.Metric;
+import org.apache.kafka.common.MetricName;
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.errors.SerializationException;
 import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.record.TimestampType;
 import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.serialization.IntegerDeserializer;
@@ -28,12 +31,15 @@
 import org.apache.kafka.common.serialization.Serializer;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.common.utils.MockTime;
+import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.errors.LogAndContinueExceptionHandler;
 import org.apache.kafka.streams.errors.LogAndFailExceptionHandler;
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.processor.FailOnInvalidTimestamp;
 import org.apache.kafka.streams.processor.LogAndSkipOnInvalidTimestamp;
 import org.apache.kafka.streams.processor.TimestampExtractor;
+import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.state.StateSerdes;
 import org.apache.kafka.test.InternalMockProcessorContext;
 import org.apache.kafka.test.MockRecordCollector;
@@ -48,6 +54,9 @@
 import java.util.List;
 import java.util.Optional;
 
+import static org.apache.kafka.streams.processor.internals.ClientUtils.consumerRecordSizeInBytes;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOPIC_LEVEL_GROUP;
+
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.equalTo;
@@ -62,10 +71,15 @@ public class RecordQueueTest {
     private final Deserializer<Integer> intDeserializer = new IntegerDeserializer();
     private final TimestampExtractor timestampExtractor = new MockTimestampExtractor();
 
+    private final Metrics metrics = new Metrics();
+    private final StreamsMetricsImpl streamsMetrics =
+        new StreamsMetricsImpl(metrics, "mock", StreamsConfig.METRICS_LATEST, new MockTime());
+
     @SuppressWarnings("rawtypes")
     final InternalMockProcessorContext context = new InternalMockProcessorContext<>(
         StateSerdes.withBuiltinTypes("anyName", Bytes.class, Bytes.class),
-        new MockRecordCollector()
+        new MockRecordCollector(),
+        metrics
     );
     private final MockSourceNode<Integer, Integer> mockSourceNodeWithMetrics
         = new MockSourceNode<>(intDeserializer, intDeserializer);
@@ -98,6 +112,57 @@ public void after() {
         mockSourceNodeWithMetrics.close();
     }
 
+    @Test
+    public void testConsumedSensor() {
+        final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(
+            new ConsumerRecord<>("topic", 1, 1, 0L, TimestampType.CREATE_TIME, 0, 0, recordKey, recordValue, new RecordHeaders(), Optional.empty()),
+            new ConsumerRecord<>("topic", 1, 2, 0L, TimestampType.CREATE_TIME, 0, 0, recordKey, recordValue, new RecordHeaders(), Optional.empty()),
+            new ConsumerRecord<>("topic", 1, 3, 0L, TimestampType.CREATE_TIME, 0, 0, recordKey, recordValue, new RecordHeaders(), Optional.empty()));
+
+        queue.addRawRecords(records);
+
+        final String threadId = Thread.currentThread().getName();
+        final String taskId = context.taskId().toString();
+        final String processorNodeId = mockSourceNodeWithMetrics.name();
+        final String topic = "topic";
+        final Metric recordsConsumed = context.metrics().metrics().get(
+            new MetricName("records-consumed-total",
+                           TOPIC_LEVEL_GROUP,
+                           "The total number of records consumed from this topic",
+                           streamsMetrics.topicLevelTagMap(threadId, taskId, processorNodeId, topic))
+        );
+        final Metric bytesConsumed = context.metrics().metrics().get(
+            new MetricName("bytes-consumed-total",
+                           TOPIC_LEVEL_GROUP,
+                           "The total number of bytes consumed from this topic",
+                           streamsMetrics.topicLevelTagMap(threadId, taskId, processorNodeId, topic))
+        );
+
+        double totalBytes = 0D;
+        double totalRecords = 0D;
+
+        queue.poll(5L);
+        ++totalRecords;
+        totalBytes += consumerRecordSizeInBytes(records.get(0));
+
+        assertThat(bytesConsumed.metricValue(), equalTo(totalBytes));
+        assertThat(recordsConsumed.metricValue(), equalTo(totalRecords));
+
+        queue.poll(6L);
+        ++totalRecords;
+        totalBytes += consumerRecordSizeInBytes(records.get(1));
+
+        assertThat(bytesConsumed.metricValue(), equalTo(totalBytes));
+        assertThat(recordsConsumed.metricValue(), equalTo(totalRecords));
+
+        queue.poll(7L);
+        ++totalRecords;
+        totalBytes += consumerRecordSizeInBytes(records.get(2));
+
+        assertThat(bytesConsumed.metricValue(), equalTo(totalBytes));
+        assertThat(recordsConsumed.metricValue(), equalTo(totalRecords));
+    }
+
     @Test
     public void testTimeTracking() {
         assertTrue(queue.isEmpty());
@@ -118,13 +183,13 @@ public void testTimeTracking() {
         assertEquals(2L, queue.headRecordOffset().longValue());
 
         // poll the first record, now with 1, 3
-        assertEquals(2L, queue.poll().timestamp);
+        assertEquals(2L, queue.poll(0).timestamp);
         assertEquals(2, queue.size());
         assertEquals(1L, queue.headRecordTimestamp());
         assertEquals(1L, queue.headRecordOffset().longValue());
 
         // poll the second record, now with 3
-        assertEquals(1L, queue.poll().timestamp);
+        assertEquals(1L, queue.poll(0).timestamp);
         assertEquals(1, queue.size());
         assertEquals(3L, queue.headRecordTimestamp());
         assertEquals(3L, queue.headRecordOffset().longValue());
@@ -143,21 +208,21 @@ public void testTimeTracking() {
         assertEquals(3L, queue.headRecordOffset().longValue());
 
         // poll the third record, now with 4, 1, 2
-        assertEquals(3L, queue.poll().timestamp);
+        assertEquals(3L, queue.poll(0).timestamp);
         assertEquals(3, queue.size());
         assertEquals(4L, queue.headRecordTimestamp());
         assertEquals(4L, queue.headRecordOffset().longValue());
 
         // poll the rest records
-        assertEquals(4L, queue.poll().timestamp);
+        assertEquals(4L, queue.poll(0).timestamp);
         assertEquals(1L, queue.headRecordTimestamp());
         assertEquals(1L, queue.headRecordOffset().longValue());
 
-        assertEquals(1L, queue.poll().timestamp);
+        assertEquals(1L, queue.poll(0).timestamp);
         assertEquals(2L, queue.headRecordTimestamp());
         assertEquals(2L, queue.headRecordOffset().longValue());
 
-        assertEquals(2L, queue.poll().timestamp);
+        assertEquals(2L, queue.poll(0).timestamp);
         assertTrue(queue.isEmpty());
         assertEquals(0, queue.size());
         assertEquals(RecordQueue.UNKNOWN, queue.headRecordTimestamp());
@@ -176,7 +241,7 @@ public void testTimeTracking() {
         assertEquals(4L, queue.headRecordOffset().longValue());
 
         // poll one record again, the timestamp should advance now
-        assertEquals(4L, queue.poll().timestamp);
+        assertEquals(4L, queue.poll(0).timestamp);
         assertEquals(2, queue.size());
         assertEquals(5L, queue.headRecordTimestamp());
         assertEquals(5L, queue.headRecordOffset().longValue());
@@ -218,13 +283,13 @@ public void shouldTrackPartitionTimeAsMaxProcessedTimestamp() {
         queue.addRawRecords(list1);
         assertThat(queue.partitionTime(), is(RecordQueue.UNKNOWN));
 
-        queue.poll();
+        queue.poll(0);
         assertThat(queue.partitionTime(), is(2L));
 
-        queue.poll();
+        queue.poll(0);
         assertThat(queue.partitionTime(), is(2L));
 
-        queue.poll();
+        queue.poll(0);
         assertThat(queue.partitionTime(), is(3L));
     }
 
@@ -251,13 +316,13 @@ public void shouldSetTimestampAndRespectMaxTimestampPolicy() {
         queue.addRawRecords(list1);
         assertThat(queue.partitionTime(), is(150L));
 
-        queue.poll();
+        queue.poll(0);
         assertThat(queue.partitionTime(), is(200L));
 
         queue.setPartitionTime(500L);
         assertThat(queue.partitionTime(), is(500L));
 
-        queue.poll();
+        queue.poll(0);
         assertThat(queue.partitionTime(), is(500L));
     }
 
@@ -299,7 +364,7 @@ public void shouldNotThrowStreamsExceptionWhenKeyDeserializationFailsWithSkipHan
 
         queueThatSkipsDeserializeErrors.addRawRecords(records);
         assertEquals(1, queueThatSkipsDeserializeErrors.size());
-        assertEquals(new CorruptedRecord(record), queueThatSkipsDeserializeErrors.poll());
+        assertEquals(new CorruptedRecord(record), queueThatSkipsDeserializeErrors.poll(0));
     }
 
     @Test
@@ -313,7 +378,7 @@ public void shouldNotThrowStreamsExceptionWhenValueDeserializationFailsWithSkipH
 
         queueThatSkipsDeserializeErrors.addRawRecords(records);
         assertEquals(1, queueThatSkipsDeserializeErrors.size());
-        assertEquals(new CorruptedRecord(record), queueThatSkipsDeserializeErrors.poll());
+        assertEquals(new CorruptedRecord(record), queueThatSkipsDeserializeErrors.poll(0));
     }
 
     @Test
@@ -394,13 +459,13 @@ public void shouldPassPartitionTimeToTimestampExtractor() {
         // no (known) timestamp has yet been passed to the timestamp extractor
         assertEquals(RecordQueue.UNKNOWN, timestampExtractor.partitionTime);
 
-        queue.poll();
+        queue.poll(0);
         assertEquals(2L, timestampExtractor.partitionTime);
 
-        queue.poll();
+        queue.poll(0);
         assertEquals(2L, timestampExtractor.partitionTime);
 
-        queue.poll();
+        queue.poll(0);
         assertEquals(3L, timestampExtractor.partitionTime);
 
     }
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java
index 00daaa6e9d82b..02d742d8abaf8 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StandbyTaskTest.java
@@ -36,7 +36,7 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.Task.TaskType;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.internals.ThreadCache;
 import org.apache.kafka.test.MockKeyValueStore;
 import org.apache.kafka.test.MockKeyValueStoreBuilder;
@@ -207,6 +207,49 @@ public void shouldThrowIfCommittingOnIllegalState() {
         assertThrows(IllegalStateException.class, task::prepareCommit);
     }
 
+
+    @Test
+    public void shouldAlwaysCheckpointStateIfEnforced() {
+        stateManager.flush();
+        EasyMock.expectLastCall().once();
+        stateManager.checkpoint();
+        EasyMock.expectLastCall().once();
+        EasyMock.expect(stateManager.changelogOffsets()).andReturn(Collections.emptyMap()).anyTimes();
+        EasyMock.replay(stateManager);
+
+        task = createStandbyTask();
+
+        task.initializeIfNeeded();
+        task.maybeCheckpoint(true);
+
+        EasyMock.verify(stateManager);
+    }
+
+    @Test
+    public void shouldOnlyCheckpointStateWithBigAdvanceIfNotEnforced() {
+        stateManager.flush();
+        EasyMock.expectLastCall().once();
+        stateManager.checkpoint();
+        EasyMock.expectLastCall().once();
+        EasyMock.expect(stateManager.changelogOffsets())
+                .andReturn(Collections.singletonMap(partition, 50L))
+                .andReturn(Collections.singletonMap(partition, 11000L))
+                .andReturn(Collections.singletonMap(partition, 12000L));
+        EasyMock.replay(stateManager);
+
+        task = createStandbyTask();
+        task.initializeIfNeeded();
+
+        task.maybeCheckpoint(false);  // this should not checkpoint
+        assertTrue(task.offsetSnapshotSinceLastFlush.isEmpty());
+        task.maybeCheckpoint(false);  // this should checkpoint
+        assertEquals(Collections.singletonMap(partition, 11000L), task.offsetSnapshotSinceLastFlush);
+        task.maybeCheckpoint(false);  // this should not checkpoint
+        assertEquals(Collections.singletonMap(partition, 11000L), task.offsetSnapshotSinceLastFlush);
+
+        EasyMock.verify(stateManager);
+    }
+
     @Test
     public void shouldFlushAndCheckpointStateManagerOnCommit() {
         EasyMock.expect(stateManager.changelogOffsets()).andStubReturn(Collections.emptyMap());
@@ -528,13 +571,13 @@ public void shouldRecycleTask() {
         EasyMock.replay(stateManager);
 
         task = createStandbyTask();
-        assertThrows(IllegalStateException.class, () -> task.closeCleanAndRecycleState()); // CREATED
+        assertThrows(IllegalStateException.class, () -> task.prepareRecycle()); // CREATED
 
         task.initializeIfNeeded();
-        assertThrows(IllegalStateException.class, () -> task.closeCleanAndRecycleState()); // RUNNING
+        assertThrows(IllegalStateException.class, () -> task.prepareRecycle()); // RUNNING
 
         task.suspend();
-        task.closeCleanAndRecycleState(); // SUSPENDED
+        task.prepareRecycle(); // SUSPENDED
 
         // Currently, there are no metrics registered for standby tasks.
         // This is a regression test so that, if we add some, we will be sure to deregister them.
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateDirectoryTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateDirectoryTest.java
index 81bc7d7562d28..205f19537badb 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateDirectoryTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StateDirectoryTest.java
@@ -241,7 +241,7 @@ public void shouldThrowProcessorStateExceptionIfStateDirOccupied() throws IOExce
 
         // Replace application's stateDir to regular file
         Utils.delete(appDir);
-        appDir.createNewFile();
+        Files.createFile(appDir.toPath());
 
         assertThrows(ProcessorStateException.class, () -> directory.getOrCreateDirectoryForTask(taskId));
     }
@@ -253,7 +253,7 @@ public void shouldThrowProcessorStateExceptionIfTestDirOccupied() throws IOExcep
         // Replace taskDir to a regular file
         final File taskDir = new File(appDir, toTaskDirString(taskId));
         Utils.delete(taskDir);
-        taskDir.createNewFile();
+        Files.createFile(taskDir.toPath());
 
         // Error: ProcessorStateException should be thrown.
         assertThrows(ProcessorStateException.class, () -> directory.getOrCreateDirectoryForTask(taskId));
@@ -391,8 +391,8 @@ public void shouldReturnEmptyArrayIfListFilesReturnsNull() throws IOException {
 
         // make sure the File#listFiles returns null and StateDirectory#listAllTaskDirectories is able to handle null
         Utils.delete(appDir);
-        assertTrue(appDir.createNewFile());
-        assertTrue(appDir.exists());
+        Files.createFile(appDir.toPath());
+        assertTrue(Files.exists(appDir.toPath()));
         assertNull(appDir.listFiles());
         assertEquals(0, directory.listAllTaskDirectories().size());
     }
@@ -571,7 +571,7 @@ public void shouldNotDeleteAppDirWhenCleanUpIfNotEmpty() throws IOException {
 
         // Create a dummy file in appDir; for this, appDir will not be empty after cleanup.
         final File dummyFile = new File(appDir, "dummy");
-        assertTrue(dummyFile.createNewFile());
+        Files.createFile(dummyFile.toPath());
 
         try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(StateDirectory.class)) {
             // call StateDirectory#clean
@@ -791,7 +791,7 @@ public void shouldGetFreshProcessIdIfProcessFileDeleted() {
     @Test
     public void shouldGetFreshProcessIdIfJsonUnreadable() throws Exception {
         final File processFile = new File(appDir, PROCESS_FILE_NAME);
-        assertThat(processFile.createNewFile(), is(true));
+        Files.createFile(processFile.toPath());
         final UUID processId = UUID.randomUUID();
 
         final FileOutputStream fileOutputStream = new FileOutputStream(processFile);
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java
index 594fc7e842ffc..fbc8d4232613a 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StoreChangelogReaderTest.java
@@ -16,6 +16,10 @@
  */
 package org.apache.kafka.streams.processor.internals;
 
+import org.apache.kafka.clients.admin.AdminClientTestUtils;
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsOptions;
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsResult;
+import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsSpec;
 import org.apache.kafka.clients.admin.ListOffsetsOptions;
 import org.apache.kafka.clients.admin.ListOffsetsResult;
 import org.apache.kafka.clients.admin.MockAdminClient;
@@ -56,9 +60,6 @@
 import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.function.Function;
-import java.util.stream.Collectors;
 
 import static java.util.Collections.singletonMap;
 import static org.apache.kafka.common.utils.Utils.mkEntry;
@@ -275,7 +276,7 @@ public void shouldPollWithRightTimeout() {
         adminClient.updateEndOffsets(Collections.singletonMap(tp, 11L));
 
         final StoreChangelogReader changelogReader =
-                new StoreChangelogReader(time, config, logContext, adminClient, consumer, callback);
+            new StoreChangelogReader(time, config, logContext, adminClient, consumer, callback);
 
         changelogReader.register(tp, stateManager);
 
@@ -646,25 +647,23 @@ public void shouldRequestCommittedOffsetsAndHandleTimeoutException() {
         EasyMock.replay(mockTask, stateManager, storeMetadata, store);
 
         final AtomicBoolean functionCalled = new AtomicBoolean(false);
-        final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) {
+        final MockAdminClient adminClient = new MockAdminClient() {
             @Override
-            public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition> partitions) {
+            public synchronized ListConsumerGroupOffsetsResult listConsumerGroupOffsets(final Map<String, ListConsumerGroupOffsetsSpec> groupSpecs, final ListConsumerGroupOffsetsOptions options) {
                 if (functionCalled.get()) {
-                    return partitions
-                        .stream()
-                        .collect(Collectors.toMap(Function.identity(), partition -> new OffsetAndMetadata(10L)));
+                    return super.listConsumerGroupOffsets(groupSpecs, options);
                 } else {
                     functionCalled.set(true);
-                    throw new TimeoutException("KABOOM!");
+                    return AdminClientTestUtils.listConsumerGroupOffsetsResult(groupSpecs.keySet().iterator().next(), new TimeoutException("KABOOM!"));
                 }
             }
         };
 
         adminClient.updateEndOffsets(Collections.singletonMap(tp, 20L));
+        adminClient.updateConsumerGroupOffsets(Collections.singletonMap(tp, 10L));
 
         final StoreChangelogReader changelogReader =
             new StoreChangelogReader(time, config, logContext, adminClient, consumer, callback);
-        changelogReader.setMainConsumer(consumer);
 
         changelogReader.register(tp, stateManager);
         changelogReader.restore(Collections.singletonMap(taskId, mockTask));
@@ -708,18 +707,16 @@ public void shouldThrowIfCommittedOffsetsFail() {
         EasyMock.expect(storeMetadata.offset()).andReturn(10L).anyTimes();
         EasyMock.replay(stateManager, storeMetadata, store);
 
-        final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) {
+        final MockAdminClient adminClient = new MockAdminClient() {
             @Override
-            public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition> partitions) {
+            public synchronized ListConsumerGroupOffsetsResult listConsumerGroupOffsets(final Map<String, ListConsumerGroupOffsetsSpec> groupSpecs, final ListConsumerGroupOffsetsOptions options) {
                 throw kaboom;
             }
         };
-
         adminClient.updateEndOffsets(Collections.singletonMap(tp, 10L));
 
         final StoreChangelogReader changelogReader =
             new StoreChangelogReader(time, config, logContext, adminClient, consumer, callback);
-        changelogReader.setMainConsumer(consumer);
 
         changelogReader.register(tp, stateManager);
 
@@ -792,9 +789,9 @@ public void shouldNotUpdateLimitForNonSourceStandbyChangelog() {
         EasyMock.expect(standbyStateManager.changelogAsSource(tp)).andReturn(false).anyTimes();
         EasyMock.replay(mockTasks, standbyStateManager, storeMetadata, store);
 
-        final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) {
+        final MockAdminClient adminClient = new MockAdminClient() {
             @Override
-            public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition> partitions) {
+            public synchronized ListConsumerGroupOffsetsResult listConsumerGroupOffsets(final Map<String, ListConsumerGroupOffsetsSpec> groupSpecs, final ListConsumerGroupOffsetsOptions options) {
                 throw new AssertionError("Should not try to fetch committed offsets");
             }
         };
@@ -803,7 +800,6 @@ public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition
         properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100L);
         final StreamsConfig config = new StreamsConfig(StreamsTestUtils.getStreamsConfig("test-reader", properties));
         final StoreChangelogReader changelogReader = new StoreChangelogReader(time, config, logContext, adminClient, consumer, callback);
-        changelogReader.setMainConsumer(consumer);
         changelogReader.transitToUpdateStandby();
 
         consumer.updateBeginningOffsets(Collections.singletonMap(tp, 5L));
@@ -846,25 +842,15 @@ public void shouldRestoreToLimitInStandbyState() {
         EasyMock.expect(standbyStateManager.changelogAsSource(tp)).andReturn(true).anyTimes();
         EasyMock.replay(mockTasks, standbyStateManager, storeMetadata, store);
 
-        final AtomicLong offset = new AtomicLong(7L);
-        final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(OffsetResetStrategy.EARLIEST) {
-            @Override
-            public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition> partitions) {
-                return partitions
-                    .stream()
-                    .collect(Collectors.toMap(Function.identity(), partition -> new OffsetAndMetadata(offset.get())));
-            }
-        };
-
         final long now = time.milliseconds();
         final Properties properties = new Properties();
         properties.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 100L);
         final StreamsConfig config = new StreamsConfig(StreamsTestUtils.getStreamsConfig("test-reader", properties));
         final StoreChangelogReader changelogReader = new StoreChangelogReader(time, config, logContext, adminClient, consumer, callback);
-        changelogReader.setMainConsumer(consumer);
         changelogReader.transitToUpdateStandby();
 
         consumer.updateBeginningOffsets(Collections.singletonMap(tp, 5L));
+        adminClient.updateConsumerGroupOffsets(Collections.singletonMap(tp, 7L));
         changelogReader.register(tp, standbyStateManager);
         assertEquals(0L, (long) changelogReader.changelogMetadata(tp).endOffset());
         assertEquals(0L, changelogReader.changelogMetadata(tp).totalRestored());
@@ -895,9 +881,9 @@ public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition
         assertNull(callback.storeNameCalledStates.get(RESTORE_END));
         assertNull(callback.storeNameCalledStates.get(RESTORE_BATCH));
 
-        offset.set(10L);
-        time.setCurrentTimeMs(now + 100L);
+        adminClient.updateConsumerGroupOffsets(Collections.singletonMap(tp, 10L));
         // should not try to read committed offsets if interval has not reached
+        time.setCurrentTimeMs(now + 100L);
         changelogReader.restore(mockTasks);
         assertEquals(7L, (long) changelogReader.changelogMetadata(tp).endOffset());
         assertEquals(2L, changelogReader.changelogMetadata(tp).totalRestored());
@@ -918,8 +904,7 @@ public Map<TopicPartition, OffsetAndMetadata> committed(final Set<TopicPartition
         assertEquals(2, changelogReader.changelogMetadata(tp).bufferedRecords().size());
         assertEquals(0, changelogReader.changelogMetadata(tp).bufferedLimitIndex());
 
-        offset.set(15L);
-
+        adminClient.updateConsumerGroupOffsets(Collections.singletonMap(tp, 15L));
         // after we've updated once, the timer should be reset and we should not try again until next interval elapsed
         time.setCurrentTimeMs(now + 201L);
         changelogReader.restore(mockTasks);
@@ -1092,7 +1077,7 @@ public void shouldThrowIfRestoreCallbackThrows() {
         final TaskId taskId = new TaskId(0, 0);
 
         EasyMock.expect(storeMetadata.offset()).andReturn(5L).anyTimes();
-        EasyMock.expect(activeStateManager.taskId()).andReturn(taskId);
+        EasyMock.expect(activeStateManager.taskId()).andReturn(taskId).anyTimes();
         EasyMock.replay(activeStateManager, storeMetadata, store);
 
         adminClient.updateEndOffsets(Collections.singletonMap(tp, 10L));
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java
index 72dc1bc8e4456..68d2def11083c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamTaskTest.java
@@ -59,7 +59,7 @@
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.Task.TaskType;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.internals.ThreadCache;
 import org.apache.kafka.test.MockKeyValueStore;
 import org.apache.kafka.test.MockProcessorNode;
@@ -80,7 +80,6 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.time.Duration;
-import java.util.Base64;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -103,7 +102,6 @@
 import static org.apache.kafka.common.utils.Utils.mkProperties;
 import static org.apache.kafka.common.utils.Utils.mkSet;
 import static org.apache.kafka.streams.StreamsConfig.AT_LEAST_ONCE;
-import static org.apache.kafka.streams.processor.internals.StreamTask.encodeTimestamp;
 import static org.apache.kafka.streams.processor.internals.Task.State.CREATED;
 import static org.apache.kafka.streams.processor.internals.Task.State.RESTORING;
 import static org.apache.kafka.streams.processor.internals.Task.State.RUNNING;
@@ -130,7 +128,6 @@ public class StreamTaskTest {
 
     private static final String APPLICATION_ID = "stream-task-test";
     private static final File BASE_DIR = TestUtils.tempDirectory();
-    private static final long DEFAULT_TIMESTAMP = 1000;
 
     private final LogContext logContext = new LogContext("[test] ");
     private final String topic1 = "topic1";
@@ -405,12 +402,17 @@ public void seek(final TopicPartition partition, final long offset) {
     }
 
     @Test
-    public void shouldReadCommittedStreamTimeOnInitialize() {
+    public void shouldReadCommittedStreamTimeAndProcessorMetadataOnInitialize() {
         stateDirectory = EasyMock.createNiceMock(StateDirectory.class);
         EasyMock.replay(stateDirectory);
 
+        final ProcessorMetadata processorMetadata = new ProcessorMetadata(mkMap(
+            mkEntry("key1", 1L),
+            mkEntry("key2", 2L)
+        ));
+
         consumer.commitSync(partitions.stream()
-            .collect(Collectors.toMap(Function.identity(), tp -> new OffsetAndMetadata(0L, encodeTimestamp(10L)))));
+            .collect(Collectors.toMap(Function.identity(), tp -> new OffsetAndMetadata(0L, new TopicPartitionMetadata(10L, processorMetadata).encode()))));
 
         task = createStatelessTask(createConfig("100"));
 
@@ -420,6 +422,49 @@ public void shouldReadCommittedStreamTimeOnInitialize() {
         task.completeRestoration(noOpResetter -> { });
 
         assertEquals(10L, task.streamTime());
+        assertEquals(1L, task.processorContext().processorMetadataForKey("key1").longValue());
+        assertEquals(2L, task.processorContext().processorMetadataForKey("key2").longValue());
+    }
+
+    @Test
+    public void shouldReadCommittedStreamTimeAndMergeProcessorMetadataOnInitialize() {
+        stateDirectory = EasyMock.createNiceMock(StateDirectory.class);
+        EasyMock.replay(stateDirectory);
+
+        final ProcessorMetadata processorMetadata1 = new ProcessorMetadata(mkMap(
+            mkEntry("key1", 1L),
+            mkEntry("key2", 2L)
+        ));
+
+        final Map<TopicPartition, OffsetAndMetadata> meta1 = mkMap(
+            mkEntry(partition1, new OffsetAndMetadata(0L, new TopicPartitionMetadata(10L, processorMetadata1).encode())
+            )
+        );
+
+        final ProcessorMetadata processorMetadata2 = new ProcessorMetadata(mkMap(
+            mkEntry("key1", 10L),
+            mkEntry("key3", 30L)
+        ));
+
+        final Map<TopicPartition, OffsetAndMetadata> meta2 = mkMap(
+            mkEntry(partition2, new OffsetAndMetadata(0L, new TopicPartitionMetadata(20L, processorMetadata2).encode())
+            )
+        );
+
+        consumer.commitSync(meta1);
+        consumer.commitSync(meta2);
+
+        task = createStatelessTask(createConfig("100"));
+
+        assertEquals(RecordQueue.UNKNOWN, task.streamTime());
+
+        task.initializeIfNeeded();
+        task.completeRestoration(noOpResetter -> { });
+
+        assertEquals(20L, task.streamTime());
+        assertEquals(10L, task.processorContext().processorMetadataForKey("key1").longValue());
+        assertEquals(2L, task.processorContext().processorMetadataForKey("key2").longValue());
+        assertEquals(30L, task.processorContext().processorMetadataForKey("key3").longValue());
     }
 
     @Test
@@ -1095,7 +1140,7 @@ public void shouldRespectCommitNeeded() {
     }
 
     @Test
-    public void shouldCommitNextOffsetFromQueueIfAvailable() {
+    public void shouldCommitNextOffsetAndProcessorMetadataFromQueueIfAvailable() {
         task = createSingleSourceStateless(createConfig(AT_LEAST_ONCE, "0"), StreamsConfig.METRICS_LATEST);
         task.initializeIfNeeded();
         task.completeRestoration(noOpResetter -> { });
@@ -1106,11 +1151,21 @@ public void shouldCommitNextOffsetFromQueueIfAvailable() {
             getConsumerRecordWithOffsetAsTimestamp(partition1, 5L)));
 
         task.process(0L);
+        processorStreamTime.mockProcessor.addProcessorMetadata("key1", 100L);
         task.process(0L);
+        processorSystemTime.mockProcessor.addProcessorMetadata("key2", 200L);
 
         final Map<TopicPartition, OffsetAndMetadata> offsetsAndMetadata = task.prepareCommit();
+        final TopicPartitionMetadata expected = new TopicPartitionMetadata(3L,
+            new ProcessorMetadata(
+                mkMap(
+                    mkEntry("key1", 100L),
+                    mkEntry("key2", 200L)
+                )
+            )
+        );
 
-        assertThat(offsetsAndMetadata, equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(5L, encodeTimestamp(3L))))));
+        assertThat(offsetsAndMetadata, equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(5L, expected.encode())))));
     }
 
     @Test
@@ -1129,8 +1184,77 @@ public void shouldCommitConsumerPositionIfRecordQueueIsEmpty() {
         task.addRecords(partition2, singletonList(getConsumerRecordWithOffsetAsTimestamp(partition2, 0L)));
         task.process(0L);
 
+        final TopicPartitionMetadata metadata = new TopicPartitionMetadata(0, new ProcessorMetadata());
+
+        assertTrue(task.commitNeeded());
+        assertThat(task.prepareCommit(), equalTo(
+            mkMap(
+                mkEntry(partition1,
+                    new OffsetAndMetadata(3L, metadata.encode())
+                )
+            )
+        ));
+        task.postCommit(false);
+
+        // the task should still be committed since the processed records have not reached the consumer position
+        assertTrue(task.commitNeeded());
+
+        consumer.poll(Duration.ZERO);
+        task.process(0L);
+
+        assertTrue(task.commitNeeded());
+        assertThat(task.prepareCommit(), equalTo(
+            mkMap(
+                mkEntry(partition1, new OffsetAndMetadata(3L, metadata.encode())),
+                mkEntry(partition2, new OffsetAndMetadata(1L, metadata.encode()))
+            )
+        ));
+        task.postCommit(false);
+
+        assertFalse(task.commitNeeded());
+    }
+
+    @Test
+    public void shouldCommitOldProcessorMetadataWhenNotDirty() {
+        task = createStatelessTask(createConfig());
+        task.initializeIfNeeded();
+        task.completeRestoration(noOpResetter -> { });
+
+        consumer.addRecord(getConsumerRecordWithOffsetAsTimestamp(partition1, 0L));
+        consumer.addRecord(getConsumerRecordWithOffsetAsTimestamp(partition1, 1L));
+        consumer.addRecord(getConsumerRecordWithOffsetAsTimestamp(partition2, 0L));
+        consumer.addRecord(getConsumerRecordWithOffsetAsTimestamp(partition2, 1L));
+        consumer.poll(Duration.ZERO);
+
+        task.addRecords(partition1, singletonList(getConsumerRecordWithOffsetAsTimestamp(partition1, 0L)));
+        task.addRecords(partition1, singletonList(getConsumerRecordWithOffsetAsTimestamp(partition1, 1L)));
+
+        task.process(0L);
+        processorStreamTime.mockProcessor.addProcessorMetadata("key1", 100L);
+
+        final TopicPartitionMetadata expectedMetadata1 = new TopicPartitionMetadata(0L,
+            new ProcessorMetadata(
+                mkMap(
+                    mkEntry("key1", 100L)
+                )
+            )
+        );
+
+        final TopicPartitionMetadata expectedMetadata2 = new TopicPartitionMetadata(RecordQueue.UNKNOWN,
+            new ProcessorMetadata(
+                mkMap(
+                    mkEntry("key1", 100L)
+                )
+            )
+        );
+
         assertTrue(task.commitNeeded());
-        assertThat(task.prepareCommit(), equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(3L, encodeTimestamp(0L))))));
+
+        assertThat(task.prepareCommit(), equalTo(
+            mkMap(
+                mkEntry(partition1, new OffsetAndMetadata(1L, expectedMetadata1.encode())),
+                mkEntry(partition2, new OffsetAndMetadata(2L, expectedMetadata2.encode()))
+            )));
         task.postCommit(false);
 
         // the task should still be committed since the processed records have not reached the consumer position
@@ -1139,9 +1263,19 @@ public void shouldCommitConsumerPositionIfRecordQueueIsEmpty() {
         consumer.poll(Duration.ZERO);
         task.process(0L);
 
+        final TopicPartitionMetadata expectedMetadata3 = new TopicPartitionMetadata(1L,
+            new ProcessorMetadata(
+                mkMap(
+                    mkEntry("key1", 100L)
+                )
+            )
+        );
         assertTrue(task.commitNeeded());
-        assertThat(task.prepareCommit(), equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(3L, encodeTimestamp(0L))),
-                                                       mkEntry(partition2, new OffsetAndMetadata(1L, encodeTimestamp(0L))))));
+
+        // Processor metadata not updated, we just need to commit to partition1 again with new offset
+        assertThat(task.prepareCommit(), equalTo(
+            mkMap(mkEntry(partition1, new OffsetAndMetadata(2L, expectedMetadata3.encode())))
+        ));
         task.postCommit(false);
 
         assertFalse(task.commitNeeded());
@@ -1171,35 +1305,6 @@ public void shouldRespectCommitRequested() {
         assertTrue(task.commitRequested());
     }
 
-    @Test
-    public void shouldEncodeAndDecodeMetadata() {
-        task = createStatelessTask(createConfig("100"));
-        assertEquals(DEFAULT_TIMESTAMP, task.decodeTimestamp(encodeTimestamp(DEFAULT_TIMESTAMP)));
-    }
-
-    @Test
-    public void shouldReturnUnknownTimestampIfUnknownVersion() {
-        task = createStatelessTask(createConfig("100"));
-
-        final byte[] emptyMessage = {StreamTask.LATEST_MAGIC_BYTE + 1};
-        final String encodedString = Base64.getEncoder().encodeToString(emptyMessage);
-        assertEquals(RecordQueue.UNKNOWN, task.decodeTimestamp(encodedString));
-    }
-
-    @Test
-    public void shouldReturnUnknownTimestampIfEmptyMessage() {
-        task = createStatelessTask(createConfig("100"));
-
-        assertEquals(RecordQueue.UNKNOWN, task.decodeTimestamp(""));
-    }
-
-    @Test
-    public void shouldReturnUnknownTimestampIfInvalidMetadata() {
-        task = createStatelessTask(createConfig("100"));
-        final String invalidBase64String = "{}";
-        assertEquals(RecordQueue.UNKNOWN, task.decodeTimestamp(invalidBase64String));
-    }
-
     @Test
     public void shouldBeProcessableIfAllPartitionsBuffered() {
         task = createStatelessTask(createConfig("100"));
@@ -1839,6 +1944,49 @@ public void shouldCheckpointOnCloseRestoringIfNoProgress() {
         EasyMock.verify(stateManager);
     }
 
+    @Test
+    public void shouldAlwaysCheckpointStateIfEnforced() {
+        stateManager.flush();
+        EasyMock.expectLastCall().once();
+        stateManager.checkpoint();
+        EasyMock.expectLastCall().once();
+        EasyMock.expect(stateManager.changelogOffsets()).andStubReturn(Collections.emptyMap());
+        EasyMock.expect(recordCollector.offsets()).andStubReturn(Collections.emptyMap());
+        EasyMock.replay(stateManager, recordCollector);
+
+        task = createOptimizedStatefulTask(createConfig("100"), consumer);
+
+        task.initializeIfNeeded();
+        task.maybeCheckpoint(true);
+
+        EasyMock.verify(stateManager);
+    }
+
+    @Test
+    public void shouldOnlyCheckpointStateWithBigAdvanceIfNotEnforced() {
+        stateManager.flush();
+        EasyMock.expectLastCall().once();
+        stateManager.checkpoint();
+        EasyMock.expectLastCall().once();
+        EasyMock.expect(stateManager.changelogOffsets())
+                .andReturn(Collections.singletonMap(partition1, 50L))
+                .andReturn(Collections.singletonMap(partition1, 11000L))
+                .andReturn(Collections.singletonMap(partition1, 12000L));
+        EasyMock.replay(stateManager);
+
+        task = createOptimizedStatefulTask(createConfig("100"), consumer);
+        task.initializeIfNeeded();
+
+        task.maybeCheckpoint(false);  // this should not checkpoint
+        assertTrue(task.offsetSnapshotSinceLastFlush.isEmpty());
+        task.maybeCheckpoint(false);  // this should checkpoint
+        assertEquals(Collections.singletonMap(partition1, 11000L), task.offsetSnapshotSinceLastFlush);
+        task.maybeCheckpoint(false);  // this should not checkpoint
+        assertEquals(Collections.singletonMap(partition1, 11000L), task.offsetSnapshotSinceLastFlush);
+
+        EasyMock.verify(stateManager);
+    }
+
     @Test
     public void shouldCheckpointOffsetsOnPostCommit() {
         final long offset = 543L;
@@ -2039,7 +2187,7 @@ public void shouldUnregisterMetricsInCloseCleanAndRecycleState() {
 
         task.suspend();
         assertThat(getTaskMetrics(), not(empty()));
-        task.closeCleanAndRecycleState();
+        task.prepareRecycle();
         assertThat(getTaskMetrics(), empty());
     }
 
@@ -2118,7 +2266,7 @@ public void shouldThrowIfRecyclingDirtyTask() {
         task.process(0L);
         assertTrue(task.commitNeeded());
 
-        assertThrows(TaskMigratedException.class, () -> task.closeCleanAndRecycleState());
+        assertThrows(TaskMigratedException.class, () -> task.prepareRecycle());
     }
 
     @Test
@@ -2129,16 +2277,16 @@ public void shouldOnlyRecycleSuspendedTasks() {
         EasyMock.replay(stateManager, recordCollector);
 
         task = createStatefulTask(createConfig("100"), true);
-        assertThrows(IllegalStateException.class, () -> task.closeCleanAndRecycleState()); // CREATED
+        assertThrows(IllegalStateException.class, () -> task.prepareRecycle()); // CREATED
 
         task.initializeIfNeeded();
-        assertThrows(IllegalStateException.class, () -> task.closeCleanAndRecycleState()); // RESTORING
+        assertThrows(IllegalStateException.class, () -> task.prepareRecycle()); // RESTORING
 
         task.completeRestoration(noOpResetter -> { });
-        assertThrows(IllegalStateException.class, () -> task.closeCleanAndRecycleState()); // RUNNING
+        assertThrows(IllegalStateException.class, () -> task.prepareRecycle()); // RUNNING
 
         task.suspend();
-        task.closeCleanAndRecycleState(); // SUSPENDED
+        task.prepareRecycle(); // SUSPENDED
 
         EasyMock.verify(stateManager, recordCollector);
     }
@@ -2191,7 +2339,7 @@ public void shouldThrowTopologyExceptionIfTaskCreatedForUnknownTopic() {
         // The processor topology is missing the topics
         final ProcessorTopology topology = withSources(emptyList(), mkMap());
 
-        final TopologyException  exception = assertThrows(
+        final TopologyException exception = assertThrows(
             TopologyException.class,
             () -> new StreamTask(
                 taskId,
@@ -2210,7 +2358,7 @@ public void shouldThrowTopologyExceptionIfTaskCreatedForUnknownTopic() {
         );
 
         assertThat(exception.getMessage(), equalTo("Invalid topology: " +
-                "Topic is unknown to the topology. This may happen if different KafkaStreams instances of the same " +
+                "Topic " + topic1 + " is unknown to the topology. This may happen if different KafkaStreams instances of the same " +
                 "application execute different Topologies. Note that Topologies are only identical if all operators " +
                 "are added in the same order."));
     }
@@ -2262,7 +2410,9 @@ public void shouldUpdateOffsetIfAllRecordsAreCorrupted() {
         assertTrue(task.commitNeeded());
         assertThat(
             task.prepareCommit(),
-            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(offset + 1, encodeTimestamp(-1)))))
+            equalTo(mkMap(mkEntry(partition1,
+                new OffsetAndMetadata(offset + 1,
+                    new TopicPartitionMetadata(RecordQueue.UNKNOWN, new ProcessorMetadata()).encode()))))
         );
     }
 
@@ -2290,7 +2440,7 @@ public void shouldUpdateOffsetIfValidRecordFollowsCorrupted() {
         assertTrue(task.commitNeeded());
         assertThat(
             task.prepareCommit(),
-            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(offset + 1, encodeTimestamp(offset)))))
+            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(offset + 1, new TopicPartitionMetadata(offset, new ProcessorMetadata()).encode()))))
         );
     }
 
@@ -2317,14 +2467,14 @@ public void shouldUpdateOffsetIfCorruptedRecordFollowsValid() {
         assertTrue(task.commitNeeded());
         assertThat(
             task.prepareCommit(),
-            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(1, encodeTimestamp(0)))))
+            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(1, new TopicPartitionMetadata(0, new ProcessorMetadata()).encode()))))
         );
 
         assertTrue(task.process(offset));
         assertTrue(task.commitNeeded());
         assertThat(
             task.prepareCommit(),
-            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(2, encodeTimestamp(0)))))
+            equalTo(mkMap(mkEntry(partition1, new OffsetAndMetadata(2, new TopicPartitionMetadata(0, new ProcessorMetadata()).encode()))))
         );
     }
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java
index 8fa632bb5ba82..2bd1250b7caf8 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamThreadTest.java
@@ -83,8 +83,6 @@
 import org.apache.kafka.test.MockTimestampExtractor;
 import org.apache.kafka.test.StreamsTestUtils;
 import org.apache.kafka.test.TestUtils;
-
-import java.util.function.BiConsumer;
 import org.easymock.EasyMock;
 import org.junit.Assert;
 import org.junit.Before;
@@ -92,6 +90,7 @@
 import org.slf4j.Logger;
 
 import java.io.File;
+import java.io.IOException;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -109,6 +108,7 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.BiConsumer;
 import java.util.stream.Stream;
 
 import static java.util.Collections.emptyMap;
@@ -148,6 +148,8 @@ public class StreamThreadTest {
     private final static String APPLICATION_ID = "stream-thread-test";
     private final static UUID PROCESS_ID = UUID.fromString("87bf53a8-54f2-485f-a4b6-acdbec0a8b3d");
     private final static String CLIENT_ID = APPLICATION_ID + "-" + PROCESS_ID;
+    public static final String STREAM_THREAD_TEST_COUNT_ONE_CHANGELOG = "stream-thread-test-count-one-changelog";
+    public static final String STREAM_THREAD_TEST_TABLE_TWO_CHANGELOG = "stream-thread-test-table-two-changelog";
 
     private final int threadIdx = 1;
     private final Metrics metrics = new Metrics();
@@ -464,6 +466,36 @@ public void shouldNotCommitBeforeTheCommitInterval() {
         verify(taskManager);
     }
 
+    @Test
+    public void shouldNotPurgeBeforeThePurgeInterval() {
+        final long commitInterval = 1000L;
+        final long purgeInterval = 2000L;
+        final Properties props = configProps(false);
+        props.setProperty(StreamsConfig.STATE_DIR_CONFIG, stateDir);
+        props.setProperty(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, Long.toString(commitInterval));
+        props.setProperty(StreamsConfig.REPARTITION_PURGE_INTERVAL_MS_CONFIG, Long.toString(purgeInterval));
+
+        final StreamsConfig config = new StreamsConfig(props);
+        final Consumer<byte[], byte[]> consumer = EasyMock.createNiceMock(Consumer.class);
+        final ConsumerGroupMetadata consumerGroupMetadata = mock(ConsumerGroupMetadata.class);
+        expect(consumer.groupMetadata()).andStubReturn(consumerGroupMetadata);
+        expect(consumerGroupMetadata.groupInstanceId()).andReturn(Optional.empty());
+        final TaskManager taskManager = mockTaskManagerPurge(1);
+        taskManager.maybePurgeCommittedRecords();
+        EasyMock.replay(consumer, consumerGroupMetadata);
+
+        final TopologyMetadata topologyMetadata = new TopologyMetadata(internalTopologyBuilder, config);
+        topologyMetadata.buildAndRewriteTopology();
+        final StreamThread thread = buildStreamThread(consumer, taskManager, config, topologyMetadata);
+        thread.setNow(mockTime.milliseconds());
+        thread.maybeCommit();
+        mockTime.sleep(purgeInterval - 10L);
+        thread.setNow(mockTime.milliseconds());
+        thread.maybeCommit();
+
+        verify(taskManager);
+    }
+
     @Test
     public void shouldEnforceRebalanceAfterNextScheduledProbingRebalanceTime() throws InterruptedException {
         final StreamsConfig config = new StreamsConfig(configProps(false));
@@ -484,6 +516,7 @@ public void shouldEnforceRebalanceAfterNextScheduledProbingRebalanceTime() throw
         final EasyMockConsumerClientSupplier mockClientSupplier = new EasyMockConsumerClientSupplier(mockConsumer);
 
         mockClientSupplier.setCluster(createCluster());
+        mockConsumer.enforceRebalance("Scheduled probing rebalance");
         EasyMock.replay(mockConsumer);
         final TopologyMetadata topologyMetadata = new TopologyMetadata(internalTopologyBuilder, config);
         topologyMetadata.buildAndRewriteTopology();
@@ -505,8 +538,6 @@ public void shouldEnforceRebalanceAfterNextScheduledProbingRebalanceTime() throw
             null
         );
 
-        mockConsumer.enforceRebalance();
-
         mockClientSupplier.nextRebalanceMs().set(mockTime.milliseconds() - 1L);
 
         thread.start();
@@ -723,9 +754,9 @@ public void shouldCommitAfterCommitInterval() {
             null,
             null,
             null,
-            null,
             topologyMetadata,
             null,
+            null,
             null
         ) {
             @Override
@@ -758,6 +789,41 @@ int commit(final Collection<Task> tasksToCommit) {
         assertTrue(committed.get());
     }
 
+    @Test
+    public void shouldPurgeAfterPurgeInterval() {
+        final long commitInterval = 100L;
+        final long purgeInterval = 200L;
+
+        final Properties props = configProps(false);
+        props.setProperty(StreamsConfig.STATE_DIR_CONFIG, stateDir);
+        props.setProperty(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, Long.toString(commitInterval));
+        props.setProperty(StreamsConfig.REPARTITION_PURGE_INTERVAL_MS_CONFIG, Long.toString(purgeInterval));
+
+        final StreamsConfig config = new StreamsConfig(props);
+        final Consumer<byte[], byte[]> consumer = EasyMock.createNiceMock(Consumer.class);
+        final ConsumerGroupMetadata consumerGroupMetadata = mock(ConsumerGroupMetadata.class);
+        expect(consumer.groupMetadata()).andStubReturn(consumerGroupMetadata);
+        expect(consumerGroupMetadata.groupInstanceId()).andReturn(Optional.empty());
+
+        final TaskManager taskManager = mockTaskManagerPurge(2);
+
+        EasyMock.replay(consumer, consumerGroupMetadata);
+
+        final TopologyMetadata topologyMetadata = new TopologyMetadata(internalTopologyBuilder, config);
+        topologyMetadata.buildAndRewriteTopology();
+        final StreamThread thread = buildStreamThread(consumer, taskManager, config, topologyMetadata);
+
+        thread.setNow(mockTime.milliseconds());
+        thread.maybeCommit();
+
+        mockTime.sleep(purgeInterval + 1);
+
+        thread.setNow(mockTime.milliseconds());
+        thread.maybeCommit();
+
+        verify(taskManager);
+    }
+
     @Test
     public void shouldRecordCommitLatency() {
         final Consumer<byte[], byte[]> consumer = EasyMock.createNiceMock(Consumer.class);
@@ -773,12 +839,9 @@ public void shouldRecordCommitLatency() {
         final ActiveTaskCreator activeTaskCreator = mock(ActiveTaskCreator.class);
         expect(activeTaskCreator.createTasks(anyObject(), anyObject())).andStubReturn(Collections.singleton(task));
         expect(activeTaskCreator.producerClientIds()).andStubReturn(Collections.singleton("producerClientId"));
-        expect(activeTaskCreator.uncreatedTasksForTopologies(anyObject())).andStubReturn(emptyMap());
-        activeTaskCreator.removeRevokedUnknownTasks(singleton(task1));
 
         final StandbyTaskCreator standbyTaskCreator = mock(StandbyTaskCreator.class);
-        expect(standbyTaskCreator.uncreatedTasksForTopologies(anyObject())).andStubReturn(emptyMap());
-        standbyTaskCreator.removeRevokedUnknownTasks(emptySet());
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
 
         EasyMock.replay(consumer, consumerGroupMetadata, task, activeTaskCreator, standbyTaskCreator);
 
@@ -792,11 +855,11 @@ public void shouldRecordCommitLatency() {
             null,
             null,
             null,
-            null,
             activeTaskCreator,
             standbyTaskCreator,
             topologyMetadata,
             null,
+            null,
             null
         ) {
             @Override
@@ -981,19 +1044,11 @@ public void shouldOnlyCompleteShutdownAfterRebalanceNotInProgress() throws Inter
 
         final StreamThread thread = createStreamThread(CLIENT_ID, new StreamsConfig(configProps(true)), true);
 
-        thread.start();
-        TestUtils.waitForCondition(
-            () -> thread.state() == StreamThread.State.STARTING,
-            10 * 1000,
-            "Thread never started.");
-
-        thread.rebalanceListener().onPartitionsRevoked(Collections.emptyList());
         thread.taskManager().handleRebalanceStart(Collections.singleton(topic1));
 
+        // assign single partition
         final Map<TaskId, Set<TopicPartition>> activeTasks = new HashMap<>();
         final List<TopicPartition> assignedPartitions = new ArrayList<>();
-
-        // assign single partition
         assignedPartitions.add(t1p1);
         assignedPartitions.add(t1p2);
         activeTasks.put(task1, Collections.singleton(t1p1));
@@ -1001,11 +1056,18 @@ public void shouldOnlyCompleteShutdownAfterRebalanceNotInProgress() throws Inter
 
         thread.taskManager().handleAssignment(activeTasks, emptyMap());
 
+        thread.start();
+        TestUtils.waitForCondition(
+                () -> thread.state() == StreamThread.State.STARTING,
+                10 * 1000,
+                "Thread never started.");
+
         thread.shutdown();
 
         // even if thread is no longer running, it should still be polling
         // as long as the rebalance is still ongoing
         assertFalse(thread.isRunning());
+        assertTrue(thread.isAlive());
 
         Thread.sleep(1000);
         assertEquals(Utils.mkSet(task1, task2), thread.taskManager().activeTaskIds());
@@ -1328,9 +1390,10 @@ public void shouldReinitializeRevivedTasksInAnyState() {
         internalTopologyBuilder.addSource(null, "name", null, null, null, topic1);
         final AtomicBoolean shouldThrow = new AtomicBoolean(false);
         final AtomicBoolean processed = new AtomicBoolean(false);
+        // TODO check if needs to be extended
         internalTopologyBuilder.addProcessor(
             "proc",
-            () -> record -> {
+            (ProcessorSupplier<Object, Object, Object, Object>) () -> record -> {
                 if (shouldThrow.get()) {
                     throw new TaskCorruptedException(singleton(task1));
                 } else {
@@ -1572,9 +1635,9 @@ public void shouldReturnStandbyTaskMetadataWhileRunningState() {
         final StreamThread thread = createStreamThread(CLIENT_ID, config, false);
         final MockConsumer<byte[], byte[]> restoreConsumer = clientSupplier.restoreConsumer;
         restoreConsumer.updatePartitions(
-            "stream-thread-test-count-one-changelog",
+            STREAM_THREAD_TEST_COUNT_ONE_CHANGELOG,
             Collections.singletonList(
-                new PartitionInfo("stream-thread-test-count-one-changelog",
+                new PartitionInfo(STREAM_THREAD_TEST_COUNT_ONE_CHANGELOG,
                                   0,
                                   null,
                                   new Node[0],
@@ -1583,7 +1646,7 @@ public void shouldReturnStandbyTaskMetadataWhileRunningState() {
         );
 
         final HashMap<TopicPartition, Long> offsets = new HashMap<>();
-        offsets.put(new TopicPartition("stream-thread-test-count-one-changelog", 1), 0L);
+        offsets.put(new TopicPartition(STREAM_THREAD_TEST_COUNT_ONE_CHANGELOG, 1), 0L);
         restoreConsumer.updateEndOffsets(offsets);
         restoreConsumer.updateBeginningOffsets(offsets);
 
@@ -1616,8 +1679,74 @@ public void shouldUpdateStandbyTask() throws Exception {
         final String storeName2 = "table-two";
         final String changelogName1 = APPLICATION_ID + "-" + storeName1 + "-changelog";
         final String changelogName2 = APPLICATION_ID + "-" + storeName2 + "-changelog";
+        final StreamThread thread = createStreamThread(CLIENT_ID, config, false);
+        final MockConsumer<byte[], byte[]> restoreConsumer = clientSupplier.restoreConsumer;
+
+        setupThread(storeName1, storeName2, changelogName1, changelogName2, thread, restoreConsumer, false);
+
+        thread.runOnce();
+
+        final StandbyTask standbyTask1 = standbyTask(thread.taskManager(), t1p1);
+        final StandbyTask standbyTask2 = standbyTask(thread.taskManager(), t2p1);
+        assertEquals(task1, standbyTask1.id());
+        assertEquals(task3, standbyTask2.id());
+
+        final KeyValueStore<Object, Long> store1 = (KeyValueStore<Object, Long>) standbyTask1.getStore(storeName1);
+        final KeyValueStore<Object, Long> store2 = (KeyValueStore<Object, Long>) standbyTask2.getStore(storeName2);
+
+        assertEquals(0L, store1.approximateNumEntries());
+        assertEquals(0L, store2.approximateNumEntries());
+
+        addStandbyRecordsToRestoreConsumer(restoreConsumer);
+
+        thread.runOnce();
+
+        assertEquals(10L, store1.approximateNumEntries());
+        assertEquals(4L, store2.approximateNumEntries());
+
+        thread.taskManager().shutdown(true);
+    }
+
+    private void addActiveRecordsToRestoreConsumer(final MockConsumer<byte[], byte[]> restoreConsumer) {
+        for (long i = 0L; i < 10L; i++) {
+            restoreConsumer.addRecord(new ConsumerRecord<>(
+                STREAM_THREAD_TEST_COUNT_ONE_CHANGELOG,
+                2,
+                i,
+                ("K" + i).getBytes(),
+                ("V" + i).getBytes()));
+        }
+    }
+
+    private void addStandbyRecordsToRestoreConsumer(final MockConsumer<byte[], byte[]> restoreConsumer) {
+        // let the store1 be restored from 0 to 10; store2 be restored from 5 (checkpointed) to 10
+        for (long i = 0L; i < 10L; i++) {
+            restoreConsumer.addRecord(new ConsumerRecord<>(
+                STREAM_THREAD_TEST_COUNT_ONE_CHANGELOG,
+                1,
+                i,
+                ("K" + i).getBytes(),
+                ("V" + i).getBytes()));
+            restoreConsumer.addRecord(new ConsumerRecord<>(
+                STREAM_THREAD_TEST_TABLE_TWO_CHANGELOG,
+                1,
+                i,
+                ("K" + i).getBytes(),
+                ("V" + i).getBytes()));
+        }
+    }
+
+    private void setupThread(final String storeName1,
+                             final String storeName2,
+                             final String changelogName1,
+                             final String changelogName2,
+                             final StreamThread thread,
+                             final MockConsumer<byte[], byte[]> restoreConsumer,
+                             final boolean addActiveTask) throws IOException {
+        final TopicPartition activePartition = new TopicPartition(changelogName1, 2);
         final TopicPartition partition1 = new TopicPartition(changelogName1, 1);
         final TopicPartition partition2 = new TopicPartition(changelogName2, 1);
+
         internalStreamsBuilder
             .stream(Collections.singleton(topic1), consumed)
             .groupByKey()
@@ -1627,12 +1756,15 @@ public void shouldUpdateStandbyTask() throws Exception {
         internalStreamsBuilder.table(topic2, new ConsumedInternal<>(), materialized);
 
         internalStreamsBuilder.buildAndOptimizeTopology();
-        final StreamThread thread = createStreamThread(CLIENT_ID, config, false);
-        final MockConsumer<byte[], byte[]> restoreConsumer = clientSupplier.restoreConsumer;
         restoreConsumer.updatePartitions(changelogName1,
             Collections.singletonList(new PartitionInfo(changelogName1, 1, null, new Node[0], new Node[0]))
         );
 
+        restoreConsumer.updateEndOffsets(Collections.singletonMap(activePartition, 10L));
+        restoreConsumer.updateBeginningOffsets(Collections.singletonMap(activePartition, 0L));
+        ((MockAdminClient) (thread.adminClient())).updateBeginningOffsets(Collections.singletonMap(activePartition, 0L));
+        ((MockAdminClient) (thread.adminClient())).updateEndOffsets(Collections.singletonMap(activePartition, 10L));
+
         restoreConsumer.updateEndOffsets(Collections.singletonMap(partition1, 10L));
         restoreConsumer.updateBeginningOffsets(Collections.singletonMap(partition1, 0L));
         restoreConsumer.updateEndOffsets(Collections.singletonMap(partition2, 10L));
@@ -1644,47 +1776,75 @@ public void shouldUpdateStandbyTask() throws Exception {
         thread.setState(StreamThread.State.STARTING);
         thread.rebalanceListener().onPartitionsRevoked(Collections.emptySet());
 
+        final Map<TaskId, Set<TopicPartition>> activeTasks = new HashMap<>();
         final Map<TaskId, Set<TopicPartition>> standbyTasks = new HashMap<>();
 
+        if (addActiveTask) {
+            activeTasks.put(task2, Collections.singleton(t1p2));
+        }
+
         // assign single partition
         standbyTasks.put(task1, Collections.singleton(t1p1));
         standbyTasks.put(task3, Collections.singleton(t2p1));
 
-        thread.taskManager().handleAssignment(emptyMap(), standbyTasks);
+        thread.taskManager().handleAssignment(activeTasks, standbyTasks);
         thread.taskManager().tryToCompleteRestoration(mockTime.milliseconds(), null);
 
         thread.rebalanceListener().onPartitionsAssigned(Collections.emptyList());
+    }
+
+    @SuppressWarnings("unchecked")
+    @Test
+    public void shouldNotUpdateStandbyTaskWhenPaused() throws Exception {
+        final String storeName1 = "count-one";
+        final String storeName2 = "table-two";
+        final String changelogName1 = APPLICATION_ID + "-" + storeName1 + "-changelog";
+        final String changelogName2 = APPLICATION_ID + "-" + storeName2 + "-changelog";
+        final StreamThread thread = createStreamThread(CLIENT_ID, config, false);
+        final MockConsumer<byte[], byte[]> restoreConsumer = clientSupplier.restoreConsumer;
+
+        setupThread(storeName1, storeName2, changelogName1, changelogName2, thread, restoreConsumer, true);
 
         thread.runOnce();
 
+        final StreamTask activeTask1 = activeTask(thread.taskManager(), t1p2);
         final StandbyTask standbyTask1 = standbyTask(thread.taskManager(), t1p1);
         final StandbyTask standbyTask2 = standbyTask(thread.taskManager(), t2p1);
         assertEquals(task1, standbyTask1.id());
         assertEquals(task3, standbyTask2.id());
 
+        final KeyValueStore<Object, Long> activeStore = (KeyValueStore<Object, Long>) activeTask1.getStore(storeName1);
+
         final KeyValueStore<Object, Long> store1 = (KeyValueStore<Object, Long>) standbyTask1.getStore(storeName1);
         final KeyValueStore<Object, Long> store2 = (KeyValueStore<Object, Long>) standbyTask2.getStore(storeName2);
+
+        assertEquals(0L, activeStore.approximateNumEntries());
         assertEquals(0L, store1.approximateNumEntries());
         assertEquals(0L, store2.approximateNumEntries());
 
+        // Add some records that the active task would handle
+        addActiveRecordsToRestoreConsumer(restoreConsumer);
         // let the store1 be restored from 0 to 10; store2 be restored from 5 (checkpointed) to 10
-        for (long i = 0L; i < 10L; i++) {
-            restoreConsumer.addRecord(new ConsumerRecord<>(
-                changelogName1,
-                1,
-                i,
-                ("K" + i).getBytes(),
-                ("V" + i).getBytes()));
-            restoreConsumer.addRecord(new ConsumerRecord<>(
-                changelogName2,
-                1,
-                i,
-                ("K" + i).getBytes(),
-                ("V" + i).getBytes()));
-        }
+        addStandbyRecordsToRestoreConsumer(restoreConsumer);
 
+        // Simulate pause
+        thread.taskManager().topologyMetadata().pauseTopology(TopologyMetadata.UNNAMED_TOPOLOGY);
         thread.runOnce();
 
+        assertEquals(0L, activeStore.approximateNumEntries());
+        assertEquals(0L, store1.approximateNumEntries());
+        assertEquals(0L, store2.approximateNumEntries());
+
+        // Simulate resume
+        thread.taskManager().topologyMetadata().resumeTopology(TopologyMetadata.UNNAMED_TOPOLOGY);
+        thread.runOnce();
+
+        assertEquals(10L, activeStore.approximateNumEntries());
+        assertEquals(0L, store1.approximateNumEntries());
+        assertEquals(0L, store2.approximateNumEntries());
+
+        thread.runOnce();
+        assertEquals(10L, activeStore.approximateNumEntries());
         assertEquals(10L, store1.approximateNumEntries());
         assertEquals(4L, store2.approximateNumEntries());
 
@@ -1718,6 +1878,7 @@ public void shouldNotCreateStandbyTaskIfStateStoresHaveLoggingDisabled() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldPunctuateActiveTask() {
         final List<Long> punctuatedStreamTime = new ArrayList<>();
         final List<Long> punctuatedWallClockTime = new ArrayList<>();
@@ -1788,6 +1949,7 @@ public void process(final Record<Object, Object> record) {}
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldPunctuateWithTimestampPreservedInProcessorContext() {
         final org.apache.kafka.streams.kstream.TransformerSupplier<Object, Object, KeyValue<Object, Object>> punctuateProcessor =
             () -> new org.apache.kafka.streams.kstream.Transformer<Object, Object, KeyValue<Object, Object>>() {
@@ -2196,9 +2358,9 @@ public void shouldCatchTimeoutExceptionFromHandleCorruptionAndInvokeExceptionHan
         expect(consumer.groupMetadata()).andStubReturn(consumerGroupMetadata);
         expect(consumerGroupMetadata.groupInstanceId()).andReturn(Optional.empty());
         consumer.subscribe((Collection<String>) anyObject(), anyObject());
-        EasyMock.expectLastCall().atLeastOnce();
+        EasyMock.expectLastCall().anyTimes();
         consumer.unsubscribe();
-        EasyMock.expectLastCall().atLeastOnce();
+        EasyMock.expectLastCall().anyTimes();
         EasyMock.replay(consumerGroupMetadata);
         final Task task1 = mock(Task.class);
         final Task task2 = mock(Task.class);
@@ -2357,7 +2519,7 @@ public void shouldEnforceRebalanceWhenTaskCorruptedExceptionIsThrownForAnActiveT
         expect(task2.id()).andReturn(taskId2).anyTimes();
         expect(taskManager.handleCorruption(corruptedTasks)).andReturn(true);
 
-        consumer.enforceRebalance();
+        consumer.enforceRebalance("Active tasks corrupted");
         expectLastCall();
 
         EasyMock.replay(task1, task2, taskManager, consumer);
@@ -2491,7 +2653,7 @@ public void shouldNotCommitNonRunningNonRestoringTasks() {
         expect(task3.state()).andReturn(Task.State.CREATED).anyTimes();
         expect(task3.id()).andReturn(taskId3).anyTimes();
 
-        expect(taskManager.tasks()).andReturn(mkMap(
+        expect(taskManager.allTasks()).andReturn(mkMap(
             mkEntry(taskId1, task1),
             mkEntry(taskId2, task2),
             mkEntry(taskId3, task3)
@@ -2749,6 +2911,22 @@ void runOnce() {
         assertThat(failedThreads.metricValue(), is(shouldFail ? 1.0 : 0.0));
     }
 
+    private TaskManager mockTaskManagerPurge(final int numberOfPurges) {
+        final TaskManager taskManager = EasyMock.createNiceMock(TaskManager.class);
+        final Task runningTask = mock(Task.class);
+        final TaskId taskId = new TaskId(0, 0);
+
+        expect(runningTask.state()).andReturn(Task.State.RUNNING).anyTimes();
+        expect(runningTask.id()).andReturn(taskId).anyTimes();
+        expect(taskManager.allTasks())
+                .andReturn(Collections.singletonMap(taskId, runningTask)).anyTimes();
+        expect(taskManager.commit(Collections.singleton(runningTask))).andReturn(1).anyTimes();
+        taskManager.maybePurgeCommittedRecords();
+        EasyMock.expectLastCall().times(numberOfPurges);
+        EasyMock.replay(taskManager, runningTask);
+        return taskManager;
+    }
+
     private TaskManager mockTaskManagerCommit(final Consumer<byte[], byte[]> consumer,
                                               final int numberOfCommits,
                                               final int commits) {
@@ -2758,7 +2936,7 @@ private TaskManager mockTaskManagerCommit(final Consumer<byte[], byte[]> consume
 
         expect(runningTask.state()).andReturn(Task.State.RUNNING).anyTimes();
         expect(runningTask.id()).andReturn(taskId).anyTimes();
-        expect(taskManager.tasks())
+        expect(taskManager.allTasks())
             .andReturn(Collections.singletonMap(taskId, runningTask)).times(numberOfCommits);
         expect(taskManager.commit(Collections.singleton(runningTask))).andReturn(commits).times(numberOfCommits);
         EasyMock.replay(taskManager, runningTask);
@@ -2814,8 +2992,17 @@ private void addRecord(final MockConsumer<byte[], byte[]> mockConsumer,
             Optional.empty()));
     }
 
+    StreamTask activeTask(final TaskManager taskManager, final TopicPartition partition) {
+        final Stream<Task> standbys = taskManager.allTasks().values().stream().filter(Task::isActive);
+        for (final Task task : (Iterable<Task>) standbys::iterator) {
+            if (task.inputPartitions().contains(partition)) {
+                return (StreamTask) task;
+            }
+        }
+        return null;
+    }
     StandbyTask standbyTask(final TaskManager taskManager, final TopicPartition partition) {
-        final Stream<Task> standbys = taskManager.tasks().values().stream().filter(t -> !t.isActive());
+        final Stream<Task> standbys = taskManager.allTasks().values().stream().filter(t -> !t.isActive());
         for (final Task task : (Iterable<Task>) standbys::iterator) {
             if (task.inputPartitions().contains(partition)) {
                 return (StandbyTask) task;
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignorTest.java
index d11f3e056cf57..e2c08ed4b62df 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsPartitionAssignorTest.java
@@ -16,8 +16,6 @@
  */
 package org.apache.kafka.streams.processor.internals;
 
-import java.time.Duration;
-import java.util.Properties;
 import org.apache.kafka.clients.admin.Admin;
 import org.apache.kafka.clients.admin.AdminClient;
 import org.apache.kafka.clients.admin.ListOffsetsResult;
@@ -79,6 +77,7 @@
 import org.junit.runners.Parameterized;
 
 import java.nio.ByteBuffer;
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -86,6 +85,7 @@
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.UUID;
@@ -196,6 +196,7 @@ public class StreamsPartitionAssignorTest {
     private StreamsMetadataState streamsMetadataState = EasyMock.createNiceMock(StreamsMetadataState.class);
     private final Map<String, Subscription> subscriptions = new HashMap<>();
     private final Class<? extends TaskAssignor> taskAssignor;
+    private Map<String, String> clientTags;
 
     private final ReferenceContainer referenceContainer = new ReferenceContainer();
     private final MockTime time = new MockTime();
@@ -210,6 +211,7 @@ private Map<String, Object> configProps() {
         referenceContainer.taskManager = taskManager;
         referenceContainer.streamsMetadataState = streamsMetadataState;
         referenceContainer.time = time;
+        referenceContainer.clientTags = clientTags != null ? clientTags : EMPTY_CLIENT_TAGS;
         configurationMap.put(InternalConfig.REFERENCE_CONTAINER_PARTITION_ASSIGNOR, referenceContainer);
         configurationMap.put(InternalConfig.INTERNAL_TASK_ASSIGNOR_CLASS, taskAssignor.getName());
         return configurationMap;
@@ -2190,6 +2192,21 @@ public void shouldThrowTaskAssignmentExceptionWhenUnableToResolvePartitionCount(
                    equalTo(AssignorError.ASSIGNMENT_ERROR.code()));
     }
 
+    @Test
+    public void testClientTags() {
+        clientTags = mkMap(mkEntry("cluster", "cluster1"), mkEntry("zone", "az1"));
+        createDefaultMockTaskManager();
+        configureDefaultPartitionAssignor();
+        final Set<String> topics = mkSet("input");
+        final Subscription subscription = new Subscription(new ArrayList<>(topics),
+                                                           partitionAssignor.subscriptionUserData(topics));
+        final SubscriptionInfo info = getInfo(UUID_1, EMPTY_TASKS, EMPTY_TASKS, uniqueField, clientTags);
+
+        assertEquals(singletonList("input"), subscription.topics());
+        assertEquals(info, SubscriptionInfo.decode(subscription.userData()));
+        assertEquals(clientTags, partitionAssignor.clientTags());
+    }
+
     private static class CorruptedInternalTopologyBuilder extends InternalTopologyBuilder {
         private Map<Subtopology, TopicsInfo> corruptedTopicGroups;
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsProducerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsProducerTest.java
index 420d94a949724..9470a7b166e1b 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsProducerTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/StreamsProducerTest.java
@@ -76,6 +76,7 @@ public class StreamsProducerTest {
     private static final double TXN_SEND_OFFSETS_TIME = 5;
     private static final double TXN_COMMIT_TIME = 6;
     private static final double TXN_ABORT_TIME = 7;
+    private static final double METADATA_WAIT_TIME = 8;
 
     private final LogContext logContext = new LogContext("test ");
     private final String topic = "topic";
@@ -1167,11 +1168,13 @@ public void shouldComputeTotalBlockedTime() {
             TXN_BEGIN_TIME,
             TXN_SEND_OFFSETS_TIME,
             TXN_COMMIT_TIME,
-            TXN_ABORT_TIME
+            TXN_ABORT_TIME,
+            METADATA_WAIT_TIME
         );
 
         final double expectedTotalBlocked = BUFFER_POOL_WAIT_TIME + FLUSH_TME + TXN_INIT_TIME +
-            TXN_BEGIN_TIME + TXN_SEND_OFFSETS_TIME +  TXN_COMMIT_TIME + TXN_ABORT_TIME;
+            TXN_BEGIN_TIME + TXN_SEND_OFFSETS_TIME +  TXN_COMMIT_TIME + TXN_ABORT_TIME +
+            METADATA_WAIT_TIME;
         assertThat(nonEosStreamsProducer.totalBlockedTime(), closeTo(expectedTotalBlocked, 0.01));
     }
 
@@ -1185,10 +1188,12 @@ public void shouldComputeTotalBlockedTimeAfterReset() {
             TXN_BEGIN_TIME,
             TXN_SEND_OFFSETS_TIME,
             TXN_COMMIT_TIME,
-            TXN_ABORT_TIME
+            TXN_ABORT_TIME,
+            METADATA_WAIT_TIME
         );
         final double expectedTotalBlocked = BUFFER_POOL_WAIT_TIME + FLUSH_TME + TXN_INIT_TIME +
-            TXN_BEGIN_TIME + TXN_SEND_OFFSETS_TIME +  TXN_COMMIT_TIME + TXN_ABORT_TIME;
+            TXN_BEGIN_TIME + TXN_SEND_OFFSETS_TIME +  TXN_COMMIT_TIME + TXN_ABORT_TIME +
+            METADATA_WAIT_TIME;
         assertThat(eosBetaStreamsProducer.totalBlockedTime(), equalTo(expectedTotalBlocked));
         reset(mockTime);
         final long closeStart = 1L;
@@ -1204,7 +1209,8 @@ public void shouldComputeTotalBlockedTimeAfterReset() {
             TXN_BEGIN_TIME,
             TXN_SEND_OFFSETS_TIME,
             TXN_COMMIT_TIME,
-            TXN_ABORT_TIME
+            TXN_ABORT_TIME,
+            METADATA_WAIT_TIME
         );
 
         assertThat(
@@ -1243,7 +1249,8 @@ private void setProducerMetrics(
         final double txnBeginTime,
         final double txnSendOffsetsTime,
         final double txnCommitTime,
-        final double txnAbortTime) {
+        final double txnAbortTime,
+        final double metadataWaitTime) {
         addMetric(producer, "bufferpool-wait-time-ns-total", bufferPoolWaitTime);
         addMetric(producer, "flush-time-ns-total", flushTime);
         addMetric(producer, "txn-init-time-ns-total", txnInitTime);
@@ -1251,5 +1258,6 @@ private void setProducerMetrics(
         addMetric(producer, "txn-send-offsets-time-ns-total", txnSendOffsetsTime);
         addMetric(producer, "txn-commit-time-ns-total", txnCommitTime);
         addMetric(producer, "txn-abort-time-ns-total", txnAbortTime);
+        addMetric(producer, "metadata-wait-time-ns-total", metadataWaitTime);
     }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskAndActionTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskAndActionTest.java
new file mode 100644
index 0000000000000..2bc9d05326e6b
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskAndActionTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.streams.processor.TaskId;
+import org.junit.jupiter.api.Test;
+
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.Action.ADD;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.Action.PAUSE;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.Action.REMOVE;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.Action.RESUME;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.createAddTask;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.createPauseTask;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.createRemoveTask;
+import static org.apache.kafka.streams.processor.internals.TaskAndAction.createResumeTask;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+
+class TaskAndActionTest {
+
+    @Test
+    public void shouldCreateAddTaskAction() {
+        final StreamTask task = mock(StreamTask.class);
+
+        final TaskAndAction addTask = createAddTask(task);
+
+        assertEquals(ADD, addTask.getAction());
+        assertEquals(task, addTask.getTask());
+        final Exception exception = assertThrows(IllegalStateException.class, addTask::getTaskId);
+        assertEquals("Action type ADD cannot have a task ID!", exception.getMessage());
+    }
+
+    @Test
+    public void shouldCreateRemoveTaskAction() {
+        final TaskId taskId = new TaskId(0, 0);
+
+        final TaskAndAction removeTask = createRemoveTask(taskId);
+
+        assertEquals(REMOVE, removeTask.getAction());
+        assertEquals(taskId, removeTask.getTaskId());
+        final Exception exception = assertThrows(IllegalStateException.class, removeTask::getTask);
+        assertEquals("Action type REMOVE cannot have a task!", exception.getMessage());
+    }
+
+    @Test
+    public void shouldCreatePauseTaskAction() {
+        final TaskId taskId = new TaskId(0, 0);
+
+        final TaskAndAction pauseTask = createPauseTask(taskId);
+
+        assertEquals(PAUSE, pauseTask.getAction());
+        assertEquals(taskId, pauseTask.getTaskId());
+        final Exception exception = assertThrows(IllegalStateException.class, pauseTask::getTask);
+        assertEquals("Action type PAUSE cannot have a task!", exception.getMessage());
+    }
+
+    @Test
+    public void shouldCreateResumeTaskAction() {
+        final TaskId taskId = new TaskId(0, 0);
+
+        final TaskAndAction pauseTask = createResumeTask(taskId);
+
+        assertEquals(RESUME, pauseTask.getAction());
+        assertEquals(taskId, pauseTask.getTaskId());
+        final Exception exception = assertThrows(IllegalStateException.class, pauseTask::getTask);
+        assertEquals("Action type RESUME cannot have a task!", exception.getMessage());
+    }
+
+    @Test
+    public void shouldThrowIfAddTaskActionIsCreatedWithNullTask() {
+        final Exception exception = assertThrows(NullPointerException.class, () -> createAddTask(null));
+        assertTrue(exception.getMessage().contains("Task to add is null!"));
+    }
+
+    @Test
+    public void shouldThrowIfRemoveTaskActionIsCreatedWithNullTaskId() {
+        final Exception exception = assertThrows(NullPointerException.class, () -> createRemoveTask(null));
+        assertTrue(exception.getMessage().contains("Task ID of task to remove is null!"));
+    }
+
+    @Test
+    public void shouldThrowIfPauseTaskActionIsCreatedWithNullTaskId() {
+        final Exception exception = assertThrows(NullPointerException.class, () -> createPauseTask(null));
+        assertTrue(exception.getMessage().contains("Task ID of task to pause is null!"));
+    }
+
+    @Test
+    public void shouldThrowIfResumeTaskActionIsCreatedWithNullTaskId() {
+        final Exception exception = assertThrows(NullPointerException.class, () -> createResumeTask(null));
+        assertTrue(exception.getMessage().contains("Task ID of task to resume is null!"));
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadataTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadataTest.java
new file mode 100644
index 0000000000000..127999ad51d0c
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskExecutionMetadataTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.streams.internals.StreamsConfigUtils.ProcessingMode;
+import org.apache.kafka.streams.processor.TaskId;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.apache.kafka.streams.processor.internals.TopologyMetadata.UNNAMED_TOPOLOGY;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class TaskExecutionMetadataTest {
+    final static String TOPOLOGY1 = "topology1";
+    final static String TOPOLOGY2 = "topology2";
+    final static Set<String> NAMED_TOPOLOGIES = new HashSet<>(Arrays.asList(TOPOLOGY1, TOPOLOGY2));
+    final static int TIME_ZERO = 0;
+    final static int CONSTANT_BACKOFF_MS = 5000;
+
+    @Test
+    public void testCanProcessWithoutNamedTopologies() {
+        final Set<String> topologies = Collections.singleton(UNNAMED_TOPOLOGY);
+        final Set<String> pausedTopologies = new HashSet<>();
+
+        final TaskExecutionMetadata metadata = new TaskExecutionMetadata(topologies, pausedTopologies, ProcessingMode.AT_LEAST_ONCE);
+
+        final Task mockTask = createMockTask(UNNAMED_TOPOLOGY);
+
+        Assert.assertTrue(metadata.canProcessTask(mockTask, TIME_ZERO));
+        // This pauses an UNNAMED_TOPOLOGY / a KafkaStreams instance without named/modular
+        // topologies.
+        pausedTopologies.add(UNNAMED_TOPOLOGY);
+        Assert.assertFalse(metadata.canProcessTask(mockTask, TIME_ZERO));
+    }
+
+    @Test
+    public void testNamedTopologiesCanBePausedIndependently() {
+        final Set<String> pausedTopologies = new HashSet<>();
+        final TaskExecutionMetadata metadata = new TaskExecutionMetadata(NAMED_TOPOLOGIES, pausedTopologies, ProcessingMode.AT_LEAST_ONCE);
+
+        final Task mockTask1 = createMockTask(TOPOLOGY1);
+        final Task mockTask2 = createMockTask(TOPOLOGY2);
+
+        Assert.assertTrue(metadata.canProcessTask(mockTask1, TIME_ZERO));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, TIME_ZERO));
+
+        pausedTopologies.add(TOPOLOGY1);
+        Assert.assertFalse(metadata.canProcessTask(mockTask1, TIME_ZERO));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, TIME_ZERO));
+
+        pausedTopologies.remove(TOPOLOGY1);
+        Assert.assertTrue(metadata.canProcessTask(mockTask1, TIME_ZERO));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, TIME_ZERO));
+    }
+
+    @Test
+    public void testNamedTopologiesCanBeStartedPaused() {
+        final Set<String> pausedTopologies = new HashSet<>();
+        pausedTopologies.add(TOPOLOGY1);
+
+        final TaskExecutionMetadata metadata = new TaskExecutionMetadata(NAMED_TOPOLOGIES, pausedTopologies, ProcessingMode.AT_LEAST_ONCE);
+
+        final Task mockTask1 = createMockTask(TOPOLOGY1);
+        final Task mockTask2 = createMockTask(TOPOLOGY2);
+
+        Assert.assertFalse(metadata.canProcessTask(mockTask1, TIME_ZERO));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, TIME_ZERO));
+
+        pausedTopologies.remove(TOPOLOGY1);
+        Assert.assertTrue(metadata.canProcessTask(mockTask1, TIME_ZERO));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, TIME_ZERO));
+    }
+
+    @Test
+    public void testNamedTopologiesCanBackoff() {
+        final Set<String> pausedTopologies = new HashSet<>();
+
+        final TaskExecutionMetadata metadata = new TaskExecutionMetadata(NAMED_TOPOLOGIES, pausedTopologies, ProcessingMode.AT_LEAST_ONCE);
+
+        final Task mockTask1 = createMockTask(TOPOLOGY1);
+        final Task mockTask2 = createMockTask(TOPOLOGY2);
+
+        Assert.assertTrue(metadata.canProcessTask(mockTask1, TIME_ZERO));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, TIME_ZERO));
+
+        metadata.registerTaskError(mockTask1, new Throwable("Error"), TIME_ZERO);
+        Assert.assertFalse(metadata.canProcessTask(mockTask1, CONSTANT_BACKOFF_MS - 1));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, CONSTANT_BACKOFF_MS - 1));
+
+        Assert.assertFalse(metadata.canProcessTask(mockTask1, CONSTANT_BACKOFF_MS));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, CONSTANT_BACKOFF_MS));
+
+        Assert.assertTrue(metadata.canProcessTask(mockTask1, CONSTANT_BACKOFF_MS + 1));
+        Assert.assertTrue(metadata.canProcessTask(mockTask2, CONSTANT_BACKOFF_MS + 1));
+    }
+
+    private static Task createMockTask(final String topologyName) {
+        final Task mockTask = mock(Task.class);
+        final TaskId taskId = new TaskId(0, 0, topologyName);
+        when(mockTask.id()).thenReturn(taskId);
+        return mockTask;
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskExecutorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskExecutorTest.java
new file mode 100644
index 0000000000000..131a68044b414
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskExecutorTest.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.common.utils.LogContext;
+import org.junit.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+public class TaskExecutorTest {
+    @Test
+    public void testPunctuateWithPause() {
+        final Tasks tasks = mock(Tasks.class);
+        final TaskManager taskManager = mock(TaskManager.class);
+        final TaskExecutionMetadata metadata = mock(TaskExecutionMetadata.class);
+
+        final TaskExecutor taskExecutor = new TaskExecutor(tasks, taskManager, metadata, new LogContext());
+
+        taskExecutor.punctuate();
+        verify(tasks).activeTasks();
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java
index 7f442d3131b9f..c3233152c05e6 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TaskManagerTest.java
@@ -33,10 +33,9 @@
 import org.apache.kafka.common.internals.KafkaFutureImpl;
 import org.apache.kafka.common.metrics.KafkaMetric;
 import org.apache.kafka.common.metrics.Measurable;
-import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.common.utils.Time;
-import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.errors.LockException;
 import org.apache.kafka.streams.errors.StreamsException;
 import org.apache.kafka.streams.errors.TaskCorruptedException;
@@ -47,11 +46,11 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.internals.StateDirectory.TaskDirectory;
 import org.apache.kafka.streams.processor.internals.Task.State;
-import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.testutil.DummyStreamsConfig;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
 import org.apache.kafka.streams.state.internals.OffsetCheckpoint;
 
+import java.nio.file.Files;
 import java.util.ArrayList;
 import org.easymock.EasyMock;
 import org.easymock.EasyMockRunner;
@@ -63,6 +62,7 @@
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
+import org.mockito.Mockito;
 
 import java.io.File;
 import java.util.Arrays;
@@ -92,12 +92,14 @@
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.common.utils.Utils.mkSet;
 import static org.apache.kafka.common.utils.Utils.union;
+import static org.apache.kafka.streams.processor.internals.TopologyMetadata.UNNAMED_TOPOLOGY;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.standbyTask;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.statefulTask;
 import static org.easymock.EasyMock.anyObject;
 import static org.easymock.EasyMock.anyString;
 import static org.easymock.EasyMock.eq;
 import static org.easymock.EasyMock.expect;
 import static org.easymock.EasyMock.expectLastCall;
-import static org.easymock.EasyMock.mock;
 import static org.easymock.EasyMock.replay;
 import static org.easymock.EasyMock.reset;
 import static org.easymock.EasyMock.resetToStrict;
@@ -114,6 +116,10 @@
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.when;
+import static org.mockito.Mockito.mock;
 
 @RunWith(EasyMockRunner.class)
 public class TaskManagerTest {
@@ -130,6 +136,7 @@ public class TaskManagerTest {
 
     private final TaskId taskId01 = new TaskId(0, 1);
     private final TopicPartition t1p1 = new TopicPartition(topic1, 1);
+    private final TopicPartition t2p2 = new TopicPartition(topic2, 1);
     private final TopicPartition t1p1changelog = new TopicPartition("changelog", 1);
     private final Set<TopicPartition> taskId01Partitions = mkSet(t1p1);
     private final Set<TopicPartition> taskId01ChangelogPartitions = mkSet(t1p1changelog);
@@ -169,8 +176,10 @@ public class TaskManagerTest {
     private StandbyTaskCreator standbyTaskCreator;
     @Mock(type = MockType.NICE)
     private Admin adminClient;
+    final StateUpdater stateUpdater = Mockito.mock(StateUpdater.class);
 
     private TaskManager taskManager;
+    private TopologyMetadata topologyMetadata;
     private final Time time = new MockTime();
 
     @Rule
@@ -178,29 +187,113 @@ public class TaskManagerTest {
 
     @Before
     public void setUp() {
-        setUpTaskManager(StreamsConfigUtils.ProcessingMode.AT_LEAST_ONCE);
+        taskManager = setUpTaskManager(StreamsConfigUtils.ProcessingMode.AT_LEAST_ONCE, false);
     }
 
-    private void setUpTaskManager(final StreamsConfigUtils.ProcessingMode processingMode) {
-        taskManager = new TaskManager(
+    private TaskManager setUpTaskManager(final ProcessingMode processingMode, final boolean stateUpdaterEnabled) {
+        topologyMetadata = new TopologyMetadata(topologyBuilder, new DummyStreamsConfig(processingMode));
+        final TaskManager taskManager = new TaskManager(
             time,
             changeLogReader,
             UUID.randomUUID(),
             "taskManagerTest",
-            new StreamsMetricsImpl(new Metrics(), "clientId", StreamsConfig.METRICS_LATEST, time),
             activeTaskCreator,
             standbyTaskCreator,
-            new TopologyMetadata(topologyBuilder, new DummyStreamsConfig(processingMode)),
+            topologyMetadata,
             adminClient,
-            stateDirectory
+            stateDirectory,
+            stateUpdaterEnabled ? stateUpdater : null
         );
         taskManager.setMainConsumer(consumer);
         reset(topologyBuilder);
         expect(topologyBuilder.hasNamedTopology()).andStubReturn(false);
-        activeTaskCreator.removeRevokedUnknownTasks(anyObject());
-        expectLastCall().asStub();
-        standbyTaskCreator.removeRevokedUnknownTasks(anyObject());
-        expectLastCall().asStub();
+        expect(topologyBuilder.nodeToSourceTopics()).andStubReturn(emptyMap());
+        return taskManager;
+    }
+
+    @Test
+    public void shouldClassifyExistingTasksWithoutStateUpdater() {
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.AT_LEAST_ONCE, false);
+        final Map<TaskId, Set<TopicPartition>> runningActiveTasks = mkMap(mkEntry(taskId01, mkSet(t1p1)));
+        final Map<TaskId, Set<TopicPartition>> standbyTasks = mkMap(mkEntry(taskId02, mkSet(t2p2)));
+        final Map<TaskId, Set<TopicPartition>> restoringActiveTasks = mkMap(mkEntry(taskId03, mkSet(t1p3)));
+        final Map<TaskId, Set<TopicPartition>> activeTasks = new HashMap<>(runningActiveTasks);
+        activeTasks.putAll(restoringActiveTasks);
+        handleAssignment(runningActiveTasks, standbyTasks, restoringActiveTasks);
+
+        taskManager.handleAssignment(activeTasks, standbyTasks);
+
+        Mockito.verifyNoInteractions(stateUpdater);
+    }
+
+    @Test
+    public void shouldClassifyExistingTasksWithStateUpdater() {
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.AT_LEAST_ONCE, true);
+        final StandbyTask standbyTaskToRecycle = standbyTask(taskId02, mkSet(t2p2)).build();
+        final StandbyTask standbyTaskToClose = standbyTask(taskId04, mkSet(t2p0)).build();
+        final StreamTask restoringActiveTaskToRecycle = statefulTask(taskId03, mkSet(t1p3)).build();
+        final StreamTask restoringActiveTaskToClose = statefulTask(taskId01, mkSet(t1p1)).build();
+        final Map<TaskId, Set<TopicPartition>> standbyTasks =
+            mkMap(mkEntry(standbyTaskToRecycle.id(), standbyTaskToRecycle.changelogPartitions()));
+        final Map<TaskId, Set<TopicPartition>> restoringActiveTasks = mkMap(
+            mkEntry(restoringActiveTaskToRecycle.id(), restoringActiveTaskToRecycle.changelogPartitions())
+        );
+        when(stateUpdater.getTasks()).thenReturn(mkSet(
+            standbyTaskToRecycle,
+            restoringActiveTaskToRecycle,
+            restoringActiveTaskToClose,
+            standbyTaskToClose
+        ));
+        handleAssignment(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
+
+        taskManager.handleAssignment(standbyTasks, restoringActiveTasks);
+
+        Mockito.verify(stateUpdater).getTasks();
+        Mockito.verify(stateUpdater).remove(standbyTaskToRecycle.id());
+        Mockito.verify(stateUpdater).remove(standbyTaskToClose.id());
+        Mockito.verify(stateUpdater).remove(restoringActiveTaskToRecycle.id());
+        Mockito.verify(stateUpdater).remove(restoringActiveTaskToClose.id());
+    }
+
+    @Test
+    public void shouldAddTasksToStateUpdater() {
+        final StreamTask task00 = mock(StreamTask.class);
+        final StandbyTask task01 = mock(StandbyTask.class);
+        when(task00.id()).thenReturn(taskId00);
+        when(task01.id()).thenReturn(taskId01);
+        when(task00.inputPartitions()).thenReturn(taskId00Partitions);
+        when(task01.inputPartitions()).thenReturn(taskId01Partitions);
+        when(task00.isActive()).thenReturn(true);
+        when(task01.isActive()).thenReturn(false);
+        when(task00.state()).thenReturn(State.RESTORING);
+        when(task01.state()).thenReturn(State.RUNNING);
+        expect(changeLogReader.completedChangelogs()).andReturn(emptySet()).anyTimes();
+        expect(consumer.assignment()).andReturn(emptySet()).anyTimes();
+        consumer.resume(anyObject());
+        expectLastCall().anyTimes();
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment))).andStubReturn(singletonList(task01));
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
+
+        taskManager = new TaskManager(
+            time,
+            changeLogReader,
+            UUID.randomUUID(),
+            "taskManagerTest",
+            activeTaskCreator,
+            standbyTaskCreator,
+            topologyMetadata,
+            adminClient,
+            stateDirectory,
+            stateUpdater
+        );
+        taskManager.setMainConsumer(consumer);
+        taskManager.handleAssignment(taskId00Assignment, taskId01Assignment);
+
+        taskManager.tryToCompleteRestoration(time.milliseconds(), noOpResetter -> { });
+
+        Mockito.verify(stateUpdater).add(task00);
+        Mockito.verify(stateUpdater).add(task01);
     }
 
     @Test
@@ -209,11 +302,12 @@ public void shouldIdempotentlyUpdateSubscriptionFromActiveAssignment() {
         final Map<TaskId, Set<TopicPartition>> assignment = mkMap(mkEntry(taskId01, mkSet(t1p1, newTopicPartition)));
 
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(emptyList());
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
 
         topologyBuilder.addSubscribedTopicsFromAssignment(eq(asList(t1p1, newTopicPartition)), anyString());
         expectLastCall();
 
-        replay(activeTaskCreator, topologyBuilder);
+        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder);
 
         taskManager.handleAssignment(assignment, emptyMap());
 
@@ -380,7 +474,8 @@ public void shouldComputeOffsetSumFromCheckpointFileForUninitializedTask() throw
         taskManager.handleRebalanceStart(singleton("topic"));
         final StateMachineTask uninitializedTask = new StateMachineTask(taskId00, taskId00Partitions, true);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singleton(uninitializedTask));
-        replay(activeTaskCreator);
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator);
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
 
         assertThat(uninitializedTask.state(), is(State.CREATED));
@@ -405,7 +500,8 @@ public void shouldComputeOffsetSumFromCheckpointFileForClosedTask() throws Excep
 
         taskManager.handleRebalanceStart(singleton("topic"));
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singleton(closedTask));
-        replay(activeTaskCreator);
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator);
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
 
         closedTask.suspend();
@@ -587,7 +683,7 @@ public void shouldReInitializeThreadProducerOnHandleLostAllIfEosV2Enabled() {
         activeTaskCreator.reInitializeThreadProducer();
         expectLastCall();
 
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2, false);
 
         replay(activeTaskCreator);
 
@@ -657,10 +753,11 @@ public void postCommit(final boolean enforceCheckpoint) {
         // `handleAssignment`
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
         expect(consumer.assignment()).andReturn(taskId00Partitions);
-        replay(activeTaskCreator, topologyBuilder, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), tp -> assertThat(tp, is(empty()))), is(true));
@@ -696,10 +793,11 @@ public void suspend() {
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
         expect(consumer.assignment()).andReturn(taskId00Partitions);
-        replay(activeTaskCreator, topologyBuilder, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), tp -> assertThat(tp, is(empty()))), is(true));
@@ -732,6 +830,8 @@ public void shouldCommitNonCorruptedTasksOnTaskCorruptedException() {
         // `handleAssignment`
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment)))
             .andStubReturn(asList(corruptedTask, nonCorruptedTask));
+        expect(standbyTaskCreator.createTasks(anyObject()))
+            .andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
         expectRestoreToBeCompleted(consumer, changeLogReader);
@@ -739,7 +839,7 @@ public void shouldCommitNonCorruptedTasksOnTaskCorruptedException() {
         // check that we should not commit empty map either
         consumer.commitSync(eq(emptyMap()));
         expectLastCall().andStubThrow(new AssertionError("should not invoke commitSync when offset map is empty"));
-        replay(activeTaskCreator, topologyBuilder, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader);
 
         taskManager.handleAssignment(assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), tp -> assertThat(tp, is(empty()))), is(true));
@@ -774,10 +874,12 @@ public void shouldNotCommitNonRunningNonCorruptedTasks() {
         // `handleAssignment`
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment)))
             .andStubReturn(asList(corruptedTask, nonRunningNonCorruptedTask));
+        expect(standbyTaskCreator.createTasks(anyObject()))
+            .andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
         expect(consumer.assignment()).andReturn(taskId00Partitions);
-        replay(activeTaskCreator, topologyBuilder, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader);
 
         taskManager.handleAssignment(assignment, emptyMap());
 
@@ -852,6 +954,7 @@ public void shouldNotAttemptToCommitInHandleCorruptedDuringARebalance() {
         assignment.putAll(taskId00Assignment);
         assignment.putAll(taskId01Assignment);
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(asList(corruptedActive, uncorruptedActive));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
         topologyBuilder.addSubscribedTopicsFromMetadata(eq(singleton(topic1)), anyObject());
@@ -907,6 +1010,7 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
         assignment.putAll(taskId00Assignment);
         assignment.putAll(taskId01Assignment);
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(asList(corruptedActive, uncorruptedActive));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
 
@@ -952,7 +1056,7 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
 
     @Test
     public void shouldCloseAndReviveUncorruptedTasksWhenTimeoutExceptionThrownFromCommitDuringHandleCorruptedWithEOS() {
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2, false);
         final StreamsProducer producer = mock(StreamsProducer.class);
         expect(activeTaskCreator.threadProducer()).andStubReturn(producer);
         final ProcessorStateManager stateManager = EasyMock.createMock(ProcessorStateManager.class);
@@ -984,6 +1088,7 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
         assignment.putAll(taskId00Assignment);
         assignment.putAll(taskId01Assignment);
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(asList(corruptedActiveTask, uncorruptedActiveTask));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         topologyBuilder.addSubscribedTopicsFromAssignment(anyObject(), anyString());
         expectLastCall().anyTimes();
 
@@ -991,12 +1096,12 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
 
         final ConsumerGroupMetadata groupMetadata = new ConsumerGroupMetadata("appId");
         expect(consumer.groupMetadata()).andReturn(groupMetadata);
-        producer.commitTransaction(offsets, groupMetadata);
-        expectLastCall().andThrow(new TimeoutException());
+
+        doThrow(new TimeoutException()).when(producer).commitTransaction(offsets, groupMetadata);
 
         expect(consumer.assignment()).andStubReturn(union(HashSet::new, taskId00Partitions, taskId01Partitions));
 
-        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader, stateManager, producer);
+        replay(activeTaskCreator, standbyTaskCreator, topologyBuilder, consumer, changeLogReader, stateManager);
 
         taskManager.handleAssignment(assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -1067,6 +1172,7 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
         expectRestoreToBeCompleted(consumer, changeLogReader);
 
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignmentActive))).andReturn(asList(revokedActiveTask, unrevokedActiveTaskWithCommitNeeded, unrevokedActiveTaskWithoutCommitNeeded));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(taskId00);
         expectLastCall();
         consumer.commitSync(expectedCommittedOffsets);
@@ -1090,7 +1196,7 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
 
     @Test
     public void shouldCloseAndReviveUncorruptedTasksWhenTimeoutExceptionThrownFromCommitDuringRevocationWithEOS() {
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2, false);
         final StreamsProducer producer = mock(StreamsProducer.class);
         expect(activeTaskCreator.threadProducer()).andStubReturn(producer);
         final ProcessorStateManager stateManager = EasyMock.createMock(ProcessorStateManager.class);
@@ -1130,17 +1236,18 @@ public void markChangelogAsCorrupted(final Collection<TopicPartition> partitions
         expectRestoreToBeCompleted(consumer, changeLogReader);
 
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignmentActive))).andReturn(asList(revokedActiveTask, unrevokedActiveTask, unrevokedActiveTaskWithoutCommitNeeded));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(taskId00);
         expectLastCall();
 
         final ConsumerGroupMetadata groupMetadata = new ConsumerGroupMetadata("appId");
         expect(consumer.groupMetadata()).andReturn(groupMetadata);
-        producer.commitTransaction(expectedCommittedOffsets, groupMetadata);
-        expectLastCall().andThrow(new TimeoutException());
+
+        doThrow(new TimeoutException()).when(producer).commitTransaction(expectedCommittedOffsets, groupMetadata);
 
         expect(consumer.assignment()).andStubReturn(union(HashSet::new, taskId00Partitions, taskId01Partitions, taskId02Partitions));
 
-        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader, producer, stateManager);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader, stateManager);
 
         taskManager.handleAssignment(assignmentActive, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -1167,6 +1274,8 @@ public void shouldCloseStandbyUnassignedTasksWhenCreatingNewTasks() {
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(standbyTaskCreator.createTasks(eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(activeTaskCreator.createTasks(anyObject(), anyObject())).andStubReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(Collections.emptyMap()))).andStubReturn(Collections.emptySet());
         consumer.commitSync(Collections.emptyMap());
         expectLastCall();
         replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
@@ -1190,8 +1299,12 @@ public void shouldAddNonResumedSuspendedTasks() {
         // expect these calls twice (because we're going to tryToCompleteRestoration twice)
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(task00));
-        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment))).andReturn(singletonList(task01));
-        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment))).andReturn(singletonList(task01)).anyTimes();
+        expect(standbyTaskCreator.createTasks(eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        topologyBuilder.addSubscribedTopicsFromAssignment(eq(asList(t1p0)), anyString());
+        expectLastCall().anyTimes();
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader, topologyBuilder);
 
         taskManager.handleAssignment(taskId00Assignment, taskId01Assignment);
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -1214,8 +1327,9 @@ public void shouldUpdateInputPartitionsAfterRebalance() {
         // expect these calls twice (because we're going to tryToCompleteRestoration twice)
         expectRestoreToBeCompleted(consumer, changeLogReader, false);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(task00));
-        replay(activeTaskCreator, consumer, changeLogReader);
-
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -1352,10 +1466,11 @@ public void shouldSuspendActiveTasksDuringRevocation() {
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         consumer.commitSync(offsets);
         expectLastCall();
 
-        replay(activeTaskCreator, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -1368,7 +1483,7 @@ public void shouldSuspendActiveTasksDuringRevocation() {
     @Test
     public void shouldCommitAllActiveTasksThatNeedCommittingOnHandleRevocationWithEosV2() {
         final StreamsProducer producer = mock(StreamsProducer.class);
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2, false);
 
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true);
         final Map<TopicPartition, OffsetAndMetadata> offsets00 = singletonMap(t1p0, new OffsetAndMetadata(0L, null));
@@ -1516,9 +1631,13 @@ public void shouldNotCommitOnHandleAssignmentIfNoTaskClosed() {
         expectRestoreToBeCompleted(consumer, changeLogReader);
 
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignmentActive))).andReturn(singleton(task00));
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
         expect(standbyTaskCreator.createTasks(eq(assignmentStandby))).andReturn(singletonList(task10));
+        expect(standbyTaskCreator.createTasks(eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        topologyBuilder.addSubscribedTopicsFromAssignment(eq(asList(t1p0)), anyString());
+        expectLastCall().anyTimes();
 
-        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader, topologyBuilder);
 
         taskManager.handleAssignment(assignmentActive, assignmentStandby);
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -1546,7 +1665,9 @@ public void shouldNotCommitOnHandleAssignmentIfOnlyStandbyTaskClosed() {
         expectRestoreToBeCompleted(consumer, changeLogReader);
 
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignmentActive))).andReturn(singleton(task00));
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
         expect(standbyTaskCreator.createTasks(eq(assignmentStandby))).andReturn(singletonList(task10));
+        expect(standbyTaskCreator.createTasks(eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
 
         replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
@@ -1565,8 +1686,11 @@ public void shouldNotCommitCreatedTasksOnRevocationOrClosure() {
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true);
 
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId00));
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expectLastCall().once();
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(task00.state(), is(Task.State.CREATED));
@@ -1590,8 +1714,8 @@ public void suspend() {
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
         assertThat(task00.state(), is(Task.State.RUNNING));
@@ -1613,8 +1737,8 @@ public void shouldCloseActiveTasksAndPropagateExceptionsOnCleanShutdown() {
         );
         final Task task00 = new StateMachineTask(taskId00, taskId00Partitions, true) {
             @Override
-            public Collection<TopicPartition> changelogPartitions() {
-                return singletonList(changelog);
+            public Set<TopicPartition> changelogPartitions() {
+                return singleton(changelog);
             }
         };
         final AtomicBoolean closedDirtyTask01 = new AtomicBoolean(false);
@@ -1664,14 +1788,8 @@ public void closeDirty() {
         expect(changeLogReader.completedChangelogs()).andReturn(emptySet());
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment)))
             .andStubReturn(asList(task00, task01, task02, task03));
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId00));
-        expectLastCall();
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId01));
-        expectLastCall();
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId02));
-        expectLastCall();
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId03));
-        expectLastCall();
+        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(anyObject());
+        expectLastCall().times(4);
         activeTaskCreator.closeThreadProducerIfNeeded();
         expectLastCall();
         expect(standbyTaskCreator.createTasks(eq(emptyMap()))).andStubReturn(emptyList());
@@ -1732,8 +1850,8 @@ public void shouldCloseActiveTasksAndPropagateTaskProducerExceptionsOnCleanShutd
         );
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true) {
             @Override
-            public Collection<TopicPartition> changelogPartitions() {
-                return singletonList(changelog);
+            public Set<TopicPartition> changelogPartitions() {
+                return singleton(changelog);
             }
         };
         final Map<TopicPartition, OffsetAndMetadata> offsets = singletonMap(t1p0, new OffsetAndMetadata(0L, null));
@@ -1785,8 +1903,8 @@ public void shouldCloseActiveTasksAndPropagateThreadProducerExceptionsOnCleanShu
         );
         final Task task00 = new StateMachineTask(taskId00, taskId00Partitions, true) {
             @Override
-            public Collection<TopicPartition> changelogPartitions() {
-                return singletonList(changelog);
+            public Set<TopicPartition> changelogPartitions() {
+                return singleton(changelog);
             }
         };
 
@@ -1830,7 +1948,7 @@ public Collection<TopicPartition> changelogPartitions() {
 
     @Test
     public void shouldOnlyCommitRevokedStandbyTaskAndPropagatePrepareCommitException() {
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_ALPHA);
+        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_ALPHA, false);
 
         final Task task00 = new StateMachineTask(taskId00, taskId00Partitions, false);
 
@@ -1856,7 +1974,7 @@ public Map<TopicPartition, OffsetAndMetadata> prepareCommit() {
         assertThat(task01.state(), is(Task.State.CLOSED));
 
         // All the tasks involving in the commit should already be removed.
-        assertThat(taskManager.tasks(), is(Collections.singletonMap(taskId00, task00)));
+        assertThat(taskManager.allTasks(), is(Collections.singletonMap(taskId00, task00)));
     }
 
     @Test
@@ -1900,8 +2018,8 @@ public void shouldCloseActiveTasksAndIgnoreExceptionsOnUncleanShutdown() {
         );
         final Task task00 = new StateMachineTask(taskId00, taskId00Partitions, true) {
             @Override
-            public Collection<TopicPartition> changelogPartitions() {
-                return singletonList(changelog);
+            public Set<TopicPartition> changelogPartitions() {
+                return singleton(changelog);
             }
         };
         final Task task01 = new StateMachineTask(taskId01, taskId01Partitions, true) {
@@ -1922,12 +2040,8 @@ public void suspend() {
         resetToStrict(changeLogReader);
         expect(changeLogReader.completedChangelogs()).andReturn(emptySet());
         expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(asList(task00, task01, task02));
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId00));
-        expectLastCall().andThrow(new RuntimeException("whatever 0"));
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId01));
-        expectLastCall().andThrow(new RuntimeException("whatever 1"));
-        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(eq(taskId02));
-        expectLastCall().andThrow(new RuntimeException("whatever 2"));
+        activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(anyObject());
+        expectLastCall().andThrow(new RuntimeException("whatever")).times(3);
         activeTaskCreator.closeThreadProducerIfNeeded();
         expectLastCall().andThrow(new RuntimeException("whatever all"));
         expect(standbyTaskCreator.createTasks(eq(emptyMap()))).andStubReturn(emptyList());
@@ -1973,6 +2087,7 @@ public void shouldCloseStandbyTasksOnShutdown() {
         final Task task00 = new StateMachineTask(taskId00, taskId00Partitions, false);
 
         // `handleAssignment`
+        expect(activeTaskCreator.createTasks(anyObject(), anyObject())).andStubReturn(Collections.emptySet());
         expect(standbyTaskCreator.createTasks(eq(assignment))).andStubReturn(singletonList(task00));
 
         // `tryToCompleteRestoration`
@@ -2011,7 +2126,8 @@ public void shouldInitializeNewActiveTasks() {
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
             .andStubReturn(singletonList(task00));
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2028,10 +2144,10 @@ public void shouldInitializeNewStandbyTasks() {
         final StateMachineTask task01 = new StateMachineTask(taskId01, taskId01Partitions, false);
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment)))
-            .andStubReturn(singletonList(task01));
+        expect(activeTaskCreator.createTasks(anyObject(), anyObject())).andStubReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment))).andStubReturn(singletonList(task01));
 
-        replay(standbyTaskCreator, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(emptyMap(), taskId01Assignment);
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2142,8 +2258,8 @@ public void shouldNotCommitOffsetsIfOnlyStandbyTasksAssigned() {
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, false);
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(standbyTaskCreator.createTasks(eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
+        expect(activeTaskCreator.createTasks(anyObject(), anyObject())).andStubReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(taskId00Assignment))).andStubReturn(singletonList(task00));
         expectLastCall();
 
         replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
@@ -2215,7 +2331,7 @@ public void shouldCommitViaConsumerIfEosDisabled() {
 
     @Test
     public void shouldCommitViaProducerIfEosAlphaEnabled() {
-        final StreamsProducer producer = mock(StreamsProducer.class);
+        final StreamsProducer producer = EasyMock.mock(StreamsProducer.class);
         expect(activeTaskCreator.streamsProducerForTask(anyObject(TaskId.class)))
             .andReturn(producer)
             .andReturn(producer);
@@ -2233,7 +2349,7 @@ public void shouldCommitViaProducerIfEosAlphaEnabled() {
 
     @Test
     public void shouldCommitViaProducerIfEosV2Enabled() {
-        final StreamsProducer producer = mock(StreamsProducer.class);
+        final StreamsProducer producer = EasyMock.mock(StreamsProducer.class);
         expect(activeTaskCreator.threadProducer()).andReturn(producer);
 
         final Map<TopicPartition, OffsetAndMetadata> offsetsT01 = singletonMap(t1p1, new OffsetAndMetadata(0L, null));
@@ -2252,7 +2368,7 @@ private void shouldCommitViaProducerIfEosEnabled(final ProcessingMode processing
                                                      final StreamsProducer producer,
                                                      final Map<TopicPartition, OffsetAndMetadata> offsetsT01,
                                                      final Map<TopicPartition, OffsetAndMetadata> offsetsT02) {
-        setUpTaskManager(processingMode);
+        final TaskManager taskManager = setUpTaskManager(processingMode, false);
 
         final StateMachineTask task01 = new StateMachineTask(taskId01, taskId01Partitions, true);
         task01.setCommittableOffsetsAndMetadata(offsetsT01);
@@ -2282,10 +2398,9 @@ public Map<TopicPartition, OffsetAndMetadata> prepareCommit() {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2309,10 +2424,10 @@ public Map<TopicPartition, OffsetAndMetadata> prepareCommit() {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment)))
-            .andStubReturn(singletonList(task01));
+        expect(activeTaskCreator.createTasks(anyObject(), anyObject())).andStubReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(taskId01Assignment))).andStubReturn(singletonList(task01));
 
-        replay(standbyTaskCreator, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(emptyMap(), taskId01Assignment);
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2344,10 +2459,10 @@ public Map<TopicPartition, Long> purgeableOffsets() {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
 
-        replay(activeTaskCreator, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2380,10 +2495,9 @@ public Map<TopicPartition, Long> purgeableOffsets() {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2407,8 +2521,6 @@ public void shouldIgnorePurgeDataErrors() {
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true);
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
 
         final KafkaFutureImpl<DeletedRecords> futureDeletedRecords = new KafkaFutureImpl<>();
         final DeleteRecordsResult deleteRecordsResult = new DeleteRecordsResult(singletonMap(t1p1, futureDeletedRecords));
@@ -2417,7 +2529,7 @@ public void shouldIgnorePurgeDataErrors() {
 
         replay(activeTaskCreator, adminClient, consumer, changeLogReader);
 
-        taskManager.handleAssignment(taskId00Assignment, emptyMap());
+        taskManager.addTask(task00);
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
 
         assertThat(task00.state(), is(Task.State.RUNNING));
@@ -2504,10 +2616,9 @@ public void shouldProcessActiveTasks() {
         assignment.put(taskId01, taskId01Partitions);
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(assignment)))
-            .andStubReturn(Arrays.asList(task00, task01));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andStubReturn(Arrays.asList(task00, task01));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2618,10 +2729,9 @@ public boolean process(final long wallClockTime) {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2646,8 +2756,8 @@ public boolean process(final long wallClockTime) {
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
             .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2673,10 +2783,9 @@ public boolean maybePunctuateStreamTime() {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2696,10 +2805,9 @@ public boolean maybePunctuateStreamTime() {
         };
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2726,8 +2834,9 @@ public boolean maybePunctuateSystemTime() {
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
             .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, consumer, changeLogReader);
+        expect(standbyTaskCreator.createTasks(anyObject()))
+            .andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(true));
@@ -2742,16 +2851,15 @@ public boolean maybePunctuateSystemTime() {
     public void shouldReturnFalseWhenThereAreStillNonRunningTasks() {
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true) {
             @Override
-            public Collection<TopicPartition> changelogPartitions() {
-                return singletonList(new TopicPartition("fake", 0));
+            public Set<TopicPartition> changelogPartitions() {
+                return singleton(new TopicPartition("fake", 0));
             }
         };
 
         expect(changeLogReader.completedChangelogs()).andReturn(emptySet());
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andStubReturn(singletonList(task00));
-
-        replay(activeTaskCreator, changeLogReader, consumer);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andStubReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, changeLogReader, consumer);
 
         taskManager.handleAssignment(taskId00Assignment, emptyMap());
         assertThat(taskManager.tryToCompleteRestoration(time.milliseconds(), null), is(false));
@@ -2769,10 +2877,11 @@ public void shouldHaveRemainingPartitionsUncleared() {
 
         expectRestoreToBeCompleted(consumer, changeLogReader);
         expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(task00));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
         consumer.commitSync(offsets);
         expectLastCall();
 
-        replay(activeTaskCreator, consumer, changeLogReader);
+        replay(activeTaskCreator, standbyTaskCreator, consumer, changeLogReader);
 
         try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(TaskManager.class)) {
             LogCaptureAppender.setClassLoggerToDebug(TaskManager.class);
@@ -3033,7 +3142,7 @@ public void shouldNotFailForTimeoutExceptionOnConsumerCommit() {
 
     @Test
     public void shouldNotFailForTimeoutExceptionOnCommitWithEosAlpha() {
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_ALPHA);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.EXACTLY_ONCE_ALPHA, false);
 
         final StreamsProducer producer = mock(StreamsProducer.class);
         expect(activeTaskCreator.streamsProducerForTask(anyObject(TaskId.class)))
@@ -3044,15 +3153,14 @@ public void shouldNotFailForTimeoutExceptionOnCommitWithEosAlpha() {
         final Map<TopicPartition, OffsetAndMetadata> offsetsT00 = singletonMap(t1p0, new OffsetAndMetadata(0L, null));
         final Map<TopicPartition, OffsetAndMetadata> offsetsT01 = singletonMap(t1p1, new OffsetAndMetadata(1L, null));
 
-        producer.commitTransaction(offsetsT00, null);
-        expectLastCall().andThrow(new TimeoutException("KABOOM!"));
-        producer.commitTransaction(offsetsT00, null);
-        expectLastCall();
-
-        producer.commitTransaction(offsetsT01, null);
-        expectLastCall();
-        producer.commitTransaction(offsetsT01, null);
-        expectLastCall();
+        doThrow(new TimeoutException("KABOOM!"))
+            .doNothing()
+            .doNothing()
+            .doNothing()
+            .when(producer).commitTransaction(offsetsT00, null);
+        doNothing()
+            .doNothing()
+            .when(producer).commitTransaction(offsetsT01, null);
 
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true);
         task00.setCommittableOffsetsAndMetadata(offsetsT00);
@@ -3061,7 +3169,7 @@ public void shouldNotFailForTimeoutExceptionOnCommitWithEosAlpha() {
         final StateMachineTask task02 = new StateMachineTask(taskId02, taskId02Partitions, true);
 
         expect(consumer.groupMetadata()).andStubReturn(null);
-        replay(producer, activeTaskCreator, consumer);
+        replay(activeTaskCreator, consumer);
 
         task00.setCommitNeeded();
         task01.setCommitNeeded();
@@ -3078,7 +3186,7 @@ public void shouldNotFailForTimeoutExceptionOnCommitWithEosAlpha() {
 
     @Test
     public void shouldThrowTaskCorruptedExceptionForTimeoutExceptionOnCommitWithEosV2() {
-        setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2);
+        final TaskManager taskManager = setUpTaskManager(ProcessingMode.EXACTLY_ONCE_V2, false);
 
         final StreamsProducer producer = mock(StreamsProducer.class);
         expect(activeTaskCreator.threadProducer())
@@ -3090,10 +3198,7 @@ public void shouldThrowTaskCorruptedExceptionForTimeoutExceptionOnCommitWithEosV
         final Map<TopicPartition, OffsetAndMetadata> allOffsets = new HashMap<>(offsetsT00);
         allOffsets.putAll(offsetsT01);
 
-        producer.commitTransaction(allOffsets, null);
-        expectLastCall().andThrow(new TimeoutException("KABOOM!"));
-        producer.commitTransaction(allOffsets, null);
-        expectLastCall();
+        doThrow(new TimeoutException("KABOOM!")).doNothing().when(producer).commitTransaction(allOffsets, null);
 
         final StateMachineTask task00 = new StateMachineTask(taskId00, taskId00Partitions, true);
         task00.setCommittableOffsetsAndMetadata(offsetsT00);
@@ -3102,7 +3207,7 @@ public void shouldThrowTaskCorruptedExceptionForTimeoutExceptionOnCommitWithEosV
         final StateMachineTask task02 = new StateMachineTask(taskId02, taskId02Partitions, true);
 
         expect(consumer.groupMetadata()).andStubReturn(null);
-        replay(producer, activeTaskCreator, consumer);
+        replay(activeTaskCreator, consumer);
 
         task00.setCommitNeeded();
         task01.setCommitNeeded();
@@ -3173,9 +3278,9 @@ public void suspend() {
 
         final Map<TaskId, Set<TopicPartition>> assignment = new HashMap<>(taskId00Assignment);
         assignment.putAll(taskId01Assignment);
-        expect(activeTaskCreator.createTasks(anyObject(), eq(assignment)))
-            .andReturn(asList(task00, task01));
-        replay(activeTaskCreator, consumer);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(assignment))).andReturn(asList(task00, task01));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        replay(activeTaskCreator, standbyTaskCreator, consumer);
 
         taskManager.handleAssignment(assignment, Collections.emptyMap());
 
@@ -3190,22 +3295,23 @@ public void suspend() {
 
     @Test
     public void shouldConvertActiveTaskToStandbyTask() {
-        final StreamTask activeTask = mock(StreamTask.class);
+        final StreamTask activeTask = EasyMock.mock(StreamTask.class);
         expect(activeTask.id()).andStubReturn(taskId00);
         expect(activeTask.inputPartitions()).andStubReturn(taskId00Partitions);
         expect(activeTask.isActive()).andStubReturn(true);
         expect(activeTask.prepareCommit()).andStubReturn(Collections.emptyMap());
 
-        final StandbyTask standbyTask = mock(StandbyTask.class);
+        final StandbyTask standbyTask = EasyMock.mock(StandbyTask.class);
         expect(standbyTask.id()).andStubReturn(taskId00);
 
-        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment)))
-            .andReturn(singletonList(activeTask));
+        expect(activeTaskCreator.createTasks(anyObject(), eq(taskId00Assignment))).andReturn(singletonList(activeTask));
+        expect(standbyTaskCreator.createTasks(anyObject())).andStubReturn(Collections.emptySet());
+        activeTask.prepareRecycle();
+        expectLastCall().once();
         activeTaskCreator.closeAndRemoveTaskProducerIfNeeded(taskId00);
         expectLastCall().anyTimes();
-
-        expect(standbyTaskCreator.createStandbyTaskFromActive(anyObject(), eq(taskId00Partitions)))
-            .andReturn(standbyTask);
+        expect(standbyTaskCreator.createStandbyTaskFromActive(anyObject(), eq(taskId00Partitions))).andReturn(standbyTask);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
 
         replay(activeTask, standbyTask, activeTaskCreator, standbyTaskCreator, consumer);
 
@@ -3218,25 +3324,21 @@ public void shouldConvertActiveTaskToStandbyTask() {
     @Test
     public void shouldConvertStandbyTaskToActiveTask() {
         final StandbyTask standbyTask = mock(StandbyTask.class);
-        expect(standbyTask.id()).andStubReturn(taskId00);
-        expect(standbyTask.isActive()).andStubReturn(false);
-        expect(standbyTask.prepareCommit()).andStubReturn(Collections.emptyMap());
-        standbyTask.suspend();
-        expectLastCall().anyTimes();
-        standbyTask.postCommit(true);
-        expectLastCall().anyTimes();
+        when(standbyTask.id()).thenReturn(taskId00);
+        when(standbyTask.isActive()).thenReturn(false);
+        when(standbyTask.prepareCommit()).thenReturn(Collections.emptyMap());
 
         final StreamTask activeTask = mock(StreamTask.class);
-        expect(activeTask.id()).andStubReturn(taskId00);
-        expect(activeTask.inputPartitions()).andStubReturn(taskId00Partitions);
+        when(activeTask.id()).thenReturn(taskId00);
+        when(activeTask.inputPartitions()).thenReturn(taskId00Partitions);
 
-        expect(standbyTaskCreator.createTasks(eq(taskId00Assignment)))
-            .andReturn(singletonList(standbyTask));
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(taskId00Assignment))).andReturn(singletonList(standbyTask));
+        expect(activeTaskCreator.createActiveTaskFromStandby(eq(standbyTask), eq(taskId00Partitions), anyObject())).andReturn(activeTask);
+        expect(activeTaskCreator.createTasks(anyObject(), eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
+        expect(standbyTaskCreator.createTasks(eq(Collections.emptyMap()))).andReturn(Collections.emptySet());
 
-        expect(activeTaskCreator.createActiveTaskFromStandby(anyObject(), eq(taskId00Partitions), anyObject()))
-            .andReturn(activeTask);
-
-        replay(standbyTask, activeTask, standbyTaskCreator, activeTaskCreator, consumer);
+        replay(standbyTaskCreator, activeTaskCreator, consumer);
 
         taskManager.handleAssignment(Collections.emptyMap(), taskId00Assignment);
         taskManager.handleAssignment(taskId00Assignment, Collections.emptyMap());
@@ -3244,6 +3346,17 @@ public void shouldConvertStandbyTaskToActiveTask() {
         verify(standbyTaskCreator, activeTaskCreator);
     }
 
+    @Test
+    public void shouldListNotPausedTasks() {
+        handleAssignment(taskId00Assignment, taskId01Assignment, emptyMap());
+
+        assertEquals(taskManager.notPausedTasks().size(), 2);
+
+        topologyMetadata.pauseTopology(UNNAMED_TOPOLOGY);
+
+        assertEquals(taskManager.notPausedTasks().size(), 0);
+    }
+
     private static void expectRestoreToBeCompleted(final Consumer<byte[], byte[]> consumer,
                                                    final ChangelogReader changeLogReader) {
         expectRestoreToBeCompleted(consumer, changeLogReader, true);
@@ -3275,7 +3388,7 @@ private void makeTaskFolders(final String... names) throws Exception {
 
     private void writeCheckpointFile(final TaskId task, final Map<TopicPartition, Long> offsets) throws Exception {
         final File checkpointFile = getCheckpointFile(task);
-        assertThat(checkpointFile.createNewFile(), is(true));
+        Files.createFile(checkpointFile.toPath());
         new OffsetCheckpoint(checkpointFile).write(offsets);
         expect(stateDirectory.checkpointFileFor(task)).andReturn(checkpointFile);
     }
@@ -3314,7 +3427,7 @@ private static class StateMachineTask extends AbstractTask implements Task {
                          final Set<TopicPartition> partitions,
                          final boolean active,
                          final ProcessorStateManager processorStateManager) {
-            super(id, null, null, processorStateManager, partitions, 0L, "test-task", StateMachineTask.class);
+            super(id, null, null, processorStateManager, partitions, (new TopologyConfig(new DummyStreamsConfig())).getTaskConfig(), "test-task", StateMachineTask.class);
             this.active = active;
         }
 
@@ -3424,7 +3537,7 @@ public void closeDirty() {
         }
 
         @Override
-        public void closeCleanAndRecycleState() {
+        public void prepareRecycle() {
             transitionTo(State.CLOSED);
         }
 
@@ -3446,7 +3559,7 @@ public StateStore getStore(final String name) {
         }
 
         @Override
-        public Collection<TopicPartition> changelogPartitions() {
+        public Set<TopicPartition> changelogPartitions() {
             return changelogOffsets.keySet();
         }
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TasksTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TasksTest.java
new file mode 100644
index 0000000000000..756aa53f86037
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TasksTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.streams.processor.TaskId;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+
+import static org.apache.kafka.common.utils.Utils.mkSet;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.standbyTask;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.statefulTask;
+import static org.apache.kafka.test.StreamsTestUtils.TaskBuilder.statelessTask;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TasksTest {
+
+    private final static TopicPartition TOPIC_PARTITION_A_0 = new TopicPartition("topicA", 0);
+    private final static TopicPartition TOPIC_PARTITION_A_1 = new TopicPartition("topicA", 1);
+    private final static TaskId TASK_0_0 = new TaskId(0, 0);
+    private final static TaskId TASK_0_1 = new TaskId(0, 1);
+    private final static TaskId TASK_1_0 = new TaskId(1, 0);
+
+    private final LogContext logContext = new LogContext();
+
+    @Test
+    public void shouldCreateTasks() {
+        final Tasks tasks = new Tasks(logContext);
+        final StreamTask statefulTask = statefulTask(TASK_0_0, mkSet(TOPIC_PARTITION_A_0)).build();
+        final StandbyTask standbyTask = standbyTask(TASK_0_1, mkSet(TOPIC_PARTITION_A_1)).build();
+        final StreamTask statelessTask = statelessTask(TASK_1_0).build();
+
+        tasks.addNewActiveTasks(mkSet(statefulTask, statelessTask));
+        tasks.addNewStandbyTasks(Collections.singletonList(standbyTask));
+
+        assertEquals(statefulTask, tasks.task(statefulTask.id()));
+        assertTrue(tasks.activeTasks().contains(statefulTask));
+        assertTrue(tasks.allTasks().contains(statefulTask));
+        assertTrue(tasks.tasks(mkSet(statefulTask.id())).contains(statefulTask));
+        assertEquals(statelessTask, tasks.task(statelessTask.id()));
+        assertTrue(tasks.activeTasks().contains(statelessTask));
+        assertTrue(tasks.allTasks().contains(statelessTask));
+        assertTrue(tasks.tasks(mkSet(statelessTask.id())).contains(statelessTask));
+        assertEquals(standbyTask, tasks.task(standbyTask.id()));
+        assertTrue(tasks.allTasks().contains(standbyTask));
+        assertTrue(tasks.tasks(mkSet(standbyTask.id())).contains(standbyTask));
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TopicPartitionMetadataTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TopicPartitionMetadataTest.java
new file mode 100644
index 0000000000000..e9bef49ff2557
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TopicPartitionMetadataTest.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import java.nio.ByteBuffer;
+import java.util.Base64;
+import org.junit.Test;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+public class TopicPartitionMetadataTest {
+
+    @Test
+    public void shouldGetPartitonTimeAndProcessorMeta() {
+        final ProcessorMetadata metadata = new ProcessorMetadata();
+        final String key = "some_key";
+        final long value = 100L;
+        metadata.put(key, value);
+
+        final TopicPartitionMetadata topicMeta = new TopicPartitionMetadata(100L, metadata);
+
+        assertThat(topicMeta.partitionTime(), is(100L));
+        assertThat(topicMeta.processorMetadata(), is(metadata));
+    }
+
+    @Test
+    public void shouldDecodeVersionOne() {
+        final byte[] serialized = ByteBuffer.allocate(Byte.BYTES + Long.BYTES)
+            .put((byte) 1)
+            .putLong(100L)
+            .array();
+        final String serializedString = Base64.getEncoder().encodeToString(serialized);
+
+        final TopicPartitionMetadata topicMeta = TopicPartitionMetadata.decode(serializedString);
+
+        assertThat(topicMeta.partitionTime(), is(100L));
+        assertThat(topicMeta.processorMetadata(), is(new ProcessorMetadata()));
+    }
+
+    @Test
+    public void shouldEncodeDecodeVersionTwo() {
+        final ProcessorMetadata metadata = new ProcessorMetadata();
+        final String key = "some_key";
+        final long value = 100L;
+        metadata.put(key, value);
+
+        final TopicPartitionMetadata expected = new TopicPartitionMetadata(100L, metadata);
+        final String serializedString = expected.encode();
+        final TopicPartitionMetadata topicMeta = TopicPartitionMetadata.decode(serializedString);
+
+        assertThat(topicMeta, is(expected));
+    }
+
+    @Test
+    public void shouldEncodeDecodeEmptyMetaVersionTwo() {
+        final TopicPartitionMetadata expected = new TopicPartitionMetadata(100L, new ProcessorMetadata());
+        final String serializedString = expected.encode();
+        final TopicPartitionMetadata topicMeta = TopicPartitionMetadata.decode(serializedString);
+
+        assertThat(topicMeta, is(expected));
+    }
+
+    @Test
+    public void shouldDecodeEmptyStringVersionTwo() {
+        final TopicPartitionMetadata expected = new TopicPartitionMetadata(RecordQueue.UNKNOWN, new ProcessorMetadata());
+        final TopicPartitionMetadata topicMeta = TopicPartitionMetadata.decode("");
+
+        assertThat(topicMeta, is(expected));
+    }
+
+    @Test
+    public void shouldReturnUnknownTimestampIfUnknownVersion() {
+        final byte[] emptyMessage = {TopicPartitionMetadata.LATEST_MAGIC_BYTE + 1};
+        final String encodedString = Base64.getEncoder().encodeToString(emptyMessage);
+
+        final TopicPartitionMetadata decoded = TopicPartitionMetadata.decode(encodedString);
+
+        assertThat(decoded.partitionTime(), is(RecordQueue.UNKNOWN));
+        assertThat(decoded.processorMetadata(), is(new ProcessorMetadata()));
+    }
+
+    @Test
+    public void shouldReturnUnknownTimestampIfInvalidMetadata() {
+        final String invalidBase64String = "{}";
+
+        final TopicPartitionMetadata decoded = TopicPartitionMetadata.decode(invalidBase64String);
+
+        assertThat(decoded.partitionTime(), is(RecordQueue.UNKNOWN));
+        assertThat(decoded.processorMetadata(), is(new ProcessorMetadata()));
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/TopologyMetadataTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TopologyMetadataTest.java
new file mode 100644
index 0000000000000..52a103a4d4669
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/TopologyMetadataTest.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals;
+
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.processor.internals.testutil.DummyStreamsConfig;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.easymock.EasyMock.mock;
+
+public class TopologyMetadataTest {
+    final static String TOPOLOGY1 = "topology1";
+    final static String TOPOLOGY2 = "topology2";
+
+    @Test
+    public void testPauseResume() {
+        final InternalTopologyBuilder internalTopologyBuilder = mock(InternalTopologyBuilder.class);
+        final StreamsConfig config = new DummyStreamsConfig();
+
+        final TopologyMetadata topologyMetadata = new TopologyMetadata(internalTopologyBuilder,
+            config);
+
+        Assert.assertFalse(topologyMetadata.isPaused(TOPOLOGY1));
+        Assert.assertFalse(topologyMetadata.isPaused(TOPOLOGY2));
+
+        topologyMetadata.pauseTopology(TOPOLOGY1);
+        Assert.assertTrue(topologyMetadata.isPaused(TOPOLOGY1));
+        Assert.assertFalse(topologyMetadata.isPaused(TOPOLOGY2));
+
+        topologyMetadata.resumeTopology(TOPOLOGY1);
+        Assert.assertFalse(topologyMetadata.isPaused(TOPOLOGY1));
+        Assert.assertFalse(topologyMetadata.isPaused(TOPOLOGY2));
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/WriteConsistencyVectorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/WriteConsistencyVectorTest.java
index 1ca19d896368a..d9a68a81b6d52 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/WriteConsistencyVectorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/WriteConsistencyVectorTest.java
@@ -115,15 +115,16 @@ public void shouldSendConsistencyVectorToChangelogTopic() {
                 ChangelogRecordDeserializationHelper.CHANGELOG_POSITION_HEADER_KEY,
                 PositionSerde.serialize(position).array()));
         recordCollector.send(
-                CHANGELOG_PARTITION.topic(),
-                KEY_BYTES,
-                VALUE_BYTES,
-                headers,
-                CHANGELOG_PARTITION.partition(),
-                TIMESTAMP,
-                BYTES_KEY_SERIALIZER,
-                BYTEARRAY_VALUE_SERIALIZER
-        );
+            CHANGELOG_PARTITION.topic(),
+            KEY_BYTES,
+            VALUE_BYTES,
+            headers,
+            CHANGELOG_PARTITION.partition(),
+            TIMESTAMP,
+            BYTES_KEY_SERIALIZER,
+            BYTEARRAY_VALUE_SERIALIZER,
+            null,
+            null);
 
         final StreamTask task = EasyMock.createNiceMock(StreamTask.class);
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/AssignmentTestUtils.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/AssignmentTestUtils.java
index cf8a6b297ab7f..78c6477f386b0 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/AssignmentTestUtils.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/AssignmentTestUtils.java
@@ -65,6 +65,9 @@ public final class AssignmentTestUtils {
     public static final UUID UUID_4 = uuidForInt(4);
     public static final UUID UUID_5 = uuidForInt(5);
     public static final UUID UUID_6 = uuidForInt(6);
+    public static final UUID UUID_7 = uuidForInt(7);
+    public static final UUID UUID_8 = uuidForInt(8);
+    public static final UUID UUID_9 = uuidForInt(9);
 
     public static final TopicPartition TP_0_0 = new TopicPartition("topic0", 0);
     public static final TopicPartition TP_0_1 = new TopicPartition("topic0", 1);
@@ -166,6 +169,15 @@ public static SubscriptionInfo getInfo(final UUID processId,
             LATEST_SUPPORTED_VERSION, LATEST_SUPPORTED_VERSION, processId, null, getTaskOffsetSums(prevTasks, standbyTasks), uniqueField, 0, EMPTY_CLIENT_TAGS);
     }
 
+    public static SubscriptionInfo getInfo(final UUID processId,
+                                           final Set<TaskId> prevTasks,
+                                           final Set<TaskId> standbyTasks,
+                                           final byte uniqueField,
+                                           final Map<String, String> clientTags) {
+        return new SubscriptionInfo(
+            LATEST_SUPPORTED_VERSION, LATEST_SUPPORTED_VERSION, processId, null, getTaskOffsetSums(prevTasks, standbyTasks), uniqueField, 0, clientTags);
+    }
+
     // Stub offset sums for when we only care about the prev/standby task sets, not the actual offsets
     private static Map<TaskId, Long> getTaskOffsetSums(final Collection<TaskId> activeTasks, final Collection<TaskId> standbyTasks) {
         final Map<TaskId, Long> taskOffsetSums = activeTasks.stream().collect(Collectors.toMap(t -> t, t -> Task.LATEST_OFFSET));
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignorTest.java
index 8a983dee9be24..631430c6a82f7 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/ClientTagAwareStandbyTaskAssignorTest.java
@@ -81,8 +81,43 @@ public void setup() {
         standbyTaskAssignor = new ClientTagAwareStandbyTaskAssignor();
     }
 
+    @Test
+    public void shouldNotAssignStatelessTasksToAnyClients() {
+        final Set<TaskId> statefulTasks = mkSet(
+            TASK_1_0,
+            TASK_1_1,
+            TASK_1_2
+        );
+
+        final Map<UUID, ClientState> clientStates = mkMap(
+            mkEntry(UUID_1, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_1), mkEntry(CLUSTER_TAG, CLUSTER_1)), TASK_0_0, TASK_1_0)),
+            mkEntry(UUID_2, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_2), mkEntry(CLUSTER_TAG, CLUSTER_1)))),
+            mkEntry(UUID_3, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_3), mkEntry(CLUSTER_TAG, CLUSTER_1)))),
+
+            mkEntry(UUID_4, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_1), mkEntry(CLUSTER_TAG, CLUSTER_2)), TASK_0_1, TASK_1_1)),
+            mkEntry(UUID_5, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_2), mkEntry(CLUSTER_TAG, CLUSTER_2)))),
+            mkEntry(UUID_6, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_3), mkEntry(CLUSTER_TAG, CLUSTER_2)))),
+
+            mkEntry(UUID_7, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_1), mkEntry(CLUSTER_TAG, CLUSTER_3)), TASK_0_2, TASK_1_2)),
+            mkEntry(UUID_8, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_2), mkEntry(CLUSTER_TAG, CLUSTER_3)))),
+            mkEntry(UUID_9, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_3), mkEntry(CLUSTER_TAG, CLUSTER_3))))
+        );
+
+        final Set<TaskId> allActiveTasks = findAllActiveTasks(clientStates);
+
+        final AssignmentConfigs assignmentConfigs = newAssignmentConfigs(2, ZONE_TAG, CLUSTER_TAG);
+
+        standbyTaskAssignor.assign(clientStates, allActiveTasks, statefulTasks, assignmentConfigs);
+
+        final Set<TaskId> statelessTasks = allActiveTasks.stream().filter(taskId -> !statefulTasks.contains(taskId)).collect(Collectors.toSet());
+        assertTrue(
+            clientStates.values().stream().allMatch(clientState -> statelessTasks.stream().noneMatch(clientState::hasStandbyTask))
+        );
+    }
+
     @Test
     public void shouldRemoveClientToRemainingStandbysAndNotPopulatePendingStandbyTasksToClientIdWhenAllStandbyTasksWereAssigned() {
+        final int numStandbyReplicas = 2;
         final Set<String> rackAwareAssignmentTags = mkSet(ZONE_TAG, CLUSTER_TAG);
         final Map<UUID, ClientState> clientStates = mkMap(
             mkEntry(UUID_1, createClientStateWithCapacity(2, mkMap(mkEntry(ZONE_TAG, ZONE_1), mkEntry(CLUSTER_TAG, CLUSTER_1)), TASK_0_0)),
@@ -102,10 +137,11 @@ public void shouldRemoveClientToRemainingStandbysAndNotPopulatePendingStandbyTas
         fillClientsTagStatistics(clientStates, tagEntryToClients, tagKeyToValues);
 
         final Map<TaskId, UUID> pendingStandbyTasksToClientId = new HashMap<>();
-        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(2, allActiveTasks);
+        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(numStandbyReplicas, allActiveTasks);
 
         for (final TaskId activeTaskId : allActiveTasks) {
             assignStandbyTasksToClientsWithDifferentTags(
+                numStandbyReplicas,
                 constrainedPrioritySet,
                 activeTaskId,
                 taskToClientId.get(activeTaskId),
@@ -132,6 +168,7 @@ public void shouldUpdateClientToRemainingStandbysAndPendingStandbyTasksToClientI
         );
 
         final ConstrainedPrioritySet constrainedPrioritySet = createLeastLoadedPrioritySetConstrainedByAssignedTask(clientStates);
+        final int numStandbyReplicas = 3;
         final Set<TaskId> allActiveTasks = findAllActiveTasks(clientStates);
         final Map<TaskId, UUID> taskToClientId = mkMap(mkEntry(TASK_0_0, UUID_1),
                                                        mkEntry(TASK_0_1, UUID_2),
@@ -143,10 +180,11 @@ public void shouldUpdateClientToRemainingStandbysAndPendingStandbyTasksToClientI
         fillClientsTagStatistics(clientStates, tagEntryToClients, tagKeyToValues);
 
         final Map<TaskId, UUID> pendingStandbyTasksToClientId = new HashMap<>();
-        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(3, allActiveTasks);
+        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(numStandbyReplicas, allActiveTasks);
 
         for (final TaskId activeTaskId : allActiveTasks) {
             assignStandbyTasksToClientsWithDifferentTags(
+                numStandbyReplicas,
                 constrainedPrioritySet,
                 activeTaskId,
                 taskToClientId.get(activeTaskId),
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignorTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignorTest.java
index bf78db6457f24..90e0fed51f388 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/HighAvailabilityTaskAssignorTest.java
@@ -20,10 +20,8 @@
 import org.apache.kafka.streams.processor.internals.assignment.AssignorConfiguration.AssignmentConfigs;
 import org.junit.Test;
 
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.UUID;
@@ -31,7 +29,6 @@
 
 import static java.util.Collections.emptySet;
 import static java.util.Collections.singleton;
-import static java.util.Collections.singletonList;
 import static java.util.Collections.singletonMap;
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
@@ -68,7 +65,6 @@
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 public class HighAvailabilityTaskAssignorTest {
@@ -423,6 +419,37 @@ public void shouldComputeNewAssignmentIfActiveTasksWasNotOnCaughtUpClient() {
         assertBalancedTasks(clientStates);
     }
 
+    @Test
+    public void shouldAssignToMostCaughtUpIfActiveTasksWasNotOnCaughtUpClient() {
+        final Set<TaskId> allTasks = mkSet(TASK_0_0);
+        final Set<TaskId> statefulTasks = mkSet(TASK_0_0);
+        final ClientState client1 = new ClientState(emptySet(), emptySet(), singletonMap(TASK_0_0, Long.MAX_VALUE), EMPTY_CLIENT_TAGS, 1);
+        final ClientState client2 = new ClientState(emptySet(), emptySet(), singletonMap(TASK_0_0, 1000L), EMPTY_CLIENT_TAGS, 1);
+        final ClientState client3 = new ClientState(emptySet(), emptySet(), singletonMap(TASK_0_0, 500L), EMPTY_CLIENT_TAGS, 1);
+        final Map<UUID, ClientState> clientStates = mkMap(
+                mkEntry(UUID_1, client1),
+                mkEntry(UUID_2, client2),
+                mkEntry(UUID_3, client3)
+        );
+
+        final boolean probingRebalanceNeeded =
+                new HighAvailabilityTaskAssignor().assign(clientStates, allTasks, statefulTasks, configWithStandbys);
+
+        assertThat(clientStates.get(UUID_1).activeTasks(), is(emptySet()));
+        assertThat(clientStates.get(UUID_2).activeTasks(), is(emptySet()));
+        assertThat(clientStates.get(UUID_3).activeTasks(), is(singleton(TASK_0_0)));
+
+        assertThat(clientStates.get(UUID_1).standbyTasks(), is(singleton(TASK_0_0))); // warm up
+        assertThat(clientStates.get(UUID_2).standbyTasks(), is(singleton(TASK_0_0))); // standby
+        assertThat(clientStates.get(UUID_3).standbyTasks(), is(emptySet()));
+
+        assertThat(probingRebalanceNeeded, is(true));
+        assertValidAssignment(1, 1, allTasks, emptySet(), clientStates, new StringBuilder());
+        assertBalancedActiveAssignment(clientStates, new StringBuilder());
+        assertBalancedStatefulAssignment(allTasks, clientStates, new StringBuilder());
+        assertBalancedTasks(clientStates);
+    }
+
     @Test
     public void shouldAssignStandbysForStatefulTasks() {
         final Set<TaskId> allTasks = mkSet(TASK_0_0, TASK_0_1);
@@ -814,27 +841,6 @@ public void shouldDistributeStatelessTasksEvenlyWithPreviousAssignmentAndNoState
         assertThat(probingRebalanceNeeded, is(false));
     }
 
-    @Test
-    public void shouldReturnClientTagAwareStandbyTaskAssignorWhenRackAwareAssignmentTagsIsSet() {
-        final StandbyTaskAssignor standbyTaskAssignor = HighAvailabilityTaskAssignor.createStandbyTaskAssignor(newAssignmentConfigs(1, singletonList("az")));
-        assertTrue(standbyTaskAssignor instanceof ClientTagAwareStandbyTaskAssignor);
-    }
-
-    @Test
-    public void shouldReturnDefaultStandbyTaskAssignorWhenRackAwareAssignmentTagsIsEmpty() {
-        final StandbyTaskAssignor standbyTaskAssignor = HighAvailabilityTaskAssignor.createStandbyTaskAssignor(newAssignmentConfigs(1, Collections.emptyList()));
-        assertTrue(standbyTaskAssignor instanceof DefaultStandbyTaskAssignor);
-    }
-
-    private static AssignorConfiguration.AssignmentConfigs newAssignmentConfigs(final int numStandbyReplicas,
-                                                                                final List<String> rackAwareAssignmentTags) {
-        return new AssignorConfiguration.AssignmentConfigs(0L,
-                                                           1,
-                                                           numStandbyReplicas,
-                                                           60000L,
-                                                           rackAwareAssignmentTags);
-    }
-
     private static void assertHasNoActiveTasks(final ClientState... clients) {
         for (final ClientState client : clients) {
             assertThat(client.activeTasks(), is(empty()));
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtilsTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtilsTest.java
index 1abf1b9263532..b13f04b2bd8ef 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtilsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignmentUtilsTest.java
@@ -19,6 +19,7 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.junit.Before;
 import org.junit.Test;
+import org.slf4j.Logger;
 
 import java.util.Map;
 import java.util.Set;
@@ -36,8 +37,16 @@
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoInteractions;
 
 public class StandbyTaskAssignmentUtilsTest {
+
     private static final Set<TaskId> ACTIVE_TASKS = mkSet(TASK_0_0, TASK_0_1, TASK_0_2);
 
     private Map<UUID, ClientState> clients;
@@ -45,6 +54,7 @@ public class StandbyTaskAssignmentUtilsTest {
 
     @Before
     public void setup() {
+
         clients = getClientStatesMap(ACTIVE_TASKS.stream().map(StandbyTaskAssignmentUtilsTest::mkState).toArray(ClientState[]::new));
         clientsByTaskLoad = new ConstrainedPrioritySet(
             (client, task) -> !clients.get(client).hasAssignedTask(task),
@@ -55,38 +65,38 @@ public void setup() {
 
     @Test
     public void shouldReturnNumberOfStandbyTasksThatWereNotAssigned() {
-        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(3, ACTIVE_TASKS);
-
-        assertTrue(tasksToRemainingStandbys.keySet()
-                                           .stream()
-                                           .map(taskId -> pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(
-                                               clients,
-                                               tasksToRemainingStandbys,
-                                               clientsByTaskLoad,
-                                               taskId
-                                           ))
-                                           .allMatch(numRemainingStandbys -> numRemainingStandbys == 1));
+        final Logger logMock = mock(Logger.class);
+        final int numStandbyReplicas = 3;
+        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(numStandbyReplicas, ACTIVE_TASKS);
+
+        tasksToRemainingStandbys.keySet().forEach(taskId -> pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(numStandbyReplicas,
+                                                                                                                   clients,
+                                                                                                                   tasksToRemainingStandbys,
+                                                                                                                   clientsByTaskLoad,
+                                                                                                                   taskId,
+                                                                                                                   logMock));
 
         assertTrue(ACTIVE_TASKS.stream().allMatch(activeTask -> tasksToRemainingStandbys.get(activeTask) == 1));
         assertTrue(areStandbyTasksPresentForAllActiveTasks(2));
+        verify(logMock, times(ACTIVE_TASKS.size())).warn(anyString(), anyInt(), anyInt(), any());
     }
 
     @Test
     public void shouldReturnZeroWhenAllStandbyTasksWereSuccessfullyAssigned() {
-        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(1, ACTIVE_TASKS);
-
-        assertTrue(tasksToRemainingStandbys.keySet()
-                                           .stream()
-                                           .map(taskId -> pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(
-                                               clients,
-                                               tasksToRemainingStandbys,
-                                               clientsByTaskLoad,
-                                               taskId
-                                           ))
-                                           .allMatch(numRemainingStandbys -> numRemainingStandbys == 0));
+        final Logger logMock = mock(Logger.class);
+        final int numStandbyReplicas = 1;
+        final Map<TaskId, Integer> tasksToRemainingStandbys = computeTasksToRemainingStandbys(numStandbyReplicas, ACTIVE_TASKS);
+
+        tasksToRemainingStandbys.keySet().forEach(taskId -> pollClientAndMaybeAssignAndUpdateRemainingStandbyTasks(numStandbyReplicas,
+                                                                                                                   clients,
+                                                                                                                   tasksToRemainingStandbys,
+                                                                                                                   clientsByTaskLoad,
+                                                                                                                   taskId,
+                                                                                                                   logMock));
 
         assertTrue(ACTIVE_TASKS.stream().allMatch(activeTask -> tasksToRemainingStandbys.get(activeTask) == 0));
         assertTrue(areStandbyTasksPresentForAllActiveTasks(1));
+        verifyNoInteractions(logMock);
     }
 
     @Test
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignorFactoryTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignorFactoryTest.java
new file mode 100644
index 0000000000000..fdd7fa1d47372
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/StandbyTaskAssignorFactoryTest.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals.assignment;
+
+import org.junit.Test;
+
+import java.util.Collections;
+import java.util.List;
+
+import static java.util.Collections.singletonList;
+import static org.junit.Assert.assertTrue;
+
+public class StandbyTaskAssignorFactoryTest {
+    private static final long ACCEPTABLE_RECOVERY_LAG = 0L;
+    private static final int MAX_WARMUP_REPLICAS = 1;
+    private static final int NUMBER_OF_STANDBY_REPLICAS = 1;
+    private static final long PROBING_REBALANCE_INTERVAL_MS = 60000L;
+
+    @Test
+    public void shouldReturnClientTagAwareStandbyTaskAssignorWhenRackAwareAssignmentTagsIsSet() {
+        final StandbyTaskAssignor standbyTaskAssignor = StandbyTaskAssignorFactory.create(newAssignmentConfigs(singletonList("az")));
+        assertTrue(standbyTaskAssignor instanceof ClientTagAwareStandbyTaskAssignor);
+    }
+
+    @Test
+    public void shouldReturnDefaultStandbyTaskAssignorWhenRackAwareAssignmentTagsIsEmpty() {
+        final StandbyTaskAssignor standbyTaskAssignor = StandbyTaskAssignorFactory.create(newAssignmentConfigs(Collections.emptyList()));
+        assertTrue(standbyTaskAssignor instanceof DefaultStandbyTaskAssignor);
+    }
+
+    private static AssignorConfiguration.AssignmentConfigs newAssignmentConfigs(final List<String> rackAwareAssignmentTags) {
+        return new AssignorConfiguration.AssignmentConfigs(ACCEPTABLE_RECOVERY_LAG,
+                                                           MAX_WARMUP_REPLICAS,
+                                                           NUMBER_OF_STANDBY_REPLICAS,
+                                                           PROBING_REBALANCE_INTERVAL_MS,
+                                                           rackAwareAssignmentTags);
+    }
+}
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovementTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovementTest.java
index 9b58d18c88f60..baf6d18496c1e 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovementTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/assignment/TaskMovementTest.java
@@ -19,19 +19,21 @@
 import org.apache.kafka.streams.processor.TaskId;
 import org.junit.Test;
 
-import java.util.Collection;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeMap;
+import java.util.TreeSet;
 import java.util.UUID;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import static java.util.Arrays.asList;
-import static java.util.Collections.emptyList;
+import static java.util.Collections.emptyMap;
+import static java.util.Collections.emptySet;
 import static java.util.Collections.emptySortedSet;
-import static java.util.Collections.singletonList;
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.common.utils.Utils.mkSet;
@@ -58,17 +60,20 @@ public void shouldAssignTasksToClientsAndReturnFalseWhenAllClientsCaughtUp() {
         final Set<TaskId> allTasks = mkSet(TASK_0_0, TASK_0_1, TASK_0_2, TASK_1_0, TASK_1_1, TASK_1_2);
 
         final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients = new HashMap<>();
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = new HashMap<>();
         for (final TaskId task : allTasks) {
             tasksToCaughtUpClients.put(task, mkSortedSet(UUID_1, UUID_2, UUID_3));
+            tasksToClientByLag.put(task, mkOrderedSet(UUID_1, UUID_2, UUID_3));
         }
 
-        final ClientState client1 = getClientStateWithActiveAssignment(asList(TASK_0_0, TASK_1_0));
-        final ClientState client2 = getClientStateWithActiveAssignment(asList(TASK_0_1, TASK_1_1));
-        final ClientState client3 = getClientStateWithActiveAssignment(asList(TASK_0_2, TASK_1_2));
+        final ClientState client1 = getClientStateWithActiveAssignment(mkSet(TASK_0_0, TASK_1_0), allTasks, allTasks);
+        final ClientState client2 = getClientStateWithActiveAssignment(mkSet(TASK_0_1, TASK_1_1), allTasks, allTasks);
+        final ClientState client3 = getClientStateWithActiveAssignment(mkSet(TASK_0_2, TASK_1_2), allTasks, allTasks);
 
         assertThat(
             assignActiveTaskMovements(
                 tasksToCaughtUpClients,
+                tasksToClientByLag,
                 getClientStatesMap(client1, client2, client3),
                 new TreeMap<>(),
                 new AtomicInteger(maxWarmupReplicas)
@@ -80,10 +85,11 @@ public void shouldAssignTasksToClientsAndReturnFalseWhenAllClientsCaughtUp() {
     @Test
     public void shouldAssignAllTasksToClientsAndReturnFalseIfNoClientsAreCaughtUp() {
         final int maxWarmupReplicas = Integer.MAX_VALUE;
+        final Set<TaskId> allTasks = mkSet(TASK_0_0, TASK_0_1, TASK_0_2, TASK_1_0, TASK_1_1, TASK_1_2);
 
-        final ClientState client1 = getClientStateWithActiveAssignment(asList(TASK_0_0, TASK_1_0));
-        final ClientState client2 = getClientStateWithActiveAssignment(asList(TASK_0_1, TASK_1_1));
-        final ClientState client3 = getClientStateWithActiveAssignment(asList(TASK_0_2, TASK_1_2));
+        final ClientState client1 = getClientStateWithActiveAssignment(mkSet(TASK_0_0, TASK_1_0), mkSet(), allTasks);
+        final ClientState client2 = getClientStateWithActiveAssignment(mkSet(TASK_0_1, TASK_1_1), mkSet(), allTasks);
+        final ClientState client3 = getClientStateWithActiveAssignment(mkSet(TASK_0_2, TASK_1_2), mkSet(), allTasks);
 
         final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients = mkMap(
             mkEntry(TASK_0_0, emptySortedSet()),
@@ -93,9 +99,18 @@ public void shouldAssignAllTasksToClientsAndReturnFalseIfNoClientsAreCaughtUp()
             mkEntry(TASK_1_1, emptySortedSet()),
             mkEntry(TASK_1_2, emptySortedSet())
         );
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = mkMap(
+            mkEntry(TASK_0_0, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_0_1, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_0_2, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_1_0, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_1_1, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_1_2, mkOrderedSet(UUID_1, UUID_2, UUID_3))
+        );
         assertThat(
             assignActiveTaskMovements(
                 tasksToCaughtUpClients,
+                tasksToClientByLag,
                 getClientStatesMap(client1, client2, client3),
                 new TreeMap<>(),
                 new AtomicInteger(maxWarmupReplicas)
@@ -107,9 +122,10 @@ public void shouldAssignAllTasksToClientsAndReturnFalseIfNoClientsAreCaughtUp()
     @Test
     public void shouldMoveTasksToCaughtUpClientsAndAssignWarmupReplicasInTheirPlace() {
         final int maxWarmupReplicas = Integer.MAX_VALUE;
-        final ClientState client1 = getClientStateWithActiveAssignment(singletonList(TASK_0_0));
-        final ClientState client2 = getClientStateWithActiveAssignment(singletonList(TASK_0_1));
-        final ClientState client3 = getClientStateWithActiveAssignment(singletonList(TASK_0_2));
+        final Set<TaskId> allTasks = mkSet(TASK_0_0, TASK_0_1, TASK_0_2);
+        final ClientState client1 = getClientStateWithActiveAssignment(mkSet(TASK_0_0), mkSet(TASK_0_0), allTasks);
+        final ClientState client2 = getClientStateWithActiveAssignment(mkSet(TASK_0_1), mkSet(TASK_0_2), allTasks);
+        final ClientState client3 = getClientStateWithActiveAssignment(mkSet(TASK_0_2), mkSet(TASK_0_1), allTasks);
         final Map<UUID, ClientState> clientStates = getClientStatesMap(client1, client2, client3);
 
         final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients = mkMap(
@@ -117,11 +133,17 @@ public void shouldMoveTasksToCaughtUpClientsAndAssignWarmupReplicasInTheirPlace(
             mkEntry(TASK_0_1, mkSortedSet(UUID_3)),
             mkEntry(TASK_0_2, mkSortedSet(UUID_2))
         );
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = mkMap(
+            mkEntry(TASK_0_0, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_0_1, mkOrderedSet(UUID_3, UUID_1, UUID_2)),
+            mkEntry(TASK_0_2, mkOrderedSet(UUID_2, UUID_1, UUID_3))
+        );
 
         assertThat(
             "should have assigned movements",
             assignActiveTaskMovements(
                 tasksToCaughtUpClients,
+                tasksToClientByLag,
                 clientStates,
                 new TreeMap<>(),
                 new AtomicInteger(maxWarmupReplicas)
@@ -139,12 +161,60 @@ public void shouldMoveTasksToCaughtUpClientsAndAssignWarmupReplicasInTheirPlace(
         assertThat(client3, hasProperty("standbyTasks", ClientState::standbyTasks, mkSet(TASK_0_2)));
     }
 
+    @Test
+    public void shouldMoveTasksToMostCaughtUpClientsAndAssignWarmupReplicasInTheirPlace() {
+        final int maxWarmupReplicas = Integer.MAX_VALUE;
+        final Map<TaskId, Long> client1Lags = mkMap(mkEntry(TASK_0_0, 10000L), mkEntry(TASK_0_1, 20000L), mkEntry(TASK_0_2, 30000L));
+        final Map<TaskId, Long> client2Lags = mkMap(mkEntry(TASK_0_2, 10000L), mkEntry(TASK_0_0, 20000L), mkEntry(TASK_0_1, 30000L));
+        final Map<TaskId, Long> client3Lags = mkMap(mkEntry(TASK_0_1, 10000L), mkEntry(TASK_0_2, 20000L), mkEntry(TASK_0_0, 30000L));
+
+        final ClientState client1 = getClientStateWithLags(mkSet(TASK_0_0), client1Lags);
+        final ClientState client2 = getClientStateWithLags(mkSet(TASK_0_1), client2Lags);
+        final ClientState client3 = getClientStateWithLags(mkSet(TASK_0_2), client3Lags);
+        // To test when the task is already a standby on the most caught up node
+        client3.assignStandby(TASK_0_1);
+        final Map<UUID, ClientState> clientStates = getClientStatesMap(client1, client2, client3);
+
+        final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients = mkMap(
+                mkEntry(TASK_0_0, mkSortedSet()),
+                mkEntry(TASK_0_1, mkSortedSet()),
+                mkEntry(TASK_0_2, mkSortedSet())
+        );
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = mkMap(
+                mkEntry(TASK_0_0, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+                mkEntry(TASK_0_1, mkOrderedSet(UUID_3, UUID_1, UUID_2)),
+                mkEntry(TASK_0_2, mkOrderedSet(UUID_2, UUID_3, UUID_1))
+        );
+
+        assertThat(
+                "should have assigned movements",
+                assignActiveTaskMovements(
+                        tasksToCaughtUpClients,
+                        tasksToClientByLag,
+                        clientStates,
+                        new TreeMap<>(),
+                        new AtomicInteger(maxWarmupReplicas)
+                ),
+                is(2)
+        );
+        // The active tasks have changed to the ones that each client is most caught up on
+        assertThat(client1, hasProperty("activeTasks", ClientState::activeTasks, mkSet(TASK_0_0)));
+        assertThat(client2, hasProperty("activeTasks", ClientState::activeTasks, mkSet(TASK_0_2)));
+        assertThat(client3, hasProperty("activeTasks", ClientState::activeTasks, mkSet(TASK_0_1)));
+
+        // we assigned warmups to migrate to the input active assignment
+        assertThat(client1, hasProperty("standbyTasks", ClientState::standbyTasks, mkSet()));
+        assertThat(client2, hasProperty("standbyTasks", ClientState::standbyTasks, mkSet(TASK_0_1)));
+        assertThat(client3, hasProperty("standbyTasks", ClientState::standbyTasks, mkSet(TASK_0_2)));
+    }
+
     @Test
     public void shouldOnlyGetUpToMaxWarmupReplicasAndReturnTrue() {
         final int maxWarmupReplicas = 1;
-        final ClientState client1 = getClientStateWithActiveAssignment(singletonList(TASK_0_0));
-        final ClientState client2 = getClientStateWithActiveAssignment(singletonList(TASK_0_1));
-        final ClientState client3 = getClientStateWithActiveAssignment(singletonList(TASK_0_2));
+        final Set<TaskId> allTasks = mkSet(TASK_0_0, TASK_0_1, TASK_0_2);
+        final ClientState client1 = getClientStateWithActiveAssignment(mkSet(TASK_0_0), mkSet(TASK_0_0), allTasks);
+        final ClientState client2 = getClientStateWithActiveAssignment(mkSet(TASK_0_1), mkSet(TASK_0_2), allTasks);
+        final ClientState client3 = getClientStateWithActiveAssignment(mkSet(TASK_0_2), mkSet(TASK_0_1), allTasks);
         final Map<UUID, ClientState> clientStates = getClientStatesMap(client1, client2, client3);
 
         final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients = mkMap(
@@ -152,11 +222,17 @@ public void shouldOnlyGetUpToMaxWarmupReplicasAndReturnTrue() {
             mkEntry(TASK_0_1, mkSortedSet(UUID_3)),
             mkEntry(TASK_0_2, mkSortedSet(UUID_2))
         );
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = mkMap(
+            mkEntry(TASK_0_0, mkOrderedSet(UUID_1, UUID_2, UUID_3)),
+            mkEntry(TASK_0_1, mkOrderedSet(UUID_3, UUID_1, UUID_2)),
+            mkEntry(TASK_0_2, mkOrderedSet(UUID_2, UUID_1, UUID_3))
+        );
 
         assertThat(
             "should have assigned movements",
             assignActiveTaskMovements(
                 tasksToCaughtUpClients,
+                tasksToClientByLag,
                 clientStates,
                 new TreeMap<>(),
                 new AtomicInteger(maxWarmupReplicas)
@@ -182,19 +258,24 @@ public void shouldOnlyGetUpToMaxWarmupReplicasAndReturnTrue() {
     @Test
     public void shouldNotCountPreviousStandbyTasksTowardsMaxWarmupReplicas() {
         final int maxWarmupReplicas = 0;
-        final ClientState client1 = getClientStateWithActiveAssignment(emptyList());
+        final Set<TaskId> allTasks = mkSet(TASK_0_0);
+        final ClientState client1 = getClientStateWithActiveAssignment(mkSet(), mkSet(TASK_0_0), allTasks);
         client1.assignStandby(TASK_0_0);
-        final ClientState client2 = getClientStateWithActiveAssignment(singletonList(TASK_0_0));
+        final ClientState client2 = getClientStateWithActiveAssignment(mkSet(TASK_0_0), mkSet(), allTasks);
         final Map<UUID, ClientState> clientStates = getClientStatesMap(client1, client2);
 
         final Map<TaskId, SortedSet<UUID>> tasksToCaughtUpClients = mkMap(
             mkEntry(TASK_0_0, mkSortedSet(UUID_1))
         );
+        final Map<TaskId, SortedSet<UUID>> tasksToClientByLag = mkMap(
+            mkEntry(TASK_0_0, mkOrderedSet(UUID_1, UUID_2))
+        );
 
         assertThat(
             "should have assigned movements",
             assignActiveTaskMovements(
                 tasksToCaughtUpClients,
+                tasksToClientByLag,
                 clientStates,
                 new TreeMap<>(),
                 new AtomicInteger(maxWarmupReplicas)
@@ -215,10 +296,35 @@ public void shouldNotCountPreviousStandbyTasksTowardsMaxWarmupReplicas() {
 
     }
 
-    private static ClientState getClientStateWithActiveAssignment(final Collection<TaskId> activeTasks) {
-        final ClientState client1 = new ClientState(1);
+    private static ClientState getClientStateWithActiveAssignment(final Set<TaskId> activeTasks,
+                                                                  final Set<TaskId> caughtUpTasks,
+                                                                  final Set<TaskId> allTasks) {
+        final Map<TaskId, Long> lags = new HashMap<>();
+        for (final TaskId task : allTasks) {
+            if (caughtUpTasks.contains(task)) {
+                lags.put(task, 0L);
+            } else {
+                lags.put(task, 10000L);
+            }
+        }
+        return getClientStateWithLags(activeTasks, lags);
+    }
+
+    private static ClientState getClientStateWithLags(final Set<TaskId> activeTasks,
+                                                      final Map<TaskId, Long> taskLags) {
+        final ClientState client1 = new ClientState(activeTasks, emptySet(), taskLags, emptyMap(), 1);
         client1.assignActiveTasks(activeTasks);
         return client1;
     }
 
+    /**
+     * Creates a SortedSet with the sort order being the order of elements in the parameter list
+     */
+    private static SortedSet<UUID> mkOrderedSet(final UUID... clients) {
+        final List<UUID> clientList = asList(clients);
+        final SortedSet<UUID> set = new TreeSet<>(Comparator.comparing(clientList::indexOf));
+        set.addAll(clientList);
+        return set;
+    }
+
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetricsTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetricsTest.java
index 0ae1a99e6f53a..738d83683e1e7 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetricsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ProcessorNodeMetricsTest.java
@@ -18,15 +18,18 @@
 
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
-import org.junit.Test;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
 
+import org.junit.Test;
+import org.mockito.MockedStatic;
 
 import java.util.Collections;
 import java.util.Map;
 import java.util.function.Supplier;
 
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TASK_LEVEL_GROUP;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.PROCESSOR_NODE_LEVEL_GROUP;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
@@ -50,19 +53,15 @@ public void shouldGetSuppressionEmitSensor() {
         final String descriptionOfCount = "The total number of emitted records from the suppression buffer";
         final String descriptionOfRate = "The average number of emitted records from the suppression buffer per second";
         when(streamsMetrics.nodeLevelSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, metricNamePrefix, RecordingLevel.DEBUG))
-                .thenReturn(expectedSensor);
+            .thenReturn(expectedSensor);
         when(streamsMetrics.nodeLevelTagMap(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-                expectedSensor,
-                PROCESSOR_NODE_LEVEL_GROUP,
-                tagMap,
-                metricNamePrefix,
-                descriptionOfRate,
-                descriptionOfCount
-        );
 
-        verifySensor(
-                () -> ProcessorNodeMetrics.suppressionEmitSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics));
+        getAndVerifySensor(
+            () -> ProcessorNodeMetrics.suppressionEmitSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics),
+            metricNamePrefix,
+            descriptionOfRate,
+            descriptionOfCount
+        );
     }
 
     @Test
@@ -71,18 +70,14 @@ public void shouldGetIdempotentUpdateSkipSensor() {
         final String descriptionOfCount = "The total number of skipped idempotent updates";
         final String descriptionOfRate = "The average number of skipped idempotent updates per second";
         when(streamsMetrics.nodeLevelSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, metricNamePrefix, RecordingLevel.DEBUG))
-                .thenReturn(expectedSensor);
+            .thenReturn(expectedSensor);
         when(streamsMetrics.nodeLevelTagMap(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-                expectedSensor,
-                PROCESSOR_NODE_LEVEL_GROUP,
-                tagMap,
-                metricNamePrefix,
-                descriptionOfRate,
-                descriptionOfCount
-        );
-        verifySensor(
-                () -> ProcessorNodeMetrics.skippedIdempotentUpdatesSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics)
+
+        getAndVerifySensor(
+            () -> ProcessorNodeMetrics.skippedIdempotentUpdatesSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics),
+            metricNamePrefix,
+            descriptionOfRate,
+            descriptionOfCount
         );
     }
 
@@ -92,26 +87,35 @@ public void shouldGetProcessAtSourceSensor() {
         final String descriptionOfCount = "The total number of calls to process";
         final String descriptionOfRate = "The average number of calls to process per second";
         when(streamsMetrics.taskLevelSensor(THREAD_ID, TASK_ID, metricNamePrefix, RecordingLevel.DEBUG))
-                .thenReturn(expectedParentSensor);
+            .thenReturn(expectedParentSensor);
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID))
-                .thenReturn(parentTagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-                expectedParentSensor,
-                StreamsMetricsImpl.TASK_LEVEL_GROUP,
-                parentTagMap,
-                metricNamePrefix,
-                descriptionOfRate,
-                descriptionOfCount
-        );
-        setUpThroughputSensor(
-                metricNamePrefix,
-                descriptionOfRate,
-                descriptionOfCount,
-                RecordingLevel.DEBUG,
-                expectedParentSensor
-        );
-
-        verifySensor(() -> ProcessorNodeMetrics.processAtSourceSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics));
+            .thenReturn(parentTagMap);
+        setUpThroughputSensor(metricNamePrefix, RecordingLevel.DEBUG, expectedParentSensor);
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ProcessorNodeMetrics.processAtSourceSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    PROCESSOR_NODE_LEVEL_GROUP,
+                    tagMap,
+                    metricNamePrefix,
+                    descriptionOfRate,
+                    descriptionOfCount
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedParentSensor,
+                    TASK_LEVEL_GROUP,
+                    parentTagMap,
+                    metricNamePrefix,
+                    descriptionOfRate,
+                    descriptionOfCount
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -119,65 +123,69 @@ public void shouldGetForwardSensor() {
         final String metricNamePrefix = "forward";
         final String descriptionOfCount = "The total number of calls to forward";
         final String descriptionOfRate = "The average number of calls to forward per second";
-        setUpThroughputParentSensor(
-            metricNamePrefix,
-            descriptionOfRate,
-            descriptionOfCount
-        );
-        setUpThroughputSensor(
-            metricNamePrefix,
-            descriptionOfRate,
-            descriptionOfCount,
-            RecordingLevel.DEBUG,
-            expectedParentSensor
-        );
-
-        verifySensor(() -> ProcessorNodeMetrics.forwardSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics));
-    }
-
-    private void setUpThroughputParentSensor(final String metricNamePrefix,
-                                             final String descriptionOfRate,
-                                             final String descriptionOfCount) {
         when(streamsMetrics.taskLevelSensor(THREAD_ID, TASK_ID, metricNamePrefix, RecordingLevel.DEBUG))
-                .thenReturn(expectedParentSensor);
+            .thenReturn(expectedParentSensor);
         when(streamsMetrics.nodeLevelTagMap(THREAD_ID, TASK_ID, StreamsMetricsImpl.ROLLUP_VALUE))
-                .thenReturn(parentTagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-                expectedParentSensor,
-                PROCESSOR_NODE_LEVEL_GROUP,
-                parentTagMap,
-                metricNamePrefix,
-                descriptionOfRate,
-                descriptionOfCount
-        );
+            .thenReturn(parentTagMap);
+        setUpThroughputSensor(metricNamePrefix, RecordingLevel.DEBUG, expectedParentSensor);
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ProcessorNodeMetrics.forwardSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    PROCESSOR_NODE_LEVEL_GROUP,
+                    tagMap,
+                    metricNamePrefix,
+                    descriptionOfRate,
+                    descriptionOfCount
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedParentSensor,
+                    PROCESSOR_NODE_LEVEL_GROUP,
+                    parentTagMap,
+                    metricNamePrefix,
+                    descriptionOfRate,
+                    descriptionOfCount
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     private void setUpThroughputSensor(final String metricNamePrefix,
-                                           final String descriptionOfRate,
-                                           final String descriptionOfCount,
-                                           final RecordingLevel recordingLevel,
-                                           final Sensor... parentSensors) {
+                                       final RecordingLevel recordingLevel,
+                                       final Sensor... parentSensors) {
         when(streamsMetrics.nodeLevelSensor(
-                THREAD_ID,
-                TASK_ID,
-                PROCESSOR_NODE_ID,
-                metricNamePrefix,
-                recordingLevel,
-                parentSensors
+            THREAD_ID,
+            TASK_ID,
+            PROCESSOR_NODE_ID,
+            metricNamePrefix,
+            recordingLevel,
+            parentSensors
         )).thenReturn(expectedSensor);
         when(streamsMetrics.nodeLevelTagMap(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            PROCESSOR_NODE_LEVEL_GROUP,
-            tagMap,
-            metricNamePrefix,
-            descriptionOfRate,
-            descriptionOfCount
-        );
     }
 
-    private void verifySensor(final Supplier<Sensor> sensorSupplier) {
-        final Sensor sensor = sensorSupplier.get();
-        assertThat(sensor, is(expectedSensor));
+    private void getAndVerifySensor(final Supplier<Sensor> sensorSupplier,
+                                    final String metricNamePrefix,
+                                    final String descriptionOfRate,
+                                    final String descriptionOfCount) {
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = sensorSupplier.get();
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    PROCESSOR_NODE_LEVEL_GROUP,
+                    tagMap,
+                    metricNamePrefix,
+                    descriptionOfRate,
+                    descriptionOfCount
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImplTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImplTest.java
index 24cf8c7f1cc65..176966f827d3c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImplTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/StreamsMetricsImplTest.java
@@ -45,6 +45,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 import static org.apache.kafka.common.utils.Utils.mkMap;
@@ -58,6 +59,7 @@
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.ROLLUP_VALUE;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.STATE_STORE_LEVEL_GROUP;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.THREAD_LEVEL_GROUP;
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOPIC_LEVEL_GROUP;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOTAL_SUFFIX;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addAvgAndMaxLatencyToSensor;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.addInvocationRateAndCountToSensor;
@@ -83,6 +85,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertNotNull;
 import static org.powermock.api.easymock.PowerMock.createMock;
 
 @RunWith(PowerMockRunner.class)
@@ -99,6 +103,10 @@ public class StreamsMetricsImplTest {
     private final static String THREAD_ID1 = "test-thread-1";
     private final static String TASK_ID1 = "test-task-1";
     private final static String TASK_ID2 = "test-task-2";
+    private final static String NODE_ID1 = "test-node-1";
+    private final static String NODE_ID2 = "test-node-2";
+    private final static String TOPIC_ID1 = "test-topic-1";
+    private final static String TOPIC_ID2 = "test-topic-2";
     private final static String METRIC_NAME1 = "test-metric1";
     private final static String METRIC_NAME2 = "test-metric2";
     private final static String THREAD_ID_TAG = "thread-id";
@@ -319,6 +327,46 @@ public void shouldGetExistingTaskLevelSensor() {
         assertThat(actualSensor, is(equalToObject(sensor)));
     }
 
+    @Test
+    public void shouldGetNewTopicLevelSensor() {
+        final Metrics metrics = mock(Metrics.class);
+        final RecordingLevel recordingLevel = RecordingLevel.INFO;
+        setupGetNewSensorTest(metrics, recordingLevel);
+        final StreamsMetricsImpl streamsMetrics = new StreamsMetricsImpl(metrics, CLIENT_ID, VERSION, time);
+
+        final Sensor actualSensor = streamsMetrics.topicLevelSensor(
+            THREAD_ID1,
+            TASK_ID1,
+            NODE_ID1,
+            TOPIC_ID1,
+            SENSOR_NAME_1,
+            recordingLevel
+        );
+
+        verify(metrics);
+        assertThat(actualSensor, is(equalToObject(sensor)));
+    }
+
+    @Test
+    public void shouldGetExistingTopicLevelSensor() {
+        final Metrics metrics = mock(Metrics.class);
+        final RecordingLevel recordingLevel = RecordingLevel.INFO;
+        setupGetExistingSensorTest(metrics);
+        final StreamsMetricsImpl streamsMetrics = new StreamsMetricsImpl(metrics, CLIENT_ID, VERSION, time);
+
+        final Sensor actualSensor = streamsMetrics.topicLevelSensor(
+            THREAD_ID1,
+            TASK_ID1,
+            NODE_ID1,
+            TOPIC_ID1,
+            SENSOR_NAME_1,
+            recordingLevel
+        );
+
+        verify(metrics);
+        assertThat(actualSensor, is(equalToObject(sensor)));
+    }
+
     @Test
     public void shouldGetNewStoreLevelSensorIfNoneExists() {
         final Metrics metrics = mock(Metrics.class);
@@ -435,7 +483,7 @@ public void shouldAddNewStoreLevelMutableMetric() {
         expect(metrics.metricName(METRIC_NAME1, STATE_STORE_LEVEL_GROUP, DESCRIPTION1, STORE_LEVEL_TAG_MAP))
             .andReturn(metricName);
         expect(metrics.metric(metricName)).andReturn(null);
-        metrics.addMetric(eq(metricName), eqMetricConfig(metricConfig), eq(VALUE_PROVIDER));
+        expect(metrics.addMetricIfAbsent(eq(metricName), eqMetricConfig(metricConfig), eq(VALUE_PROVIDER))).andReturn(null);
         replay(metrics);
         final StreamsMetricsImpl streamsMetrics = new StreamsMetricsImpl(metrics, CLIENT_ID, VERSION, time);
 
@@ -452,6 +500,17 @@ public void shouldAddNewStoreLevelMutableMetric() {
         verify(metrics);
     }
 
+    @Test
+    public void shouldCreateNewStoreLevelMutableMetric() {
+        final MetricName metricName =
+                new MetricName(METRIC_NAME1, STATE_STORE_LEVEL_GROUP, DESCRIPTION1, STORE_LEVEL_TAG_MAP);
+        final MetricConfig metricConfig = new MetricConfig().recordLevel(INFO_RECORDING_LEVEL);
+        final Metrics metrics = new Metrics(metricConfig);
+        assertNull(metrics.metric(metricName));
+        metrics.addMetricIfAbsent(metricName, metricConfig, VALUE_PROVIDER);
+        assertNotNull(metrics.metric(metricName));
+    }
+
     @Test
     public void shouldNotAddStoreLevelMutableMetricIfAlreadyExists() {
         final Metrics metrics = mock(Metrics.class);
@@ -476,6 +535,38 @@ public void shouldNotAddStoreLevelMutableMetricIfAlreadyExists() {
         verify(metrics);
     }
 
+    @Test
+    public void shouldReturnSameMetricIfAlreadyCreated() {
+        final MetricName metricName =
+                new MetricName(METRIC_NAME1, STATE_STORE_LEVEL_GROUP, DESCRIPTION1, STORE_LEVEL_TAG_MAP);
+        final MetricConfig metricConfig = new MetricConfig().recordLevel(INFO_RECORDING_LEVEL);
+        final Metrics metrics = new Metrics(metricConfig);
+        assertNull(metrics.metric(metricName));
+        final KafkaMetric kafkaMetric = metrics.addMetricIfAbsent(metricName, metricConfig, VALUE_PROVIDER);
+        assertEquals(kafkaMetric, metrics.addMetricIfAbsent(metricName, metricConfig, VALUE_PROVIDER));
+    }
+
+    @Test
+    public void shouldCreateMetricOnceDuringConcurrentMetricCreationRequest() throws InterruptedException {
+        final MetricName metricName =
+                new MetricName(METRIC_NAME1, STATE_STORE_LEVEL_GROUP, DESCRIPTION1, STORE_LEVEL_TAG_MAP);
+        final MetricConfig metricConfig = new MetricConfig().recordLevel(INFO_RECORDING_LEVEL);
+        final Metrics metrics = new Metrics(metricConfig);
+        assertNull(metrics.metric(metricName));
+        final AtomicReference<KafkaMetric> metricCreatedViaThread1 = new AtomicReference<>();
+        final AtomicReference<KafkaMetric> metricCreatedViaThread2 = new AtomicReference<>();
+
+        final Thread thread1 = new Thread(() -> metricCreatedViaThread1.set(metrics.addMetricIfAbsent(metricName, metricConfig, VALUE_PROVIDER)));
+        final Thread thread2 = new Thread(() -> metricCreatedViaThread2.set(metrics.addMetricIfAbsent(metricName, metricConfig, VALUE_PROVIDER)));
+
+        thread1.start();
+        thread2.start();
+
+        thread1.join();
+        thread2.join();
+        assertEquals(metricCreatedViaThread1.get(), metricCreatedViaThread2.get());
+    }
+
     @Test
     public void shouldRemoveStateStoreLevelSensors() {
         final Metrics metrics = niceMock(Metrics.class);
@@ -505,14 +596,13 @@ public void shouldRemoveStateStoreLevelSensors() {
     public void shouldGetNewNodeLevelSensor() {
         final Metrics metrics = mock(Metrics.class);
         final RecordingLevel recordingLevel = RecordingLevel.INFO;
-        final String processorNodeName = "processorNodeName";
         setupGetNewSensorTest(metrics, recordingLevel);
         final StreamsMetricsImpl streamsMetrics = new StreamsMetricsImpl(metrics, CLIENT_ID, VERSION, time);
 
         final Sensor actualSensor = streamsMetrics.nodeLevelSensor(
             THREAD_ID1,
             TASK_ID1,
-            processorNodeName,
+            NODE_ID1,
             SENSOR_NAME_1,
             recordingLevel
         );
@@ -525,14 +615,13 @@ public void shouldGetNewNodeLevelSensor() {
     public void shouldGetExistingNodeLevelSensor() {
         final Metrics metrics = mock(Metrics.class);
         final RecordingLevel recordingLevel = RecordingLevel.INFO;
-        final String processorNodeName = "processorNodeName";
         setupGetExistingSensorTest(metrics);
         final StreamsMetricsImpl streamsMetrics = new StreamsMetricsImpl(metrics, CLIENT_ID, VERSION, time);
 
         final Sensor actualSensor = streamsMetrics.nodeLevelSensor(
             THREAD_ID1,
             TASK_ID1,
-            processorNodeName,
+            NODE_ID1,
             SENSOR_NAME_1,
             recordingLevel
         );
@@ -732,6 +821,9 @@ public void testMultiLevelSensorRemoval() {
         final String processorNodeName = "processorNodeName";
         final Map<String, String> nodeTags = mkMap(mkEntry("nkey", "value"));
 
+        final String topicName = "topicName";
+        final Map<String, String> topicTags = mkMap(mkEntry("tkey", "value"));
+
         final Sensor parent1 = metrics.taskLevelSensor(THREAD_ID1, taskName, operation, RecordingLevel.DEBUG);
         addAvgAndMaxLatencyToSensor(parent1, PROCESSOR_NODE_LEVEL_GROUP, taskTags, operation);
         addInvocationRateAndCountToSensor(parent1, PROCESSOR_NODE_LEVEL_GROUP, taskTags, operation, "", "");
@@ -744,6 +836,18 @@ public void testMultiLevelSensorRemoval() {
 
         assertThat(registry.metrics().size(), greaterThan(numberOfTaskMetrics));
 
+        final int numberOfNodeMetrics = registry.metrics().size();
+
+        final Sensor child1 = metrics.topicLevelSensor(THREAD_ID1, taskName, processorNodeName, topicName, operation, RecordingLevel.DEBUG, sensor1);
+        addAvgAndMaxLatencyToSensor(child1, TOPIC_LEVEL_GROUP, topicTags, operation);
+        addInvocationRateAndCountToSensor(child1, TOPIC_LEVEL_GROUP, topicTags, operation, "", "");
+
+        assertThat(registry.metrics().size(), greaterThan(numberOfNodeMetrics));
+
+        metrics.removeAllTopicLevelSensors(THREAD_ID1, taskName, processorNodeName, topicName);
+
+        assertThat(registry.metrics().size(), equalTo(numberOfNodeMetrics));
+
         metrics.removeAllNodeLevelSensors(THREAD_ID1, taskName, processorNodeName);
 
         assertThat(registry.metrics().size(), equalTo(numberOfTaskMetrics));
@@ -1104,6 +1208,22 @@ public void shouldAddValue() {
         assertThat(metrics.metrics().size(), equalTo(1 + 1)); // one metric is added automatically in the constructor of Metrics
     }
 
+    @Test
+    public void shouldAddTotalCountAndSumMetricsToSensor() {
+        final String totalMetricNamePrefix = "total";
+        final String sumMetricNamePrefix = "count";
+        StreamsMetricsImpl
+            .addTotalCountAndSumMetricsToSensor(sensor, group, tags, totalMetricNamePrefix, sumMetricNamePrefix, DESCRIPTION1, DESCRIPTION2);
+
+        final double valueToRecord1 = 18.0;
+        final double valueToRecord2 = 42.0;
+        final double expectedCountMetricValue = 2;
+        verifyMetric(totalMetricNamePrefix + "-total", DESCRIPTION1, valueToRecord1, valueToRecord2, expectedCountMetricValue);
+        final double expectedSumMetricValue = 2 * valueToRecord1 + 2 * valueToRecord2; // values are recorded once for each metric verification
+        verifyMetric(sumMetricNamePrefix + "-total", DESCRIPTION2, valueToRecord1, valueToRecord2, expectedSumMetricValue);
+        assertThat(metrics.metrics().size(), equalTo(2 + 1)); // one metric is added automatically in the constructor of Metrics
+    }
+
     @Test
     public void shouldAddAvgAndTotalMetricsToSensor() {
         StreamsMetricsImpl
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TaskMetricsTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TaskMetricsTest.java
index 1d33fea1ecc0a..a38fb322bc24d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TaskMetricsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TaskMetricsTest.java
@@ -19,7 +19,10 @@
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
 import org.junit.Test;
+import org.mockito.MockedStatic;
+
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
 import static org.mockito.Mockito.when;
 
 import java.util.Collections;
@@ -48,18 +51,20 @@ public void shouldGetActiveProcessRatioSensor() {
         final String ratioDescription = "The fraction of time the thread spent " +
             "on processing this task among all assigned active tasks";
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addValueMetricToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            ratioDescription
-        );
-
-
-        final Sensor sensor = TaskMetrics.activeProcessRatioSensor(THREAD_ID, TASK_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.activeProcessRatioSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addValueMetricToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    ratioDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -70,18 +75,20 @@ public void shouldGetActiveBufferCountSensor() {
         final String countDescription = "The count of buffered records that are polled " +
             "from consumer and not yet processed for this active task";
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addValueMetricToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            countDescription
-        );
 
-
-        final Sensor sensor = TaskMetrics.activeBufferedRecordsSensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.activeBufferedRecordsSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addValueMetricToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    countDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -92,18 +99,21 @@ public void shouldGetProcessLatencySensor() {
         final String avgLatencyDescription = "The average latency of calls to process";
         final String maxLatencyDescription = "The maximum latency of calls to process";
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            avgLatencyDescription,
-            maxLatencyDescription
-        );
-
-        final Sensor sensor = TaskMetrics.processLatencySensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.processLatencySensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    avgLatencyDescription,
+                    maxLatencyDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -117,26 +127,31 @@ public void shouldGetPunctuateSensor() {
         final String avgLatencyDescription = "The average latency of calls to punctuate";
         final String maxLatencyDescription = "The maximum latency of calls to punctuate";
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operationLatency,
-            avgLatencyDescription,
-            maxLatencyDescription
-        );
-
-        final Sensor sensor = TaskMetrics.punctuateSensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.punctuateSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operationLatency,
+                    avgLatencyDescription,
+                    maxLatencyDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -146,18 +161,21 @@ public void shouldGetCommitSensor() {
         final String rateDescription = "The average number of calls to commit per second";
         when(streamsMetrics.taskLevelSensor(THREAD_ID, TASK_ID, operation, RecordingLevel.DEBUG)).thenReturn(expectedSensor);
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-        final Sensor sensor = TaskMetrics.commitSensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.commitSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -167,18 +185,21 @@ public void shouldGetEnforcedProcessingSensor() {
         final String rateDescription = "The average number of occurrences of enforced-processing operations per second";
         when(streamsMetrics.taskLevelSensor(THREAD_ID, TASK_ID, operation, RecordingLevel.DEBUG)).thenReturn(expectedSensor);
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-        final Sensor sensor = TaskMetrics.enforcedProcessingSensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.enforcedProcessingSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -192,18 +213,21 @@ public void shouldGetRecordLatenessSensor() {
                 + "the current stream time";
         when(streamsMetrics.taskLevelSensor(THREAD_ID, TASK_ID, operation, RecordingLevel.DEBUG)).thenReturn(expectedSensor);
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            avgDescription,
-            maxDescription
-        );
-
-        final Sensor sensor = TaskMetrics.recordLatenessSensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.recordLatenessSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    avgDescription,
+                    maxDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -213,17 +237,20 @@ public void shouldGetDroppedRecordsSensor() {
         final String rateDescription = "The average number of dropped records per second";
         when(streamsMetrics.taskLevelSensor(THREAD_ID, TASK_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, TASK_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-        final Sensor sensor = TaskMetrics.droppedRecordsSensor(THREAD_ID, TASK_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = TaskMetrics.droppedRecordsSensor(THREAD_ID, TASK_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
-}
\ No newline at end of file
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetricsTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetricsTest.java
index 6ed97ebf7cbeb..2bbb6acb2a84c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetricsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/ThreadMetricsTest.java
@@ -24,12 +24,14 @@
 
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import java.util.Collections;
 import java.util.Map;
 import org.mockito.ArgumentCaptor;
+import org.mockito.MockedStatic;
 
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.LATENCY_SUFFIX;
 import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.RATE_SUFFIX;
@@ -54,17 +56,20 @@ public void shouldGetProcessRatioSensor() {
         final String ratioDescription = "The fraction of time the thread spent on processing active tasks";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addValueMetricToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            ratioDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.processRatioSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.processRatioSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addValueMetricToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    ratioDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -74,18 +79,21 @@ public void shouldGetProcessRecordsSensor() {
         final String maxDescription = "The maximum number of records processed within an iteration";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            avgDescription,
-            maxDescription
-        );
 
-        final Sensor sensor = ThreadMetrics.processRecordsSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.processRecordsSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    avgDescription,
+                    maxDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -95,18 +103,21 @@ public void shouldGetProcessLatencySensor() {
         final String maxLatencyDescription = "The maximum process latency";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operationLatency, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operationLatency,
-            avgLatencyDescription,
-            maxLatencyDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.processLatencySensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.processLatencySensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operationLatency,
+                    avgLatencyDescription,
+                    maxLatencyDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -117,18 +128,21 @@ public void shouldGetProcessRateSensor() {
         final String rateDescription = "The average per-second number of calls to process";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operationRate, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addRateOfSumAndSumMetricsToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.processRateSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.processRateSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addRateOfSumAndSumMetricsToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -137,17 +151,20 @@ public void shouldGetPollRatioSensor() {
         final String ratioDescription = "The fraction of time the thread spent on polling records from consumer";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addValueMetricToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            ratioDescription
-        );
 
-        final Sensor sensor = ThreadMetrics.pollRatioSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.pollRatioSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addValueMetricToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    ratioDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -157,18 +174,21 @@ public void shouldGetPollRecordsSensor() {
         final String maxDescription = "The maximum number of records polled from consumer within an iteration";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            avgDescription,
-            maxDescription
-        );
 
-        final Sensor sensor = ThreadMetrics.pollRecordsSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.pollRecordsSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    avgDescription,
+                    maxDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -181,26 +201,31 @@ public void shouldGetPollSensor() {
         final String maxLatencyDescription = "The maximum poll latency";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operationLatency,
-            avgLatencyDescription,
-            maxLatencyDescription
-        );
 
-        final Sensor sensor = ThreadMetrics.pollSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.pollSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operationLatency,
+                    avgLatencyDescription,
+                    maxLatencyDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -213,25 +238,31 @@ public void shouldGetCommitSensor() {
         final String maxLatencyDescription = "The maximum commit latency";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operationLatency,
-            avgLatencyDescription,
-            maxLatencyDescription);
 
-        final Sensor sensor = ThreadMetrics.commitSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.commitSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operationLatency,
+                    avgLatencyDescription,
+                    maxLatencyDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -240,17 +271,20 @@ public void shouldGetCommitRatioSensor() {
         final String ratioDescription = "The fraction of time the thread spent on committing all tasks";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addValueMetricToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            ratioDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.commitRatioSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.commitRatioSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addValueMetricToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    ratioDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -262,18 +296,21 @@ public void shouldGetCommitOverTasksSensor() {
             "The average per-second number of calls to commit over all tasks assigned to one stream thread";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.DEBUG)).thenReturn(expectedSensor);
         when(streamsMetrics.taskLevelTagMap(THREAD_ID, ROLLUP_VALUE)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            TASK_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.commitOverTasksSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.commitOverTasksSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    TASK_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -286,26 +323,31 @@ public void shouldGetPunctuateSensor() {
         final String maxLatencyDescription = "The maximum punctuate latency";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operationLatency,
-            avgLatencyDescription,
-            maxLatencyDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.punctuateSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.punctuateSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operationLatency,
+                    avgLatencyDescription,
+                    maxLatencyDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -314,42 +356,21 @@ public void shouldGetPunctuateRatioSensor() {
         final String ratioDescription = "The fraction of time the thread spent on punctuating active tasks";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addValueMetricToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            ratioDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.punctuateRatioSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.punctuateRatioSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addValueMetricToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    ratioDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
-
-    @Test
-    public void shouldGetSkipRecordSensor() {
-        final String operation = "skipped-records";
-        final String totalDescription = "The total number of skipped records";
-        final String rateDescription = "The average per-second number of skipped records";
-        when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO))
-                .thenReturn(expectedSensor);
-        when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-        final Sensor sensor = ThreadMetrics.skipRecordSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
-    }
-
-    @Test
     public void shouldGetCreateTaskSensor() {
         final String operation = "task-created";
         final String totalDescription = "The total number of newly created tasks";
@@ -357,19 +378,20 @@ public void shouldGetCreateTaskSensor() {
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
 
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-
-        final Sensor sensor = ThreadMetrics.createTaskSensor(THREAD_ID, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.createTaskSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
@@ -379,19 +401,21 @@ public void shouldGetCloseTaskSensor() {
         final String rateDescription = "The average per-second number of closed tasks";
         when(streamsMetrics.threadLevelSensor(THREAD_ID, operation, RecordingLevel.INFO)).thenReturn(expectedSensor);
         when(streamsMetrics.threadLevelTagMap(THREAD_ID)).thenReturn(tagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            THREAD_LEVEL_GROUP,
-            tagMap,
-            operation,
-            rateDescription,
-            totalDescription
-        );
-
-
-        final Sensor sensor = ThreadMetrics.closeTaskSensor(THREAD_ID, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = ThreadMetrics.closeTaskSensor(THREAD_ID, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    THREAD_LEVEL_GROUP,
+                    tagMap,
+                    operation,
+                    rateDescription,
+                    totalDescription
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
diff --git a/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TopicMetricsTest.java b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TopicMetricsTest.java
new file mode 100644
index 0000000000000..b698b26192a08
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/processor/internals/metrics/TopicMetricsTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.processor.internals.metrics;
+
+import org.apache.kafka.common.metrics.Sensor;
+import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
+
+import org.junit.AfterClass;
+import org.junit.Test;
+import org.mockito.MockedStatic;
+import java.util.Collections;
+import java.util.Map;
+import java.util.function.Supplier;
+
+import static org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl.TOPIC_LEVEL_GROUP;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
+
+public class TopicMetricsTest {
+
+    private static final String THREAD_ID = "test-thread";
+    private static final String TASK_ID = "test-task";
+    private static final String PROCESSOR_NODE_ID = "test-processor";
+    private static final String TOPIC = "topic";
+
+    private final Map<String, String> tagMap = Collections.singletonMap("hello", "world");
+
+    private final Sensor expectedSensor = mock(Sensor.class);
+    private static final MockedStatic<StreamsMetricsImpl> STREAMS_METRICS_STATIC_MOCK = mockStatic(StreamsMetricsImpl.class);
+    private final StreamsMetricsImpl streamsMetrics = mock(StreamsMetricsImpl.class);
+
+    @AfterClass
+    public static void cleanUp() {
+        STREAMS_METRICS_STATIC_MOCK.close();
+    }
+
+    @Test
+    public void shouldGetRecordsAndBytesConsumedSensor() {
+        final String recordsMetricNamePrefix = "records-consumed";
+        final String bytesMetricNamePrefix = "bytes-consumed";
+        final String descriptionOfRecordsTotal = "The total number of records consumed from this topic";
+        final String descriptionOfBytesTotal = "The total number of bytes consumed from this topic";
+
+        when(streamsMetrics.topicLevelSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC, "consumed", RecordingLevel.INFO))
+            .thenReturn(expectedSensor);
+        when(streamsMetrics.topicLevelSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC, "consumed", RecordingLevel.INFO))
+            .thenReturn(expectedSensor);
+        when(streamsMetrics.topicLevelTagMap(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC)).thenReturn(tagMap);
+
+        verifySensor(
+            () -> TopicMetrics.consumedSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC, streamsMetrics)
+        );
+
+        STREAMS_METRICS_STATIC_MOCK.verify(
+            () -> StreamsMetricsImpl.addTotalCountAndSumMetricsToSensor(
+                expectedSensor,
+                TOPIC_LEVEL_GROUP,
+                tagMap,
+                recordsMetricNamePrefix,
+                bytesMetricNamePrefix,
+                descriptionOfRecordsTotal,
+                descriptionOfBytesTotal
+            )
+        );
+    }
+
+    @Test
+    public void shouldGetRecordsAndBytesProducedSensor() {
+        final String recordsMetricNamePrefix = "records-produced";
+        final String bytesMetricNamePrefix = "bytes-produced";
+        final String descriptionOfRecordsTotal = "The total number of records produced to this topic";
+        final String descriptionOfBytesTotal = "The total number of bytes produced to this topic";
+
+        when(streamsMetrics.topicLevelSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC, "produced", RecordingLevel.INFO))
+            .thenReturn(expectedSensor);
+        when(streamsMetrics.topicLevelSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC, "produced", RecordingLevel.INFO))
+            .thenReturn(expectedSensor);
+        when(streamsMetrics.topicLevelTagMap(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC)).thenReturn(tagMap);
+
+        verifySensor(() -> TopicMetrics.producedSensor(THREAD_ID, TASK_ID, PROCESSOR_NODE_ID, TOPIC, streamsMetrics));
+
+        STREAMS_METRICS_STATIC_MOCK.verify(
+            () -> StreamsMetricsImpl.addTotalCountAndSumMetricsToSensor(
+                expectedSensor,
+                TOPIC_LEVEL_GROUP,
+                tagMap,
+                recordsMetricNamePrefix,
+                bytesMetricNamePrefix,
+                descriptionOfRecordsTotal,
+                descriptionOfBytesTotal
+            )
+        );
+    }
+
+    private void verifySensor(final Supplier<Sensor> sensorSupplier) {
+        final Sensor sensor = sensorSupplier.get();
+        assertThat(sensor, is(expectedSensor));
+    }
+
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java b/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java
index 6a95ccbd08a2a..178399426076f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/KeyValueStoreTestDriver.java
@@ -32,7 +32,9 @@
 import org.apache.kafka.streams.processor.StateStoreContext;
 import org.apache.kafka.streams.processor.StreamPartitioner;
 import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
+import org.apache.kafka.streams.processor.internals.ProcessorTopology;
 import org.apache.kafka.streams.processor.internals.RecordCollector;
 import org.apache.kafka.streams.processor.internals.RecordCollectorImpl;
 import org.apache.kafka.streams.processor.internals.StreamsProducer;
@@ -45,6 +47,7 @@
 import org.apache.kafka.test.TestUtils;
 
 import java.io.File;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
@@ -54,6 +57,9 @@
 import java.util.Properties;
 import java.util.Set;
 
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
 /**
  * A component that provides a {@link #context() ProcessingContext} that can be supplied to a {@link KeyValueStore} so that
  * all entries written to the Kafka topic by the store during {@link KeyValueStore#flush()} are captured for testing purposes.
@@ -199,6 +205,9 @@ private KeyValueStoreTestDriver(final StateSerdes<K, V> serdes) {
         props.put(StreamsConfig.ROCKSDB_CONFIG_SETTER_CLASS_CONFIG, MockRocksDbConfigSetter.class);
         props.put(StreamsConfig.METRICS_RECORDING_LEVEL_CONFIG, "DEBUG");
 
+        final ProcessorTopology topology = mock(ProcessorTopology.class);
+        when(topology.sinkTopics()).thenReturn(Collections.emptySet());
+
         final LogContext logContext = new LogContext("KeyValueStoreTestDriver ");
         final RecordCollector recordCollector = new RecordCollectorImpl(
             logContext,
@@ -212,7 +221,8 @@ private KeyValueStoreTestDriver(final StateSerdes<K, V> serdes) {
                 logContext,
                 Time.SYSTEM),
             new DefaultProductionExceptionHandler(),
-            new MockStreamsMetrics(new Metrics())
+            new MockStreamsMetrics(new Metrics()),
+            topology
         ) {
             @Override
             public <K1, V1> void send(final String topic,
@@ -222,11 +232,16 @@ public <K1, V1> void send(final String topic,
                                       final Integer partition,
                                       final Long timestamp,
                                       final Serializer<K1> keySerializer,
-                                      final Serializer<V1> valueSerializer) {
+                                      final Serializer<V1> valueSerializer,
+                                      final String processorNodeId,
+                                      final InternalProcessorContext<Void, Void> context) {
                 // for byte arrays we need to wrap it for comparison
 
-                final K keyTest = serdes.keyFrom(keySerializer.serialize(topic, headers, key));
-                final V valueTest = serdes.valueFrom(valueSerializer.serialize(topic, headers, value));
+                final byte[] keyBytes = keySerializer.serialize(topic, headers, key);
+                final byte[] valueBytes = valueSerializer.serialize(topic, headers, value);
+
+                final K keyTest = serdes.keyFrom(keyBytes);
+                final V valueTest = serdes.valueFrom(valueBytes);
 
                 recordFlushed(keyTest, valueTest);
             }
@@ -239,6 +254,8 @@ public <K1, V1> void send(final String topic,
                                       final Long timestamp,
                                       final Serializer<K1> keySerializer,
                                       final Serializer<V1> valueSerializer,
+                                      final String processorNodeId,
+                                      final InternalProcessorContext<Void, Void> context,
                                       final StreamPartitioner<? super K1, ? super V1> partitioner) {
                 throw new UnsupportedOperationException();
             }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStoreTest.java
index e8d578d017b0b..3644e8eaa6d9f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractDualSchemaRocksDBSegmentedBytesStoreTest.java
@@ -37,6 +37,7 @@
 import org.apache.kafka.streams.StreamsConfig.InternalConfig;
 import org.apache.kafka.streams.kstream.Window;
 import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.internals.SessionWindow;
 import org.apache.kafka.streams.kstream.internals.TimeWindow;
 import org.apache.kafka.streams.processor.StateStoreContext;
 import org.apache.kafka.streams.processor.internals.ChangelogRecordDeserializationHelper;
@@ -48,6 +49,8 @@
 import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.StateSerdes;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.KeyFirstSessionKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.TimeFirstSessionKeySchema;
 import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
 import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
 import org.apache.kafka.streams.state.internals.SegmentedBytesStore.KeySchema;
@@ -88,6 +91,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
@@ -98,7 +102,9 @@ public abstract class AbstractDualSchemaRocksDBSegmentedBytesStoreTest<S extends
     private AbstractDualSchemaRocksDBSegmentedBytesStore<S> bytesStore;
     private File stateDir;
     private final Window[] windows = new Window[4];
-    private Window nextSegmentWindow;
+    private Window nextSegmentWindow, startEdgeWindow, endEdgeWindow;
+    private final long startEdgeTime = Long.MAX_VALUE - 700L;
+    private final long endEdgeTime = Long.MAX_VALUE - 600L;
 
     final long retention = 1000;
     final long segmentInterval = 60_000L;
@@ -106,6 +112,20 @@ public abstract class AbstractDualSchemaRocksDBSegmentedBytesStoreTest<S extends
 
     @Before
     public void before() {
+        if (getBaseSchema() instanceof TimeFirstSessionKeySchema) {
+            windows[0] = new SessionWindow(10L, 10L);
+            windows[1] = new SessionWindow(500L, 1000L);
+            windows[2] = new SessionWindow(1_000L, 1_500L);
+            windows[3] = new SessionWindow(30_000L, 60_000L);
+            // All four of the previous windows will go into segment 1.
+            // The nextSegmentWindow is computed be a high enough time that when it gets written
+            // to the segment store, it will advance stream time past the first segment's retention time and
+            // expire it.
+            nextSegmentWindow = new SessionWindow(segmentInterval + retention, segmentInterval + retention);
+
+            startEdgeWindow = new SessionWindow(0L, startEdgeTime);
+            endEdgeWindow = new SessionWindow(endEdgeTime, Long.MAX_VALUE);
+        }
         if (getBaseSchema() instanceof TimeFirstWindowKeySchema) {
             windows[0] = timeWindowForSize(10L, windowSizeForTimeWindow);
             windows[1] = timeWindowForSize(500L, windowSizeForTimeWindow);
@@ -116,6 +136,9 @@ public void before() {
             // to the segment store, it will advance stream time past the first segment's retention time and
             // expire it.
             nextSegmentWindow = timeWindowForSize(segmentInterval + retention, windowSizeForTimeWindow);
+
+            startEdgeWindow = timeWindowForSize(startEdgeTime, windowSizeForTimeWindow);
+            endEdgeWindow = timeWindowForSize(endEdgeTime, windowSizeForTimeWindow);
         }
 
         bytesStore = getBytesStore();
@@ -285,8 +308,370 @@ public void shouldPutAndBackwardFetch() {
         }
     }
 
+    @Test
+    public void shouldPutAndFetchEdgeSingleKey() {
+        final String keyA = "a";
+        final String keyB = "b";
+
+        final Bytes serializedKeyAStart = serializeKey(new Windowed<>(keyA, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyAEnd = serializeKey(new Windowed<>(keyA, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBStart = serializeKey(new Windowed<>(keyB, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBEnd = serializeKey(new Windowed<>(keyB, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+
+        bytesStore.put(serializedKeyAStart, serializeValue(10));
+        bytesStore.put(serializedKeyAEnd, serializeValue(50));
+        bytesStore.put(serializedKeyBStart, serializeValue(100));
+        bytesStore.put(serializedKeyBEnd, serializeValue(150));
+
+        // Can fetch start/end edge for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyA.getBytes()), startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch start/end edge for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyB.getBytes()), startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch from 0 to max for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyA.getBytes()), 0, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch from 0 to max for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyB.getBytes()), 0, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+    }
+
+    @Test
+    public void shouldPutAndFetchEdgeKeyRange() {
+        final String keyA = "a";
+        final String keyB = "b";
+
+        final Bytes serializedKeyAStart = serializeKey(new Windowed<>(keyA, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyAEnd = serializeKey(new Windowed<>(keyA, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBStart = serializeKey(new Windowed<>(keyB, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBEnd = serializeKey(new Windowed<>(keyB, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+
+        bytesStore.put(serializedKeyAStart, serializeValue(10));
+        bytesStore.put(serializedKeyAEnd, serializeValue(50));
+        bytesStore.put(serializedKeyBStart, serializeValue(100));
+        bytesStore.put(serializedKeyBEnd, serializeValue(150));
+        // Can fetch from start/end for key range
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyA.getBytes()), Bytes.wrap(keyB.getBytes()), startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch from 0 to max for key range
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyA.getBytes()), Bytes.wrap(keyB.getBytes()), 0L, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // KeyB should be ignored and KeyA should be included even in storage
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            null, Bytes.wrap(keyA.getBytes()), startEdgeTime, endEdgeTime - 1L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            Bytes.wrap(keyB.getBytes()), null, startEdgeTime + 1, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            null, null, 0, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.fetch(
+            null, null, startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+    }
+
+    @Test
+    public void shouldPutAndBackwardFetchEdgeSingleKey() {
+        final String keyA = "a";
+        final String keyB = "b";
+
+        final Bytes serializedKeyAStart = serializeKey(new Windowed<>(keyA, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyAEnd = serializeKey(new Windowed<>(keyA, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBStart = serializeKey(new Windowed<>(keyB, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBEnd = serializeKey(new Windowed<>(keyB, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+
+        bytesStore.put(serializedKeyAStart, serializeValue(10));
+        bytesStore.put(serializedKeyAEnd, serializeValue(50));
+        bytesStore.put(serializedKeyBStart, serializeValue(100));
+        bytesStore.put(serializedKeyBEnd, serializeValue(150));
+
+        // Can fetch start/end edge for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyA.getBytes()), startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch start/end edge for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyB.getBytes()), startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch from 0 to max for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyA.getBytes()), 0, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch from 0 to max for single key
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyB.getBytes()), 0, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+    }
+
+    @Test
+    public void shouldPutAndBackwardFetchEdgeKeyRange() {
+        final String keyA = "a";
+        final String keyB = "b";
+
+        final Bytes serializedKeyAStart = serializeKey(new Windowed<>(keyA, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyAEnd = serializeKey(new Windowed<>(keyA, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBStart = serializeKey(new Windowed<>(keyB, startEdgeWindow), false,
+            Integer.MAX_VALUE);
+        final Bytes serializedKeyBEnd = serializeKey(new Windowed<>(keyB, endEdgeWindow), false,
+            Integer.MAX_VALUE);
+
+        bytesStore.put(serializedKeyAStart, serializeValue(10));
+        bytesStore.put(serializedKeyAEnd, serializeValue(50));
+        bytesStore.put(serializedKeyBStart, serializeValue(100));
+        bytesStore.put(serializedKeyBEnd, serializeValue(150));
+
+        // Can fetch from start/end for key range
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyA.getBytes()), Bytes.wrap(keyB.getBytes()), startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // Can fetch from 0 to max for key range
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyA.getBytes()), Bytes.wrap(keyB.getBytes()), 0L, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // KeyB should be ignored and KeyA should be included even in storage
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            null, Bytes.wrap(keyA.getBytes()), startEdgeTime, endEdgeTime - 1L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            Bytes.wrap(keyB.getBytes()), null, startEdgeTime + 1, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            null, null, 0, Long.MAX_VALUE)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        try (final KeyValueIterator<Bytes, byte[]> values = bytesStore.backwardFetch(
+            null, null, startEdgeTime, endEdgeTime)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = getIndexSchema() == null ? asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            ) : asList(
+                KeyValue.pair(new Windowed<>(keyB, endEdgeWindow), 150L),
+                KeyValue.pair(new Windowed<>(keyB, startEdgeWindow), 100L),
+                KeyValue.pair(new Windowed<>(keyA, endEdgeWindow), 50L),
+                KeyValue.pair(new Windowed<>(keyA, startEdgeWindow), 10L)
+            );
+            assertEquals(expected, toList(values));
+        }
+    }
+
     @Test
     public void shouldPutAndFetchWithPrefixKey() {
+        // Only for TimeFirstWindowKeySchema schema
+        if (!(getBaseSchema() instanceof TimeFirstWindowKeySchema)) {
+            return;
+        }
         final String keyA = "a";
         final String keyB = "aa";
         final String keyC = "aaa";
@@ -365,6 +750,11 @@ public void shouldPutAndFetchWithPrefixKey() {
 
     @Test
     public void shouldPutAndBackwardFetchWithPrefix() {
+        // Only for TimeFirstWindowKeySchema schema
+        if (!(getBaseSchema() instanceof TimeFirstWindowKeySchema)) {
+            return;
+        }
+
         final String keyA = "a";
         final String keyB = "aa";
         final String keyC = "aaa";
@@ -438,6 +828,143 @@ public void shouldPutAndBackwardFetchWithPrefix() {
         }
     }
 
+    @Test
+    public void shouldFetchSessionForSingleKey() {
+        // Only for TimeFirstSessionKeySchema schema
+        if (!(getBaseSchema() instanceof TimeFirstSessionKeySchema)) {
+            return;
+        }
+
+        final String keyA = "a";
+        final String keyB = "b";
+        final String keyC = "c";
+
+        final StateSerdes<String, Long> stateSerdes = StateSerdes.withBuiltinTypes("dummy", String.class, Long.class);
+        final Bytes key1 = Bytes.wrap(stateSerdes.keySerializer().serialize("dummy", keyA));
+        final Bytes key2 = Bytes.wrap(stateSerdes.keySerializer().serialize("dummy", keyB));
+        final Bytes key3 = Bytes.wrap(stateSerdes.keySerializer().serialize("dummy", keyC));
+
+        final byte[] expectedValue1 = serializeValue(10);
+        final byte[] expectedValue2 = serializeValue(50);
+        final byte[] expectedValue3 = serializeValue(100);
+        final byte[] expectedValue4 = serializeValue(200);
+
+        bytesStore.put(serializeKey(new Windowed<>(keyA, windows[0])), expectedValue1);
+        bytesStore.put(serializeKey(new Windowed<>(keyA, windows[1])), expectedValue2);
+        bytesStore.put(serializeKey(new Windowed<>(keyB, windows[2])), expectedValue3);
+        bytesStore.put(serializeKey(new Windowed<>(keyC, windows[3])), expectedValue4);
+
+        final byte[] value1 = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSession(
+            key1, windows[0].start(), windows[0].end());
+        assertEquals(Bytes.wrap(value1), Bytes.wrap(expectedValue1));
+
+        final byte[] value2 = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSession(
+            key1, windows[1].start(), windows[1].end());
+        assertEquals(Bytes.wrap(value2), Bytes.wrap(expectedValue2));
+
+        final byte[] value3 = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSession(
+            key2, windows[2].start(), windows[2].end());
+        assertEquals(Bytes.wrap(value3), Bytes.wrap(expectedValue3));
+
+        final byte[] value4 = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSession(
+            key3, windows[3].start(), windows[3].end());
+        assertEquals(Bytes.wrap(value4), Bytes.wrap(expectedValue4));
+
+        final byte[] noValue = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSession(
+            key3, 2000, 3000);
+        assertNull(noValue);
+    }
+
+    @Test
+    public void shouldFetchSessionForTimeRange() {
+        // Only for TimeFirstSessionKeySchema schema
+        if (!(getBaseSchema() instanceof TimeFirstSessionKeySchema)) {
+            return;
+        }
+        final String keyA = "a";
+        final String keyB = "b";
+        final String keyC = "c";
+
+        final Window[] sessionWindows = new Window[4];
+        sessionWindows[0] = new SessionWindow(100L, 100L);
+        sessionWindows[1] = new SessionWindow(50L, 200L);
+        sessionWindows[2] = new SessionWindow(200L, 300L);
+        bytesStore.put(serializeKey(new Windowed<>(keyA, sessionWindows[0])), serializeValue(10));
+        bytesStore.put(serializeKey(new Windowed<>(keyB, sessionWindows[1])), serializeValue(100));
+        bytesStore.put(serializeKey(new Windowed<>(keyC, sessionWindows[2])), serializeValue(200));
+
+
+        // Fetch point
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(100L, 100L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = Collections.singletonList(
+                KeyValue.pair(new Windowed<>(keyA, sessionWindows[0]), 10L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Fetch partial boundary
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(100L, 200L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, sessionWindows[0]), 10L),
+                KeyValue.pair(new Windowed<>(keyB, sessionWindows[1]), 100L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Fetch partial
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(99L, 201L)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, sessionWindows[0]), 10L),
+                KeyValue.pair(new Windowed<>(keyB, sessionWindows[1]), 100L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Fetch partial
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(101L, 199L)) {
+            assertTrue(toList(values).isEmpty());
+        }
+
+        // Fetch all boundary
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(100L, 300L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, sessionWindows[0]), 10L),
+                KeyValue.pair(new Windowed<>(keyB, sessionWindows[1]), 100L),
+                KeyValue.pair(new Windowed<>(keyC, sessionWindows[2]), 200L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Fetch all
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(99L, 301L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>(keyA, sessionWindows[0]), 10L),
+                KeyValue.pair(new Windowed<>(keyB, sessionWindows[1]), 100L),
+                KeyValue.pair(new Windowed<>(keyC, sessionWindows[2]), 200L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+
+        // Fetch all
+        try (final KeyValueIterator<Bytes, byte[]> values = ((RocksDBTimeOrderedSessionSegmentedBytesStore) bytesStore).fetchSessions(101L, 299L)) {
+
+            final List<KeyValue<Windowed<String>, Long>> expected = Collections.singletonList(
+                KeyValue.pair(new Windowed<>(keyB, sessionWindows[1]), 100L)
+            );
+
+            assertEquals(expected, toList(values));
+        }
+    }
+
     @Test
     public void shouldSkipAndRemoveDanglingIndex() {
         final String keyA = "a";
@@ -1081,10 +1608,16 @@ private Bytes serializeKey(final Windowed<String> key, final boolean changeLog)
 
     private Bytes serializeKey(final Windowed<String> key, final boolean changeLog, final int seq) {
         final StateSerdes<String, Long> stateSerdes = StateSerdes.withBuiltinTypes("dummy", String.class, Long.class);
-        if (changeLog) {
-            return WindowKeySchema.toStoreKeyBinary(key, seq, stateSerdes);
-        } else if (getBaseSchema() instanceof TimeFirstWindowKeySchema) {
+        if (getBaseSchema() instanceof TimeFirstWindowKeySchema) {
+            if (changeLog) {
+                return WindowKeySchema.toStoreKeyBinary(key, seq, stateSerdes);
+            }
             return TimeFirstWindowKeySchema.toStoreKeyBinary(key, seq, stateSerdes);
+        } else if (getBaseSchema() instanceof TimeFirstSessionKeySchema) {
+            if (changeLog) {
+                return Bytes.wrap(SessionKeySchema.toBinary(key, stateSerdes.keySerializer(), "dummy"));
+            }
+            return Bytes.wrap(TimeFirstSessionKeySchema.toBinary(key, stateSerdes.keySerializer(), "dummy"));
         } else {
             throw new IllegalStateException("Unrecognized serde schema");
         }
@@ -1094,6 +1627,8 @@ private Bytes serializeKeyForIndex(final Windowed<String> key) {
         final StateSerdes<String, Long> stateSerdes = StateSerdes.withBuiltinTypes("dummy", String.class, Long.class);
         if (getIndexSchema() instanceof KeyFirstWindowKeySchema) {
             return KeyFirstWindowKeySchema.toStoreKeyBinary(key, 0, stateSerdes);
+        } else if (getIndexSchema() instanceof KeyFirstSessionKeySchema) {
+            return Bytes.wrap(KeyFirstSessionKeySchema.toBinary(key, stateSerdes.keySerializer(), "dummy"));
         } else {
             throw new IllegalStateException("Unrecognized serde schema");
         }
@@ -1119,6 +1654,12 @@ private List<KeyValue<Windowed<String>, Long>> toList(final KeyValueIterator<Byt
                     stateSerdes.valueDeserializer().deserialize("dummy", next.value)
                 );
                 results.add(deserialized);
+            } else if (getBaseSchema() instanceof TimeFirstSessionKeySchema) {
+                final KeyValue<Windowed<String>, Long> deserialized = KeyValue.pair(
+                    TimeFirstSessionKeySchema.from(next.key.get(), stateSerdes.keyDeserializer(), "dummy"),
+                    stateSerdes.valueDeserializer().deserialize("dummy", next.value)
+                );
+                results.add(deserialized);
             } else {
                 throw new IllegalStateException("Unrecognized serde schema");
             }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractSessionBytesStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractSessionBytesStoreTest.java
index 6e93f6a7ba1ad..78d7f08ee84ee 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractSessionBytesStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/AbstractSessionBytesStoreTest.java
@@ -19,6 +19,7 @@
 import org.apache.kafka.clients.producer.ProducerRecord;
 import org.apache.kafka.common.Metric;
 import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.Serdes;
@@ -32,7 +33,9 @@
 import org.apache.kafka.streams.kstream.internals.SessionWindow;
 import org.apache.kafka.streams.processor.StateStoreContext;
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.test.InternalMockProcessorContext;
@@ -60,6 +63,7 @@
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.hasItem;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotEquals;
@@ -73,6 +77,13 @@ public abstract class AbstractSessionBytesStoreTest {
     static final long SEGMENT_INTERVAL = 60_000L;
     static final long RETENTION_PERIOD = 10_000L;
 
+    enum StoreType {
+        RocksDBSessionStore,
+        RocksDBTimeOrderedSessionStoreWithIndex,
+        RocksDBTimeOrderedSessionStoreWithoutIndex,
+        InMemoryStore
+    }
+
     SessionStore<String, Long> sessionStore;
 
     private MockRecordCollector recordCollector;
@@ -83,6 +94,8 @@ abstract <K, V> SessionStore<K, V> buildSessionStore(final long retentionPeriod,
                                                          final Serde<K> keySerde,
                                                          final Serde<V> valueSerde);
 
+    abstract StoreType getStoreType();
+
     @Before
     public void setUp() {
         sessionStore = buildSessionStore(RETENTION_PERIOD, Serdes.String(), Serdes.Long());
@@ -179,6 +192,75 @@ public void shouldFetchAllSessionsWithSameRecordKey() {
         }
     }
 
+    @Test
+    public void shouldFindSessionsForTimeRange() {
+        sessionStore.put(new Windowed<>("a", new SessionWindow(0, 0)), 5L);
+
+        if (getStoreType() == StoreType.RocksDBSessionStore) {
+            assertThrows(
+                "This API is not supported by this implementation of SessionStore.",
+                UnsupportedOperationException.class,
+                () -> sessionStore.findSessions(0, 0)
+            );
+            return;
+        }
+
+        // Find point
+        try (final KeyValueIterator<Windowed<String>, Long> values = sessionStore.findSessions(0, 0)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = Collections.singletonList(
+                KeyValue.pair(new Windowed<>("a", new SessionWindow(0, 0)), 5L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        sessionStore.put(new Windowed<>("b", new SessionWindow(10, 20)), 10L);
+        sessionStore.put(new Windowed<>("c", new SessionWindow(30, 40)), 20L);
+
+        // Find boundary
+        try (final KeyValueIterator<Windowed<String>, Long> values = sessionStore.findSessions(0, 20)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>("a", new SessionWindow(0, 0)), 5L),
+                KeyValue.pair(new Windowed<>("b", new SessionWindow(10, 20)), 10L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // Find left boundary
+        try (final KeyValueIterator<Windowed<String>, Long> values = sessionStore.findSessions(0, 19)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = Collections.singletonList(
+                KeyValue.pair(new Windowed<>("a", new SessionWindow(0, 0)), 5L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // Find right boundary
+        try (final KeyValueIterator<Windowed<String>, Long> values = sessionStore.findSessions(1, 20)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = Collections.singletonList(
+                KeyValue.pair(new Windowed<>("b", new SessionWindow(10, 20)), 10L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // Find partial off by 1
+        try (final KeyValueIterator<Windowed<String>, Long> values = sessionStore.findSessions(19, 41)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>("b", new SessionWindow(10, 20)), 10L),
+                KeyValue.pair(new Windowed<>("c", new SessionWindow(30, 40)), 20L)
+            );
+            assertEquals(expected, toList(values));
+        }
+
+        // Find all boundary
+        try (final KeyValueIterator<Windowed<String>, Long> values = sessionStore.findSessions(0, 40)) {
+            final List<KeyValue<Windowed<String>, Long>> expected = asList(
+                KeyValue.pair(new Windowed<>("a", new SessionWindow(0, 0)), 5L),
+                KeyValue.pair(new Windowed<>("b", new SessionWindow(10, 20)), 10L),
+                KeyValue.pair(new Windowed<>("c", new SessionWindow(30, 40)), 20L)
+            );
+            assertEquals(expected, toList(values));
+        }
+    }
+
     @Test
     public void shouldBackwardFetchAllSessionsWithSameRecordKey() {
         final LinkedList<KeyValue<Windowed<String>, Long>> expected = new LinkedList<>();
@@ -810,4 +892,46 @@ public void shouldNotThrowInvalidRangeExceptionWithNegativeFromKey() {
             );
         }
     }
+
+    @Test
+    public void shouldRemoveExpired() {
+        sessionStore.put(new Windowed<>("a", new SessionWindow(0, 0)), 1L);
+        if (getStoreType() == StoreType.InMemoryStore) {
+            sessionStore.put(new Windowed<>("aa", new SessionWindow(0, 10)), 2L);
+            sessionStore.put(new Windowed<>("a", new SessionWindow(10, 20)), 3L);
+
+            // Advance stream time to expire the first record
+            sessionStore.put(new Windowed<>("aa", new SessionWindow(10, RETENTION_PERIOD)), 4L);
+        } else {
+            sessionStore.put(new Windowed<>("aa", new SessionWindow(0, SEGMENT_INTERVAL)), 2L);
+            sessionStore.put(new Windowed<>("a", new SessionWindow(10, SEGMENT_INTERVAL)), 3L);
+
+            // Advance stream time to expire the first record
+            sessionStore.put(new Windowed<>("aa", new SessionWindow(10, 2 * SEGMENT_INTERVAL)), 4L);
+        }
+
+        try (final KeyValueIterator<Windowed<String>, Long> iterator =
+            sessionStore.findSessions("a", "b", 0L, Long.MAX_VALUE)
+        ) {
+            assertEquals(valuesToSet(iterator), new HashSet<>(Arrays.asList(2L, 3L, 4L)));
+        }
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPut() {
+        final MeteredSessionStore<String, Long> meteredSessionStore = (MeteredSessionStore<String, Long>) sessionStore;
+        final ChangeLoggingSessionBytesStore changeLoggingSessionBytesStore = (ChangeLoggingSessionBytesStore) meteredSessionStore.wrapped();
+        final SessionStore wrapped = (SessionStore) changeLoggingSessionBytesStore.wrapped();
+
+        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
+        sessionStore.put(new Windowed<String>("a", new SessionWindow(0, 0)), 1L);
+        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
+        sessionStore.put(new Windowed<String>("aa", new SessionWindow(0, 10)), 2L);
+        context.setRecordContext(new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders()));
+        sessionStore.put(new Windowed<String>("a", new SessionWindow(10, 20)), 3L);
+
+        final Position expected = Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 3L)))));
+        final Position actual = sessionStore.getPosition();
+        assertThat(expected, is(actual));
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemoryKeyValueStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemoryKeyValueStoreTest.java
index 13d78ec49b01d..f11f85477c108 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemoryKeyValueStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemoryKeyValueStoreTest.java
@@ -221,17 +221,44 @@ public void shouldPutGetToFromCache() {
     }
 
     @Test
-    public void shouldMatchPositionAfterPut() {
+    public void shouldMatchPositionAfterPutWithFlushListener() {
+        store.setFlushListener(record -> { }, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithoutFlushListener() {
+        store.setFlushListener(null, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    private void shouldMatchPositionAfterPut() {
         context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
         store.put(bytesKey("key1"), bytesValue("value1"));
         context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
         store.put(bytesKey("key2"), bytesValue("value2"));
-        context.setRecordContext(new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders()));
-        store.put(bytesKey("key3"), bytesValue("value3"));
 
-        final Position expected = Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 3L)))));
-        final Position actual = store.getPosition();
-        assertEquals(expected, actual);
+        // Position should correspond to the last record's context, not the current context.
+        context.setRecordContext(
+            new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders())
+        );
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            store.getPosition()
+        );
+        assertEquals(Position.emptyPosition(), underlyingStore.getPosition());
+
+        store.flush();
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            store.getPosition()
+        );
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            underlyingStore.getPosition()
+        );
     }
 
     private byte[] bytesValue(final String value) {
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemorySessionStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemorySessionStoreTest.java
index 0de2321e7085b..d5aa667c0c560 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemorySessionStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingInMemorySessionStoreTest.java
@@ -36,6 +36,7 @@
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.test.InternalMockProcessorContext;
@@ -53,6 +54,8 @@
 import java.util.Random;
 
 import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.test.StreamsTestUtils.toList;
 import static org.apache.kafka.test.StreamsTestUtils.verifyKeyValueList;
 import static org.apache.kafka.test.StreamsTestUtils.verifyWindowedKeyValue;
@@ -142,6 +145,46 @@ public void shouldPutFetchFromCache() {
         }
     }
 
+    @Test
+    public void shouldMatchPositionAfterPutWithFlushListener() {
+        cachingStore.setFlushListener(record -> { }, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithoutFlushListener() {
+        cachingStore.setFlushListener(null, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    private void shouldMatchPositionAfterPut() {
+        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
+        cachingStore.put(new Windowed<>(keyA, new SessionWindow(0, 0)), "1".getBytes());
+        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
+        cachingStore.put(new Windowed<>(keyA, new SessionWindow(0, 0)), "1".getBytes());
+
+        // Position should correspond to the last record's context, not the current context.
+        context.setRecordContext(
+            new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders())
+        );
+
+        // the caching session store doesn't maintain a separate
+        // position because it never serves queries from the cache
+        assertEquals(Position.emptyPosition(), cachingStore.getPosition());
+        assertEquals(Position.emptyPosition(), underlyingStore.getPosition());
+
+        cachingStore.flush();
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            cachingStore.getPosition()
+        );
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            underlyingStore.getPosition()
+        );
+    }
+
     @Test
     public void shouldPutFetchAllKeysFromCache() {
         cachingStore.put(new Windowed<>(keyA, new SessionWindow(0, 0)), "1".getBytes());
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentSessionStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentSessionStoreTest.java
index 6a622dcf0d4c2..50fd88a276954 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentSessionStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentSessionStoreTest.java
@@ -35,6 +35,7 @@
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.test.InternalMockProcessorContext;
@@ -52,6 +53,8 @@
 import java.util.Random;
 
 import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.test.StreamsTestUtils.toList;
 import static org.apache.kafka.test.StreamsTestUtils.verifyKeyValueList;
 import static org.apache.kafka.test.StreamsTestUtils.verifyWindowedKeyValue;
@@ -80,6 +83,7 @@ public class CachingPersistentSessionStoreTest {
     private SessionStore<Bytes, byte[]> underlyingStore;
     private CachingSessionStore cachingStore;
     private ThreadCache cache;
+    private InternalMockProcessorContext<Object, Object> context;
 
     @Before
     public void before() {
@@ -93,7 +97,7 @@ public void before() {
         underlyingStore = new RocksDBSessionStore(segmented);
         cachingStore = new CachingSessionStore(underlyingStore, SEGMENT_INTERVAL);
         cache = new ThreadCache(new LogContext("testCache "), MAX_CACHE_SIZE_BYTES, new MockStreamsMetrics(new Metrics()));
-        final InternalMockProcessorContext context =
+        this.context =
             new InternalMockProcessorContext<>(TestUtils.tempDirectory(), null, null, null, cache);
         context.setRecordContext(new ProcessorRecordContext(DEFAULT_TIMESTAMP, 0, 0, TOPIC, new RecordHeaders()));
         cachingStore.init((StateStoreContext) context, cachingStore);
@@ -123,6 +127,45 @@ public void shouldPutFetchFromCache() {
             assertFalse(b.hasNext());
         }
     }
+    @Test
+    public void shouldMatchPositionAfterPutWithFlushListener() {
+        cachingStore.setFlushListener(record -> { }, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithoutFlushListener() {
+        cachingStore.setFlushListener(null, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    private void shouldMatchPositionAfterPut() {
+        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
+        cachingStore.put(new Windowed<>(keyA, new SessionWindow(0, 0)), "1".getBytes());
+        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
+        cachingStore.put(new Windowed<>(keyA, new SessionWindow(0, 0)), "1".getBytes());
+
+        // Position should correspond to the last record's context, not the current context.
+        context.setRecordContext(
+            new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders())
+        );
+
+        // the caching session store doesn't maintain a separate
+        // position because it never serves queries from the cache
+        assertEquals(Position.emptyPosition(), cachingStore.getPosition());
+        assertEquals(Position.emptyPosition(), underlyingStore.getPosition());
+
+        cachingStore.flush();
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            cachingStore.getPosition()
+        );
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            underlyingStore.getPosition()
+        );
+    }
 
     @Test
     public void shouldPutFetchAllKeysFromCache() {
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentWindowStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentWindowStoreTest.java
index 2d64a44aa063a..83136c33e8175 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentWindowStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/CachingPersistentWindowStoreTest.java
@@ -39,6 +39,7 @@
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.StoreBuilder;
 import org.apache.kafka.streams.state.Stores;
@@ -63,6 +64,8 @@
 import static java.time.Duration.ofMinutes;
 import static java.time.Instant.ofEpochMilli;
 import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.streams.state.internals.ThreadCacheTest.memoryCacheEntrySize;
 import static org.apache.kafka.test.StreamsTestUtils.toList;
 import static org.apache.kafka.test.StreamsTestUtils.verifyAllWindowedKeyValues;
@@ -142,6 +145,7 @@ public void shouldDelegateInit() {
     }
 
     @Test
+    @SuppressWarnings("deprecation")
     public void shouldNotReturnDuplicatesInRanges() {
         final StreamsBuilder builder = new StreamsBuilder();
 
@@ -260,6 +264,46 @@ public void shouldPutFetchFromCache() {
         }
     }
 
+    @Test
+    public void shouldMatchPositionAfterPutWithFlushListener() {
+        cachingStore.setFlushListener(record -> { }, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithoutFlushListener() {
+        cachingStore.setFlushListener(null, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    private void shouldMatchPositionAfterPut() {
+        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
+        cachingStore.put(bytesKey("key1"), bytesValue("value1"), DEFAULT_TIMESTAMP);
+        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
+        cachingStore.put(bytesKey("key2"), bytesValue("value2"), DEFAULT_TIMESTAMP);
+
+        // Position should correspond to the last record's context, not the current context.
+        context.setRecordContext(
+            new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders())
+        );
+
+        // the caching window store doesn't maintain a separate
+        // position because it never serves queries from the cache
+        assertEquals(Position.emptyPosition(), cachingStore.getPosition());
+        assertEquals(Position.emptyPosition(), underlyingStore.getPosition());
+
+        cachingStore.flush();
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            cachingStore.getPosition()
+        );
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            underlyingStore.getPosition()
+        );
+    }
+
     private void verifyKeyValue(final KeyValue<Long, byte[]> next,
                                 final long expectedKey,
                                 final String expectedValue) {
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/InMemorySessionStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/InMemorySessionStoreTest.java
index 7821e2c0216ca..8546c546716da 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/InMemorySessionStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/InMemorySessionStoreTest.java
@@ -16,12 +16,9 @@
  */
 package org.apache.kafka.streams.state.internals;
 
-import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.internals.SessionWindow;
-import org.apache.kafka.streams.query.Position;
-import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.streams.state.Stores;
@@ -31,13 +28,9 @@
 import java.util.HashSet;
 
 import static java.time.Duration.ofMillis;
-import static org.apache.kafka.common.utils.Utils.mkEntry;
-import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.apache.kafka.test.StreamsTestUtils.valuesToSet;
-import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.hamcrest.Matchers.is;
 
 public class InMemorySessionStoreTest extends AbstractSessionBytesStoreTest {
 
@@ -55,20 +48,8 @@ <K, V> SessionStore<K, V> buildSessionStore(final long retentionPeriod,
             valueSerde).build();
     }
 
-    @Test
-    public void shouldRemoveExpired() {
-        sessionStore.put(new Windowed<>("a", new SessionWindow(0, 0)), 1L);
-        sessionStore.put(new Windowed<>("aa", new SessionWindow(0, 10)), 2L);
-        sessionStore.put(new Windowed<>("a", new SessionWindow(10, 20)), 3L);
-
-        // Advance stream time to expire the first record
-        sessionStore.put(new Windowed<>("aa", new SessionWindow(10, RETENTION_PERIOD)), 4L);
-
-        try (final KeyValueIterator<Windowed<String>, Long> iterator =
-            sessionStore.findSessions("a", "b", 0L, Long.MAX_VALUE)
-        ) {
-            assertEquals(valuesToSet(iterator), new HashSet<>(Arrays.asList(2L, 3L, 4L)));
-        }
+    StoreType getStoreType() {
+        return StoreType.InMemoryStore;
     }
 
     @Test
@@ -90,22 +71,4 @@ public void shouldNotExpireFromOpenIterator() {
         assertFalse(sessionStore.findSessions("a", "b", 0L, 20L).hasNext());
     }
 
-    @Test
-    public void shouldMatchPositionAfterPut() {
-        final MeteredSessionStore<String, Long> meteredSessionStore = (MeteredSessionStore<String, Long>) sessionStore;
-        final ChangeLoggingSessionBytesStore changeLoggingSessionBytesStore = (ChangeLoggingSessionBytesStore) meteredSessionStore.wrapped();
-        final InMemorySessionStore inMemorySessionStore = (InMemorySessionStore) changeLoggingSessionBytesStore.wrapped();
-
-        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
-        sessionStore.put(new Windowed<String>("a", new SessionWindow(0, 0)), 1L);
-        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
-        sessionStore.put(new Windowed<String>("aa", new SessionWindow(0, 10)), 2L);
-        context.setRecordContext(new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders()));
-        sessionStore.put(new Windowed<String>("a", new SessionWindow(10, 20)), 3L);
-
-        final Position expected = Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 3L)))));
-        final Position actual = inMemorySessionStore.getPosition();
-        assertThat(expected, is(actual));
-    }
-
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/KeyValueSegmentsTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/KeyValueSegmentsTest.java
index c8f1a0e061883..e8fe877b1093d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/KeyValueSegmentsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/KeyValueSegmentsTest.java
@@ -28,6 +28,7 @@
 import org.junit.Test;
 
 import java.io.File;
+import java.nio.file.Files;
 import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.List;
@@ -303,7 +304,7 @@ public void shouldUpdateSegmentFileNameFromOldDateFormatToNewFormat() throws Exc
         for (int segmentId = 0; segmentId < NUM_SEGMENTS; ++segmentId) {
             final File oldSegment = new File(storeDirectoryPath + File.separator + storeName + "-" + formatter.format(new Date(segmentId * segmentInterval)));
             //noinspection ResultOfMethodCallIgnored
-            oldSegment.createNewFile();
+            Files.createFile(oldSegment.toPath());
         }
 
         segments.openExisting(context, -1L);
@@ -325,7 +326,7 @@ public void shouldUpdateSegmentFileNameFromOldColonFormatToNewFormat() throws Ex
         for (int segmentId = 0; segmentId < NUM_SEGMENTS; ++segmentId) {
             final File oldSegment = new File(storeDirectoryPath + File.separator + storeName + ":" + segmentId * (RETENTION_PERIOD / (NUM_SEGMENTS - 1)));
             //noinspection ResultOfMethodCallIgnored
-            oldSegment.createNewFile();
+            Files.createFile(oldSegment.toPath());
         }
 
         segments.openExisting(context, -1L);
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreIteratorTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreIteratorTest.java
index 0d69d933cf9f6..98134ad30e15d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreIteratorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreIteratorTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.kafka.streams.state.internals;
 
+import java.util.Collection;
+import java.util.function.Function;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
@@ -24,18 +26,26 @@
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.StateSerdes;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
 import org.apache.kafka.test.KeyValueIteratorStub;
+import org.junit.Before;
 import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 
+import static java.util.Arrays.asList;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
+@RunWith(Parameterized.class)
 public class MergedSortedCacheWrappedWindowStoreIteratorTest {
 
     private static final SegmentedCacheFunction SINGLE_SEGMENT_CACHE_FUNCTION = new SegmentedCacheFunction(null, -1) {
@@ -45,10 +55,55 @@ public long segmentId(final Bytes key) {
         }
     };
 
+    @FunctionalInterface
+    private interface StoreKeySerializer<K> {
+        Bytes serialize(final K key, final long ts, final int seq, final StateSerdes<K, ?> serdes);
+    }
+
     private final List<KeyValue<Long, byte[]>> windowStoreKvPairs = new ArrayList<>();
     private final ThreadCache cache = new ThreadCache(new LogContext("testCache "), 1000000L,  new MockStreamsMetrics(new Metrics()));
     private final String namespace = "0.0-one";
     private final StateSerdes<String, String> stateSerdes = new StateSerdes<>("foo", Serdes.String(), Serdes.String());
+    private Function<byte[], Long> tsExtractor;
+    private StoreKeySerializer<String> storeKeySerializer;
+
+    private enum SchemaType {
+        WINDOW_KEY_SCHEMA,
+        KEY_FIRST_SCHEMA,
+        TIME_FIRST_SCHEMA
+    }
+
+    @Parameter
+    public SchemaType schemaType;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {SchemaType.WINDOW_KEY_SCHEMA},
+            {SchemaType.KEY_FIRST_SCHEMA},
+            {SchemaType.TIME_FIRST_SCHEMA},
+        });
+    }
+
+    @Before
+    public void setUp() {
+        switch (schemaType) {
+            case KEY_FIRST_SCHEMA:
+                tsExtractor = KeyFirstWindowKeySchema::extractStoreTimestamp;
+                storeKeySerializer = KeyFirstWindowKeySchema::toStoreKeyBinary;
+                break;
+            case WINDOW_KEY_SCHEMA:
+                tsExtractor = WindowKeySchema::extractStoreTimestamp;
+                storeKeySerializer = WindowKeySchema::toStoreKeyBinary;
+                break;
+            case TIME_FIRST_SCHEMA:
+                tsExtractor = TimeFirstWindowKeySchema::extractStoreTimestamp;
+                storeKeySerializer = TimeFirstWindowKeySchema::toStoreKeyBinary;
+                break;
+            default:
+                throw new IllegalStateException("Unknown schemaType: " + schemaType);
+        }
+    }
 
     @Test
     public void shouldIterateOverValueFromBothIterators() {
@@ -58,14 +113,14 @@ public void shouldIterateOverValueFromBothIterators() {
             final KeyValue<Long, byte[]> v1 = KeyValue.pair(t, v1Bytes);
             windowStoreKvPairs.add(v1);
             expectedKvPairs.add(KeyValue.pair(t, v1Bytes));
-            final Bytes keyBytes = WindowKeySchema.toStoreKeyBinary("a", t + 10, 0, stateSerdes);
+            final Bytes keyBytes = storeKeySerializer.serialize("a", t + 10, 0, stateSerdes);
             final byte[] valBytes = String.valueOf(t + 10).getBytes();
             expectedKvPairs.add(KeyValue.pair(t + 10, valBytes));
             cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(keyBytes), new LRUCacheEntry(valBytes));
         }
 
-        final Bytes fromBytes = WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes);
-        final Bytes toBytes = WindowKeySchema.toStoreKeyBinary("a", 100, 0, stateSerdes);
+        final Bytes fromBytes = storeKeySerializer.serialize("a", 0, 0, stateSerdes);
+        final Bytes toBytes = storeKeySerializer.serialize("a", 100, 0, stateSerdes);
         final KeyValueIterator<Long, byte[]> storeIterator = new DelegatingPeekingKeyValueIterator<>("store", new KeyValueIteratorStub<>(windowStoreKvPairs.iterator()));
 
         final ThreadCache.MemoryLRUCacheBytesIterator cacheIterator = cache.range(
@@ -73,7 +128,7 @@ public void shouldIterateOverValueFromBothIterators() {
         );
 
         final MergedSortedCacheWindowStoreIterator iterator = new MergedSortedCacheWindowStoreIterator(
-            cacheIterator, storeIterator, true
+            cacheIterator, storeIterator, true, tsExtractor
         );
         int index = 0;
         while (iterator.hasNext()) {
@@ -94,14 +149,14 @@ public void shouldReverseIterateOverValueFromBothIterators() {
             final KeyValue<Long, byte[]> v1 = KeyValue.pair(t, v1Bytes);
             windowStoreKvPairs.add(v1);
             expectedKvPairs.add(KeyValue.pair(t, v1Bytes));
-            final Bytes keyBytes = WindowKeySchema.toStoreKeyBinary("a", t + 10, 0, stateSerdes);
+            final Bytes keyBytes = storeKeySerializer.serialize("a", t + 10, 0, stateSerdes);
             final byte[] valBytes = String.valueOf(t + 10).getBytes();
             expectedKvPairs.add(KeyValue.pair(t + 10, valBytes));
             cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(keyBytes), new LRUCacheEntry(valBytes));
         }
 
-        final Bytes fromBytes = WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes);
-        final Bytes toBytes = WindowKeySchema.toStoreKeyBinary("a", 100, 0, stateSerdes);
+        final Bytes fromBytes = storeKeySerializer.serialize("a", 0, 0, stateSerdes);
+        final Bytes toBytes = storeKeySerializer.serialize("a", 100, 0, stateSerdes);
         Collections.reverse(windowStoreKvPairs);
         final KeyValueIterator<Long, byte[]> storeIterator =
             new DelegatingPeekingKeyValueIterator<>("store", new KeyValueIteratorStub<>(windowStoreKvPairs.iterator()));
@@ -111,7 +166,7 @@ public void shouldReverseIterateOverValueFromBothIterators() {
         );
 
         final MergedSortedCacheWindowStoreIterator iterator = new MergedSortedCacheWindowStoreIterator(
-            cacheIterator, storeIterator, false
+            cacheIterator, storeIterator, false, tsExtractor
         );
         int index = 0;
         Collections.reverse(expectedKvPairs);
@@ -127,15 +182,15 @@ public void shouldReverseIterateOverValueFromBothIterators() {
     @Test
     public void shouldPeekNextStoreKey() {
         windowStoreKvPairs.add(KeyValue.pair(10L, "a".getBytes()));
-        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
-        final Bytes fromBytes = WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes);
-        final Bytes toBytes = WindowKeySchema.toStoreKeyBinary("a", 100, 0, stateSerdes);
+        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(storeKeySerializer.serialize("a", 0, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
+        final Bytes fromBytes = storeKeySerializer.serialize("a", 0, 0, stateSerdes);
+        final Bytes toBytes = storeKeySerializer.serialize("a", 100, 0, stateSerdes);
         final KeyValueIterator<Long, byte[]> storeIterator = new DelegatingPeekingKeyValueIterator<>("store", new KeyValueIteratorStub<>(windowStoreKvPairs.iterator()));
         final ThreadCache.MemoryLRUCacheBytesIterator cacheIterator = cache.range(
             namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(fromBytes), SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(toBytes)
         );
         final MergedSortedCacheWindowStoreIterator iterator = new MergedSortedCacheWindowStoreIterator(
-            cacheIterator, storeIterator, true
+            cacheIterator, storeIterator, true, tsExtractor
         );
         assertThat(iterator.peekNextKey(), equalTo(0L));
         iterator.next();
@@ -146,9 +201,9 @@ public void shouldPeekNextStoreKey() {
     @Test
     public void shouldPeekNextStoreKeyReverse() {
         windowStoreKvPairs.add(KeyValue.pair(10L, "a".getBytes()));
-        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
-        final Bytes fromBytes = WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes);
-        final Bytes toBytes = WindowKeySchema.toStoreKeyBinary("a", 100, 0, stateSerdes);
+        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(storeKeySerializer.serialize("a", 0, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
+        final Bytes fromBytes = storeKeySerializer.serialize("a", 0, 0, stateSerdes);
+        final Bytes toBytes = storeKeySerializer.serialize("a", 100, 0, stateSerdes);
         final KeyValueIterator<Long, byte[]> storeIterator =
             new DelegatingPeekingKeyValueIterator<>("store", new KeyValueIteratorStub<>(windowStoreKvPairs.iterator()));
         final ThreadCache.MemoryLRUCacheBytesIterator cacheIterator = cache.reverseRange(
@@ -156,7 +211,7 @@ public void shouldPeekNextStoreKeyReverse() {
             SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(toBytes)
         );
         final MergedSortedCacheWindowStoreIterator iterator = new MergedSortedCacheWindowStoreIterator(
-            cacheIterator, storeIterator, false
+            cacheIterator, storeIterator, false, tsExtractor
         );
         assertThat(iterator.peekNextKey(), equalTo(10L));
         iterator.next();
@@ -167,9 +222,9 @@ public void shouldPeekNextStoreKeyReverse() {
     @Test
     public void shouldPeekNextCacheKey() {
         windowStoreKvPairs.add(KeyValue.pair(0L, "a".getBytes()));
-        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(WindowKeySchema.toStoreKeyBinary("a", 10L, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
-        final Bytes fromBytes = WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes);
-        final Bytes toBytes = WindowKeySchema.toStoreKeyBinary("a", 100, 0, stateSerdes);
+        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(storeKeySerializer.serialize("a", 10L, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
+        final Bytes fromBytes = storeKeySerializer.serialize("a", 0, 0, stateSerdes);
+        final Bytes toBytes = storeKeySerializer.serialize("a", 100, 0, stateSerdes);
         final KeyValueIterator<Long, byte[]> storeIterator =
             new DelegatingPeekingKeyValueIterator<>("store", new KeyValueIteratorStub<>(windowStoreKvPairs.iterator()));
         final ThreadCache.MemoryLRUCacheBytesIterator cacheIterator = cache.range(
@@ -180,7 +235,8 @@ public void shouldPeekNextCacheKey() {
         final MergedSortedCacheWindowStoreIterator iterator = new MergedSortedCacheWindowStoreIterator(
             cacheIterator,
             storeIterator,
-            true
+            true,
+            tsExtractor
         );
         assertThat(iterator.peekNextKey(), equalTo(0L));
         iterator.next();
@@ -191,9 +247,9 @@ public void shouldPeekNextCacheKey() {
     @Test
     public void shouldPeekNextCacheKeyReverse() {
         windowStoreKvPairs.add(KeyValue.pair(0L, "a".getBytes()));
-        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(WindowKeySchema.toStoreKeyBinary("a", 10L, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
-        final Bytes fromBytes = WindowKeySchema.toStoreKeyBinary("a", 0, 0, stateSerdes);
-        final Bytes toBytes = WindowKeySchema.toStoreKeyBinary("a", 100, 0, stateSerdes);
+        cache.put(namespace, SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(storeKeySerializer.serialize("a", 10L, 0, stateSerdes)), new LRUCacheEntry("b".getBytes()));
+        final Bytes fromBytes = storeKeySerializer.serialize("a", 0, 0, stateSerdes);
+        final Bytes toBytes = storeKeySerializer.serialize("a", 100, 0, stateSerdes);
         final KeyValueIterator<Long, byte[]> storeIterator =
             new DelegatingPeekingKeyValueIterator<>("store", new KeyValueIteratorStub<>(windowStoreKvPairs.iterator()));
         final ThreadCache.MemoryLRUCacheBytesIterator cacheIterator = cache.reverseRange(
@@ -204,7 +260,8 @@ public void shouldPeekNextCacheKeyReverse() {
         final MergedSortedCacheWindowStoreIterator iterator = new MergedSortedCacheWindowStoreIterator(
             cacheIterator,
             storeIterator,
-            false
+            false,
+            tsExtractor
         );
         assertThat(iterator.peekNextKey(), equalTo(10L));
         iterator.next();
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreKeyValueIteratorTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreKeyValueIteratorTest.java
index 8cfe9b87e7e8f..0e0f184f6f680 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreKeyValueIteratorTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/MergedSortedCacheWrappedWindowStoreKeyValueIteratorTest.java
@@ -17,6 +17,7 @@
 
 package org.apache.kafka.streams.state.internals;
 
+import java.util.Collection;
 import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
@@ -24,24 +25,41 @@
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.internals.TimeWindow;
 import org.apache.kafka.streams.state.StateSerdes;
+import org.apache.kafka.streams.state.internals.MergedSortedCacheWindowStoreKeyValueIterator.StoreKeyToWindowKey;
+import org.apache.kafka.streams.state.internals.MergedSortedCacheWindowStoreKeyValueIterator.WindowKeyToBytes;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
 import org.apache.kafka.test.KeyValueIteratorStub;
+import org.junit.Before;
 import org.junit.Test;
 
 import java.util.Collections;
 import java.util.Iterator;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 
+import static java.util.Arrays.asList;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+@RunWith(Parameterized.class)
 public class MergedSortedCacheWrappedWindowStoreKeyValueIteratorTest {
+
+    @FunctionalInterface
+    private interface StoreKeySerializer<K> {
+        Bytes serialize(final Windowed<K> key, final int seq, final StateSerdes<K, ?> serdes);
+    }
+
     private static final SegmentedCacheFunction SINGLE_SEGMENT_CACHE_FUNCTION = new SegmentedCacheFunction(null, -1) {
         @Override
         public long segmentId(final Bytes key) {
             return 0;
         }
     };
+
     private static final int WINDOW_SIZE = 10;
 
     private final String storeKey = "a";
@@ -51,15 +69,62 @@ public long segmentId(final Bytes key) {
     private final Iterator<KeyValue<Windowed<Bytes>, byte[]>> storeKvs = Collections.singleton(
         KeyValue.pair(new Windowed<>(Bytes.wrap(storeKey.getBytes()), storeWindow), storeKey.getBytes())).iterator();
     private final TimeWindow cacheWindow = new TimeWindow(10, 20);
-    private final Iterator<KeyValue<Bytes, LRUCacheEntry>> cacheKvs = Collections.singleton(
-        KeyValue.pair(
-            SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(WindowKeySchema.toStoreKeyBinary(
-                    new Windowed<>(cacheKey, cacheWindow), 0, new StateSerdes<>("dummy", Serdes.String(), Serdes.ByteArray()))
-            ),
-            new LRUCacheEntry(cacheKey.getBytes())
-        )).iterator();
+    private Iterator<KeyValue<Bytes, LRUCacheEntry>> cacheKvs;
     final private Deserializer<String> deserializer = Serdes.String().deserializer();
 
+    private StoreKeySerializer<String> storeKeySerializer;
+    private StoreKeyToWindowKey storeKeyToWindowKey;
+    private WindowKeyToBytes windowKeyToBytes;
+
+    private enum SchemaType {
+        WINDOW_KEY_SCHEMA,
+        KEY_FIRST_SCHEMA,
+        TIME_FIRST_SCHEMA
+    }
+
+    @Parameter
+    public SchemaType schemaType;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {SchemaType.WINDOW_KEY_SCHEMA},
+            {SchemaType.KEY_FIRST_SCHEMA},
+            {SchemaType.TIME_FIRST_SCHEMA},
+        });
+    }
+
+    @Before
+    public void setUp() {
+        switch (schemaType) {
+            case KEY_FIRST_SCHEMA:
+                storeKeySerializer = KeyFirstWindowKeySchema::toStoreKeyBinary;
+                storeKeyToWindowKey = KeyFirstWindowKeySchema::fromStoreKey;
+                windowKeyToBytes = KeyFirstWindowKeySchema::toStoreKeyBinary;
+                break;
+            case WINDOW_KEY_SCHEMA:
+                storeKeySerializer = WindowKeySchema::toStoreKeyBinary;
+                storeKeyToWindowKey = WindowKeySchema::fromStoreKey;
+                windowKeyToBytes = WindowKeySchema::toStoreKeyBinary;
+                break;
+            case TIME_FIRST_SCHEMA:
+                storeKeySerializer = TimeFirstWindowKeySchema::toStoreKeyBinary;
+                storeKeyToWindowKey = TimeFirstWindowKeySchema::fromStoreKey;
+                windowKeyToBytes = TimeFirstWindowKeySchema::toStoreKeyBinary;
+                break;
+            default:
+                throw new IllegalStateException("Unknown schemaType: " + schemaType);
+        }
+        cacheKvs = Collections.singleton(
+            KeyValue.pair(
+                SINGLE_SEGMENT_CACHE_FUNCTION.cacheKey(storeKeySerializer.serialize(
+                    new Windowed<>(cacheKey, cacheWindow), 0, new StateSerdes<>("dummy", Serdes.String(), Serdes.ByteArray()))
+                ),
+                new LRUCacheEntry(cacheKey.getBytes())
+            )
+        ).iterator();
+    }
+
     @Test
     public void shouldHaveNextFromStore() {
         final MergedSortedCacheWindowStoreKeyValueIterator mergeIterator =
@@ -185,7 +250,9 @@ private MergedSortedCacheWindowStoreKeyValueIterator createIterator(final Iterat
             new StateSerdes<>("name", Serdes.Bytes(), Serdes.ByteArray()),
             WINDOW_SIZE,
             SINGLE_SEGMENT_CACHE_FUNCTION,
-            forward
+            forward,
+            storeKeyToWindowKey,
+            windowKeyToBytes
         );
     }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBSessionStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBSessionStoreTest.java
index deabea85960d3..8a849d86bcbbc 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBSessionStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBSessionStoreTest.java
@@ -16,73 +16,78 @@
  */
 package org.apache.kafka.streams.state.internals;
 
-import org.apache.kafka.common.header.internals.RecordHeaders;
+import java.util.Collection;
 import org.apache.kafka.common.serialization.Serde;
-import org.apache.kafka.streams.kstream.Windowed;
-import org.apache.kafka.streams.kstream.internals.SessionWindow;
-import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
-import org.apache.kafka.streams.query.Position;
-import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.SessionStore;
 import org.apache.kafka.streams.state.Stores;
-import org.junit.Test;
 
-import java.util.Arrays;
-import java.util.HashSet;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 
 import static java.time.Duration.ofMillis;
-import static org.apache.kafka.common.utils.Utils.mkEntry;
-import static org.apache.kafka.common.utils.Utils.mkMap;
-import static org.apache.kafka.test.StreamsTestUtils.valuesToSet;
-import static org.junit.Assert.assertEquals;
+import static java.util.Arrays.asList;
 
+@RunWith(Parameterized.class)
 public class RocksDBSessionStoreTest extends AbstractSessionBytesStoreTest {
 
     private static final String STORE_NAME = "rocksDB session store";
 
+    @Parameter
+    public StoreType storeType;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> getParamStoreType() {
+        return asList(new Object[][] {
+            {StoreType.RocksDBSessionStore},
+            {StoreType.RocksDBTimeOrderedSessionStoreWithIndex},
+            {StoreType.RocksDBTimeOrderedSessionStoreWithoutIndex}
+        });
+    }
+
+    @Override
+    StoreType getStoreType() {
+        return storeType;
+    }
+
     @Override
     <K, V> SessionStore<K, V> buildSessionStore(final long retentionPeriod,
                                                  final Serde<K> keySerde,
                                                  final Serde<V> valueSerde) {
-        return Stores.sessionStoreBuilder(
-            Stores.persistentSessionStore(
-                STORE_NAME,
-                ofMillis(retentionPeriod)),
-            keySerde,
-            valueSerde).build();
-    }
-
-    @Test
-    public void shouldRemoveExpired() {
-        sessionStore.put(new Windowed<>("a", new SessionWindow(0, 0)), 1L);
-        sessionStore.put(new Windowed<>("aa", new SessionWindow(0, SEGMENT_INTERVAL)), 2L);
-        sessionStore.put(new Windowed<>("a", new SessionWindow(10, SEGMENT_INTERVAL)), 3L);
-
-        // Advance stream time to expire the first record
-        sessionStore.put(new Windowed<>("aa", new SessionWindow(10, 2 * SEGMENT_INTERVAL)), 4L);
-
-        try (final KeyValueIterator<Windowed<String>, Long> iterator =
-            sessionStore.findSessions("a", "b", 0L, Long.MAX_VALUE)
-        ) {
-            assertEquals(valuesToSet(iterator), new HashSet<>(Arrays.asList(2L, 3L, 4L)));
+        switch (storeType) {
+            case RocksDBSessionStore: {
+                return Stores.sessionStoreBuilder(
+                    Stores.persistentSessionStore(
+                        STORE_NAME,
+                        ofMillis(retentionPeriod)),
+                    keySerde,
+                    valueSerde).build();
+            }
+            case RocksDBTimeOrderedSessionStoreWithIndex: {
+                return Stores.sessionStoreBuilder(
+                    new RocksDbTimeOrderedSessionBytesStoreSupplier(
+                        STORE_NAME,
+                        retentionPeriod,
+                        true
+                    ),
+                    keySerde,
+                    valueSerde
+                ).build();
+            }
+            case RocksDBTimeOrderedSessionStoreWithoutIndex: {
+                return Stores.sessionStoreBuilder(
+                    new RocksDbTimeOrderedSessionBytesStoreSupplier(
+                        STORE_NAME,
+                        retentionPeriod,
+                       false
+                    ),
+                    keySerde,
+                    valueSerde
+                ).build();
+            }
+            default:
+                throw new IllegalStateException("Unknown StoreType: " + storeType);
         }
     }
 
-    @Test
-    public void shouldMatchPositionAfterPut() {
-        final MeteredSessionStore<String, Long> meteredSessionStore = (MeteredSessionStore<String, Long>) sessionStore;
-        final ChangeLoggingSessionBytesStore changeLoggingSessionBytesStore = (ChangeLoggingSessionBytesStore) meteredSessionStore.wrapped();
-        final RocksDBSessionStore rocksDBSessionStore = (RocksDBSessionStore) changeLoggingSessionBytesStore.wrapped();
-
-        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
-        sessionStore.put(new Windowed<String>("a", new SessionWindow(0, 0)), 1L);
-        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
-        sessionStore.put(new Windowed<String>("aa", new SessionWindow(0, SEGMENT_INTERVAL)), 2L);
-        context.setRecordContext(new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders()));
-        sessionStore.put(new Windowed<String>("a", new SessionWindow(10, SEGMENT_INTERVAL)), 3L);
-
-        final Position expected = Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 3L)))));
-        final Position actual = rocksDBSessionStore.getPosition();
-        assertEquals(expected, actual);
-    }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSegmentedBytesStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSegmentedBytesStoreTest.java
deleted file mode 100644
index 0d5b016a9ef13..0000000000000
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedSegmentedBytesStoreTest.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.kafka.streams.state.internals;
-
-import static java.util.Arrays.asList;
-
-import java.util.Collection;
-import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
-import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
-import org.apache.kafka.streams.state.internals.SegmentedBytesStore.KeySchema;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameter;
-
-@RunWith(Parameterized.class)
-public class RocksDBTimeOrderedSegmentedBytesStoreTest
-    extends AbstractDualSchemaRocksDBSegmentedBytesStoreTest<KeyValueSegment> {
-
-    private final static String METRICS_SCOPE = "metrics-scope";
-
-    @Parameter
-    public String name;
-
-    @Parameter(1)
-    public boolean hasIndex;
-
-    @Parameterized.Parameters(name = "{0}")
-    public static Collection<Object[]> getKeySchema() {
-        return asList(new Object[][] {
-            {"WindowSchemaWithIndex", true},
-            {"WindowSchemaWithoutIndex", false}
-        });
-    }
-
-    AbstractDualSchemaRocksDBSegmentedBytesStore<KeyValueSegment> getBytesStore() {
-        return new RocksDBTimeOrderedSegmentedBytesStore(
-            storeName,
-            METRICS_SCOPE,
-            retention,
-            segmentInterval,
-            hasIndex
-        );
-    }
-
-    @Override
-    KeyValueSegments newSegments() {
-        return new KeyValueSegments(storeName, METRICS_SCOPE, retention, segmentInterval);
-    }
-
-    @Override
-    KeySchema getBaseSchema() {
-        return new TimeFirstWindowKeySchema();
-    }
-
-    @Override
-    KeySchema getIndexSchema() {
-        return hasIndex ? new KeyFirstWindowKeySchema() : null;
-    }
-
-}
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowSegmentedBytesStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowSegmentedBytesStoreTest.java
new file mode 100644
index 0000000000000..db02f5b6ff0d0
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedWindowSegmentedBytesStoreTest.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import static java.util.Arrays.asList;
+
+import java.util.Collection;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.KeyFirstSessionKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.TimeFirstSessionKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.SegmentedBytesStore.KeySchema;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class RocksDBTimeOrderedWindowSegmentedBytesStoreTest
+    extends AbstractDualSchemaRocksDBSegmentedBytesStoreTest<KeyValueSegment> {
+
+    private final static String METRICS_SCOPE = "metrics-scope";
+
+    private enum SchemaType {
+        WindowSchemaWithIndex,
+        WindowSchemaWithoutIndex,
+        SessionSchemaWithIndex,
+        SessionSchemaWithoutIndex
+    }
+
+    private boolean hasIndex;
+    private SchemaType schemaType;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> getKeySchema() {
+        return asList(new Object[][] {
+            {SchemaType.WindowSchemaWithIndex, true},
+            {SchemaType.WindowSchemaWithoutIndex, false},
+            {SchemaType.SessionSchemaWithIndex, true},
+            {SchemaType.SessionSchemaWithoutIndex, false}
+        });
+    }
+
+    public RocksDBTimeOrderedWindowSegmentedBytesStoreTest(final SchemaType schemaType, final boolean hasIndex) {
+        this.schemaType = schemaType;
+        this.hasIndex = hasIndex;
+    }
+
+
+    AbstractDualSchemaRocksDBSegmentedBytesStore<KeyValueSegment> getBytesStore() {
+        switch (schemaType) {
+            case WindowSchemaWithIndex:
+            case WindowSchemaWithoutIndex:
+                return new RocksDBTimeOrderedWindowSegmentedBytesStore(
+                    storeName,
+                    METRICS_SCOPE,
+                    retention,
+                    segmentInterval,
+                    hasIndex
+                );
+            case SessionSchemaWithIndex:
+            case SessionSchemaWithoutIndex:
+                return new RocksDBTimeOrderedSessionSegmentedBytesStore(
+                    storeName,
+                    METRICS_SCOPE,
+                    retention,
+                    segmentInterval,
+                    hasIndex
+                );
+            default:
+                throw new IllegalStateException("Unknown SchemaType: " + schemaType);
+        }
+    }
+
+    @Override
+    KeyValueSegments newSegments() {
+        return new KeyValueSegments(storeName, METRICS_SCOPE, retention, segmentInterval);
+    }
+
+    @Override
+    KeySchema getBaseSchema() {
+        switch (schemaType) {
+            case WindowSchemaWithIndex:
+            case WindowSchemaWithoutIndex:
+                return new TimeFirstWindowKeySchema();
+            case SessionSchemaWithIndex:
+            case SessionSchemaWithoutIndex:
+                return new TimeFirstSessionKeySchema();
+            default:
+                throw new IllegalStateException("Unknown SchemaType: " + schemaType);
+        }
+    }
+
+    @Override
+    KeySchema getIndexSchema() {
+        if (!hasIndex) {
+            return null;
+        }
+        switch (schemaType) {
+            case WindowSchemaWithIndex:
+                return new KeyFirstWindowKeySchema();
+            case SessionSchemaWithIndex:
+                return new KeyFirstSessionKeySchema();
+            default:
+                throw new IllegalStateException("Unknown SchemaType: " + schemaType);
+        }
+    }
+
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java
index 5abfd0667d2db..c0c7e963e6e2c 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDBWindowStoreTest.java
@@ -68,17 +68,14 @@ enum StoreType {
     }
 
     @Parameter
-    public String name;
-
-    @Parameter(1)
     public StoreType storeType;
 
     @Parameterized.Parameters(name = "{0}")
     public static Collection<Object[]> getKeySchema() {
         return asList(new Object[][] {
-            {"RocksDBWindowStore", StoreType.RocksDBWindowStore},
-            {"RocksDBTimeOrderedWindowStoreWithIndex", StoreType.RocksDBTimeOrderedWindowStoreWithIndex},
-            {"RocksDBTimeOrderedWindowStoreWithoutIndex", StoreType.RocksDBTimeOrderedWindowStoreWithoutIndex}
+            {StoreType.RocksDBWindowStore},
+            {StoreType.RocksDBTimeOrderedWindowStoreWithIndex},
+            {StoreType.RocksDBTimeOrderedWindowStoreWithoutIndex}
         });
     }
 
@@ -88,32 +85,41 @@ <K, V> WindowStore<K, V> buildWindowStore(final long retentionPeriod,
                                               final boolean retainDuplicates,
                                               final Serde<K> keySerde,
                                               final Serde<V> valueSerde) {
-        if (storeType == StoreType.RocksDBWindowStore) {
-            return Stores.windowStoreBuilder(
-                    Stores.persistentWindowStore(
-                        STORE_NAME,
-                        ofMillis(retentionPeriod),
-                        ofMillis(windowSize),
-                        retainDuplicates),
+
+        switch (storeType) {
+            case RocksDBWindowStore: {
+                return Stores.windowStoreBuilder(
+                        Stores.persistentWindowStore(
+                            STORE_NAME,
+                            ofMillis(retentionPeriod),
+                            ofMillis(windowSize),
+                            retainDuplicates),
+                        keySerde,
+                        valueSerde)
+                    .build();
+            }
+            case RocksDBTimeOrderedWindowStoreWithIndex: {
+                final long defaultSegmentInterval = Math.max(retentionPeriod / 2, 60_000L);
+                return Stores.windowStoreBuilder(
+                    new RocksDbIndexedTimeOrderedWindowBytesStoreSupplier(STORE_NAME,
+                        retentionPeriod, defaultSegmentInterval, windowSize, retainDuplicates,
+                        true),
                     keySerde,
-                    valueSerde)
-                .build();
-        } else if (storeType == StoreType.RocksDBTimeOrderedWindowStoreWithIndex) {
-            final long defaultSegmentInterval = Math.max(retentionPeriod / 2, 60_000L);
-            return Stores.windowStoreBuilder(
-                new RocksDbIndexedTimeOrderedWindowBytesStoreSupplier(STORE_NAME,
-                    retentionPeriod, defaultSegmentInterval, windowSize, retainDuplicates, true),
-                keySerde,
-                valueSerde
-            ).build();
-        } else {
-            final long defaultSegmentInterval = Math.max(retentionPeriod / 2, 60_000L);
-            return Stores.windowStoreBuilder(
-                new RocksDbIndexedTimeOrderedWindowBytesStoreSupplier(STORE_NAME,
-                    retentionPeriod, defaultSegmentInterval, windowSize, retainDuplicates, false),
-                keySerde,
-                valueSerde
-            ).build();
+                    valueSerde
+                ).build();
+            }
+            case RocksDBTimeOrderedWindowStoreWithoutIndex: {
+                final long defaultSegmentInterval = Math.max(retentionPeriod / 2, 60_000L);
+                return Stores.windowStoreBuilder(
+                    new RocksDbIndexedTimeOrderedWindowBytesStoreSupplier(STORE_NAME,
+                        retentionPeriod, defaultSegmentInterval, windowSize, retainDuplicates,
+                        false),
+                    keySerde,
+                    valueSerde
+                ).build();
+            }
+            default:
+                throw new IllegalStateException("Unknown StoreType: " + storeType);
         }
     }
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplierTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplierTest.java
new file mode 100644
index 0000000000000..fad4cc5a47177
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/RocksDbIndexedTimeOrderedWindowBytesStoreSupplierTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.state.WindowStore;
+import org.junit.Test;
+
+import static java.time.Duration.ZERO;
+import static java.time.Duration.ofMillis;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.core.IsInstanceOf.instanceOf;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+public class RocksDbIndexedTimeOrderedWindowBytesStoreSupplierTest {
+
+    @Test
+    public void shouldThrowIfStoreNameIsNull() {
+        final Exception e = assertThrows(NullPointerException.class, () -> RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create(null, ZERO, ZERO, false, false));
+        assertEquals("name cannot be null", e.getMessage());
+    }
+
+    @Test
+    public void shouldThrowIfRetentionPeriodIsNegative() {
+        final Exception e = assertThrows(IllegalArgumentException.class, () -> RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create("anyName", ofMillis(-1L), ZERO, false, false));
+        assertEquals("retentionPeriod cannot be negative", e.getMessage());
+    }
+
+    @Test
+    public void shouldThrowIfWindowSizeIsNegative() {
+        final Exception e = assertThrows(IllegalArgumentException.class, () -> RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create("anyName", ofMillis(0L), ofMillis(-1L), false, false));
+        assertEquals("windowSize cannot be negative", e.getMessage());
+    }
+
+    @Test
+    public void shouldThrowIfWindowSizeIsLargerThanRetention() {
+        final Exception e = assertThrows(IllegalArgumentException.class, () -> RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create("anyName", ofMillis(1L), ofMillis(2L), false, false));
+        assertEquals("The retention period of the window store anyName must be no smaller than its window size. Got size=[2], retention=[1]", e.getMessage());
+    }
+
+    @Test
+    public void shouldCreateRocksDbTimeOrderedWindowStoreWithIndex() {
+        final WindowStore store = RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create("store", ofMillis(1L), ofMillis(1L), false, true).get();
+        final StateStore wrapped = ((WrappedStateStore) store).wrapped();
+        assertThat(store, instanceOf(RocksDBTimeOrderedWindowStore.class));
+        assertThat(wrapped, instanceOf(RocksDBTimeOrderedWindowSegmentedBytesStore.class));
+        assertTrue(((RocksDBTimeOrderedWindowSegmentedBytesStore) wrapped).hasIndex());
+    }
+
+    @Test
+    public void shouldCreateRocksDbTimeOrderedWindowStoreWithoutIndex() {
+        final WindowStore store = RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create("store", ofMillis(1L), ofMillis(1L), false, false).get();
+        final StateStore wrapped = ((WrappedStateStore) store).wrapped();
+        assertThat(store, instanceOf(RocksDBTimeOrderedWindowStore.class));
+        assertThat(wrapped, instanceOf(RocksDBTimeOrderedWindowSegmentedBytesStore.class));
+        assertFalse(((RocksDBTimeOrderedWindowSegmentedBytesStore) wrapped).hasIndex());
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/SessionKeySchemaTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/SessionKeySchemaTest.java
index 0482f01ba5e6a..8b5391a7cbf14 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/SessionKeySchemaTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/SessionKeySchemaTest.java
@@ -17,30 +17,101 @@
 
 package org.apache.kafka.streams.state.internals;
 
+import java.util.Collection;
+import java.util.Map;
+import java.util.function.Function;
+import org.apache.kafka.common.serialization.Deserializer;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.Serializer;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.kstream.Window;
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.kstream.WindowedSerdes;
 import org.apache.kafka.streams.kstream.internals.SessionWindow;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.KeyFirstSessionKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedSessionKeySchemas.TimeFirstSessionKeySchema;
+import org.apache.kafka.streams.state.internals.SegmentedBytesStore.KeySchema;
 import org.apache.kafka.test.KeyValueIteratorStub;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
+import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.core.IsEqual.equalTo;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
+@RunWith(Parameterized.class)
 public class SessionKeySchemaTest {
+    private static final Map<SchemaType, KeySchema> SCHEMA_TYPE_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, new SessionKeySchema()),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, new KeyFirstSessionKeySchema()),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, new TimeFirstSessionKeySchema())
+    );
+
+    private static final Map<SchemaType, Function<Windowed<Bytes>, Bytes>> WINDOW_TO_STORE_BINARY_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::toBinary),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::toBinary),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::toBinary)
+    );
+
+    private static final Map<SchemaType, Function<byte[], Long>> EXTRACT_END_TS_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::extractEndTimestamp),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::extractEndTimestamp),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::extractEndTimestamp)
+    );
+
+    private static final Map<SchemaType, Function<byte[], Long>> EXTRACT_START_TS_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::extractStartTimestamp),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::extractStartTimestamp),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::extractStartTimestamp)
+    );
+
+    @FunctionalInterface
+    interface TriFunction<A, B, C, R> {
+        R apply(A a, B b, C c);
+    }
+
+    private static final Map<SchemaType, TriFunction<Windowed<String>, Serializer<String>, String, byte[]>> SERDE_TO_STORE_BINARY_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::toBinary),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::toBinary),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::toBinary)
+    );
+
+    private static final Map<SchemaType, TriFunction<byte[], Deserializer<String>, String, Windowed<String>>> SERDE_FROM_BYTES_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::from),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::from),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::from)
+    );
+
+    private static final Map<SchemaType, Function<Bytes, Windowed<Bytes>>> FROM_BYTES_MAP = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::from),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::from),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::from)
+    );
+
+    private static final Map<SchemaType, Function<byte[], Window>> EXTRACT_WINDOW = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::extractWindow),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::extractWindow),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::extractWindow)
+    );
+
+    private static final Map<SchemaType, Function<byte[], byte[]>> EXTRACT_KEY_BYTES = mkMap(
+        mkEntry(SchemaType.SessionKeySchema, SessionKeySchema::extractKeyBytes),
+        mkEntry(SchemaType.PrefixedKeyFirstSchema, KeyFirstSessionKeySchema::extractKeyBytes),
+        mkEntry(SchemaType.PrefixedTimeFirstSchema, TimeFirstSessionKeySchema::extractKeyBytes)
+    );
 
     private final String key = "key";
     private final String topic = "topic";
@@ -52,8 +123,45 @@ public class SessionKeySchemaTest {
     private final Windowed<String> windowedKey = new Windowed<>(key, window);
     private final Serde<Windowed<String>> keySerde = new WindowedSerdes.SessionWindowedSerde<>(serde);
 
-    private final SessionKeySchema sessionKeySchema = new SessionKeySchema();
+    private final KeySchema keySchema;
     private DelegatingPeekingKeyValueIterator<Bytes, Integer> iterator;
+    private final SchemaType schemaType;
+    private final Function<Windowed<Bytes>, Bytes> toBinary;
+    private final TriFunction<Windowed<String>, Serializer<String>, String, byte[]> serdeToBinary;
+    private final TriFunction<byte[], Deserializer<String>, String, Windowed<String>> serdeFromBytes;
+    private final Function<Bytes, Windowed<Bytes>> fromBytes;
+    private final Function<byte[], Long> extractStartTS;
+    private final Function<byte[], Long> extractEndTS;
+    private final Function<byte[], byte[]> extractKeyBytes;
+    private final Function<byte[], Window> extractWindow;
+
+    private enum SchemaType {
+        SessionKeySchema,
+        PrefixedTimeFirstSchema,
+        PrefixedKeyFirstSchema
+    }
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {SchemaType.SessionKeySchema},
+            {SchemaType.PrefixedTimeFirstSchema},
+            {SchemaType.PrefixedKeyFirstSchema}
+        });
+    }
+
+    public SessionKeySchemaTest(final SchemaType type) {
+        schemaType = type;
+        keySchema = SCHEMA_TYPE_MAP.get(type);
+        toBinary = WINDOW_TO_STORE_BINARY_MAP.get(schemaType);
+        serdeToBinary = SERDE_TO_STORE_BINARY_MAP.get(schemaType);
+        serdeFromBytes = SERDE_FROM_BYTES_MAP.get(schemaType);
+        fromBytes = FROM_BYTES_MAP.get(schemaType);
+        extractStartTS = EXTRACT_START_TS_MAP.get(schemaType);
+        extractEndTS = EXTRACT_END_TS_MAP.get(schemaType);
+        extractKeyBytes = EXTRACT_KEY_BYTES.get(schemaType);
+        extractWindow = EXTRACT_WINDOW.get(schemaType);
+    }
 
     @After
     public void after() {
@@ -64,44 +172,44 @@ public void after() {
 
     @Before
     public void before() {
-        final List<KeyValue<Bytes, Integer>> keys = Arrays.asList(KeyValue.pair(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0, 0}), new SessionWindow(0, 0))), 1),
-                                                                  KeyValue.pair(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0}), new SessionWindow(0, 0))), 2),
-                                                                  KeyValue.pair(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0, 0, 0}), new SessionWindow(0, 0))), 3),
-                                                                  KeyValue.pair(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0}), new SessionWindow(10, 20))), 4),
-                                                                  KeyValue.pair(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0, 0}), new SessionWindow(10, 20))), 5),
-                                                                  KeyValue.pair(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0, 0, 0}), new SessionWindow(10, 20))), 6));
+        final List<KeyValue<Bytes, Integer>> keys = asList(KeyValue.pair(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0, 0}), new SessionWindow(0, 0))), 1),
+                                                                  KeyValue.pair(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0}), new SessionWindow(0, 0))), 2),
+                                                                  KeyValue.pair(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0, 0, 0}), new SessionWindow(0, 0))), 3),
+                                                                  KeyValue.pair(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0}), new SessionWindow(10, 20))), 4),
+                                                                  KeyValue.pair(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0, 0}), new SessionWindow(10, 20))), 5),
+                                                                  KeyValue.pair(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0, 0, 0}), new SessionWindow(10, 20))), 6));
         iterator = new DelegatingPeekingKeyValueIterator<>("foo", new KeyValueIteratorStub<>(keys.iterator()));
     }
 
     @Test
     public void shouldFetchExactKeysSkippingLongerKeys() {
         final Bytes key = Bytes.wrap(new byte[]{0});
-        final List<Integer> result = getValues(sessionKeySchema.hasNextCondition(key, key, 0, Long.MAX_VALUE, true));
-        assertThat(result, equalTo(Arrays.asList(2, 4)));
+        final List<Integer> result = getValues(keySchema.hasNextCondition(key, key, 0, Long.MAX_VALUE, true));
+        assertThat(result, equalTo(asList(2, 4)));
     }
 
     @Test
     public void shouldFetchExactKeySkippingShorterKeys() {
         final Bytes key = Bytes.wrap(new byte[]{0, 0});
-        final HasNextCondition hasNextCondition = sessionKeySchema.hasNextCondition(key, key, 0, Long.MAX_VALUE, true);
+        final HasNextCondition hasNextCondition = keySchema.hasNextCondition(key, key, 0, Long.MAX_VALUE, true);
         final List<Integer> results = getValues(hasNextCondition);
-        assertThat(results, equalTo(Arrays.asList(1, 5)));
+        assertThat(results, equalTo(asList(1, 5)));
     }
 
     @Test
     public void shouldFetchAllKeysUsingNullKeys() {
-        final HasNextCondition hasNextCondition = sessionKeySchema.hasNextCondition(null, null, 0, Long.MAX_VALUE, true);
+        final HasNextCondition hasNextCondition = keySchema.hasNextCondition(null, null, 0, Long.MAX_VALUE, true);
         final List<Integer> results = getValues(hasNextCondition);
-        assertThat(results, equalTo(Arrays.asList(1, 2, 3, 4, 5, 6)));
+        assertThat(results, equalTo(asList(1, 2, 3, 4, 5, 6)));
     }
     
     @Test
     public void testUpperBoundWithLargeTimestamps() {
-        final Bytes upper = sessionKeySchema.upperRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), Long.MAX_VALUE);
+        final Bytes upper = keySchema.upperRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), Long.MAX_VALUE);
 
         assertThat(
             "shorter key with max timestamp should be in range",
-            upper.compareTo(SessionKeySchema.toBinary(
+            upper.compareTo(toBinary.apply(
                 new Windowed<>(
                     Bytes.wrap(new byte[]{0xA}),
                     new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))
@@ -110,7 +218,7 @@ public void testUpperBoundWithLargeTimestamps() {
 
         assertThat(
             "shorter key with max timestamp should be in range",
-            upper.compareTo(SessionKeySchema.toBinary(
+            upper.compareTo(toBinary.apply(
                 new Windowed<>(
                     Bytes.wrap(new byte[]{0xA, 0xB}),
                     new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))
@@ -118,18 +226,26 @@ public void testUpperBoundWithLargeTimestamps() {
             )) >= 0
         );
 
-        assertThat(upper, equalTo(SessionKeySchema.toBinary(
-            new Windowed<>(Bytes.wrap(new byte[]{0xA}), new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))))
-        );
+        if (schemaType == SchemaType.PrefixedTimeFirstSchema) {
+            assertThat(upper, equalTo(toBinary.apply(
+                new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}),
+                    new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))))
+            );
+        } else {
+            assertThat(upper, equalTo(toBinary.apply(
+                new Windowed<>(Bytes.wrap(new byte[]{0xA}),
+                    new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))))
+            );
+        }
     }
 
     @Test
     public void testUpperBoundWithKeyBytesLargerThanFirstTimestampByte() {
-        final Bytes upper = sessionKeySchema.upperRange(Bytes.wrap(new byte[]{0xA, (byte) 0x8F, (byte) 0x9F}), Long.MAX_VALUE);
+        final Bytes upper = keySchema.upperRange(Bytes.wrap(new byte[]{0xA, (byte) 0x8F, (byte) 0x9F}), Long.MAX_VALUE);
 
         assertThat(
             "shorter key with max timestamp should be in range",
-            upper.compareTo(SessionKeySchema.toBinary(
+            upper.compareTo(toBinary.apply(
                 new Windowed<>(
                     Bytes.wrap(new byte[]{0xA, (byte) 0x8F}),
                     new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))
@@ -137,40 +253,53 @@ public void testUpperBoundWithKeyBytesLargerThanFirstTimestampByte() {
             ) >= 0
         );
 
-        assertThat(upper, equalTo(SessionKeySchema.toBinary(
+        assertThat(upper, equalTo(toBinary.apply(
             new Windowed<>(Bytes.wrap(new byte[]{0xA, (byte) 0x8F, (byte) 0x9F}), new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))))
         );
     }
 
     @Test
     public void testUpperBoundWithZeroTimestamp() {
-        final Bytes upper = sessionKeySchema.upperRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), 0);
-
-        assertThat(upper, equalTo(SessionKeySchema.toBinary(
-            new Windowed<>(Bytes.wrap(new byte[]{0xA}), new SessionWindow(0, Long.MAX_VALUE))))
-        );
+        final Bytes upper = keySchema.upperRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), 0);
+        final Function<Windowed<Bytes>, Bytes> toBinary = WINDOW_TO_STORE_BINARY_MAP.get(schemaType);
+
+        if (schemaType == SchemaType.PrefixedTimeFirstSchema) {
+            assertThat(upper, equalTo(toBinary.apply(
+                new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), new SessionWindow(0, Long.MAX_VALUE))))
+            );
+        } else {
+            assertThat(upper, equalTo(toBinary.apply(
+                new Windowed<>(Bytes.wrap(new byte[]{0xA}), new SessionWindow(0, Long.MAX_VALUE))))
+            );
+        }
     }
 
     @Test
     public void testLowerBoundWithZeroTimestamp() {
-        final Bytes lower = sessionKeySchema.lowerRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), 0);
-        assertThat(lower, equalTo(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), new SessionWindow(0, 0)))));
+        final Bytes lower = keySchema.lowerRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), 0);
+        assertThat(lower, equalTo(toBinary.apply(new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), new SessionWindow(0, 0)))));
     }
 
     @Test
     public void testLowerBoundMatchesTrailingZeros() {
-        final Bytes lower = sessionKeySchema.lowerRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), Long.MAX_VALUE);
+        final Bytes lower = keySchema.lowerRange(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), Long.MAX_VALUE);
 
         assertThat(
             "appending zeros to key should still be in range",
-            lower.compareTo(SessionKeySchema.toBinary(
+            lower.compareTo(toBinary.apply(
                 new Windowed<>(
                     Bytes.wrap(new byte[]{0xA, 0xB, 0xC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
                     new SessionWindow(Long.MAX_VALUE, Long.MAX_VALUE))
             )) < 0
         );
 
-        assertThat(lower, equalTo(SessionKeySchema.toBinary(new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), new SessionWindow(0, 0)))));
+        if (schemaType == SchemaType.PrefixedTimeFirstSchema) {
+            assertThat(lower, equalTo(toBinary.apply(
+                new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), new SessionWindow(0, Long.MAX_VALUE)))));
+        } else {
+            assertThat(lower, equalTo(toBinary.apply(
+                new Windowed<>(Bytes.wrap(new byte[]{0xA, 0xB, 0xC}), new SessionWindow(0, 0)))));
+        }
     }
 
     @Test
@@ -197,47 +326,47 @@ public void shouldDeSerializeNullToNull() {
 
     @Test
     public void shouldConvertToBinaryAndBack() {
-        final byte[] serialized = SessionKeySchema.toBinary(windowedKey, serde.serializer(), "dummy");
-        final Windowed<String> result = SessionKeySchema.from(serialized, Serdes.String().deserializer(), "dummy");
+        final byte[] serialized = serdeToBinary.apply(windowedKey, serde.serializer(), "dummy");
+        final Windowed<String> result = serdeFromBytes.apply(serialized, Serdes.String().deserializer(), "dummy");
         assertEquals(windowedKey, result);
     }
 
     @Test
     public void shouldExtractEndTimeFromBinary() {
-        final byte[] serialized = SessionKeySchema.toBinary(windowedKey, serde.serializer(), "dummy");
-        assertEquals(endTime, SessionKeySchema.extractEndTimestamp(serialized));
+        final byte[] serialized = serdeToBinary.apply(windowedKey, serde.serializer(), "dummy");
+        assertEquals(endTime, (long) extractEndTS.apply(serialized));
     }
 
     @Test
     public void shouldExtractStartTimeFromBinary() {
-        final byte[] serialized = SessionKeySchema.toBinary(windowedKey, serde.serializer(), "dummy");
-        assertEquals(startTime, SessionKeySchema.extractStartTimestamp(serialized));
+        final byte[] serialized = serdeToBinary.apply(windowedKey, serde.serializer(), "dummy");
+        assertEquals(startTime, (long) extractStartTS.apply(serialized));
     }
 
     @Test
     public void shouldExtractWindowFromBindary() {
-        final byte[] serialized = SessionKeySchema.toBinary(windowedKey, serde.serializer(), "dummy");
-        assertEquals(window, SessionKeySchema.extractWindow(serialized));
+        final byte[] serialized = serdeToBinary.apply(windowedKey, serde.serializer(), "dummy");
+        assertEquals(window, extractWindow.apply(serialized));
     }
 
     @Test
     public void shouldExtractKeyBytesFromBinary() {
-        final byte[] serialized = SessionKeySchema.toBinary(windowedKey, serde.serializer(), "dummy");
-        assertArrayEquals(key.getBytes(), SessionKeySchema.extractKeyBytes(serialized));
+        final byte[] serialized = serdeToBinary.apply(windowedKey, serde.serializer(), "dummy");
+        assertArrayEquals(key.getBytes(), extractKeyBytes.apply(serialized));
     }
 
     @Test
     public void shouldExtractKeyFromBinary() {
-        final byte[] serialized = SessionKeySchema.toBinary(windowedKey, serde.serializer(), "dummy");
-        assertEquals(windowedKey, SessionKeySchema.from(serialized, serde.deserializer(), "dummy"));
+        final byte[] serialized = serdeToBinary.apply(windowedKey, serde.serializer(), "dummy");
+        assertEquals(windowedKey, serdeFromBytes.apply(serialized, serde.deserializer(), "dummy"));
     }
 
     @Test
     public void shouldExtractBytesKeyFromBinary() {
         final Bytes bytesKey = Bytes.wrap(key.getBytes());
         final Windowed<Bytes> windowedBytesKey = new Windowed<>(bytesKey, window);
-        final Bytes serialized = SessionKeySchema.toBinary(windowedBytesKey);
-        assertEquals(windowedBytesKey, SessionKeySchema.from(serialized));
+        final Bytes serialized = toBinary.apply(windowedBytesKey);
+        assertEquals(windowedBytesKey, fromBytes.apply(serialized));
     }
 
     private List<Integer> getValues(final HasNextCondition hasNextCondition) {
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java
index 722e71eed899b..a70bf57814819 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/StreamThreadStateStoreProviderTest.java
@@ -46,7 +46,7 @@
 import org.apache.kafka.streams.processor.internals.StreamsProducer;
 import org.apache.kafka.streams.processor.internals.Task;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig;
+import org.apache.kafka.streams.TopologyConfig;
 import org.apache.kafka.streams.state.QueryableStoreTypes;
 import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
 import org.apache.kafka.streams.state.ReadOnlySessionStore;
@@ -430,7 +430,9 @@ private StreamTask createStreamsTask(final StreamsConfig streamsConfig,
                 Time.SYSTEM
             ),
             streamsConfig.defaultProductionExceptionHandler(),
-            new MockStreamsMetrics(metrics));
+            new MockStreamsMetrics(metrics),
+            topology
+        );
         final StreamsMetricsImpl streamsMetrics = new MockStreamsMetrics(metrics);
         final InternalProcessorContext context = new ProcessorContextImpl(
             taskId,
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/TimeOrderedCachingPersistentWindowStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimeOrderedCachingPersistentWindowStoreTest.java
new file mode 100644
index 0000000000000..1ee797d35ec5a
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimeOrderedCachingPersistentWindowStoreTest.java
@@ -0,0 +1,1238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import java.util.Collection;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.TestInputTopic;
+import org.apache.kafka.streams.TopologyTestDriver;
+import org.apache.kafka.streams.errors.InvalidStateStoreException;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.TimeWindowedDeserializer;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.internals.TimeWindow;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Processor;
+import org.apache.kafka.streams.processor.api.ProcessorContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.query.Position;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.StoreBuilder;
+import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.TimestampedWindowStore;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+import org.apache.kafka.streams.state.WindowStore;
+import org.apache.kafka.streams.state.WindowStoreIterator;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
+import org.apache.kafka.test.InternalMockProcessorContext;
+import org.apache.kafka.test.TestUtils;
+import org.easymock.EasyMock;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.UUID;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+
+import static java.time.Duration.ofHours;
+import static java.time.Duration.ofMinutes;
+import static java.time.Instant.ofEpochMilli;
+import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.streams.state.internals.ThreadCacheTest.memoryCacheEntrySize;
+import static org.apache.kafka.test.StreamsTestUtils.toList;
+import static org.apache.kafka.test.StreamsTestUtils.verifyAllWindowedKeyValues;
+import static org.apache.kafka.test.StreamsTestUtils.verifyKeyValueList;
+import static org.apache.kafka.test.StreamsTestUtils.verifyWindowedKeyValue;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.hasItem;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class TimeOrderedCachingPersistentWindowStoreTest {
+
+    private static final int MAX_CACHE_SIZE_BYTES = 300;
+    private static final long DEFAULT_TIMESTAMP = 10L;
+    private static final Long WINDOW_SIZE = 10L;
+    private static final long SEGMENT_INTERVAL = 100L;
+    private final static String TOPIC = "topic";
+    private static final String CACHE_NAMESPACE = "0_0-store-name";
+
+    private ThreadCache cache;
+    private InternalMockProcessorContext context;
+    private TimeFirstWindowKeySchema baseKeySchema;
+    private WindowStore<Bytes, byte[]> underlyingStore;
+    private TimeOrderedCachingWindowStore cachingStore;
+    private RocksDBTimeOrderedWindowSegmentedBytesStore bytesStore;
+    private CacheFlushListenerStub<Windowed<String>, String> cacheListener;
+
+    @Parameter
+    public boolean hasIndex;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {true},
+            {false}
+        });
+    }
+
+    @Before
+    public void setUp() {
+        baseKeySchema = new TimeFirstWindowKeySchema();
+        bytesStore = new RocksDBTimeOrderedWindowSegmentedBytesStore("test", "metrics-scope", 100, SEGMENT_INTERVAL, hasIndex);
+        underlyingStore = new RocksDBTimeOrderedWindowStore(bytesStore, false, WINDOW_SIZE);
+        final TimeWindowedDeserializer<String> keyDeserializer = new TimeWindowedDeserializer<>(new StringDeserializer(), WINDOW_SIZE);
+        keyDeserializer.setIsChangelogTopic(true);
+        cacheListener = new CacheFlushListenerStub<>(keyDeserializer, new StringDeserializer());
+        cachingStore = new TimeOrderedCachingWindowStore(underlyingStore, WINDOW_SIZE, SEGMENT_INTERVAL);
+        cachingStore.setFlushListener(cacheListener, false);
+        cache = new ThreadCache(new LogContext("testCache "), MAX_CACHE_SIZE_BYTES, new MockStreamsMetrics(new Metrics()));
+        context = new InternalMockProcessorContext<>(TestUtils.tempDirectory(), null, null, null, cache);
+        context.setRecordContext(new ProcessorRecordContext(DEFAULT_TIMESTAMP, 0, 0, TOPIC, new RecordHeaders()));
+        cachingStore.init((StateStoreContext) context, cachingStore);
+    }
+
+    @After
+    public void closeStore() {
+        cachingStore.close();
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test
+    public void shouldDelegateDeprecatedInit() {
+        final RocksDBTimeOrderedWindowStore inner = EasyMock.mock(RocksDBTimeOrderedWindowStore.class);
+        EasyMock.expect(inner.hasIndex()).andReturn(hasIndex);
+        EasyMock.replay(inner);
+        final TimeOrderedCachingWindowStore outer = new TimeOrderedCachingWindowStore(inner, WINDOW_SIZE, SEGMENT_INTERVAL);
+
+        EasyMock.reset(inner);
+        EasyMock.expect(inner.name()).andStubReturn("store");
+        inner.init((org.apache.kafka.streams.processor.ProcessorContext) context, outer);
+        EasyMock.expectLastCall();
+        EasyMock.replay(inner);
+        outer.init((org.apache.kafka.streams.processor.ProcessorContext) context, outer);
+        EasyMock.verify(inner);
+    }
+
+    @Test
+    public void shouldDelegateInit() {
+        final RocksDBTimeOrderedWindowStore inner = EasyMock.mock(RocksDBTimeOrderedWindowStore.class);
+        EasyMock.expect(inner.hasIndex()).andReturn(hasIndex);
+        EasyMock.replay(inner);
+        final TimeOrderedCachingWindowStore outer = new TimeOrderedCachingWindowStore(inner, WINDOW_SIZE, SEGMENT_INTERVAL);
+
+        EasyMock.reset(inner);
+        EasyMock.expect(inner.name()).andStubReturn("store");
+        inner.init((StateStoreContext) context, outer);
+        EasyMock.expectLastCall();
+        EasyMock.replay(inner);
+        outer.init((StateStoreContext) context, outer);
+        EasyMock.verify(inner);
+    }
+
+    @Test
+    public void shouldThrowIfWrongStore() {
+        final RocksDBTimestampedWindowStore innerWrong = EasyMock.mock(RocksDBTimestampedWindowStore.class);
+        final Exception e = assertThrows(IllegalArgumentException.class,
+            () -> new TimeOrderedCachingWindowStore(innerWrong, WINDOW_SIZE, SEGMENT_INTERVAL));
+        assertThat(e.getMessage(),
+            containsString("TimeOrderedCachingWindowStore only supports RocksDBTimeOrderedWindowStore backed store"));
+
+        final RocksDBTimeOrderedWindowStore inner = EasyMock.mock(RocksDBTimeOrderedWindowStore.class);
+        // Nothing happens
+        new TimeOrderedCachingWindowStore(inner, WINDOW_SIZE, SEGMENT_INTERVAL);
+    }
+
+    @Test
+    public void shouldNotReturnDuplicatesInRanges() {
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final StoreBuilder<TimestampedWindowStore<String, String>> storeBuilder = Stores.timestampedWindowStoreBuilder(
+            RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create(
+                "store-name",
+                ofHours(1L),
+                ofMinutes(1),
+                false,
+                hasIndex
+            ), Serdes.String(), Serdes.String())
+            .withCachingEnabled();
+
+        builder.addStateStore(storeBuilder);
+
+        builder.stream(TOPIC,
+            Consumed.with(Serdes.String(), Serdes.String()))
+            .process(() -> new Processor<String, String, String, String>() {
+                private int numRecordsProcessed;
+                private WindowStore<String, ValueAndTimestamp<String>> store;
+
+                @Override
+                public void init(final ProcessorContext<String, String> processorContext) {
+                    this.store = processorContext.getStateStore("store-name");
+                    int count = 0;
+
+                    try (final KeyValueIterator<Windowed<String>, ValueAndTimestamp<String>> all = store.all()) {
+                        while (all.hasNext()) {
+                            count++;
+                            all.next();
+                        }
+                    }
+
+                    assertThat(count, equalTo(0));
+                }
+
+                @Override
+                public void process(final Record<String, String> record) {
+                    int count = 0;
+
+                    try (final KeyValueIterator<Windowed<String>, ValueAndTimestamp<String>> all = store.all()) {
+                        while (all.hasNext()) {
+                            count++;
+                            all.next();
+                        }
+                    }
+
+                    assertThat(count, equalTo(numRecordsProcessed));
+
+                    store.put(record.value(), ValueAndTimestamp.make(record.value(), record.timestamp()), record.timestamp());
+
+                    numRecordsProcessed++;
+                }
+
+            }, "store-name");
+
+        final Properties streamsConfiguration = new Properties();
+        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+        streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath());
+        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000L);
+
+        final Instant initialWallClockTime = Instant.ofEpochMilli(0L);
+        final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), streamsConfiguration, initialWallClockTime);
+
+        final TestInputTopic<String, String> inputTopic = driver.createInputTopic(TOPIC,
+            Serdes.String().serializer(),
+            Serdes.String().serializer(),
+            initialWallClockTime,
+            Duration.ZERO);
+
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+        driver.advanceWallClockTime(Duration.ofSeconds(10));
+        inputTopic.advanceTime(Duration.ofSeconds(10));
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+        driver.advanceWallClockTime(Duration.ofSeconds(10));
+        inputTopic.advanceTime(Duration.ofSeconds(10));
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+        driver.advanceWallClockTime(Duration.ofSeconds(10));
+        inputTopic.advanceTime(Duration.ofSeconds(10));
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+
+        driver.close();
+    }
+
+    @Test
+    public void shouldPutFetchFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+
+        assertThat(cachingStore.fetch(bytesKey("a"), 10), equalTo(bytesValue("a")));
+        assertThat(cachingStore.fetch(bytesKey("b"), 10), equalTo(bytesValue("b")));
+        assertThat(cachingStore.fetch(bytesKey("c"), 10), equalTo(null));
+        assertThat(cachingStore.fetch(bytesKey("a"), 0), equalTo(null));
+
+        try (final WindowStoreIterator<byte[]> a = cachingStore.fetch(bytesKey("a"), ofEpochMilli(10), ofEpochMilli(10));
+             final WindowStoreIterator<byte[]> b = cachingStore.fetch(bytesKey("b"), ofEpochMilli(10), ofEpochMilli(10))) {
+            verifyKeyValue(a.next(), DEFAULT_TIMESTAMP, "a");
+            verifyKeyValue(b.next(), DEFAULT_TIMESTAMP, "b");
+            assertFalse(a.hasNext());
+            assertFalse(b.hasNext());
+            final int expectedSize = hasIndex ? 4 : 2;
+            assertEquals(expectedSize, cache.size());
+        }
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithFlushListener() {
+        cachingStore.setFlushListener(record -> { }, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithoutFlushListener() {
+        cachingStore.setFlushListener(null, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    private void shouldMatchPositionAfterPut() {
+        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
+        cachingStore.put(bytesKey("key1"), bytesValue("value1"), DEFAULT_TIMESTAMP);
+        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
+        cachingStore.put(bytesKey("key2"), bytesValue("value2"), DEFAULT_TIMESTAMP);
+
+        // Position should correspond to the last record's context, not the current context.
+        context.setRecordContext(
+            new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders())
+        );
+
+        // the caching window store doesn't maintain a separate
+        // position because it never serves queries from the cache
+        assertEquals(Position.emptyPosition(), cachingStore.getPosition());
+        assertEquals(Position.emptyPosition(), underlyingStore.getPosition());
+
+        cachingStore.flush();
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            cachingStore.getPosition()
+        );
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            underlyingStore.getPosition()
+        );
+    }
+
+    private void verifyKeyValue(final KeyValue<Long, byte[]> next,
+                                final long expectedKey,
+                                final String expectedValue) {
+        assertThat(next.key, equalTo(expectedKey));
+        assertThat(next.value, equalTo(bytesValue(expectedValue)));
+    }
+
+    private static byte[] bytesValue(final String value) {
+        return value.getBytes();
+    }
+
+    private static Bytes bytesKey(final String key) {
+        return Bytes.wrap(key.getBytes());
+    }
+
+    private String stringFrom(final byte[] from) {
+        return Serdes.String().deserializer().deserialize("", from);
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(bytesKey("a"), bytesKey("b"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("a", "b");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+            final int expectedSize = hasIndex ? 4 : 2;
+            assertEquals(expectedSize, cache.size());
+        }
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCacheForNullKeyFrom() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(null, bytesKey("d"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("a", "b", "c", "d");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCacheForNullKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(bytesKey("b"), null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("b", "c", "d", "e");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCacheForNullKeyFromKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(null, null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("a", "b", "c", "d", "e");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutBackwardFetchRangeFromCacheForNullKeyFrom() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(null, bytesKey("c"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("c", "b", "a");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutBackwardFetchRangeFromCacheForNullKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(bytesKey("c"), null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("e", "d", "c");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutBackwardFetchRangeFromCacheForNullKeyFromKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(null, null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("e", "d", "c", "b", "a");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldGetAllFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("f"), bytesValue("f"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("g"), bytesValue("g"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("h"), bytesValue("h"), DEFAULT_TIMESTAMP);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator = cachingStore.all()) {
+            final String[] array = {"a", "b", "c", "d", "e", "f", "g", "h"};
+            for (final String s : array) {
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(s), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                    s);
+            }
+            assertFalse(iterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldGetAllBackwardFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("f"), bytesValue("f"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("g"), bytesValue("g"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("h"), bytesValue("h"), DEFAULT_TIMESTAMP);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator = cachingStore.backwardAll()) {
+            final String[] array = {"h", "g", "f", "e", "d", "c", "b", "a"};
+            for (final String s : array) {
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(s), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                    s);
+            }
+            assertFalse(iterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldFetchAllWithinTimestampRange() {
+        final String[] array = {"a", "b", "c", "d", "e", "f", "g", "h"};
+        for (int i = 0; i < array.length; i++) {
+            cachingStore.put(bytesKey(array[i]), bytesValue(array[i]), i);
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetchAll(ofEpochMilli(0), ofEpochMilli(7))) {
+            for (int i = 0; i < array.length; i++) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator1 =
+                 cachingStore.fetchAll(ofEpochMilli(2), ofEpochMilli(4))) {
+            for (int i = 2; i <= 4; i++) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator1.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator1.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator2 =
+                 cachingStore.fetchAll(ofEpochMilli(5), ofEpochMilli(7))) {
+            for (int i = 5; i <= 7; i++) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator2.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator2.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldFetchAllBackwardWithinTimestampRange() {
+        final String[] array = {"a", "b", "c", "d", "e", "f", "g", "h"};
+        for (int i = 0; i < array.length; i++) {
+            cachingStore.put(bytesKey(array[i]), bytesValue(array[i]), i);
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetchAll(ofEpochMilli(0), ofEpochMilli(7))) {
+            for (int i = array.length - 1; i >= 0; i--) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator1 =
+                 cachingStore.backwardFetchAll(ofEpochMilli(2), ofEpochMilli(4))) {
+            for (int i = 4; i >= 2; i--) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator1.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator1.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator2 =
+                 cachingStore.backwardFetchAll(ofEpochMilli(5), ofEpochMilli(7))) {
+            for (int i = 7; i >= 5; i--) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator2.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator2.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldFlushEvictedItemsIntoUnderlyingStore() {
+        final int added = addItemsToCache();
+        // all dirty entries should have been flushed
+        try (final KeyValueIterator<Bytes, byte[]> iter = bytesStore.fetch(
+            Bytes.wrap("0".getBytes(StandardCharsets.UTF_8)),
+            DEFAULT_TIMESTAMP,
+            DEFAULT_TIMESTAMP)) {
+            final KeyValue<Bytes, byte[]> next = iter.next();
+            assertEquals(DEFAULT_TIMESTAMP, baseKeySchema.segmentTimestamp(next.key));
+            assertArrayEquals("0".getBytes(), next.value);
+            assertFalse(iter.hasNext());
+            assertEquals(added - 1, cache.size());
+        }
+    }
+
+    @Test
+    public void shouldForwardDirtyItemsWhenFlushCalled() {
+        final Windowed<String> windowedKey =
+            new Windowed<>("1", new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE));
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("a", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+    }
+
+    @Test
+    public void shouldSetFlushListener() {
+        assertTrue(cachingStore.setFlushListener(null, true));
+        assertTrue(cachingStore.setFlushListener(null, false));
+    }
+
+    @Test
+    public void shouldForwardOldValuesWhenEnabled() {
+        cachingStore.setFlushListener(cacheListener, true);
+        final Windowed<String> windowedKey =
+            new Windowed<>("1", new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE));
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("b", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cacheListener.forwarded.clear();
+        cachingStore.put(bytesKey("1"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("c", cacheListener.forwarded.get(windowedKey).newValue);
+        assertEquals("b", cacheListener.forwarded.get(windowedKey).oldValue);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey).newValue);
+        assertEquals("c", cacheListener.forwarded.get(windowedKey).oldValue);
+        cacheListener.forwarded.clear();
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey));
+        cacheListener.forwarded.clear();
+    }
+
+    @Test
+    public void shouldNotForwardOldValuesWhenDisabled() {
+        final Windowed<String> windowedKey =
+            new Windowed<>("1", new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE));
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("b", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cachingStore.put(bytesKey("1"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("c", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cacheListener.forwarded.clear();
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey));
+        cacheListener.forwarded.clear();
+    }
+
+    @Test
+    public void shouldForwardDirtyItemToListenerWhenEvicted() {
+        final int numRecords = addItemsToCache();
+        assertEquals(numRecords, cacheListener.forwarded.size());
+    }
+
+    @Test
+    public void shouldTakeValueFromCacheIfSameTimestampFlushedToRocks() {
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.fetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "b");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateAcrossWindows() {
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.fetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateBackwardAcrossWindows() {
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.backwardFetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateCacheAndStore() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.fetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateBackwardCacheAndStore() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.backwardFetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateCacheAndStoreKeyRange() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> fetchRange =
+                 cachingStore.fetch(key, bytesKey("2"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                "a");
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP + WINDOW_SIZE, DEFAULT_TIMESTAMP + WINDOW_SIZE + WINDOW_SIZE)),
+                "b");
+            assertFalse(fetchRange.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateBackwardCacheAndStoreKeyRange() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> fetchRange =
+                 cachingStore.backwardFetch(key, bytesKey("2"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP + WINDOW_SIZE, DEFAULT_TIMESTAMP + WINDOW_SIZE + WINDOW_SIZE)),
+                "b");
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                "a");
+            assertFalse(fetchRange.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldClearNamespaceCacheOnClose() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), 0L);
+        final int size = hasIndex ? 2 : 1;
+        assertEquals(size, cache.size());
+        cachingStore.close();
+        assertEquals(0, cache.size());
+    }
+
+    @Test
+    public void shouldThrowIfTryingToFetchFromClosedCachingStore() {
+        cachingStore.close();
+        assertThrows(InvalidStateStoreException.class, () -> cachingStore.fetch(bytesKey("a"), ofEpochMilli(0), ofEpochMilli(10)));
+    }
+
+    @Test
+    public void shouldThrowIfTryingToFetchRangeFromClosedCachingStore() {
+        cachingStore.close();
+        assertThrows(InvalidStateStoreException.class, () -> cachingStore.fetch(bytesKey("a"), bytesKey("b"), ofEpochMilli(0), ofEpochMilli(10)));
+    }
+
+    @Test
+    public void shouldThrowIfTryingToWriteToClosedCachingStore() {
+        cachingStore.close();
+        assertThrows(InvalidStateStoreException.class, () -> cachingStore.put(bytesKey("a"), bytesValue("a"), 0L));
+    }
+
+    @Test
+    public void shouldSkipNonExistBaseKeyInCache() {
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+
+        final SegmentedCacheFunction indexCacheFunction = new SegmentedCacheFunction(new KeyFirstWindowKeySchema(), SEGMENT_INTERVAL);
+
+        final Bytes key = bytesKey("a");
+        final byte[] value = bytesValue("0001");
+        final Bytes cacheIndexKey = indexCacheFunction.cacheKey(KeyFirstWindowKeySchema.toStoreKeyBinary(key, 1, 0));
+        final String cacheName = context.taskId() + "-test";
+
+        // Only put index to store
+        cache.put(cacheName,
+            cacheIndexKey,
+            new LRUCacheEntry(
+                new byte[0],
+                new RecordHeaders(),
+                true,
+                context.offset(),
+                context.timestamp(),
+                context.partition(),
+                "")
+        );
+
+        underlyingStore.put(key, value, 1);
+
+        if (hasIndex) {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("a", "0001", 1),
+                    windowedPair("aa", "0002", 0)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        } else {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0001", 1)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        }
+    }
+
+    @Test
+    public void shouldFetchAndIterateOverExactKeys() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        final List<KeyValue<Long, byte[]>> expected = asList(
+            KeyValue.pair(0L, bytesValue("0001")),
+            KeyValue.pair(1L, bytesValue("0003")),
+            KeyValue.pair(SEGMENT_INTERVAL, bytesValue("0005"))
+        );
+        final List<KeyValue<Long, byte[]>> actual =
+            toList(cachingStore.fetch(bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)));
+        verifyKeyValueList(expected, actual);
+    }
+
+    @Test
+    public void shouldBackwardFetchAndIterateOverExactKeys() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        final List<KeyValue<Long, byte[]>> expected = asList(
+            KeyValue.pair(SEGMENT_INTERVAL, bytesValue("0005")),
+            KeyValue.pair(1L, bytesValue("0003")),
+            KeyValue.pair(0L, bytesValue("0001"))
+        );
+        final List<KeyValue<Long, byte[]>> actual =
+            toList(cachingStore.backwardFetch(bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)));
+        verifyKeyValueList(expected, actual);
+    }
+
+    @Test
+    public void shouldFetchAndIterateOverKeyRange() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("a", "0001", 0),
+                windowedPair("a", "0003", 1),
+                windowedPair("a", "0005", SEGMENT_INTERVAL)
+            ),
+            toList(cachingStore.fetch(bytesKey("a"), bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("aa", "0002", 0),
+                windowedPair("aa", "0004", 1)),
+            toList(cachingStore.fetch(bytesKey("aa"), bytesKey("aa"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        if (hasIndex) {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("a", "0001", 0),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("a", "0005", SEGMENT_INTERVAL)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        } else {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("a", "0001", 0),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("a", "0005", SEGMENT_INTERVAL)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        }
+    }
+
+    @Test
+    public void shouldFetchAndIterateOverKeyBackwardRange() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("a", "0005", SEGMENT_INTERVAL),
+                windowedPair("a", "0003", 1),
+                windowedPair("a", "0001", 0)
+            ),
+            toList(cachingStore.backwardFetch(bytesKey("a"), bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("aa", "0004", 1),
+                windowedPair("aa", "0002", 0)),
+            toList(cachingStore.backwardFetch(bytesKey("aa"), bytesKey("aa"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        if (!hasIndex) {
+            verifyKeyValueList(
+                // Ordered by timestamp if has no index
+                asList(
+                    windowedPair("a", "0005", SEGMENT_INTERVAL),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0001", 0)
+                ),
+                toList(cachingStore.backwardFetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        } else {
+            verifyKeyValueList(
+                asList(
+                    // First because in larger segments
+                    windowedPair("a", "0005", SEGMENT_INTERVAL),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("a", "0001", 0)
+                ),
+                toList(cachingStore.backwardFetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        }
+    }
+
+    @Test
+    public void shouldReturnSameResultsForSingleKeyFetchAndEqualKeyRangeFetch() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0003"), 2);
+        cachingStore.put(bytesKey("aaa"), bytesValue("0004"), 3);
+
+        try (final WindowStoreIterator<byte[]> singleKeyIterator = cachingStore.fetch(bytesKey("aa"), 0L, 5L);
+             final KeyValueIterator<Windowed<Bytes>, byte[]> keyRangeIterator = cachingStore.fetch(bytesKey("aa"), bytesKey("aa"), 0L, 5L)) {
+
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertFalse(singleKeyIterator.hasNext());
+            assertFalse(keyRangeIterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldReturnSameResultsForSingleKeyFetchAndEqualKeyRangeBackwardFetch() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0003"), 2);
+        cachingStore.put(bytesKey("aaa"), bytesValue("0004"), 3);
+
+        try (final WindowStoreIterator<byte[]> singleKeyIterator =
+                 cachingStore.backwardFetch(bytesKey("aa"), Instant.ofEpochMilli(0L), Instant.ofEpochMilli(5L));
+             final KeyValueIterator<Windowed<Bytes>, byte[]> keyRangeIterator =
+                 cachingStore.backwardFetch(bytesKey("aa"), bytesKey("aa"), Instant.ofEpochMilli(0L), Instant.ofEpochMilli(5L))) {
+
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertFalse(singleKeyIterator.hasNext());
+            assertFalse(keyRangeIterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldThrowNullPointerExceptionOnPutNullKey() {
+        assertThrows(NullPointerException.class, () -> cachingStore.put(null, bytesValue("anyValue"), 0L));
+    }
+
+    @Test
+    public void shouldNotThrowNullPointerExceptionOnPutNullValue() {
+        cachingStore.put(bytesKey("a"), null, 0L);
+    }
+
+    @Test
+    public void shouldThrowNullPointerExceptionOnFetchNullKey() {
+        assertThrows(NullPointerException.class, () -> cachingStore.fetch(null, ofEpochMilli(1L), ofEpochMilli(2L)));
+    }
+
+    @Test
+    public void shouldNotThrowInvalidRangeExceptionWithNegativeFromKey() {
+        final Bytes keyFrom = Bytes.wrap(Serdes.Integer().serializer().serialize("", -1));
+        final Bytes keyTo = Bytes.wrap(Serdes.Integer().serializer().serialize("", 1));
+
+        try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(TimeOrderedCachingWindowStore.class);
+             final KeyValueIterator<Windowed<Bytes>, byte[]> iterator = cachingStore.fetch(keyFrom, keyTo, 0L, 10L)) {
+            assertFalse(iterator.hasNext());
+
+            final List<String> messages = appender.getMessages();
+            assertThat(
+                messages,
+                hasItem("Returning empty iterator for fetch with invalid key range: from > to." +
+                    " This may be due to range arguments set in the wrong order, " +
+                    "or serdes that don't preserve ordering when lexicographically comparing the serialized bytes." +
+                    " Note that the built-in numerical serdes do not follow this for negative numbers")
+            );
+        }
+    }
+
+    @Test
+    public void shouldNotThrowInvalidBackwardRangeExceptionWithNegativeFromKey() {
+        final Bytes keyFrom = Bytes.wrap(Serdes.Integer().serializer().serialize("", -1));
+        final Bytes keyTo = Bytes.wrap(Serdes.Integer().serializer().serialize("", 1));
+
+        try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(TimeOrderedCachingWindowStore.class);
+             final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(keyFrom, keyTo, Instant.ofEpochMilli(0L), Instant.ofEpochMilli(10L))) {
+            assertFalse(iterator.hasNext());
+
+            final List<String> messages = appender.getMessages();
+            assertThat(
+                messages,
+                hasItem("Returning empty iterator for fetch with invalid key range: from > to." +
+                    " This may be due to serdes that don't preserve ordering when lexicographically comparing the serialized bytes." +
+                    " Note that the built-in numerical serdes do not follow this for negative numbers")
+            );
+        }
+    }
+
+    @Test
+    public void shouldCloseCacheAndWrappedStoreAfterErrorDuringCacheFlush() {
+        setUpCloseTests();
+        EasyMock.reset(cache);
+        cache.flush(CACHE_NAMESPACE);
+        EasyMock.expectLastCall().andThrow(new RuntimeException("Simulating an error on flush"));
+        cache.close(CACHE_NAMESPACE);
+        EasyMock.replay(cache);
+        EasyMock.reset(underlyingStore);
+        underlyingStore.close();
+        EasyMock.replay(underlyingStore);
+
+        assertThrows(RuntimeException.class, cachingStore::close);
+        EasyMock.verify(cache, underlyingStore);
+    }
+
+    @Test
+    public void shouldCloseWrappedStoreAfterErrorDuringCacheClose() {
+        setUpCloseTests();
+        EasyMock.reset(cache);
+        cache.flush(CACHE_NAMESPACE);
+        cache.close(CACHE_NAMESPACE);
+        EasyMock.expectLastCall().andThrow(new RuntimeException("Simulating an error on close"));
+        EasyMock.replay(cache);
+        EasyMock.reset(underlyingStore);
+        underlyingStore.close();
+        EasyMock.replay(underlyingStore);
+
+        assertThrows(RuntimeException.class, cachingStore::close);
+        EasyMock.verify(cache, underlyingStore);
+    }
+
+    @Test
+    public void shouldCloseCacheAfterErrorDuringStateStoreClose() {
+        setUpCloseTests();
+        EasyMock.reset(cache);
+        cache.flush(CACHE_NAMESPACE);
+        cache.close(CACHE_NAMESPACE);
+        EasyMock.replay(cache);
+        EasyMock.reset(underlyingStore);
+        underlyingStore.close();
+        EasyMock.expectLastCall().andThrow(new RuntimeException("Simulating an error on close"));
+        EasyMock.replay(underlyingStore);
+
+        assertThrows(RuntimeException.class, cachingStore::close);
+        EasyMock.verify(cache, underlyingStore);
+    }
+
+    private void setUpCloseTests() {
+        underlyingStore = EasyMock.createNiceMock(RocksDBTimeOrderedWindowStore.class);
+        EasyMock.expect(underlyingStore.name()).andStubReturn("store-name");
+        EasyMock.expect(underlyingStore.isOpen()).andStubReturn(true);
+        EasyMock.replay(underlyingStore);
+        cachingStore = new TimeOrderedCachingWindowStore(underlyingStore, WINDOW_SIZE, SEGMENT_INTERVAL);
+        cache = EasyMock.createNiceMock(ThreadCache.class);
+        context = new InternalMockProcessorContext<>(TestUtils.tempDirectory(), null, null, null, cache);
+        context.setRecordContext(new ProcessorRecordContext(10, 0, 0, TOPIC, new RecordHeaders()));
+        cachingStore.init((StateStoreContext) context, cachingStore);
+    }
+
+    private static KeyValue<Windowed<Bytes>, byte[]> windowedPair(final String key, final String value, final long timestamp) {
+        return KeyValue.pair(
+            new Windowed<>(bytesKey(key), new TimeWindow(timestamp, timestamp + WINDOW_SIZE)),
+            bytesValue(value));
+    }
+
+    private int addItemsToCache() {
+        int cachedSize = 0;
+        int i = 0;
+        while (cachedSize < MAX_CACHE_SIZE_BYTES) {
+            final String kv = String.valueOf(i++);
+            cachingStore.put(bytesKey(kv), bytesValue(kv), DEFAULT_TIMESTAMP);
+            cachedSize += memoryCacheEntrySize(kv.getBytes(), kv.getBytes(), TOPIC) +
+                8 + // timestamp
+                4; // sequenceNumber
+        }
+        return i;
+    }
+
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/TimeOrderedWindowStoreTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimeOrderedWindowStoreTest.java
new file mode 100644
index 0000000000000..bf597fb789bc6
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimeOrderedWindowStoreTest.java
@@ -0,0 +1,1245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import java.util.Collection;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.common.header.internals.RecordHeaders;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.common.utils.LogContext;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.TestInputTopic;
+import org.apache.kafka.streams.TopologyTestDriver;
+import org.apache.kafka.streams.errors.InvalidStateStoreException;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.TimeWindowedDeserializer;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.kstream.internals.TimeWindow;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Processor;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.testutil.LogCaptureAppender;
+import org.apache.kafka.streams.query.Position;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.StoreBuilder;
+import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.TimestampedWindowStore;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+import org.apache.kafka.streams.state.WindowStore;
+import org.apache.kafka.streams.state.WindowStoreIterator;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.KeyFirstWindowKeySchema;
+import org.apache.kafka.streams.state.internals.PrefixedWindowKeySchemas.TimeFirstWindowKeySchema;
+import org.apache.kafka.test.InternalMockProcessorContext;
+import org.apache.kafka.test.TestUtils;
+import org.easymock.EasyMock;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import java.util.UUID;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+
+import static java.time.Duration.ofHours;
+import static java.time.Duration.ofMinutes;
+import static java.time.Instant.ofEpochMilli;
+import static java.util.Arrays.asList;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+import static org.apache.kafka.common.utils.Utils.mkMap;
+import static org.apache.kafka.streams.state.internals.ThreadCacheTest.memoryCacheEntrySize;
+import static org.apache.kafka.test.StreamsTestUtils.toList;
+import static org.apache.kafka.test.StreamsTestUtils.verifyAllWindowedKeyValues;
+import static org.apache.kafka.test.StreamsTestUtils.verifyKeyValueList;
+import static org.apache.kafka.test.StreamsTestUtils.verifyWindowedKeyValue;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.hasItem;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class TimeOrderedWindowStoreTest {
+
+    private static final int MAX_CACHE_SIZE_BYTES = 300;
+    private static final long DEFAULT_TIMESTAMP = 10L;
+    private static final Long WINDOW_SIZE = 10L;
+    private static final long SEGMENT_INTERVAL = 100L;
+    private final static String TOPIC = "topic";
+    private static final String CACHE_NAMESPACE = "0_0-store-name";
+
+    private InternalMockProcessorContext context;
+    private RocksDBTimeOrderedWindowSegmentedBytesStore bytesStore;
+    private WindowStore<Bytes, byte[]> underlyingStore;
+    private TimeOrderedCachingWindowStore cachingStore;
+    private CacheFlushListenerStub<Windowed<String>, String> cacheListener;
+    private ThreadCache cache;
+    private TimeFirstWindowKeySchema baseKeySchema;
+
+    @Parameter
+    public boolean hasIndex;
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {true},
+            {false}
+        });
+    }
+
+    @Before
+    public void setUp() {
+        baseKeySchema = new TimeFirstWindowKeySchema();
+        bytesStore = new RocksDBTimeOrderedWindowSegmentedBytesStore("test", "metrics-scope", 100, SEGMENT_INTERVAL, hasIndex);
+        underlyingStore = new RocksDBTimeOrderedWindowStore(bytesStore, false, WINDOW_SIZE);
+        final TimeWindowedDeserializer<String> keyDeserializer = new TimeWindowedDeserializer<>(new StringDeserializer(), WINDOW_SIZE);
+        keyDeserializer.setIsChangelogTopic(true);
+        cacheListener = new CacheFlushListenerStub<>(keyDeserializer, new StringDeserializer());
+        cachingStore = new TimeOrderedCachingWindowStore(underlyingStore, WINDOW_SIZE, SEGMENT_INTERVAL);
+        cachingStore.setFlushListener(cacheListener, false);
+        cache = new ThreadCache(new LogContext("testCache "), MAX_CACHE_SIZE_BYTES, new MockStreamsMetrics(new Metrics()));
+        context = new InternalMockProcessorContext<>(TestUtils.tempDirectory(), null, null, null, cache);
+        context.setRecordContext(new ProcessorRecordContext(DEFAULT_TIMESTAMP, 0, 0, TOPIC, new RecordHeaders()));
+        cachingStore.init((StateStoreContext) context, cachingStore);
+    }
+
+    @After
+    public void closeStore() {
+        cachingStore.close();
+    }
+
+    @SuppressWarnings("deprecation")
+    @Test
+    public void shouldDelegateDeprecatedInit() {
+        final RocksDBTimeOrderedWindowStore inner = EasyMock.mock(RocksDBTimeOrderedWindowStore.class);
+        EasyMock.expect(inner.hasIndex()).andReturn(hasIndex);
+        EasyMock.replay(inner);
+        final TimeOrderedCachingWindowStore outer = new TimeOrderedCachingWindowStore(inner, WINDOW_SIZE, SEGMENT_INTERVAL);
+
+        EasyMock.reset(inner);
+        EasyMock.expect(inner.name()).andStubReturn("store");
+        inner.init((ProcessorContext) context, outer);
+        EasyMock.expectLastCall();
+        EasyMock.replay(inner);
+        outer.init((ProcessorContext) context, outer);
+        EasyMock.verify(inner);
+    }
+
+    @Test
+    public void shouldDelegateInit() {
+        final RocksDBTimeOrderedWindowStore inner = EasyMock.mock(RocksDBTimeOrderedWindowStore.class);
+        EasyMock.expect(inner.hasIndex()).andReturn(hasIndex);
+        EasyMock.replay(inner);
+        final TimeOrderedCachingWindowStore outer = new TimeOrderedCachingWindowStore(inner, WINDOW_SIZE, SEGMENT_INTERVAL);
+
+        EasyMock.reset(inner);
+        EasyMock.expect(inner.name()).andStubReturn("store");
+        inner.init((StateStoreContext) context, outer);
+        EasyMock.expectLastCall();
+        EasyMock.replay(inner);
+        outer.init((StateStoreContext) context, outer);
+        EasyMock.verify(inner);
+    }
+
+    @Test
+    public void shouldThrowIfWrongStore() {
+        final RocksDBTimestampedWindowStore innerWrong = EasyMock.mock(RocksDBTimestampedWindowStore.class);
+        final Exception e = assertThrows(IllegalArgumentException.class,
+            () -> new TimeOrderedCachingWindowStore(innerWrong, WINDOW_SIZE, SEGMENT_INTERVAL));
+        assertThat(e.getMessage(),
+            containsString("TimeOrderedCachingWindowStore only supports RocksDBTimeOrderedWindowStore backed store"));
+
+        final RocksDBTimeOrderedWindowStore inner = EasyMock.mock(RocksDBTimeOrderedWindowStore.class);
+        // Nothing happens
+        new TimeOrderedCachingWindowStore(inner, WINDOW_SIZE, SEGMENT_INTERVAL);
+    }
+
+    @Test
+    public void shouldNotReturnDuplicatesInRanges() {
+        final StreamsBuilder builder = new StreamsBuilder();
+
+        final StoreBuilder<TimestampedWindowStore<String, String>> storeBuilder = Stores.timestampedWindowStoreBuilder(
+            RocksDbIndexedTimeOrderedWindowBytesStoreSupplier.create(
+                "store-name",
+                ofHours(1L),
+                ofMinutes(1),
+                false,
+                hasIndex
+            ), Serdes.String(), Serdes.String())
+            .withCachingEnabled();
+
+        builder.addStateStore(storeBuilder);
+
+        builder.stream(TOPIC,
+            Consumed.with(Serdes.String(), Serdes.String()))
+            .process(() -> new Processor<String, String, String, String>() {
+                private WindowStore<String, ValueAndTimestamp<String>> store;
+                private int numRecordsProcessed;
+                private org.apache.kafka.streams.processor.api.ProcessorContext<String, String> context;
+
+                @Override
+                public void init(final org.apache.kafka.streams.processor.api.ProcessorContext<String, String> processorContext) {
+                    this.context = processorContext;
+                    this.store = processorContext.getStateStore("store-name");
+                    int count = 0;
+
+                    try (final KeyValueIterator<Windowed<String>, ValueAndTimestamp<String>> all = store.all()) {
+                        while (all.hasNext()) {
+                            count++;
+                            all.next();
+                        }
+                    }
+
+                    assertThat(count, equalTo(0));
+                }
+
+                @Override
+                public void process(final Record<String, String> record) {
+                    int count = 0;
+
+                    try (final KeyValueIterator<Windowed<String>, ValueAndTimestamp<String>> all = store.all()) {
+                        while (all.hasNext()) {
+                            count++;
+                            all.next();
+                        }
+                    }
+
+                    assertThat(count, equalTo(numRecordsProcessed));
+
+                    store.put(record.value(), ValueAndTimestamp.make(record.value(), record.timestamp()), record.timestamp());
+
+                    numRecordsProcessed++;
+
+                    context.forward(record);
+                }
+
+                @Override
+                public void close() {
+                }
+            }, "store-name");
+
+        final Properties streamsConfiguration = new Properties();
+        streamsConfiguration.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
+        streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+        streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
+        streamsConfiguration.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getPath());
+        streamsConfiguration.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 10 * 1000L);
+
+        final Instant initialWallClockTime = Instant.ofEpochMilli(0L);
+        final TopologyTestDriver driver = new TopologyTestDriver(builder.build(), streamsConfiguration, initialWallClockTime);
+
+        final TestInputTopic<String, String> inputTopic = driver.createInputTopic(TOPIC,
+            Serdes.String().serializer(),
+            Serdes.String().serializer(),
+            initialWallClockTime,
+            Duration.ZERO);
+
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+        driver.advanceWallClockTime(Duration.ofSeconds(10));
+        inputTopic.advanceTime(Duration.ofSeconds(10));
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+        driver.advanceWallClockTime(Duration.ofSeconds(10));
+        inputTopic.advanceTime(Duration.ofSeconds(10));
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+        driver.advanceWallClockTime(Duration.ofSeconds(10));
+        inputTopic.advanceTime(Duration.ofSeconds(10));
+        for (int i = 0; i < 5; i++) {
+            inputTopic.pipeInput(UUID.randomUUID().toString(), UUID.randomUUID().toString());
+        }
+
+        driver.close();
+    }
+
+    @Test
+    public void shouldPutFetchFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+
+        assertThat(cachingStore.fetch(bytesKey("a"), 10), equalTo(bytesValue("a")));
+        assertThat(cachingStore.fetch(bytesKey("b"), 10), equalTo(bytesValue("b")));
+        assertThat(cachingStore.fetch(bytesKey("c"), 10), equalTo(null));
+        assertThat(cachingStore.fetch(bytesKey("a"), 0), equalTo(null));
+
+        try (final WindowStoreIterator<byte[]> a = cachingStore.fetch(bytesKey("a"), ofEpochMilli(10), ofEpochMilli(10));
+             final WindowStoreIterator<byte[]> b = cachingStore.fetch(bytesKey("b"), ofEpochMilli(10), ofEpochMilli(10))) {
+            verifyKeyValue(a.next(), DEFAULT_TIMESTAMP, "a");
+            verifyKeyValue(b.next(), DEFAULT_TIMESTAMP, "b");
+            assertFalse(a.hasNext());
+            assertFalse(b.hasNext());
+            final int expectedSize = hasIndex ? 4 : 2;
+            assertEquals(expectedSize, cache.size());
+        }
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithFlushListener() {
+        cachingStore.setFlushListener(record -> { }, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    @Test
+    public void shouldMatchPositionAfterPutWithoutFlushListener() {
+        cachingStore.setFlushListener(null, false);
+        shouldMatchPositionAfterPut();
+    }
+
+    private void shouldMatchPositionAfterPut() {
+        context.setRecordContext(new ProcessorRecordContext(0, 1, 0, "", new RecordHeaders()));
+        cachingStore.put(bytesKey("key1"), bytesValue("value1"), DEFAULT_TIMESTAMP);
+        context.setRecordContext(new ProcessorRecordContext(0, 2, 0, "", new RecordHeaders()));
+        cachingStore.put(bytesKey("key2"), bytesValue("value2"), DEFAULT_TIMESTAMP);
+
+        // Position should correspond to the last record's context, not the current context.
+        context.setRecordContext(
+            new ProcessorRecordContext(0, 3, 0, "", new RecordHeaders())
+        );
+
+        // the caching window store doesn't maintain a separate
+        // position because it never serves queries from the cache
+        assertEquals(Position.emptyPosition(), cachingStore.getPosition());
+        assertEquals(Position.emptyPosition(), underlyingStore.getPosition());
+
+        cachingStore.flush();
+
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            cachingStore.getPosition()
+        );
+        assertEquals(
+            Position.fromMap(mkMap(mkEntry("", mkMap(mkEntry(0, 2L))))),
+            underlyingStore.getPosition()
+        );
+    }
+
+    private void verifyKeyValue(final KeyValue<Long, byte[]> next,
+                                final long expectedKey,
+                                final String expectedValue) {
+        assertThat(next.key, equalTo(expectedKey));
+        assertThat(next.value, equalTo(bytesValue(expectedValue)));
+    }
+
+    private static byte[] bytesValue(final String value) {
+        return value.getBytes();
+    }
+
+    private static Bytes bytesKey(final String key) {
+        return Bytes.wrap(key.getBytes());
+    }
+
+    private String stringFrom(final byte[] from) {
+        return Serdes.String().deserializer().deserialize("", from);
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(bytesKey("a"), bytesKey("b"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("a", "b");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+            final int expectedSize = hasIndex ? 4 : 2;
+            assertEquals(expectedSize, cache.size());
+        }
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCacheForNullKeyFrom() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(null, bytesKey("d"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("a", "b", "c", "d");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCacheForNullKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(bytesKey("b"), null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("b", "c", "d", "e");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutFetchRangeFromCacheForNullKeyFromKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetch(null, null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("a", "b", "c", "d", "e");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutBackwardFetchRangeFromCacheForNullKeyFrom() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(null, bytesKey("c"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("c", "b", "a");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutBackwardFetchRangeFromCacheForNullKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(bytesKey("c"), null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("e", "d", "c");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldPutBackwardFetchRangeFromCacheForNullKeyFromKeyTo() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP + 10L);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP + 20L);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP + 20L);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(null, null, ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + 20L))) {
+            final List<Windowed<Bytes>> expectedKeys = Arrays.asList(
+                new Windowed<>(bytesKey("e"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("d"), new TimeWindow(DEFAULT_TIMESTAMP + 20L, DEFAULT_TIMESTAMP + 20L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("c"), new TimeWindow(DEFAULT_TIMESTAMP + 10L, DEFAULT_TIMESTAMP + 10L + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("b"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                new Windowed<>(bytesKey("a"), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE))
+            );
+
+            final List<String> expectedValues = Arrays.asList("e", "d", "c", "b", "a");
+
+            verifyAllWindowedKeyValues(iterator, expectedKeys, expectedValues);
+        }
+    }
+
+    @Test
+    public void shouldGetAllFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("f"), bytesValue("f"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("g"), bytesValue("g"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("h"), bytesValue("h"), DEFAULT_TIMESTAMP);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator = cachingStore.all()) {
+            final String[] array = {"a", "b", "c", "d", "e", "f", "g", "h"};
+            for (final String s : array) {
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(s), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                    s);
+            }
+            assertFalse(iterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldGetAllBackwardFromCache() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("b"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("c"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("d"), bytesValue("d"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("e"), bytesValue("e"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("f"), bytesValue("f"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("g"), bytesValue("g"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("h"), bytesValue("h"), DEFAULT_TIMESTAMP);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator = cachingStore.backwardAll()) {
+            final String[] array = {"h", "g", "f", "e", "d", "c", "b", "a"};
+            for (final String s : array) {
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(s), new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                    s);
+            }
+            assertFalse(iterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldFetchAllWithinTimestampRange() {
+        final String[] array = {"a", "b", "c", "d", "e", "f", "g", "h"};
+        for (int i = 0; i < array.length; i++) {
+            cachingStore.put(bytesKey(array[i]), bytesValue(array[i]), i);
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.fetchAll(ofEpochMilli(0), ofEpochMilli(7))) {
+            for (int i = 0; i < array.length; i++) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator1 =
+                 cachingStore.fetchAll(ofEpochMilli(2), ofEpochMilli(4))) {
+            for (int i = 2; i <= 4; i++) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator1.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator1.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator2 =
+                 cachingStore.fetchAll(ofEpochMilli(5), ofEpochMilli(7))) {
+            for (int i = 5; i <= 7; i++) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator2.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator2.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldFetchAllBackwardWithinTimestampRange() {
+        final String[] array = {"a", "b", "c", "d", "e", "f", "g", "h"};
+        for (int i = 0; i < array.length; i++) {
+            cachingStore.put(bytesKey(array[i]), bytesValue(array[i]), i);
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetchAll(ofEpochMilli(0), ofEpochMilli(7))) {
+            for (int i = array.length - 1; i >= 0; i--) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator1 =
+                 cachingStore.backwardFetchAll(ofEpochMilli(2), ofEpochMilli(4))) {
+            for (int i = 4; i >= 2; i--) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator1.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator1.hasNext());
+        }
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> iterator2 =
+                 cachingStore.backwardFetchAll(ofEpochMilli(5), ofEpochMilli(7))) {
+            for (int i = 7; i >= 5; i--) {
+                final String str = array[i];
+                verifyWindowedKeyValue(
+                    iterator2.next(),
+                    new Windowed<>(bytesKey(str), new TimeWindow(i, i + WINDOW_SIZE)),
+                    str);
+            }
+            assertFalse(iterator2.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldFlushEvictedItemsIntoUnderlyingStore() {
+        final int added = addItemsToCache();
+        // all dirty entries should have been flushed
+        try (final KeyValueIterator<Bytes, byte[]> iter = bytesStore.fetch(
+            Bytes.wrap("0".getBytes(StandardCharsets.UTF_8)),
+            DEFAULT_TIMESTAMP,
+            DEFAULT_TIMESTAMP)) {
+            final KeyValue<Bytes, byte[]> next = iter.next();
+            assertEquals(DEFAULT_TIMESTAMP, baseKeySchema.segmentTimestamp(next.key));
+            assertArrayEquals("0".getBytes(), next.value);
+            assertFalse(iter.hasNext());
+            assertEquals(added - 1, cache.size());
+        }
+    }
+
+    @Test
+    public void shouldForwardDirtyItemsWhenFlushCalled() {
+        final Windowed<String> windowedKey =
+            new Windowed<>("1", new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE));
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("a", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+    }
+
+    @Test
+    public void shouldSetFlushListener() {
+        assertTrue(cachingStore.setFlushListener(null, true));
+        assertTrue(cachingStore.setFlushListener(null, false));
+    }
+
+    @Test
+    public void shouldForwardOldValuesWhenEnabled() {
+        cachingStore.setFlushListener(cacheListener, true);
+        final Windowed<String> windowedKey =
+            new Windowed<>("1", new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE));
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("b", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cacheListener.forwarded.clear();
+        cachingStore.put(bytesKey("1"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("c", cacheListener.forwarded.get(windowedKey).newValue);
+        assertEquals("b", cacheListener.forwarded.get(windowedKey).oldValue);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey).newValue);
+        assertEquals("c", cacheListener.forwarded.get(windowedKey).oldValue);
+        cacheListener.forwarded.clear();
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey));
+        cacheListener.forwarded.clear();
+    }
+
+    @Test
+    public void shouldForwardOldValuesWhenDisabled() {
+        final Windowed<String> windowedKey =
+            new Windowed<>("1", new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE));
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("b", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cachingStore.put(bytesKey("1"), bytesValue("c"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertEquals("c", cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey).newValue);
+        assertNull(cacheListener.forwarded.get(windowedKey).oldValue);
+        cacheListener.forwarded.clear();
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), null, DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        assertNull(cacheListener.forwarded.get(windowedKey));
+        cacheListener.forwarded.clear();
+    }
+
+    @Test
+    public void shouldForwardDirtyItemToListenerWhenEvicted() {
+        final int numRecords = addItemsToCache();
+        assertEquals(numRecords, cacheListener.forwarded.size());
+    }
+
+    @Test
+    public void shouldTakeValueFromCacheIfSameTimestampFlushedToRocks() {
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.flush();
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP);
+
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.fetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "b");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateAcrossWindows() {
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.fetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateBackwardAcrossWindows() {
+        cachingStore.put(bytesKey("1"), bytesValue("a"), DEFAULT_TIMESTAMP);
+        cachingStore.put(bytesKey("1"), bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.backwardFetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateCacheAndStore() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.fetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateBackwardCacheAndStore() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+        try (final WindowStoreIterator<byte[]> fetch =
+                 cachingStore.backwardFetch(bytesKey("1"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP + WINDOW_SIZE, "b");
+            verifyKeyValue(fetch.next(), DEFAULT_TIMESTAMP, "a");
+            assertFalse(fetch.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateCacheAndStoreKeyRange() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> fetchRange =
+                 cachingStore.fetch(key, bytesKey("2"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                "a");
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP + WINDOW_SIZE, DEFAULT_TIMESTAMP + WINDOW_SIZE + WINDOW_SIZE)),
+                "b");
+            assertFalse(fetchRange.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldIterateBackwardCacheAndStoreKeyRange() {
+        final Bytes key = Bytes.wrap("1".getBytes());
+        bytesStore.put(TimeFirstWindowKeySchema.toStoreKeyBinary(key, DEFAULT_TIMESTAMP, 0), "a".getBytes());
+        cachingStore.put(key, bytesValue("b"), DEFAULT_TIMESTAMP + WINDOW_SIZE);
+
+        try (final KeyValueIterator<Windowed<Bytes>, byte[]> fetchRange =
+                 cachingStore.backwardFetch(key, bytesKey("2"), ofEpochMilli(DEFAULT_TIMESTAMP), ofEpochMilli(DEFAULT_TIMESTAMP + WINDOW_SIZE))) {
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP + WINDOW_SIZE, DEFAULT_TIMESTAMP + WINDOW_SIZE + WINDOW_SIZE)),
+                "b");
+            verifyWindowedKeyValue(
+                fetchRange.next(),
+                new Windowed<>(key, new TimeWindow(DEFAULT_TIMESTAMP, DEFAULT_TIMESTAMP + WINDOW_SIZE)),
+                "a");
+            assertFalse(fetchRange.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldClearNamespaceCacheOnClose() {
+        cachingStore.put(bytesKey("a"), bytesValue("a"), 0L);
+        final int size = hasIndex ? 2 : 1;
+        assertEquals(size, cache.size());
+        cachingStore.close();
+        assertEquals(0, cache.size());
+    }
+
+    @Test
+    public void shouldThrowIfTryingToFetchFromClosedCachingStore() {
+        cachingStore.close();
+        assertThrows(InvalidStateStoreException.class, () -> cachingStore.fetch(bytesKey("a"), ofEpochMilli(0), ofEpochMilli(10)));
+    }
+
+    @Test
+    public void shouldThrowIfTryingToFetchRangeFromClosedCachingStore() {
+        cachingStore.close();
+        assertThrows(InvalidStateStoreException.class, () -> cachingStore.fetch(bytesKey("a"), bytesKey("b"), ofEpochMilli(0), ofEpochMilli(10)));
+    }
+
+    @Test
+    public void shouldThrowIfTryingToWriteToClosedCachingStore() {
+        cachingStore.close();
+        assertThrows(InvalidStateStoreException.class, () -> cachingStore.put(bytesKey("a"), bytesValue("a"), 0L));
+    }
+
+    @Test
+    public void shouldSkipNonExistBaseKeyInCache() {
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+
+        final SegmentedCacheFunction indexCacheFunction = new SegmentedCacheFunction(new KeyFirstWindowKeySchema(), SEGMENT_INTERVAL);
+
+        final Bytes key = bytesKey("a");
+        final byte[] value = bytesValue("0001");
+        final Bytes cacheIndexKey = indexCacheFunction.cacheKey(KeyFirstWindowKeySchema.toStoreKeyBinary(key, 1, 0));
+        final String cacheName = context.taskId() + "-test";
+
+        // Only put index to store
+        cache.put(cacheName,
+            cacheIndexKey,
+            new LRUCacheEntry(
+                new byte[0],
+                new RecordHeaders(),
+                true,
+                context.offset(),
+                context.timestamp(),
+                context.partition(),
+                "")
+        );
+
+        underlyingStore.put(key, value, 1);
+
+        if (hasIndex) {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("a", "0001", 1),
+                    windowedPair("aa", "0002", 0)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        } else {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0001", 1)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        }
+    }
+
+    @Test
+    public void shouldFetchAndIterateOverExactKeys() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        final List<KeyValue<Long, byte[]>> expected = asList(
+            KeyValue.pair(0L, bytesValue("0001")),
+            KeyValue.pair(1L, bytesValue("0003")),
+            KeyValue.pair(SEGMENT_INTERVAL, bytesValue("0005"))
+        );
+        final List<KeyValue<Long, byte[]>> actual =
+            toList(cachingStore.fetch(bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)));
+        verifyKeyValueList(expected, actual);
+    }
+
+    @Test
+    public void shouldBackwardFetchAndIterateOverExactKeys() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        final List<KeyValue<Long, byte[]>> expected = asList(
+            KeyValue.pair(SEGMENT_INTERVAL, bytesValue("0005")),
+            KeyValue.pair(1L, bytesValue("0003")),
+            KeyValue.pair(0L, bytesValue("0001"))
+        );
+        final List<KeyValue<Long, byte[]>> actual =
+            toList(cachingStore.backwardFetch(bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)));
+        verifyKeyValueList(expected, actual);
+    }
+
+    @Test
+    public void shouldFetchAndIterateOverKeyRange() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("a", "0001", 0),
+                windowedPair("a", "0003", 1),
+                windowedPair("a", "0005", SEGMENT_INTERVAL)
+            ),
+            toList(cachingStore.fetch(bytesKey("a"), bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("aa", "0002", 0),
+                windowedPair("aa", "0004", 1)),
+            toList(cachingStore.fetch(bytesKey("aa"), bytesKey("aa"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        if (hasIndex) {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("a", "0001", 0),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("a", "0005", SEGMENT_INTERVAL)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        } else {
+            verifyKeyValueList(
+                asList(
+                    windowedPair("a", "0001", 0),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("a", "0005", SEGMENT_INTERVAL)
+                ),
+                toList(cachingStore.fetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        }
+    }
+
+    @Test
+    public void shouldFetchAndIterateOverKeyBackwardRange() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 0);
+        cachingStore.put(bytesKey("a"), bytesValue("0003"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0004"), 1);
+        cachingStore.put(bytesKey("a"), bytesValue("0005"), SEGMENT_INTERVAL);
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("a", "0005", SEGMENT_INTERVAL),
+                windowedPair("a", "0003", 1),
+                windowedPair("a", "0001", 0)
+            ),
+            toList(cachingStore.backwardFetch(bytesKey("a"), bytesKey("a"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        verifyKeyValueList(
+            asList(
+                windowedPair("aa", "0004", 1),
+                windowedPair("aa", "0002", 0)),
+            toList(cachingStore.backwardFetch(bytesKey("aa"), bytesKey("aa"), ofEpochMilli(0), ofEpochMilli(Long.MAX_VALUE)))
+        );
+
+        if (!hasIndex) {
+            verifyKeyValueList(
+                // Ordered by timestamp if has no index
+                asList(
+                    windowedPair("a", "0005", SEGMENT_INTERVAL),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0001", 0)
+                ),
+                toList(cachingStore.backwardFetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        } else {
+            verifyKeyValueList(
+                asList(
+                    // First because in larger segments
+                    windowedPair("a", "0005", SEGMENT_INTERVAL),
+                    windowedPair("aa", "0004", 1),
+                    windowedPair("aa", "0002", 0),
+                    windowedPair("a", "0003", 1),
+                    windowedPair("a", "0001", 0)
+                ),
+                toList(cachingStore.backwardFetch(bytesKey("a"), bytesKey("aa"), ofEpochMilli(0),
+                    ofEpochMilli(Long.MAX_VALUE)))
+            );
+        }
+    }
+
+    @Test
+    public void shouldReturnSameResultsForSingleKeyFetchAndEqualKeyRangeFetch() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0003"), 2);
+        cachingStore.put(bytesKey("aaa"), bytesValue("0004"), 3);
+
+        try (final WindowStoreIterator<byte[]> singleKeyIterator = cachingStore.fetch(bytesKey("aa"), 0L, 5L);
+             final KeyValueIterator<Windowed<Bytes>, byte[]> keyRangeIterator = cachingStore.fetch(bytesKey("aa"), bytesKey("aa"), 0L, 5L)) {
+
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertFalse(singleKeyIterator.hasNext());
+            assertFalse(keyRangeIterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldReturnSameResultsForSingleKeyFetchAndEqualKeyRangeBackwardFetch() {
+        cachingStore.put(bytesKey("a"), bytesValue("0001"), 0);
+        cachingStore.put(bytesKey("aa"), bytesValue("0002"), 1);
+        cachingStore.put(bytesKey("aa"), bytesValue("0003"), 2);
+        cachingStore.put(bytesKey("aaa"), bytesValue("0004"), 3);
+
+        try (final WindowStoreIterator<byte[]> singleKeyIterator =
+                 cachingStore.backwardFetch(bytesKey("aa"), Instant.ofEpochMilli(0L), Instant.ofEpochMilli(5L));
+             final KeyValueIterator<Windowed<Bytes>, byte[]> keyRangeIterator =
+                 cachingStore.backwardFetch(bytesKey("aa"), bytesKey("aa"), Instant.ofEpochMilli(0L), Instant.ofEpochMilli(5L))) {
+
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertEquals(stringFrom(singleKeyIterator.next().value), stringFrom(keyRangeIterator.next().value));
+            assertFalse(singleKeyIterator.hasNext());
+            assertFalse(keyRangeIterator.hasNext());
+        }
+    }
+
+    @Test
+    public void shouldThrowNullPointerExceptionOnPutNullKey() {
+        assertThrows(NullPointerException.class, () -> cachingStore.put(null, bytesValue("anyValue"), 0L));
+    }
+
+    @Test
+    public void shouldNotThrowNullPointerExceptionOnPutNullValue() {
+        cachingStore.put(bytesKey("a"), null, 0L);
+    }
+
+    @Test
+    public void shouldThrowNullPointerExceptionOnFetchNullKey() {
+        assertThrows(NullPointerException.class, () -> cachingStore.fetch(null, ofEpochMilli(1L), ofEpochMilli(2L)));
+    }
+
+    @Test
+    public void shouldNotThrowInvalidRangeExceptionWithNegativeFromKey() {
+        final Bytes keyFrom = Bytes.wrap(Serdes.Integer().serializer().serialize("", -1));
+        final Bytes keyTo = Bytes.wrap(Serdes.Integer().serializer().serialize("", 1));
+
+        try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(TimeOrderedCachingWindowStore.class);
+             final KeyValueIterator<Windowed<Bytes>, byte[]> iterator = cachingStore.fetch(keyFrom, keyTo, 0L, 10L)) {
+            assertFalse(iterator.hasNext());
+
+            final List<String> messages = appender.getMessages();
+            assertThat(
+                messages,
+                hasItem("Returning empty iterator for fetch with invalid key range: from > to." +
+                    " This may be due to range arguments set in the wrong order, " +
+                    "or serdes that don't preserve ordering when lexicographically comparing the serialized bytes." +
+                    " Note that the built-in numerical serdes do not follow this for negative numbers")
+            );
+        }
+    }
+
+    @Test
+    public void shouldNotThrowInvalidBackwardRangeExceptionWithNegativeFromKey() {
+        final Bytes keyFrom = Bytes.wrap(Serdes.Integer().serializer().serialize("", -1));
+        final Bytes keyTo = Bytes.wrap(Serdes.Integer().serializer().serialize("", 1));
+
+        try (final LogCaptureAppender appender = LogCaptureAppender.createAndRegister(TimeOrderedCachingWindowStore.class);
+             final KeyValueIterator<Windowed<Bytes>, byte[]> iterator =
+                 cachingStore.backwardFetch(keyFrom, keyTo, Instant.ofEpochMilli(0L), Instant.ofEpochMilli(10L))) {
+            assertFalse(iterator.hasNext());
+
+            final List<String> messages = appender.getMessages();
+            assertThat(
+                messages,
+                hasItem("Returning empty iterator for fetch with invalid key range: from > to." +
+                    " This may be due to serdes that don't preserve ordering when lexicographically comparing the serialized bytes." +
+                    " Note that the built-in numerical serdes do not follow this for negative numbers")
+            );
+        }
+    }
+
+    @Test
+    public void shouldCloseCacheAndWrappedStoreAfterErrorDuringCacheFlush() {
+        setUpCloseTests();
+        EasyMock.reset(cache);
+        cache.flush(CACHE_NAMESPACE);
+        EasyMock.expectLastCall().andThrow(new RuntimeException("Simulating an error on flush"));
+        cache.close(CACHE_NAMESPACE);
+        EasyMock.replay(cache);
+        EasyMock.reset(underlyingStore);
+        underlyingStore.close();
+        EasyMock.replay(underlyingStore);
+
+        assertThrows(RuntimeException.class, cachingStore::close);
+        EasyMock.verify(cache, underlyingStore);
+    }
+
+    @Test
+    public void shouldCloseWrappedStoreAfterErrorDuringCacheClose() {
+        setUpCloseTests();
+        EasyMock.reset(cache);
+        cache.flush(CACHE_NAMESPACE);
+        cache.close(CACHE_NAMESPACE);
+        EasyMock.expectLastCall().andThrow(new RuntimeException("Simulating an error on close"));
+        EasyMock.replay(cache);
+        EasyMock.reset(underlyingStore);
+        underlyingStore.close();
+        EasyMock.replay(underlyingStore);
+
+        assertThrows(RuntimeException.class, cachingStore::close);
+        EasyMock.verify(cache, underlyingStore);
+    }
+
+    @Test
+    public void shouldCloseCacheAfterErrorDuringStateStoreClose() {
+        setUpCloseTests();
+        EasyMock.reset(cache);
+        cache.flush(CACHE_NAMESPACE);
+        cache.close(CACHE_NAMESPACE);
+        EasyMock.replay(cache);
+        EasyMock.reset(underlyingStore);
+        underlyingStore.close();
+        EasyMock.expectLastCall().andThrow(new RuntimeException("Simulating an error on close"));
+        EasyMock.replay(underlyingStore);
+
+        assertThrows(RuntimeException.class, cachingStore::close);
+        EasyMock.verify(cache, underlyingStore);
+    }
+
+    private void setUpCloseTests() {
+        underlyingStore = EasyMock.createNiceMock(RocksDBTimeOrderedWindowStore.class);
+        EasyMock.expect(underlyingStore.name()).andStubReturn("store-name");
+        EasyMock.expect(underlyingStore.isOpen()).andStubReturn(true);
+        EasyMock.replay(underlyingStore);
+        cachingStore = new TimeOrderedCachingWindowStore(underlyingStore, WINDOW_SIZE, SEGMENT_INTERVAL);
+        cache = EasyMock.createNiceMock(ThreadCache.class);
+        context = new InternalMockProcessorContext<>(TestUtils.tempDirectory(), null, null, null, cache);
+        context.setRecordContext(new ProcessorRecordContext(10, 0, 0, TOPIC, new RecordHeaders()));
+        cachingStore.init((StateStoreContext) context, cachingStore);
+    }
+
+    private static KeyValue<Windowed<Bytes>, byte[]> windowedPair(final String key, final String value, final long timestamp) {
+        return KeyValue.pair(
+            new Windowed<>(bytesKey(key), new TimeWindow(timestamp, timestamp + WINDOW_SIZE)),
+            bytesValue(value));
+    }
+
+    private int addItemsToCache() {
+        int cachedSize = 0;
+        int i = 0;
+        while (cachedSize < MAX_CACHE_SIZE_BYTES) {
+            final String kv = String.valueOf(i++);
+            cachingStore.put(bytesKey(kv), bytesValue(kv), DEFAULT_TIMESTAMP);
+            cachedSize += memoryCacheEntrySize(kv.getBytes(), kv.getBytes(), TOPIC) +
+                8 + // timestamp
+                4; // sequenceNumber
+        }
+        return i;
+    }
+
+}
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedSegmentsTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedSegmentsTest.java
index 722cb69fd13f3..50bad3f60c5e3 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedSegmentsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedSegmentsTest.java
@@ -28,6 +28,7 @@
 import org.junit.Test;
 
 import java.io.File;
+import java.nio.file.Files;
 import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.List;
@@ -304,7 +305,7 @@ public void shouldUpdateSegmentFileNameFromOldDateFormatToNewFormat() throws Exc
         for (int segmentId = 0; segmentId < NUM_SEGMENTS; ++segmentId) {
             final File oldSegment = new File(storeDirectoryPath + File.separator + storeName + "-" + formatter.format(new Date(segmentId * segmentInterval)));
             //noinspection ResultOfMethodCallIgnored
-            oldSegment.createNewFile();
+            Files.createFile(oldSegment.toPath());
         }
 
         segments.openExisting(context, -1L);
@@ -312,7 +313,7 @@ public void shouldUpdateSegmentFileNameFromOldDateFormatToNewFormat() throws Exc
         for (int segmentId = 0; segmentId < NUM_SEGMENTS; ++segmentId) {
             final String segmentName = storeName + "." + (long) segmentId * segmentInterval;
             final File newSegment = new File(storeDirectoryPath + File.separator + segmentName);
-            assertTrue(newSegment.exists());
+            assertTrue(Files.exists(newSegment.toPath()));
         }
     }
 
@@ -326,14 +327,14 @@ public void shouldUpdateSegmentFileNameFromOldColonFormatToNewFormat() throws Ex
         for (int segmentId = 0; segmentId < NUM_SEGMENTS; ++segmentId) {
             final File oldSegment = new File(storeDirectoryPath + File.separator + storeName + ":" + segmentId * (RETENTION_PERIOD / (NUM_SEGMENTS - 1)));
             //noinspection ResultOfMethodCallIgnored
-            oldSegment.createNewFile();
+            Files.createFile(oldSegment.toPath());
         }
 
         segments.openExisting(context, -1L);
 
         for (int segmentId = 0; segmentId < NUM_SEGMENTS; ++segmentId) {
             final File newSegment = new File(storeDirectoryPath + File.separator + storeName + "." + segmentId * (RETENTION_PERIOD / (NUM_SEGMENTS - 1)));
-            assertTrue(newSegment.exists());
+            assertTrue(Files.exists(newSegment.toPath()));
         }
     }
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilderTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilderTest.java
index 586ec73ea66f8..504a426ed5ed6 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilderTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/TimestampedWindowStoreBuilderTest.java
@@ -18,6 +18,7 @@
 package org.apache.kafka.streams.state.internals;
 
 import java.time.Duration;
+import java.util.Collection;
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.MockTime;
 import org.apache.kafka.streams.processor.StateStore;
@@ -25,16 +26,21 @@
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.TimestampedWindowStore;
 import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
-import org.easymock.EasyMockRunner;
+import org.apache.kafka.streams.state.WindowStore;
+import org.easymock.EasyMockRule;
 import org.easymock.Mock;
 import org.easymock.MockType;
 import org.hamcrest.CoreMatchers;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 
 import java.util.Collections;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 
+import static java.util.Arrays.asList;
 import static org.easymock.EasyMock.expect;
 import static org.easymock.EasyMock.replay;
 import static org.easymock.EasyMock.reset;
@@ -44,17 +50,39 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThrows;
 
-@RunWith(EasyMockRunner.class)
+@RunWith(Parameterized.class)
 public class TimestampedWindowStoreBuilderTest {
+    private static final String TIMESTAMP_STORE_NAME = "Timestamped Store";
+    private static final String TIMEORDERED_STORE_NAME = "TimeOrdered Store";
 
+    @Rule
+    public EasyMockRule rule = new EasyMockRule(this);
     @Mock(type = MockType.NICE)
     private WindowBytesStoreSupplier supplier;
     @Mock(type = MockType.NICE)
-    private RocksDBTimestampedWindowStore inner;
+    private RocksDBTimestampedWindowStore timestampedStore;
+    @Mock(type = MockType.NICE)
+    private RocksDBTimeOrderedWindowStore timeOrderedStore;
     private TimestampedWindowStoreBuilder<String, String> builder;
+    private boolean isTimeOrderedStore;
+    private WindowStore inner;
+
+    @Parameter
+    public String storeName;
 
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+        return asList(new Object[][] {
+            {TIMESTAMP_STORE_NAME},
+            {TIMEORDERED_STORE_NAME}
+        });
+    }
+
+    @SuppressWarnings("unchecked")
     @Before
     public void setUp() {
+        isTimeOrderedStore = TIMEORDERED_STORE_NAME.equals(storeName);
+        inner = isTimeOrderedStore ? timeOrderedStore : timestampedStore;
         expect(supplier.get()).andReturn(inner);
         expect(supplier.name()).andReturn("name");
         expect(supplier.metricsScope()).andReturn("metricScope");
@@ -93,7 +121,11 @@ public void shouldHaveCachingStoreWhenEnabled() {
         final TimestampedWindowStore<String, String> store = builder.withCachingEnabled().build();
         final StateStore wrapped = ((WrappedStateStore) store).wrapped();
         assertThat(store, instanceOf(MeteredTimestampedWindowStore.class));
-        assertThat(wrapped, instanceOf(CachingWindowStore.class));
+        if (isTimeOrderedStore) {
+            assertThat(wrapped, instanceOf(TimeOrderedCachingWindowStore.class));
+        } else {
+            assertThat(wrapped, instanceOf(CachingWindowStore.class));
+        }
     }
 
     @Test
@@ -116,7 +148,11 @@ public void shouldHaveCachingAndChangeLoggingWhenBothEnabled() {
         final WrappedStateStore caching = (WrappedStateStore) ((WrappedStateStore) store).wrapped();
         final WrappedStateStore changeLogging = (WrappedStateStore) caching.wrapped();
         assertThat(store, instanceOf(MeteredTimestampedWindowStore.class));
-        assertThat(caching, instanceOf(CachingWindowStore.class));
+        if (isTimeOrderedStore) {
+            assertThat(caching, instanceOf(TimeOrderedCachingWindowStore.class));
+        } else {
+            assertThat(caching, instanceOf(CachingWindowStore.class));
+        }
         assertThat(changeLogging, instanceOf(ChangeLoggingTimestampedWindowBytesStore.class));
         assertThat(changeLogging.wrapped(), CoreMatchers.equalTo(inner));
     }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/WindowKeySchemaTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/WindowKeySchemaTest.java
index e9360534a8b0b..4729a73f14f6f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/WindowKeySchemaTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/WindowKeySchemaTest.java
@@ -130,7 +130,7 @@ interface TriFunction<A, B, C, R> {
     final private KeySchema keySchema;
     final private Serde<Windowed<String>> keySerde = new WindowedSerdes.TimeWindowedSerde<>(serde, Long.MAX_VALUE);
     final private StateSerdes<String, byte[]> stateSerdes = new StateSerdes<>("dummy", serde, Serdes.ByteArray());
-    final private SchemaType schemaType;
+    final public SchemaType schemaType;
 
     private enum SchemaType {
         WindowKeySchema,
@@ -141,13 +141,13 @@ private enum SchemaType {
     @Parameterized.Parameters(name = "{0}")
     public static Collection<Object[]> data() {
         return asList(new Object[][] {
-            {"WindowKeySchema", SchemaType.WindowKeySchema},
-            {"PrefixedTimeFirstSchema", SchemaType.PrefixedTimeFirstSchema},
-            {"PrefixedKeyFirstSchema", SchemaType.PrefixedKeyFirstSchema}
+            {SchemaType.WindowKeySchema},
+            {SchemaType.PrefixedTimeFirstSchema},
+            {SchemaType.PrefixedKeyFirstSchema}
         });
     }
 
-    public WindowKeySchemaTest(final String name, final SchemaType type) {
+    public WindowKeySchemaTest(final SchemaType type) {
         schemaType = type;
         keySchema = SCHEMA_TYPE_MAP.get(type);
     }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/NamedCacheMetricsTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/NamedCacheMetricsTest.java
index 0b525dbd4dd1b..9debdcd578b9f 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/NamedCacheMetricsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/NamedCacheMetricsTest.java
@@ -19,9 +19,9 @@
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
+
 import org.junit.Test;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
+import org.mockito.MockedStatic;
 
 import java.util.Map;
 
@@ -29,6 +29,9 @@
 import static org.apache.kafka.common.utils.Utils.mkMap;
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
 
 public class NamedCacheMetricsTest {
 
@@ -48,18 +51,21 @@ public void shouldGetHitRatioSensorWithBuiltInMetricsVersionCurrent() {
         final String hitRatio = "hit-ratio";
         when(streamsMetrics.cacheLevelSensor(THREAD_ID, TASK_ID, STORE_NAME, hitRatio, RecordingLevel.DEBUG)).thenReturn(expectedSensor);
         when(streamsMetrics.cacheLevelTagMap(THREAD_ID, TASK_ID, STORE_NAME)).thenReturn(tagMap);
-        StreamsMetricsImpl.addAvgAndMinAndMaxToSensor(
-            expectedSensor,
-            StreamsMetricsImpl.CACHE_LEVEL_GROUP,
-            tagMap,
-            hitRatio,
-            HIT_RATIO_AVG_DESCRIPTION,
-            HIT_RATIO_MIN_DESCRIPTION,
-            HIT_RATIO_MAX_DESCRIPTION);
-
-        final Sensor sensor = NamedCacheMetrics.hitRatioSensor(streamsMetrics, THREAD_ID, TASK_ID, STORE_NAME);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = NamedCacheMetrics.hitRatioSensor(streamsMetrics, THREAD_ID, TASK_ID, STORE_NAME);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMinAndMaxToSensor(
+                    expectedSensor,
+                    StreamsMetricsImpl.CACHE_LEVEL_GROUP,
+                    tagMap,
+                    hitRatio,
+                    HIT_RATIO_AVG_DESCRIPTION,
+                    HIT_RATIO_MIN_DESCRIPTION,
+                    HIT_RATIO_MAX_DESCRIPTION
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
-
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/StateStoreMetricsTest.java b/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/StateStoreMetricsTest.java
index f9813163e3f84..48413306a93c3 100644
--- a/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/StateStoreMetricsTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/state/internals/metrics/StateStoreMetricsTest.java
@@ -19,9 +19,9 @@
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
+
 import org.junit.Test;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
+import org.mockito.MockedStatic;
 
 import java.util.Collections;
 import java.util.Map;
@@ -29,6 +29,9 @@
 
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
 
 public class StateStoreMetricsTest {
 
@@ -48,12 +51,14 @@ public void shouldGetPutSensor() {
         final String descriptionOfRate = "The average number of calls to put per second";
         final String descriptionOfAvg = "The average latency of calls to put";
         final String descriptionOfMax = "The maximum latency of calls to put";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.putSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.putSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -63,12 +68,14 @@ public void shouldGetPutIfAbsentSensor() {
         final String descriptionOfRate = "The average number of calls to put-if-absent per second";
         final String descriptionOfAvg = "The average latency of calls to put-if-absent";
         final String descriptionOfMax = "The maximum latency of calls to put-if-absent";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.putIfAbsentSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.putIfAbsentSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -78,12 +85,14 @@ public void shouldGetPutAllSensor() {
         final String descriptionOfRate = "The average number of calls to put-all per second";
         final String descriptionOfAvg = "The average latency of calls to put-all";
         final String descriptionOfMax = "The maximum latency of calls to put-all";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.putAllSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.putAllSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -93,12 +102,14 @@ public void shouldGetFetchSensor() {
         final String descriptionOfRate = "The average number of calls to fetch per second";
         final String descriptionOfAvg = "The average latency of calls to fetch";
         final String descriptionOfMax = "The maximum latency of calls to fetch";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.fetchSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.fetchSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -108,12 +119,14 @@ public void shouldGetGetSensor() {
         final String descriptionOfRate = "The average number of calls to get per second";
         final String descriptionOfAvg = "The average latency of calls to get";
         final String descriptionOfMax = "The maximum latency of calls to get";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.getSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.getSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -123,12 +136,14 @@ public void shouldGetAllSensor() {
         final String descriptionOfRate = "The average number of calls to all per second";
         final String descriptionOfAvg = "The average latency of calls to all";
         final String descriptionOfMax = "The maximum latency of calls to all";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.allSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.allSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -138,43 +153,17 @@ public void shouldGetRangeSensor() {
         final String descriptionOfRate = "The average number of calls to range per second";
         final String descriptionOfAvg = "The average latency of calls to range";
         final String descriptionOfMax = "The maximum latency of calls to range";
-        shouldGetSensor(
-            metricName,
-            descriptionOfRate,
-            descriptionOfAvg,
-            descriptionOfMax,
-            () -> StateStoreMetrics.rangeSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+        setupStreamsMetrics(
+            metricName
         );
-    }
 
-    @Test
-    public void shouldGetPrefixScanSensor() {
-        final String metricName = "prefix-scan";
-        final String descriptionOfRate = "The average number of calls to prefix-scan per second";
-        final String descriptionOfAvg = "The average latency of calls to prefix-scan";
-        final String descriptionOfMax = "The maximum latency of calls to prefix-scan";
-        when(streamsMetrics.storeLevelSensor(TASK_ID, STORE_NAME, metricName, RecordingLevel.DEBUG))
-                .thenReturn(expectedSensor);
-        when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, STORE_NAME)).thenReturn(storeTagMap);
-        StreamsMetricsImpl.addInvocationRateToSensor(
-            expectedSensor,
-            STORE_LEVEL_GROUP,
-            storeTagMap,
+        getAndVerifySensor(
+            () -> StateStoreMetrics.rangeSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate
-        );
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            STORE_LEVEL_GROUP,
-            storeTagMap,
-            latencyMetricName(metricName),
             descriptionOfAvg,
-            descriptionOfMax
+            descriptionOfMax,
+            descriptionOfRate
         );
-
-        final Sensor sensor = StateStoreMetrics.prefixScanSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
     }
 
     @Test
@@ -183,12 +172,14 @@ public void shouldGetFlushSensor() {
         final String descriptionOfRate = "The average number of calls to flush per second";
         final String descriptionOfAvg = "The average latency of calls to flush";
         final String descriptionOfMax = "The maximum latency of calls to flush";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.flushSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.flushSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -198,12 +189,14 @@ public void shouldGetRemoveSensor() {
         final String descriptionOfRate = "The average number of calls to remove per second";
         final String descriptionOfAvg = "The average latency of calls to remove";
         final String descriptionOfMax = "The maximum latency of calls to remove";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.removeSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.removeSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -213,12 +206,14 @@ public void shouldGetDeleteSensor() {
         final String descriptionOfRate = "The average number of calls to delete per second";
         final String descriptionOfAvg = "The average latency of calls to delete";
         final String descriptionOfMax = "The maximum latency of calls to delete";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.deleteSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.deleteSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
@@ -228,25 +223,65 @@ public void shouldGetRestoreSensor() {
         final String descriptionOfRate = "The average number of restorations per second";
         final String descriptionOfAvg = "The average latency of restorations";
         final String descriptionOfMax = "The maximum latency of restorations";
-        shouldGetSensor(
+        setupStreamsMetrics(metricName);
+
+        getAndVerifySensor(
+            () -> StateStoreMetrics.restoreSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics),
             metricName,
-            descriptionOfRate,
             descriptionOfAvg,
             descriptionOfMax,
-            () -> StateStoreMetrics.restoreSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics)
+            descriptionOfRate
         );
     }
 
+    @Test
+    public void shouldGetPrefixScanSensor() {
+        final String metricName = "prefix-scan";
+        final String descriptionOfRate = "The average number of calls to prefix-scan per second";
+        final String descriptionOfAvg = "The average latency of calls to prefix-scan";
+        final String descriptionOfMax = "The maximum latency of calls to prefix-scan";
+        when(streamsMetrics.storeLevelSensor(TASK_ID, STORE_NAME, metricName, RecordingLevel.DEBUG))
+            .thenReturn(expectedSensor);
+        when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, STORE_NAME)).thenReturn(storeTagMap);
+
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = StateStoreMetrics.prefixScanSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics);
+
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateToSensor(
+                    expectedSensor,
+                    STORE_LEVEL_GROUP,
+                    storeTagMap,
+                    metricName,
+                    descriptionOfRate
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    STORE_LEVEL_GROUP,
+                    storeTagMap,
+                    latencyMetricName(metricName),
+                    descriptionOfAvg,
+                    descriptionOfMax
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
+    }
+
     @Test
     public void shouldGetSuppressionBufferCountSensor() {
         final String metricName = "suppression-buffer-count";
         final String descriptionOfAvg = "The average count of buffered records";
         final String descriptionOfMax = "The maximum count of buffered records";
-        shouldGetSuppressionBufferSensor(
+        setupStreamsMetricsForSuppressionBufferSensor(metricName);
+
+        verifySensorSuppressionBufferSensor(
+            () -> StateStoreMetrics.suppressionBufferCountSensor(TASK_ID, STORE_TYPE, BUFFER_NAME, streamsMetrics),
             metricName,
             descriptionOfAvg,
-            descriptionOfMax,
-            () -> StateStoreMetrics.suppressionBufferCountSensor(TASK_ID, STORE_TYPE, BUFFER_NAME, streamsMetrics)
+            descriptionOfMax
         );
     }
 
@@ -255,11 +290,13 @@ public void shouldGetSuppressionBufferSizeSensor() {
         final String metricName = "suppression-buffer-size";
         final String descriptionOfAvg = "The average size of buffered records";
         final String descriptionOfMax = "The maximum size of buffered records";
-        shouldGetSuppressionBufferSensor(
+        setupStreamsMetricsForSuppressionBufferSensor(metricName);
+
+        verifySensorSuppressionBufferSensor(
+            () -> StateStoreMetrics.suppressionBufferSizeSensor(TASK_ID, STORE_TYPE, BUFFER_NAME, streamsMetrics),
             metricName,
             descriptionOfAvg,
-            descriptionOfMax,
-            () -> StateStoreMetrics.suppressionBufferSizeSensor(TASK_ID, STORE_TYPE, BUFFER_NAME, streamsMetrics)
+            descriptionOfMax
         );
     }
 
@@ -269,114 +306,124 @@ public void shouldGetExpiredWindowRecordDropSensor() {
         final String descriptionOfRate = "The average number of dropped records due to an expired window per second";
         final String descriptionOfCount = "The total number of dropped records due to an expired window";
         when(streamsMetrics.storeLevelSensor(TASK_ID, STORE_NAME, metricName, RecordingLevel.INFO))
-                .thenReturn(expectedSensor);
-
+            .thenReturn(expectedSensor);
         when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, STORE_NAME)).thenReturn(storeTagMap);
-        StreamsMetricsImpl.addInvocationRateAndCountToSensor(
-            expectedSensor,
-            "stream-" + STORE_TYPE + "-metrics",
-            storeTagMap,
-            metricName,
-            descriptionOfRate,
-            descriptionOfCount
-        );
-
-        final Sensor sensor =
-            StateStoreMetrics.expiredWindowRecordDropSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics);
 
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor =
+                StateStoreMetrics.expiredWindowRecordDropSensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateAndCountToSensor(
+                    expectedSensor,
+                    "stream-" + STORE_TYPE + "-metrics",
+                    storeTagMap,
+                    metricName,
+                    descriptionOfRate,
+                    descriptionOfCount
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     @Test
     public void shouldGetRecordE2ELatencySensor() {
         final String metricName = "record-e2e-latency";
-
         final String e2eLatencyDescription =
             "end-to-end latency of a record, measuring by comparing the record timestamp with the "
                 + "system time when it has been fully processed by the node";
         final String descriptionOfAvg = "The average " + e2eLatencyDescription;
         final String descriptionOfMin = "The minimum " + e2eLatencyDescription;
         final String descriptionOfMax = "The maximum " + e2eLatencyDescription;
-
         when(streamsMetrics.storeLevelSensor(TASK_ID, STORE_NAME, metricName, RecordingLevel.TRACE))
-                .thenReturn(expectedSensor);
+            .thenReturn(expectedSensor);
         when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, STORE_NAME)).thenReturn(storeTagMap);
 
-        StreamsMetricsImpl.addAvgAndMinAndMaxToSensor(
-            expectedSensor,
-            STORE_LEVEL_GROUP,
-            storeTagMap,
-            metricName,
-            descriptionOfAvg,
-            descriptionOfMin,
-            descriptionOfMax
-        );
-
-        final Sensor sensor =
-            StateStoreMetrics.e2ELatencySensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics);
-
-        assertThat(sensor, is(expectedSensor));
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor =
+                StateStoreMetrics.e2ELatencySensor(TASK_ID, STORE_TYPE, STORE_NAME, streamsMetrics);
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMinAndMaxToSensor(
+                    expectedSensor,
+                    STORE_LEVEL_GROUP,
+                    storeTagMap,
+                    metricName,
+                    descriptionOfAvg,
+                    descriptionOfMin,
+                    descriptionOfMax
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
-    private void shouldGetSensor(final String metricName,
-                                 final String descriptionOfRate,
-                                 final String descriptionOfAvg,
-                                 final String descriptionOfMax,
-                                 final Supplier<Sensor> sensorSupplier) {
-        when(streamsMetrics.storeLevelSensor(
-                TASK_ID,
-                STORE_NAME,
-                metricName,
-                RecordingLevel.DEBUG
-        )).thenReturn(expectedSensor);
-
-        StreamsMetricsImpl.addInvocationRateToSensor(
-            expectedSensor,
-            STORE_LEVEL_GROUP,
-            storeTagMap,
-            metricName,
-            descriptionOfRate
-        );
+    private void setupStreamsMetrics(final String metricName) {
+        when(streamsMetrics.storeLevelSensor(TASK_ID, STORE_NAME, metricName, RecordingLevel.DEBUG))
+            .thenReturn(expectedSensor);
         when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, STORE_NAME)).thenReturn(storeTagMap);
+    }
 
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            STORE_LEVEL_GROUP,
-            storeTagMap,
-            latencyMetricName(metricName),
-            descriptionOfAvg,
-            descriptionOfMax
-        );
-
-        final Sensor sensor = sensorSupplier.get();
-
-        assertThat(sensor, is(expectedSensor));
+    private void getAndVerifySensor(final Supplier<Sensor> sensorSupplier,
+                                    final String metricName,
+                                    final String descriptionOfAvg,
+                                    final String descriptionOfMax,
+                                    final String descriptionOfRate) {
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = sensorSupplier.get();
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addInvocationRateToSensor(
+                    expectedSensor,
+                    STORE_LEVEL_GROUP,
+                    storeTagMap,
+                    metricName,
+                    descriptionOfRate
+                )
+            );
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    STORE_LEVEL_GROUP,
+                    storeTagMap,
+                    latencyMetricName(metricName),
+                    descriptionOfAvg,
+                    descriptionOfMax
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 
     private String latencyMetricName(final String metricName) {
         return metricName + StreamsMetricsImpl.LATENCY_SUFFIX;
     }
 
-    private void shouldGetSuppressionBufferSensor(final String metricName,
-                                                  final String descriptionOfAvg,
-                                                  final String descriptionOfMax,
-                                                  final Supplier<Sensor> sensorSupplier) {
-        final Map<String, String> tagMap;
-        when(streamsMetrics.storeLevelSensor(TASK_ID, BUFFER_NAME, metricName, RecordingLevel.DEBUG)).thenReturn(expectedSensor);
-        tagMap = storeTagMap;
-        when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, BUFFER_NAME)).thenReturn(tagMap);
-
-        StreamsMetricsImpl.addAvgAndMaxToSensor(
-            expectedSensor,
-            STORE_LEVEL_GROUP,
-            tagMap,
+    private void setupStreamsMetricsForSuppressionBufferSensor(final String metricName) {
+        when(streamsMetrics.storeLevelSensor(
+            TASK_ID,
+            BUFFER_NAME,
             metricName,
-            descriptionOfAvg,
-            descriptionOfMax
-        );
-
-        final Sensor sensor = sensorSupplier.get();
+            RecordingLevel.DEBUG
+        )).thenReturn(expectedSensor);
+        when(streamsMetrics.storeLevelTagMap(TASK_ID, STORE_TYPE, BUFFER_NAME)).thenReturn(storeTagMap);
+    }
 
-        assertThat(sensor, is(expectedSensor));
+    private void verifySensorSuppressionBufferSensor(final Supplier<Sensor> sensorSupplier,
+                                                     final String metricName,
+                                                     final String descriptionOfAvg,
+                                                     final String descriptionOfMax) {
+        try (final MockedStatic<StreamsMetricsImpl> streamsMetricsStaticMock = mockStatic(StreamsMetricsImpl.class)) {
+            final Sensor sensor = sensorSupplier.get();
+            streamsMetricsStaticMock.verify(
+                () -> StreamsMetricsImpl.addAvgAndMaxToSensor(
+                    expectedSensor,
+                    STORE_LEVEL_GROUP,
+                    storeTagMap,
+                    metricName,
+                    descriptionOfAvg,
+                    descriptionOfMax
+                )
+            );
+            assertThat(sensor, is(expectedSensor));
+        }
     }
 }
diff --git a/streams/src/test/java/org/apache/kafka/streams/tests/RelationalSmokeTestTest.java b/streams/src/test/java/org/apache/kafka/streams/tests/RelationalSmokeTestTest.java
index a8f118660fcb2..5bd414e2a5f54 100644
--- a/streams/src/test/java/org/apache/kafka/streams/tests/RelationalSmokeTestTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/tests/RelationalSmokeTestTest.java
@@ -23,7 +23,9 @@
 import org.apache.kafka.streams.TestOutputTopic;
 import org.apache.kafka.streams.TopologyTestDriver;
 import org.apache.kafka.test.TestUtils;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import java.util.Map;
 import java.util.TreeMap;
@@ -33,6 +35,9 @@
 
 public class RelationalSmokeTestTest extends SmokeTestUtil {
 
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
+
     @Test
     public void verifySmokeTestLogic() {
         try (final TopologyTestDriver driver =
diff --git a/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java b/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java
index 936a41a6c6601..0b93006adf16d 100644
--- a/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java
+++ b/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java
@@ -19,7 +19,6 @@
 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.common.utils.KafkaThread;
-import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.StreamsBuilder;
@@ -38,10 +37,8 @@
 import org.apache.kafka.streams.kstream.Windowed;
 import org.apache.kafka.streams.state.Stores;
 import org.apache.kafka.streams.state.WindowStore;
+import org.apache.kafka.test.TestUtils;
 
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
 import java.time.Duration;
 import java.time.Instant;
 import java.util.Properties;
@@ -66,28 +63,6 @@ private static void addShutdownHook(final String name, final Runnable runnable)
         }
     }
 
-    private static File tempDirectory() {
-        final String prefix = "kafka-";
-        final File file;
-        try {
-            file = Files.createTempDirectory(prefix).toFile();
-        } catch (final IOException ex) {
-            throw new RuntimeException("Failed to create a temp dir", ex);
-        }
-        file.deleteOnExit();
-
-        addShutdownHook("delete-temp-file-shutdown-hook", () -> {
-            try {
-                Utils.delete(file);
-            } catch (final IOException e) {
-                System.out.println("Error deleting " + file.getAbsolutePath());
-                e.printStackTrace(System.out);
-            }
-        });
-
-        return file;
-    }
-
     public SmokeTestClient(final String name) {
         this.name = name;
     }
@@ -156,7 +131,7 @@ private Properties getStreamsConfig(final Properties props) {
         final Properties fullProps = new Properties(props);
         fullProps.put(StreamsConfig.APPLICATION_ID_CONFIG, "SmokeTest");
         fullProps.put(StreamsConfig.CLIENT_ID_CONFIG, "SmokeTest-" + name);
-        fullProps.put(StreamsConfig.STATE_DIR_CONFIG, tempDirectory().getAbsolutePath());
+        fullProps.put(StreamsConfig.STATE_DIR_CONFIG, TestUtils.tempDirectory().getAbsolutePath());
         fullProps.put(StreamsConfig.PROCESSING_GUARANTEE_CONFIG, StreamsConfig.EXACTLY_ONCE_V2);
         fullProps.putAll(props);
         return fullProps;
diff --git a/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..2bbb25db395ed 100644
--- a/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,15 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -163,7 +173,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -183,7 +194,16 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                             intSerde.serializer().serialize("", value)
                         );
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
+
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +215,60 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+
+        }
+        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+                              List<ProducerRecord<byte[], byte[]>> needRetry,
+                              final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println("retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
+            producer.flush();
+            needRetry = needRetry2;
 
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+                              final String topic,
+                              final byte[] keyBytes,
+                              final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +359,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/src/test/java/org/apache/kafka/streams/tests/SystemTestUtilTest.java b/streams/src/test/java/org/apache/kafka/streams/tests/SystemTestUtilTest.java
index a2a26a3fa8c27..5f847a683fef2 100644
--- a/streams/src/test/java/org/apache/kafka/streams/tests/SystemTestUtilTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/tests/SystemTestUtilTest.java
@@ -18,7 +18,9 @@
 package org.apache.kafka.streams.tests;
 
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -28,6 +30,8 @@
 import static org.junit.Assert.assertThrows;
 
 public class SystemTestUtilTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private final Map<String, String> expectedParsedMap = new TreeMap<>();
 
diff --git a/streams/src/test/java/org/apache/kafka/streams/tools/StreamsResetterTest.java b/streams/src/test/java/org/apache/kafka/streams/tools/StreamsResetterTest.java
index d4f7841000113..dc3cf65b2cf9e 100644
--- a/streams/src/test/java/org/apache/kafka/streams/tools/StreamsResetterTest.java
+++ b/streams/src/test/java/org/apache/kafka/streams/tools/StreamsResetterTest.java
@@ -29,7 +29,9 @@
 import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.TopicPartitionInfo;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
 
 import java.time.Duration;
 import java.time.Instant;
@@ -44,6 +46,8 @@
 import static org.junit.Assert.assertTrue;
 
 public class StreamsResetterTest {
+    @Rule
+    public Timeout globalTimeout = Timeout.seconds(600);
 
     private static final String TOPIC = "topic1";
     private final StreamsResetter streamsResetter = new StreamsResetter();
diff --git a/streams/src/test/java/org/apache/kafka/test/InternalMockProcessorContext.java b/streams/src/test/java/org/apache/kafka/test/InternalMockProcessorContext.java
index 650c7f75c9055..5192a1f67821c 100644
--- a/streams/src/test/java/org/apache/kafka/test/InternalMockProcessorContext.java
+++ b/streams/src/test/java/org/apache/kafka/test/InternalMockProcessorContext.java
@@ -35,6 +35,7 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.AbstractProcessorContext;
 import org.apache.kafka.streams.processor.internals.ChangelogRecordDeserializationHelper;
@@ -459,7 +460,9 @@ public void logChange(final String storeName,
             taskId().partition(),
             timestamp,
             BYTES_KEY_SERIALIZER,
-            BYTEARRAY_VALUE_SERIALIZER);
+            BYTEARRAY_VALUE_SERIALIZER,
+            null,
+            null);
     }
 
     @Override
@@ -500,4 +503,18 @@ public void addChangelogForStore(final String storeName, final String changelogT
     public String changelogFor(final String storeName) {
         return storeToChangelogTopic.get(storeName);
     }
+
+    @Override
+    public <K extends KOut, V extends VOut> void forward(final FixedKeyRecord<K, V> record) {
+        forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+    }
+
+    @Override
+    public <K extends KOut, V extends VOut> void forward(final FixedKeyRecord<K, V> record,
+                                                         final String childName) {
+        forward(
+            new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+            childName
+        );
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/test/MockApiFixedKeyProcessor.java b/streams/src/test/java/org/apache/kafka/test/MockApiFixedKeyProcessor.java
new file mode 100644
index 0000000000000..d737ecdf93437
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/test/MockApiFixedKeyProcessor.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.test;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.kafka.streams.KeyValueTimestamp;
+import org.apache.kafka.streams.processor.Cancellable;
+import org.apache.kafka.streams.processor.PunctuationType;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorContext;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+public class MockApiFixedKeyProcessor<KIn, VIn, VOut> implements FixedKeyProcessor<KIn, VIn, VOut> {
+
+    private final ArrayList<FixedKeyRecord<KIn, VIn>> processed = new ArrayList<>();
+    private final Map<KIn, ValueAndTimestamp<VIn>> lastValueAndTimestampPerKey = new HashMap<>();
+
+    private final ArrayList<Long> punctuatedStreamTime = new ArrayList<>();
+    private final ArrayList<Long> punctuatedSystemTime = new ArrayList<>();
+
+    private Cancellable scheduleCancellable;
+
+    private final PunctuationType punctuationType;
+    private final long scheduleInterval;
+
+    private boolean commitRequested = false;
+    private FixedKeyProcessorContext<KIn, VOut> context;
+
+    public MockApiFixedKeyProcessor(final PunctuationType punctuationType,
+                            final long scheduleInterval) {
+        this.punctuationType = punctuationType;
+        this.scheduleInterval = scheduleInterval;
+    }
+
+    public MockApiFixedKeyProcessor() {
+        this(PunctuationType.STREAM_TIME, -1);
+    }
+
+    @Override
+    public void init(final FixedKeyProcessorContext<KIn, VOut> context) {
+        this.context = context;
+        if (scheduleInterval > 0L) {
+            scheduleCancellable = context.schedule(
+                Duration.ofMillis(scheduleInterval),
+                punctuationType,
+                (punctuationType == PunctuationType.STREAM_TIME ? punctuatedStreamTime : punctuatedSystemTime)::add
+            );
+        }
+    }
+
+    @Override
+    public void process(final FixedKeyRecord<KIn, VIn> record) {
+        final KIn key = record.key();
+        final VIn value = record.value();
+        final KeyValueTimestamp<KIn, VIn> keyValueTimestamp = new KeyValueTimestamp<>(key, value, record.timestamp());
+
+        if (value != null) {
+            lastValueAndTimestampPerKey.put(key, ValueAndTimestamp.make(value, record.timestamp()));
+        } else {
+            lastValueAndTimestampPerKey.remove(key);
+        }
+
+        processed.add(record);
+
+        if (commitRequested) {
+            context.commit();
+            commitRequested = false;
+        }
+    }
+
+    public void checkAndClearProcessResult(final KeyValueTimestamp<?, ?>... expected) {
+        assertThat("the number of outputs:" + processed, processed.size(), is(expected.length));
+        for (int i = 0; i < expected.length; i++) {
+            final FixedKeyRecord<KIn, VIn> record = processed.get(i);
+            assertThat(
+                "output[" + i + "]:",
+                new KeyValueTimestamp<>(record.key(), record.value(), record.timestamp()),
+                is(expected[i])
+            );
+        }
+
+        processed.clear();
+    }
+
+    public void checkAndClearProcessedRecords(final Record<?, ?>... expected) {
+        assertThat("the number of outputs:" + processed, processed.size(), is(expected.length));
+        for (int i = 0; i < expected.length; i++) {
+            assertThat("output[" + i + "]:", processed.get(i), is(expected[i]));
+        }
+
+        processed.clear();
+    }
+
+    public void requestCommit() {
+        commitRequested = true;
+    }
+
+    public void checkEmptyAndClearProcessResult() {
+        assertThat("the number of outputs:", processed.size(), is(0));
+        processed.clear();
+    }
+
+    public void checkAndClearPunctuateResult(final PunctuationType type, final long... expected) {
+        final ArrayList<Long> punctuated = type == PunctuationType.STREAM_TIME ? punctuatedStreamTime : punctuatedSystemTime;
+        assertThat("the number of outputs:", punctuated.size(), is(expected.length));
+
+        for (int i = 0; i < expected.length; i++) {
+            assertThat("output[" + i + "]:", punctuated.get(i), is(expected[i]));
+        }
+
+        processed.clear();
+    }
+
+    public ArrayList<KeyValueTimestamp<KIn, VIn>> processed() {
+        return processed
+            .stream()
+            .map(r -> new KeyValueTimestamp<>(r.key(), r.value(), r.timestamp()))
+            .collect(Collectors.toCollection(ArrayList::new));
+    }
+
+    public Map<KIn, ValueAndTimestamp<VIn>> lastValueAndTimestampPerKey() {
+        return lastValueAndTimestampPerKey;
+    }
+
+    public List<Long> punctuatedStreamTime() {
+        return punctuatedStreamTime;
+    }
+
+    public Cancellable scheduleCancellable() {
+        return scheduleCancellable;
+    }
+
+    public FixedKeyProcessorContext<KIn, VOut> context() {
+        return context;
+    }
+
+    public void context(final FixedKeyProcessorContext<KIn, VOut> context) {
+        this.context = context;
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/test/MockApiFixedKeyProcessorSupplier.java b/streams/src/test/java/org/apache/kafka/test/MockApiFixedKeyProcessorSupplier.java
new file mode 100644
index 0000000000000..34c0ff3b286a4
--- /dev/null
+++ b/streams/src/test/java/org/apache/kafka/test/MockApiFixedKeyProcessorSupplier.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.test;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.kafka.streams.processor.PunctuationType;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessor;
+import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
+
+public class MockApiFixedKeyProcessorSupplier<KIn, VIn, VOut>
+    implements FixedKeyProcessorSupplier<KIn, VIn, VOut> {
+
+    private final long scheduleInterval;
+    private final PunctuationType punctuationType;
+    private final List<MockApiFixedKeyProcessor<KIn, VIn, VOut>> processors = new ArrayList<>();
+
+    public MockApiFixedKeyProcessorSupplier() {
+        this(-1L);
+    }
+
+    public MockApiFixedKeyProcessorSupplier(final long scheduleInterval) {
+        this(scheduleInterval, PunctuationType.STREAM_TIME);
+    }
+
+    public MockApiFixedKeyProcessorSupplier(final long scheduleInterval, final PunctuationType punctuationType) {
+        this.scheduleInterval = scheduleInterval;
+        this.punctuationType = punctuationType;
+    }
+
+    @Override
+    public FixedKeyProcessor<KIn, VIn, VOut> get() {
+        final MockApiFixedKeyProcessor<KIn, VIn, VOut> processor = new MockApiFixedKeyProcessor<>(punctuationType, scheduleInterval);
+
+        // to keep tests simple, ignore calls from ApiUtils.checkSupplier
+        if (!StreamsTestUtils.isCheckSupplierCall()) {
+            processors.add(processor);
+        }
+
+        return processor;
+    }
+
+    // get the captured processor assuming that only one processor gets returned from this supplier
+    public MockApiFixedKeyProcessor<KIn, VIn, VOut> theCapturedProcessor() {
+        return capturedProcessors(1).get(0);
+    }
+
+    public int capturedProcessorsCount() {
+        return processors.size();
+    }
+
+    // get the captured processors with the expected number
+    public List<MockApiFixedKeyProcessor<KIn, VIn, VOut>> capturedProcessors(final int expectedNumberOfProcessors) {
+        assertEquals(expectedNumberOfProcessors, processors.size());
+
+        return processors;
+    }
+}
diff --git a/streams/src/test/java/org/apache/kafka/test/MockApiProcessor.java b/streams/src/test/java/org/apache/kafka/test/MockApiProcessor.java
index dd56bad58469b..2f5914bb3d374 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockApiProcessor.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockApiProcessor.java
@@ -22,6 +22,7 @@
 import org.apache.kafka.streams.processor.api.Processor;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
 
 import java.time.Duration;
@@ -29,13 +30,14 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.is;
 
 public class MockApiProcessor<KIn, VIn, KOut, VOut> implements Processor<KIn, VIn, KOut, VOut> {
 
-    private final ArrayList<KeyValueTimestamp<KIn, VIn>> processed = new ArrayList<>();
+    private final ArrayList<Record<KIn, VIn>> processed = new ArrayList<>();
     private final Map<KIn, ValueAndTimestamp<VIn>> lastValueAndTimestampPerKey = new HashMap<>();
 
     private final ArrayList<Long> punctuatedStreamTime = new ArrayList<>();
@@ -83,7 +85,7 @@ public void process(final Record<KIn, VIn> record) {
             lastValueAndTimestampPerKey.remove(key);
         }
 
-        processed.add(keyValueTimestamp);
+        processed.add(record);
 
         if (commitRequested) {
             context.commit();
@@ -92,6 +94,20 @@ public void process(final Record<KIn, VIn> record) {
     }
 
     public void checkAndClearProcessResult(final KeyValueTimestamp<?, ?>... expected) {
+        assertThat("the number of outputs:" + processed, processed.size(), is(expected.length));
+        for (int i = 0; i < expected.length; i++) {
+            final Record<KIn, VIn> record = processed.get(i);
+            assertThat(
+                "output[" + i + "]:",
+                new KeyValueTimestamp<>(record.key(), record.value(), record.timestamp()),
+                is(expected[i])
+            );
+        }
+
+        processed.clear();
+    }
+
+    public void checkAndClearProcessedRecords(final Record<?, ?>... expected) {
         assertThat("the number of outputs:" + processed, processed.size(), is(expected.length));
         for (int i = 0; i < expected.length; i++) {
             assertThat("output[" + i + "]:", processed.get(i), is(expected[i]));
@@ -120,8 +136,17 @@ public void checkAndClearPunctuateResult(final PunctuationType type, final long.
         processed.clear();
     }
 
+    public void addProcessorMetadata(final String key, final long value) {
+        if (context instanceof InternalProcessorContext) {
+            ((InternalProcessorContext<KOut, VOut>) context).addProcessorMetadataKeyValue(key, value);
+        }
+    }
+
     public ArrayList<KeyValueTimestamp<KIn, VIn>> processed() {
-        return processed;
+        return processed
+            .stream()
+            .map(r -> new KeyValueTimestamp<>(r.key(), r.value(), r.timestamp()))
+            .collect(Collectors.toCollection(ArrayList::new));
     }
 
     public Map<KIn, ValueAndTimestamp<VIn>> lastValueAndTimestampPerKey() {
diff --git a/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java b/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java
index 880f2cb3bf5d1..53b80ae38b238 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockClientSupplier.java
@@ -24,7 +24,6 @@
 import org.apache.kafka.clients.producer.MockProducer;
 import org.apache.kafka.clients.producer.Producer;
 import org.apache.kafka.clients.producer.ProducerConfig;
-import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
 import org.apache.kafka.common.Cluster;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.streams.KafkaClientSupplier;
@@ -69,7 +68,7 @@ public Producer<byte[], byte[]> getProducer(final Map<String, Object> config) {
         } else {
             assertFalse(config.containsKey(ProducerConfig.TRANSACTIONAL_ID_CONFIG));
         }
-        final MockProducer<byte[], byte[]> producer = new MockProducer<>(cluster, true, new DefaultPartitioner(), BYTE_ARRAY_SERIALIZER, BYTE_ARRAY_SERIALIZER);
+        final MockProducer<byte[], byte[]> producer = new MockProducer<>(cluster, true, BYTE_ARRAY_SERIALIZER, BYTE_ARRAY_SERIALIZER);
         producers.add(producer);
         return producer;
     }
diff --git a/streams/src/test/java/org/apache/kafka/test/MockInternalNewProcessorContext.java b/streams/src/test/java/org/apache/kafka/test/MockInternalNewProcessorContext.java
index 7131e861ba556..c06bc79cdc137 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockInternalNewProcessorContext.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockInternalNewProcessorContext.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.test;
 
+import java.util.Objects;
 import org.apache.kafka.common.header.Headers;
 import org.apache.kafka.common.header.internals.RecordHeaders;
 import org.apache.kafka.common.utils.Bytes;
@@ -24,8 +25,11 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.MockProcessorContext;
+import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.processor.internals.ProcessorMetadata;
 import org.apache.kafka.streams.processor.internals.ProcessorNode;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.processor.internals.RecordCollector;
@@ -48,12 +52,15 @@ public class MockInternalNewProcessorContext<KOut, VOut> extends MockProcessorCo
 
     private long timestamp = 0;
     private Headers headers = new RecordHeaders();
+    private ProcessorMetadata processorMetadata;
 
     public MockInternalNewProcessorContext() {
+        processorMetadata = new ProcessorMetadata();
     }
 
     public MockInternalNewProcessorContext(final Properties config, final TaskId taskId, final File stateDir) {
         super(config, taskId, stateDir);
+        processorMetadata = new ProcessorMetadata();
     }
 
     @Override
@@ -206,4 +213,39 @@ public <T extends StateStore> T getStateStore(StoreBuilder<T> builder) {
     public String changelogFor(final String storeName) {
         return "mock-changelog";
     }
+
+    @Override
+    public void addProcessorMetadataKeyValue(final String key, final long value) {
+        processorMetadata.put(key, value);
+    }
+
+    @Override
+    public Long processorMetadataForKey(final String key) {
+        return processorMetadata.get(key);
+    }
+
+    @Override
+    public void setProcessorMetadata(final ProcessorMetadata metadata) {
+        Objects.requireNonNull(metadata);
+        processorMetadata = metadata;
+    }
+
+    @Override
+    public ProcessorMetadata getProcessorMetadata() {
+        return processorMetadata;
+    }
+
+    @Override
+    public <K extends KOut, V extends VOut> void forward(final FixedKeyRecord<K, V> record) {
+        forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+    }
+
+    @Override
+    public <K extends KOut, V extends VOut> void forward(final FixedKeyRecord<K, V> record,
+                                                         final String childName) {
+        forward(
+            new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+            childName
+        );
+    }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/test/MockInternalProcessorContext.java b/streams/src/test/java/org/apache/kafka/test/MockInternalProcessorContext.java
index 6f8bcd8aeb2e7..e585c04517a06 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockInternalProcessorContext.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockInternalProcessorContext.java
@@ -16,6 +16,7 @@
  */
 package org.apache.kafka.test;
 
+import java.util.Objects;
 import org.apache.kafka.common.utils.Bytes;
 import org.apache.kafka.streams.processor.CommitCallback;
 import org.apache.kafka.streams.processor.MockProcessorContext;
@@ -23,9 +24,11 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.api.RecordMetadata;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.processor.internals.ProcessorMetadata;
 import org.apache.kafka.streams.processor.internals.ProcessorNode;
 import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
 import org.apache.kafka.streams.processor.internals.RecordCollector;
@@ -49,12 +52,15 @@ public class MockInternalProcessorContext extends MockProcessorContext implement
     private RecordCollector recordCollector;
     private long currentSystemTimeMs;
     private TaskType taskType = TaskType.ACTIVE;
+    private ProcessorMetadata processorMetadata;
 
     public MockInternalProcessorContext() {
+        processorMetadata = new ProcessorMetadata();
     }
 
     public MockInternalProcessorContext(final Properties config, final TaskId taskId, final File stateDir) {
         super(config, taskId, stateDir);
+        processorMetadata = new ProcessorMetadata();
     }
 
     @Override
@@ -181,4 +187,38 @@ public void registerCacheFlushListener(final String namespace, final DirtyEntryF
     public String changelogFor(final String storeName) {
         return "mock-changelog";
     }
+
+    @Override
+    public void addProcessorMetadataKeyValue(final String key, final long value) {
+        processorMetadata.put(key, value);
+    }
+
+    @Override
+    public Long processorMetadataForKey(final String key) {
+        return processorMetadata.get(key);
+    }
+
+    @Override
+    public void setProcessorMetadata(final ProcessorMetadata metadata) {
+        Objects.requireNonNull(metadata);
+        processorMetadata = metadata;
+    }
+
+    @Override
+    public ProcessorMetadata getProcessorMetadata() {
+        return processorMetadata;
+    }
+
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record) {
+        forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+    }
+
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record, final String childName) {
+        forward(
+            new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+            childName
+        );
+    }
 }
\ No newline at end of file
diff --git a/streams/src/test/java/org/apache/kafka/test/MockProcessor.java b/streams/src/test/java/org/apache/kafka/test/MockProcessor.java
index a3bb87d3034dc..dc2a38938a81a 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockProcessor.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockProcessor.java
@@ -21,6 +21,7 @@
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.PunctuationType;
 import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.state.ValueAndTimestamp;
 
 import java.util.ArrayList;
@@ -83,4 +84,11 @@ public Cancellable scheduleCancellable() {
     public ArrayList<KeyValueTimestamp<K, V>> processed() {
         return delegate.processed();
     }
+
+    @SuppressWarnings("unchecked")
+    public void addProcessorMetadata(final String key, final long value) {
+        if (context instanceof InternalProcessorContext) {
+            ((InternalProcessorContext<K, V>) context).addProcessorMetadataKeyValue(key, value);
+        }
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/test/MockProcessorNode.java b/streams/src/test/java/org/apache/kafka/test/MockProcessorNode.java
index 4ab4cb8bbbcd3..039c831e4dfcb 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockProcessorNode.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockProcessorNode.java
@@ -16,14 +16,14 @@
  */
 package org.apache.kafka.test;
 
+import java.util.Collections;
+import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.kafka.streams.processor.PunctuationType;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
+import org.apache.kafka.streams.processor.internals.ProcessorAdapter;
 import org.apache.kafka.streams.processor.internals.ProcessorNode;
 
-import java.util.Collections;
-import java.util.concurrent.atomic.AtomicInteger;
-
 public class MockProcessorNode<KIn, VIn, KOut, VOut> extends ProcessorNode<KIn, VIn, KOut, VOut> {
 
     private static final String NAME = "MOCK-PROCESS-";
@@ -46,8 +46,9 @@ public MockProcessorNode() {
         this(new MockProcessor<>());
     }
 
+    @SuppressWarnings("unchecked")
     private MockProcessorNode(final MockProcessor<KIn, VIn> mockProcessor) {
-        super(NAME + INDEX.getAndIncrement(), mockProcessor, Collections.<String>emptySet());
+        super(NAME + INDEX.getAndIncrement(), ProcessorAdapter.adapt(mockProcessor), Collections.<String>emptySet());
 
         this.mockProcessor = mockProcessor;
     }
@@ -60,7 +61,7 @@ public void init(final InternalProcessorContext<KOut, VOut> context) {
 
     @Override
     public void process(final Record<KIn, VIn> record) {
-        processor().process(record);
+        mockProcessor.process(record.key(), record.value());
     }
 
     @Override
diff --git a/streams/src/test/java/org/apache/kafka/test/MockRecordCollector.java b/streams/src/test/java/org/apache/kafka/test/MockRecordCollector.java
index 505ee6858a7cd..8a7f543496356 100644
--- a/streams/src/test/java/org/apache/kafka/test/MockRecordCollector.java
+++ b/streams/src/test/java/org/apache/kafka/test/MockRecordCollector.java
@@ -21,6 +21,7 @@
 import org.apache.kafka.common.header.Headers;
 import org.apache.kafka.common.serialization.Serializer;
 import org.apache.kafka.streams.processor.StreamPartitioner;
+import org.apache.kafka.streams.processor.internals.InternalProcessorContext;
 import org.apache.kafka.streams.processor.internals.RecordCollector;
 
 import java.util.Collections;
@@ -46,13 +47,17 @@ public <K, V> void send(final String topic,
                             final Integer partition,
                             final Long timestamp,
                             final Serializer<K> keySerializer,
-                            final Serializer<V> valueSerializer) {
-        collected.add(new ProducerRecord<>(topic,
+                            final Serializer<V> valueSerializer,
+                            final String processorNodeId,
+                            final InternalProcessorContext<Void, Void> context) {
+        collected.add(new ProducerRecord<>(
+            topic,
             partition,
             timestamp,
             key,
             value,
-            headers));
+            headers)
+        );
     }
 
     @Override
@@ -63,13 +68,17 @@ public <K, V> void send(final String topic,
                             final Long timestamp,
                             final Serializer<K> keySerializer,
                             final Serializer<V> valueSerializer,
+                            final String processorNodeId,
+                            final InternalProcessorContext<Void, Void> context,
                             final StreamPartitioner<? super K, ? super V> partitioner) {
-        collected.add(new ProducerRecord<>(topic,
+        collected.add(new ProducerRecord<>(
+            topic,
             0, // partition id
             timestamp,
             key,
             value,
-            headers));
+            headers)
+        );
     }
 
     @Override
diff --git a/streams/src/test/java/org/apache/kafka/test/NoOpProcessorContext.java b/streams/src/test/java/org/apache/kafka/test/NoOpProcessorContext.java
index 53231edcd499b..47ebe4bdb44ad 100644
--- a/streams/src/test/java/org/apache/kafka/test/NoOpProcessorContext.java
+++ b/streams/src/test/java/org/apache/kafka/test/NoOpProcessorContext.java
@@ -27,6 +27,7 @@
 import org.apache.kafka.streams.processor.StateStore;
 import org.apache.kafka.streams.processor.TaskId;
 import org.apache.kafka.streams.processor.To;
+import org.apache.kafka.streams.processor.api.FixedKeyRecord;
 import org.apache.kafka.streams.processor.api.Record;
 import org.apache.kafka.streams.processor.internals.AbstractProcessorContext;
 import org.apache.kafka.streams.processor.internals.MockStreamsMetrics;
@@ -152,4 +153,17 @@ public void registerCacheFlushListener(final String namespace, final DirtyEntryF
     public String changelogFor(final String storeName) {
         return ProcessorStateManager.storeChangelogTopic(applicationId(), storeName, taskId().topologyName());
     }
+
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record) {
+        forward(new Record<>(record.key(), record.value(), record.timestamp(), record.headers()));
+    }
+
+    @Override
+    public <K, V> void forward(final FixedKeyRecord<K, V> record, final String childName) {
+        forward(
+            new Record<>(record.key(), record.value(), record.timestamp(), record.headers()),
+            childName
+        );
+    }
 }
diff --git a/streams/src/test/java/org/apache/kafka/test/StreamsTestUtils.java b/streams/src/test/java/org/apache/kafka/test/StreamsTestUtils.java
index 900898295b5ec..20cade6d93a88 100644
--- a/streams/src/test/java/org/apache/kafka/test/StreamsTestUtils.java
+++ b/streams/src/test/java/org/apache/kafka/test/StreamsTestUtils.java
@@ -18,6 +18,7 @@
 
 import org.apache.kafka.common.Metric;
 import org.apache.kafka.common.MetricName;
+import org.apache.kafka.common.TopicPartition;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.utils.Bytes;
@@ -25,12 +26,17 @@
 import org.apache.kafka.streams.KeyValue;
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.processor.TaskId;
+import org.apache.kafka.streams.processor.internals.StandbyTask;
+import org.apache.kafka.streams.processor.internals.StreamTask;
+import org.apache.kafka.streams.processor.internals.Task;
 import org.apache.kafka.streams.state.KeyValueIterator;
 
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
@@ -48,6 +54,8 @@
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.junit.Assert.assertFalse;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 public final class StreamsTestUtils {
     private StreamsTestUtils() {}
@@ -273,4 +281,52 @@ public static boolean isCheckSupplierCall() {
         return Arrays.stream(Thread.currentThread().getStackTrace())
                 .anyMatch(caller -> "org.apache.kafka.streams.internals.ApiUtils".equals(caller.getClassName()) && "checkSupplier".equals(caller.getMethodName()));
     }
+
+    public static class TaskBuilder<T extends Task> {
+        private final T task;
+
+        private TaskBuilder(final T task) {
+            this.task = task;
+        }
+
+        public static TaskBuilder<StreamTask> statelessTask(final TaskId taskId) {
+            final StreamTask task = mock(StreamTask.class);
+            when(task.changelogPartitions()).thenReturn(Collections.emptySet());
+            when(task.isActive()).thenReturn(true);
+            when(task.id()).thenReturn(taskId);
+            return new TaskBuilder<>(task);
+        }
+
+        public static TaskBuilder<StreamTask> statefulTask(final TaskId taskId,
+                                                           final Set<TopicPartition> changelogPartitions) {
+            final StreamTask task = mock(StreamTask.class);
+            when(task.isActive()).thenReturn(true);
+            setupStatefulTask(task, taskId, changelogPartitions);
+            return new TaskBuilder<>(task);
+        }
+
+        public static TaskBuilder<StandbyTask> standbyTask(final TaskId taskId,
+                                                           final Set<TopicPartition> changelogPartitions) {
+            final StandbyTask task = mock(StandbyTask.class);
+            when(task.isActive()).thenReturn(false);
+            setupStatefulTask(task, taskId, changelogPartitions);
+            return new TaskBuilder<>(task);
+        }
+
+        private static void setupStatefulTask(final Task task,
+                                              final TaskId taskId,
+                                              final Set<TopicPartition> changelogPartitions) {
+            when(task.changelogPartitions()).thenReturn(changelogPartitions);
+            when(task.id()).thenReturn(taskId);
+        }
+
+        public TaskBuilder<T> inState(final Task.State state) {
+            when(task.state()).thenReturn(state);
+            return this;
+        }
+
+        public T build() {
+            return task;
+        }
+    }
 }
diff --git a/streams/src/test/resources/log4j.properties b/streams/src/test/resources/log4j.properties
index 050cd679f06e7..b7e1fb2d60ea4 100644
--- a/streams/src/test/resources/log4j.properties
+++ b/streams/src/test/resources/log4j.properties
@@ -22,6 +22,12 @@ log4j.logger.kafka=ERROR
 log4j.logger.state.change.logger=ERROR
 log4j.logger.org.apache.kafka=ERROR
 log4j.logger.org.apache.zookeeper=ERROR
+log4j.logger.org.apache.kafka.clients=ERROR
+
+# These are the only logs we will likely ever find anything useful in to debug Streams test failures
+log4j.logger.org.apache.kafka.clients.consumer=INFO
+log4j.logger.org.apache.kafka.clients.producer=INFO
+log4j.logger.org.apache.kafka.streams=INFO
 
 # printing out the configs takes up a huge amount of the allotted characters,
 # and provides little value as we can always figure out the test configs without the logs
@@ -29,7 +35,3 @@ log4j.logger.org.apache.kafka.clients.producer.ProducerConfig=ERROR
 log4j.logger.org.apache.kafka.clients.consumer.ConsumerConfig=ERROR
 log4j.logger.org.apache.kafka.clients.admin.AdminClientConfig=ERROR
 log4j.logger.org.apache.kafka.streams.StreamsConfig=ERROR
-
-# These are the only logs we will likely ever find anything useful in to debug Streams test failures
-log4j.logger.org.apache.kafka.clients=INFO
-log4j.logger.org.apache.kafka.streams=INFO
diff --git a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KStream.scala b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KStream.scala
index dedb4246aaf02..24f9e6ed6a769 100644
--- a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KStream.scala
+++ b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KStream.scala
@@ -28,7 +28,7 @@ import org.apache.kafka.streams.kstream.{
   KStream => KStreamJ
 }
 import org.apache.kafka.streams.processor.TopicNameExtractor
-import org.apache.kafka.streams.processor.api.ProcessorSupplier
+import org.apache.kafka.streams.processor.api.{FixedKeyProcessorSupplier, ProcessorSupplier}
 import org.apache.kafka.streams.scala.FunctionsCompatConversions.{
   FlatValueMapperFromFunction,
   FlatValueMapperWithKeyFromFunction,
@@ -558,6 +558,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transform`
    */
+  @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, String*) instead.")
   def transform[K1, V1](
     transformerSupplier: TransformerSupplier[K, V, KeyValue[K1, V1]],
     stateStoreNames: String*
@@ -580,6 +581,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transform`
    */
+  @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, Named, String*) instead.")
   def transform[K1, V1](
     transformerSupplier: TransformerSupplier[K, V, KeyValue[K1, V1]],
     named: Named,
@@ -602,6 +604,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transform`
    */
+  @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, String*) instead.")
   def flatTransform[K1, V1](
     transformerSupplier: TransformerSupplier[K, V, Iterable[KeyValue[K1, V1]]],
     stateStoreNames: String*
@@ -624,6 +627,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains more or less records with new key and value (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transform`
    */
+  @deprecated(since = "3.3", message = "Use process(ProcessorSupplier, Named, String*) instead.")
   def flatTransform[K1, V1](
     transformerSupplier: TransformerSupplier[K, V, Iterable[KeyValue[K1, V1]]],
     named: Named,
@@ -646,6 +650,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.")
   def flatTransformValues[VR](
     valueTransformerSupplier: ValueTransformerSupplier[V, Iterable[VR]],
     stateStoreNames: String*
@@ -668,6 +673,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.")
   def flatTransformValues[VR](
     valueTransformerSupplier: ValueTransformerSupplier[V, Iterable[VR]],
     named: Named,
@@ -690,6 +696,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, String*) instead.")
   def flatTransformValues[VR](
     valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, Iterable[VR]],
     stateStoreNames: String*
@@ -712,6 +719,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.")
   def flatTransformValues[VR](
     valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, Iterable[VR]],
     named: Named,
@@ -733,6 +741,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, String*) instead.")
   def transformValues[VR](
     valueTransformerSupplier: ValueTransformerSupplier[V, VR],
     stateStoreNames: String*
@@ -754,6 +763,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.")
   def transformValues[VR](
     valueTransformerSupplier: ValueTransformerSupplier[V, VR],
     named: Named,
@@ -775,6 +785,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, String*) instead.")
   def transformValues[VR](
     valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, VR],
     stateStoreNames: String*
@@ -796,6 +807,7 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @return a [[KStream]] that contains records with unmodified key and new values (possibly of different type)
    * @see `org.apache.kafka.streams.kstream.KStream#transformValues`
    */
+  @deprecated(since = "3.3", message = "Use processValues(FixedKeyProcessorSupplier, Named, String*) instead.")
   def transformValues[VR](
     valueTransformerSupplier: ValueTransformerWithKeySupplier[K, V, VR],
     named: Named,
@@ -824,6 +836,29 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
     inner.process(processorSupplierJ, stateStoreNames: _*)
   }
 
+  /**
+   * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given
+   * `processorSupplier`).
+   * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected
+   * to the `Processor`.
+   * It's not required to connect global state stores that are added via `addGlobalStore`;
+   * read-only access to global state stores is available by default.
+   *
+   * @param processorSupplier a function that generates a [[org.apache.kafka.streams.processor.Processor]]
+   * @param named             a [[Named]] config used to name the processor in the topology
+   * @param stateStoreNames   the names of the state store used by the processor
+   * @see `org.apache.kafka.streams.kstream.KStream#process`
+   */
+  @deprecated(since = "3.0", message = "Use process(ProcessorSupplier, String*) instead.")
+  def process(
+    processorSupplier: () => org.apache.kafka.streams.processor.Processor[K, V],
+    named: Named,
+    stateStoreNames: String*
+  ): Unit = {
+    val processorSupplierJ: org.apache.kafka.streams.processor.ProcessorSupplier[K, V] = () => processorSupplier()
+    inner.process(processorSupplierJ, named, stateStoreNames: _*)
+  }
+
   /**
    * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given
    * `processorSupplier`).
@@ -839,8 +874,8 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * @param stateStoreNames   the names of the state store used by the processor
    * @see `org.apache.kafka.streams.kstream.KStream#process`
    */
-  def process(processorSupplier: ProcessorSupplier[K, V, Void, Void], stateStoreNames: String*): Unit =
-    inner.process(processorSupplier, stateStoreNames: _*)
+  def process[KR, VR](processorSupplier: ProcessorSupplier[K, V, KR, VR], stateStoreNames: String*): KStream[KR, VR] =
+    new KStream(inner.process(processorSupplier, stateStoreNames: _*))
 
   /**
    * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given
@@ -850,39 +885,64 @@ class KStream[K, V](val inner: KStreamJ[K, V]) {
    * It's not required to connect global state stores that are added via `addGlobalStore`;
    * read-only access to global state stores is available by default.
    *
-   * @param processorSupplier a function that generates a [[org.apache.kafka.streams.processor.Processor]]
+   * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with
+   * the older (deprecated) overload.
+   *
+   * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.Processor]]
    * @param named             a [[Named]] config used to name the processor in the topology
    * @param stateStoreNames   the names of the state store used by the processor
    * @see `org.apache.kafka.streams.kstream.KStream#process`
    */
-  @deprecated(since = "3.0", message = "Use process(ProcessorSupplier, String*) instead.")
-  def process(
-    processorSupplier: () => org.apache.kafka.streams.processor.Processor[K, V],
+  def process[KR, VR](
+    processorSupplier: ProcessorSupplier[K, V, KR, VR],
     named: Named,
     stateStoreNames: String*
-  ): Unit = {
-    val processorSupplierJ: org.apache.kafka.streams.processor.ProcessorSupplier[K, V] = () => processorSupplier()
-    inner.process(processorSupplierJ, named, stateStoreNames: _*)
-  }
+  ): KStream[KR, VR] =
+    new KStream(inner.process(processorSupplier, named, stateStoreNames: _*))
 
   /**
-   * Process all records in this stream, one record at a time, by applying a `Processor` (provided by the given
+   * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given
    * `processorSupplier`).
    * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected
-   * to the `Processor`.
+   * to the `FixedKeyProcessor`.
+   * It's not required to connect global state stores that are added via `addGlobalStore`;
+   * read-only access to global state stores is available by default.
+   *
+   * Note that this overload takes a FixedKeyProcessorSupplier instead of a Function to avoid post-erasure ambiguity with
+   * the older (deprecated) overload.
+   *
+   * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.FixedKeyProcessor]]
+   * @param stateStoreNames   the names of the state store used by the processor
+   * @see `org.apache.kafka.streams.kstream.KStream#process`
+   */
+  def processValues[VR](
+    processorSupplier: FixedKeyProcessorSupplier[K, V, VR],
+    stateStoreNames: String*
+  ): KStream[K, VR] =
+    new KStream(inner.processValues(processorSupplier, stateStoreNames: _*))
+
+  /**
+   * Process all records in this stream, one record at a time, by applying a `FixedKeyProcessor` (provided by the given
+   * `processorSupplier`).
+   * In order to assign a state, the state must be created and added via `addStateStore` before they can be connected
+   * to the `FixedKeyProcessor`.
    * It's not required to connect global state stores that are added via `addGlobalStore`;
    * read-only access to global state stores is available by default.
    *
    * Note that this overload takes a ProcessorSupplier instead of a Function to avoid post-erasure ambiguity with
    * the older (deprecated) overload.
    *
-   * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.Processor]]
+   * @param processorSupplier a supplier for [[org.apache.kafka.streams.processor.api.FixedKeyProcessor]]
    * @param named             a [[Named]] config used to name the processor in the topology
    * @param stateStoreNames   the names of the state store used by the processor
    * @see `org.apache.kafka.streams.kstream.KStream#process`
    */
-  def process(processorSupplier: ProcessorSupplier[K, V, Void, Void], named: Named, stateStoreNames: String*): Unit =
-    inner.process(processorSupplier, named, stateStoreNames: _*)
+  def processValues[VR](
+    processorSupplier: FixedKeyProcessorSupplier[K, V, VR],
+    named: Named,
+    stateStoreNames: String*
+  ): KStream[K, VR] =
+    new KStream(inner.processValues(processorSupplier, named, stateStoreNames: _*))
 
   /**
    * Group the records by their current key into a [[KGroupedStream]]
diff --git a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala
index 3a405b68a7713..9d8fe81f71a8e 100644
--- a/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala
+++ b/streams/streams-scala/src/main/scala/org/apache/kafka/streams/scala/kstream/KTable.scala
@@ -249,7 +249,7 @@ class KTable[K, V](val inner: KTableJ[K, V]) {
    * @see `org.apache.kafka.streams.kstream.KTable#mapValues`
    */
   def mapValues[VR](mapper: (K, V) => VR, materialized: Materialized[K, VR, ByteArrayKeyValueStore]): KTable[K, VR] =
-    new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey))
+    new KTable(inner.mapValues[VR](mapper.asValueMapperWithKey, materialized))
 
   /**
    * Create a new [[KTable]] by transforming the value of each record in this [[KTable]] into a new value
diff --git a/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/TopologyTest.scala b/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/TopologyTest.scala
index 0344e8c249d01..b38b0c3a941f3 100644
--- a/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/TopologyTest.scala
+++ b/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/TopologyTest.scala
@@ -46,14 +46,15 @@ import org.apache.kafka.streams.{KeyValue, StreamsConfig, TopologyDescription, S
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api._
 
+import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
 
 /**
  * Test suite that verifies that the topology built by the Java and Scala APIs match.
  */
 //noinspection ScalaDeprecation
+@Timeout(600)
 class TopologyTest {
-
   private val inputTopic = "input-topic"
   private val userClicksTopic = "user-clicks-topic"
   private val userRegionsTopic = "user-regions-topic"
@@ -275,6 +276,7 @@ class TopologyTest {
     assertEquals(getTopologyScala, getTopologyJava)
   }
 
+  @nowarn
   @Test
   def shouldBuildIdenticalTopologyInJavaNScalaTransform(): Unit = {
 
@@ -301,6 +303,7 @@ class TopologyTest {
       streamBuilder.build().describe()
     }
 
+    @nowarn
     // build the Java topology
     def getTopologyJava: TopologyDescription = {
 
diff --git a/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KStreamTest.scala b/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KStreamTest.scala
index 0ec7b0e2f849a..0b61984a96ab7 100644
--- a/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KStreamTest.scala
+++ b/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KStreamTest.scala
@@ -36,6 +36,7 @@ import org.apache.kafka.streams.scala.utils.TestDriver
 import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
 import org.junit.jupiter.api.Test
 
+import scala.annotation.nowarn
 import scala.jdk.CollectionConverters._
 
 class KStreamTest extends TestDriver {
@@ -221,6 +222,7 @@ class KStreamTest extends TestDriver {
     testDriver.close()
   }
 
+  @nowarn
   @Test
   def testTransformCorrectlyRecords(): Unit = {
     class TestTransformer extends Transformer[String, String, KeyValue[String, String]] {
@@ -256,6 +258,7 @@ class KStreamTest extends TestDriver {
     testDriver.close()
   }
 
+  @nowarn
   @Test
   def testFlatTransformCorrectlyRecords(): Unit = {
     class TestTransformer extends Transformer[String, String, Iterable[KeyValue[String, String]]] {
@@ -291,6 +294,7 @@ class KStreamTest extends TestDriver {
     testDriver.close()
   }
 
+  @nowarn
   @Test
   def testCorrectlyFlatTransformValuesInRecords(): Unit = {
     class TestTransformer extends ValueTransformer[String, Iterable[String]] {
@@ -327,6 +331,7 @@ class KStreamTest extends TestDriver {
     testDriver.close()
   }
 
+  @nowarn
   @Test
   def testCorrectlyFlatTransformValuesInRecordsWithKey(): Unit = {
     class TestTransformer extends ValueTransformerWithKey[String, String, Iterable[String]] {
@@ -443,6 +448,7 @@ class KStreamTest extends TestDriver {
     assertEquals("my-name", joinNode.name())
   }
 
+  @nowarn
   @Test
   def testSettingNameOnTransform(): Unit = {
     class TestTransformer extends Transformer[String, String, KeyValue[String, String]] {
diff --git a/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KTableTest.scala b/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KTableTest.scala
index 09a3a7d9087b2..9e872601ef131 100644
--- a/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KTableTest.scala
+++ b/streams/streams-scala/src/test/scala/org/apache/kafka/streams/scala/kstream/KTableTest.scala
@@ -496,4 +496,42 @@ class KTableTest extends TestDriver {
     assertTrue(joinNodeLeft.name().contains("my-name"))
     assertTrue(joinNodeRight.name().contains("my-name"))
   }
+
+  @Test
+  def testMapValuesWithValueMapperWithMaterialized(): Unit = {
+    val builder = new StreamsBuilder()
+    val sourceTopic = "source"
+    val stateStore = "store"
+    val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore)
+
+    val table = builder.stream[String, String](sourceTopic).toTable
+    table.mapValues(value => value.length.toLong, materialized)
+
+    val testDriver = createTestDriver(builder)
+    val testInput = testDriver.createInput[String, String](sourceTopic)
+
+    testInput.pipeInput("1", "topic1value1")
+    assertEquals(12, testDriver.getKeyValueStore[String, Long](stateStore).get("1"))
+
+    testDriver.close()
+  }
+
+  @Test
+  def testMapValuesWithValueMapperWithKeyAndWithMaterialized(): Unit = {
+    val builder = new StreamsBuilder()
+    val sourceTopic = "source"
+    val stateStore = "store"
+    val materialized = Materialized.as[String, Long, ByteArrayKeyValueStore](stateStore)
+
+    val table = builder.stream[String, String](sourceTopic).toTable
+    table.mapValues((key, value) => key.length + value.length.toLong, materialized)
+
+    val testDriver = createTestDriver(builder)
+    val testInput = testDriver.createInput[String, String](sourceTopic)
+
+    testInput.pipeInput("1", "topic1value1")
+    assertEquals(13, testDriver.getKeyValueStore[String, Long](stateStore).get("1"))
+
+    testDriver.close()
+  }
 }
diff --git a/streams/test-utils/src/main/java/org/apache/kafka/streams/TopologyTestDriver.java b/streams/test-utils/src/main/java/org/apache/kafka/streams/TopologyTestDriver.java
index c9745d8d982d6..9e7e285bd10b0 100644
--- a/streams/test-utils/src/main/java/org/apache/kafka/streams/TopologyTestDriver.java
+++ b/streams/test-utils/src/main/java/org/apache/kafka/streams/TopologyTestDriver.java
@@ -70,7 +70,7 @@
 import org.apache.kafka.streams.processor.internals.Task;
 import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
 import org.apache.kafka.streams.processor.internals.metrics.TaskMetrics;
-import org.apache.kafka.streams.processor.internals.namedtopology.TopologyConfig.TaskConfig;
+import org.apache.kafka.streams.TopologyConfig.TaskConfig;
 import org.apache.kafka.streams.query.Position;
 import org.apache.kafka.streams.state.KeyValueIterator;
 import org.apache.kafka.streams.state.KeyValueStore;
@@ -496,7 +496,8 @@ private void setupTask(final StreamsConfig streamsConfig,
                 TASK_ID,
                 testDriverProducer,
                 streamsConfig.defaultProductionExceptionHandler(),
-                streamsMetrics
+                streamsMetrics,
+                processorTopology
             );
 
             final InternalProcessorContext context = new ProcessorContextImpl(
diff --git a/streams/test-utils/src/main/java/org/apache/kafka/streams/processor/api/MockProcessorContext.java b/streams/test-utils/src/main/java/org/apache/kafka/streams/processor/api/MockProcessorContext.java
index acd946a9d48e4..49cf8cae639ac 100644
--- a/streams/test-utils/src/main/java/org/apache/kafka/streams/processor/api/MockProcessorContext.java
+++ b/streams/test-utils/src/main/java/org/apache/kafka/streams/processor/api/MockProcessorContext.java
@@ -76,6 +76,8 @@ public class MockProcessorContext<KForward, VForward> implements ProcessorContex
 
     // settable record metadata ================================================
     private MockRecordMetadata recordMetadata;
+    private Long currentSystemTimeMs;
+    private Long currentStreamTimeMs;
 
     // mocks ================================================
     private final Map<String, StateStore> stateStores = new HashMap<>();
@@ -284,6 +286,22 @@ public Map<String, Object> appConfigsWithPrefix(final String prefix) {
         return config.originalsWithPrefix(prefix);
     }
 
+    @Override
+    public long currentSystemTimeMs() {
+        if (currentSystemTimeMs == null) {
+            throw new IllegalStateException("System time must be set before use via setCurrentSystemTimeMs().");
+        }
+        return currentSystemTimeMs;
+    }
+
+    @Override
+    public long currentStreamTimeMs() {
+        if (currentStreamTimeMs == null) {
+            throw new IllegalStateException("Stream time must be set before use via setCurrentStreamTimeMs().");
+        }
+        return currentStreamTimeMs;
+    }
+
     @Override
     public Serde<?> keySerde() {
         return config.defaultKeySerde();
@@ -326,6 +344,14 @@ public void setRecordMetadata(final String topic,
         recordMetadata = new MockRecordMetadata(topic, partition, offset);
     }
 
+    public void setCurrentSystemTimeMs(final long currentSystemTimeMs) {
+        this.currentSystemTimeMs = currentSystemTimeMs;
+    }
+
+    public void setCurrentStreamTimeMs(final long currentStreamTimeMs) {
+        this.currentStreamTimeMs = currentStreamTimeMs;
+    }
+
     @Override
     public Optional<RecordMetadata> recordMetadata() {
         return Optional.ofNullable(recordMetadata);
diff --git a/streams/test-utils/src/test/java/org/apache/kafka/streams/TestTopicsTest.java b/streams/test-utils/src/test/java/org/apache/kafka/streams/TestTopicsTest.java
index 766729fffa4f5..ac82c938d7d3d 100644
--- a/streams/test-utils/src/test/java/org/apache/kafka/streams/TestTopicsTest.java
+++ b/streams/test-utils/src/test/java/org/apache/kafka/streams/TestTopicsTest.java
@@ -31,8 +31,6 @@
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.time.Duration;
 import java.time.Instant;
@@ -54,8 +52,6 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 public class TestTopicsTest {
-    private static final Logger log = LoggerFactory.getLogger(TestTopicsTest.class);
-
     private final static String INPUT_TOPIC = "input";
     private final static String OUTPUT_TOPIC = "output1";
     private final static String INPUT_TOPIC_MAP = OUTPUT_TOPIC;
@@ -175,7 +171,6 @@ public void testKeyValuesToMapWithNull() {
         assertThrows(IllegalStateException.class, outputTopic::readKeyValuesToMap);
     }
 
-
     @Test
     public void testKeyValueListDuration() {
         final TestInputTopic<Long, String> inputTopic =
diff --git a/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..5c4a8cd615b5e 100644
--- a/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,14 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +139,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -148,7 +164,6 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                                                      final Duration timeToSpend) {
         final Properties producerProps = generatorProperties(kafka);
 
-
         int numRecordsProduced = 0;
 
         final Map<String, Set<Integer>> allData = new HashMap<>();
@@ -163,7 +178,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,15 +191,21 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
                             stringSerde.serializer().serialize("", key),
                             intSerde.serializer().serialize("", value)
                         );
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +217,59 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
-
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +360,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index c0c8c72c59965..9d08663d9b37f 100644
--- a/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-24/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.AbstractProcessor;
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.ProcessorSupplier;
@@ -42,12 +49,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataTable, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
         config.putAll(streamsProperties);
 
@@ -61,13 +85,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier() {
+    private static void buildFKTable(final KTable<String, Integer> primaryTable,
+                                     final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier(final String topic) {
         return () -> new AbstractProcessor<K, V>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext context) {
-                System.out.println("[2.4] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[2.4] initializing processor: topic=" + topic + " taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -75,7 +108,7 @@ public void init(final ProcessorContext context) {
             public void process(final K key, final V value) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..4dae6eae5756f 100644
--- a/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,14 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +139,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -163,7 +179,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,15 +192,21 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
                             stringSerde.serializer().serialize("", key),
                             intSerde.serializer().serialize("", value)
                         );
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +218,59 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
-
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +361,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index 0fea040bcb4bb..69c46de37af14 100644
--- a/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-25/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.AbstractProcessor;
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.ProcessorSupplier;
@@ -42,12 +49,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataTable, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
         config.putAll(streamsProperties);
 
@@ -61,13 +85,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier() {
+    private static void buildFKTable(final KTable<String, Integer> primaryTable,
+                                     final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier(final String topic) {
         return () -> new AbstractProcessor<K, V>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext context) {
-                System.out.println("[2.5] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[2.5] initializing processor: topic=" + topic + " taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -75,7 +108,7 @@ public void init(final ProcessorContext context) {
             public void process(final K key, final V value) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..0e08771495f1f 100644
--- a/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,14 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +139,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -163,7 +179,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,7 +192,6 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
@@ -183,7 +199,16 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                             intSerde.serializer().serialize("", value)
                         );
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
+
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +220,59 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
-
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +363,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index e1b294ff15bd0..0844552134a03 100644
--- a/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-26/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.AbstractProcessor;
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.ProcessorSupplier;
@@ -42,12 +49,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataTable, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
         config.putAll(streamsProperties);
 
@@ -61,13 +85,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier() {
+    private static void buildFKTable(final KTable<String, Integer> primaryTable,
+                                     final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier(final String topic) {
         return () -> new AbstractProcessor<K, V>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext context) {
-                System.out.println("[2.6] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[2.6] initializing processor: topic=" + topic + " taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -75,7 +108,7 @@ public void init(final ProcessorContext context) {
             public void process(final K key, final V value) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..ac7482cfb2d78 100644
--- a/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,14 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +139,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -163,7 +179,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,15 +192,21 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
                             stringSerde.serializer().serialize("", key),
                             intSerde.serializer().serialize("", value)
                         );
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,21 +218,19 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
-            }
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
 
             // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
             // all suppressed records.
@@ -227,6 +248,44 @@ public static Map<String, Set<Integer>> generate(final String kafka,
         return Collections.unmodifiableMap(allData);
     }
 
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
+            }
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
+            }
+        }
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
+    }
+
     private static Properties generatorProperties(final String kafka) {
         final Properties producerProps = new Properties();
         producerProps.put(ProducerConfig.CLIENT_ID_CONFIG, "SmokeTest");
@@ -315,14 +374,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index 6f485e694cfcc..32d8d9408f57b 100644
--- a/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-27/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.AbstractProcessor;
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.ProcessorSupplier;
@@ -42,12 +49,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataTable, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000L);
         config.putAll(streamsProperties);
 
@@ -61,13 +85,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier() {
+    private static void buildFKTable(final KTable<String, Integer> primaryTable,
+                                     final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier(final String topic) {
         return () -> new AbstractProcessor<K, V>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext context) {
-                System.out.println("[2.7] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[2.7] initializing processor: topic=" + topic + " taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -75,7 +108,7 @@ public void init(final ProcessorContext context) {
             public void process(final K key, final V value) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..4dae6eae5756f 100644
--- a/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,14 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +139,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -163,7 +179,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,15 +192,21 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
                             stringSerde.serializer().serialize("", key),
                             intSerde.serializer().serialize("", value)
                         );
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +218,59 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
-
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +361,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index 4f2825d23d600..db17d73bcbaca 100644
--- a/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-28/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.AbstractProcessor;
 import org.apache.kafka.streams.processor.ProcessorContext;
 import org.apache.kafka.streams.processor.ProcessorSupplier;
@@ -42,12 +49,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataTable, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
         config.putAll(streamsProperties);
 
@@ -61,13 +85,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier() {
+    private static void buildFKTable(final KTable<String, Integer> primaryTable,
+        final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <K, V> ProcessorSupplier<K, V> printProcessorSupplier(final String topic) {
         return () -> new AbstractProcessor<K, V>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext context) {
-                System.out.println("[2.8] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[2.8] initializing processor: topic=" + topic + " taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -75,7 +108,7 @@ public void init(final ProcessorContext context) {
             public void process(final K key, final V value) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..4dae6eae5756f 100644
--- a/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,14 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +139,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -163,7 +179,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,15 +192,21 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
                             stringSerde.serializer().serialize("", key),
                             intSerde.serializer().serialize("", value)
                         );
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +218,59 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
-
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +361,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index b097de71d4186..0751516d76c9b 100644
--- a/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-30/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.api.ContextualProcessor;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.api.ProcessorSupplier;
@@ -44,12 +51,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataTable, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
         config.putAll(streamsProperties);
 
@@ -63,13 +87,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <KIn, VIn, KOut, VOut> ProcessorSupplier<KIn, VIn, KOut, VOut> printProcessorSupplier() {
+    private static void buildFKTable(final KTable<String, Integer> primaryTable,
+        final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <KIn, VIn, KOut, VOut> ProcessorSupplier<KIn, VIn, KOut, VOut> printProcessorSupplier(final String topic) {
         return () -> new ContextualProcessor<KIn, VIn, KOut, VOut>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext<KOut, VOut> context) {
-                System.out.println("[3.0] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[3.0] initializing processor: topic=" + topic + " taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -77,7 +110,7 @@ public void init(final ProcessorContext<KOut, VOut> context) {
             public void process(final Record<KIn, VIn> record) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
index ac83cd95ebaaf..dbacbb9625b61 100644
--- a/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
+++ b/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -30,6 +30,7 @@
 import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
 import org.apache.kafka.common.serialization.StringDeserializer;
 import org.apache.kafka.common.utils.Exit;
 import org.apache.kafka.common.utils.Utils;
@@ -60,7 +61,7 @@
 import static org.apache.kafka.common.utils.Utils.mkEntry;
 
 public class SmokeTestDriver extends SmokeTestUtil {
-    private static final String[] TOPICS = {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
         "data",
         "echo",
         "max",
@@ -72,6 +73,15 @@ public class SmokeTestDriver extends SmokeTestUtil {
         "avg",
         "tagg"
     };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
 
     private static final int MAX_RECORD_EMPTY_RETRIES = 30;
 
@@ -130,9 +140,16 @@ static void generatePerpetually(final String kafka,
                         stringSerde.serializer().serialize("", key),
                         intSerde.serializer().serialize("", value)
                     );
-
                 producer.send(record);
 
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
                 numRecordsProduced++;
                 if (numRecordsProduced % 100 == 0) {
                     System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
@@ -148,7 +165,6 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                                                      final Duration timeToSpend) {
         final Properties producerProps = generatorProperties(kafka);
 
-
         int numRecordsProduced = 0;
 
         final Map<String, Set<Integer>> allData = new HashMap<>();
@@ -163,7 +179,8 @@ public static Map<String, Set<Integer>> generate(final String kafka,
 
         final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
 
-        List<ProducerRecord<byte[], byte[]>> needRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
 
         try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
             while (remaining > 0) {
@@ -175,7 +192,6 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                     remaining--;
                     data[index] = data[remaining];
                 } else {
-
                     final ProducerRecord<byte[], byte[]> record =
                         new ProducerRecord<>(
                             "data",
@@ -183,7 +199,16 @@ public static Map<String, Set<Integer>> generate(final String kafka,
                             intSerde.serializer().serialize("", value)
                         );
 
-                    producer.send(record, new TestCallback(record, needRetry));
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
+
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
 
                     numRecordsProduced++;
                     allData.get(key).add(value);
@@ -195,36 +220,59 @@ public static Map<String, Set<Integer>> generate(final String kafka,
             }
             producer.flush();
 
-            int remainingRetries = 5;
-            while (!needRetry.isEmpty()) {
-                final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
-                for (final ProducerRecord<byte[], byte[]> record : needRetry) {
-                    System.out.println("retry producing " + stringSerde.deserializer().deserialize("", record.key()));
-                    producer.send(record, new TestCallback(record, needRetry2));
-                }
-                producer.flush();
-                needRetry = needRetry2;
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
 
-                if (--remainingRetries == 0 && !needRetry.isEmpty()) {
-                    System.err.println("Failed to produce all records after multiple retries");
-                    Exit.exit(1);
-                }
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
             }
-
-            // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
-            // all suppressed records.
-            final List<PartitionInfo> partitions = producer.partitionsFor("data");
-            for (final PartitionInfo partition : partitions) {
-                producer.send(new ProducerRecord<>(
-                    partition.topic(),
-                    partition.partition(),
-                    System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
-                    stringSerde.serializer().serialize("", "flush"),
-                    intSerde.serializer().serialize("", 0)
-                ));
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
             }
         }
-        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
     }
 
     private static Properties generatorProperties(final String kafka) {
@@ -315,14 +363,14 @@ public static VerificationResult verify(final String kafka,
         props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
 
         final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
-        final List<TopicPartition> partitions = getAllPartitions(consumer, TOPICS);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
         consumer.assign(partitions);
         consumer.seekToBeginning(partitions);
 
         final int recordsGenerated = inputs.size() * maxRecordsPerKey;
         int recordsProcessed = 0;
         final Map<String, AtomicInteger> processed =
-            Stream.of(TOPICS)
+            Stream.of(NUMERIC_VALUE_TOPICS)
                   .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
 
         final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
diff --git a/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
index 6657c5f2f23ed..311d30ba40038 100644
--- a/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
+++ b/streams/upgrade-system-tests-31/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -16,11 +16,18 @@
  */
 package org.apache.kafka.streams.tests;
 
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+import java.util.Random;
 import org.apache.kafka.common.utils.Utils;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
 import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
 import org.apache.kafka.streams.processor.api.ContextualProcessor;
 import org.apache.kafka.streams.processor.api.ProcessorContext;
 import org.apache.kafka.streams.processor.api.ProcessorSupplier;
@@ -44,12 +51,29 @@ public static void main(final String[] args) throws Exception {
         System.out.println("props=" + streamsProperties);
 
         final StreamsBuilder builder = new StreamsBuilder();
-        final KStream dataStream = builder.stream("data");
-        dataStream.process(printProcessorSupplier());
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
         dataStream.to("echo");
 
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataStream, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
         final Properties config = new Properties();
-        config.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "StreamsUpgradeTest");
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
         config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
         config.putAll(streamsProperties);
 
@@ -63,13 +87,22 @@ public static void main(final String[] args) throws Exception {
         }));
     }
 
-    private static <KIn, VIn, KOut, VOut> ProcessorSupplier<KIn, VIn, KOut, VOut> printProcessorSupplier() {
+    private static void buildFKTable(final KStream<String, Integer> primaryTable,
+                                     final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable.toTable()
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <KIn, VIn, KOut, VOut> ProcessorSupplier<KIn, VIn, KOut, VOut> printProcessorSupplier(final String topic) {
         return () -> new ContextualProcessor<KIn, VIn, KOut, VOut>() {
             private int numRecordsProcessed = 0;
 
             @Override
             public void init(final ProcessorContext<KOut, VOut> context) {
-                System.out.println("[3.1] initializing processor: topic=data taskId=" + context.taskId());
+                System.out.println("[3.1] initializing processor: topic=" + topic + "taskId=" + context.taskId());
                 numRecordsProcessed = 0;
             }
 
@@ -77,7 +110,7 @@ public void init(final ProcessorContext<KOut, VOut> context) {
             public void process(final Record<KIn, VIn> record) {
                 numRecordsProcessed++;
                 if (numRecordsProcessed % 100 == 0) {
-                    System.out.println("processed " + numRecordsProcessed + " records from topic=data");
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
                 }
             }
 
diff --git a/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java
new file mode 100644
index 0000000000000..dc0ad4d5601c8
--- /dev/null
+++ b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestClient.java
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.tests;
+
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.common.utils.KafkaThread;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.Topology;
+import org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.Grouped;
+import org.apache.kafka.streams.kstream.KGroupedStream;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Materialized;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.kstream.Suppressed.BufferConfig;
+import org.apache.kafka.streams.kstream.TimeWindows;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.state.Stores;
+import org.apache.kafka.streams.state.WindowStore;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Properties;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.kafka.streams.kstream.Suppressed.untilWindowCloses;
+
+public class SmokeTestClient extends SmokeTestUtil {
+
+    private final String name;
+
+    private KafkaStreams streams;
+    private boolean uncaughtException = false;
+    private boolean started;
+    private volatile boolean closed;
+
+    private static void addShutdownHook(final String name, final Runnable runnable) {
+        if (name != null) {
+            Runtime.getRuntime().addShutdownHook(KafkaThread.nonDaemon(name, runnable));
+        } else {
+            Runtime.getRuntime().addShutdownHook(new Thread(runnable));
+        }
+    }
+
+    private static File tempDirectory() {
+        final String prefix = "kafka-";
+        final File file;
+        try {
+            file = Files.createTempDirectory(prefix).toFile();
+        } catch (final IOException ex) {
+            throw new RuntimeException("Failed to create a temp dir", ex);
+        }
+        file.deleteOnExit();
+
+        addShutdownHook("delete-temp-file-shutdown-hook", () -> {
+            try {
+                Utils.delete(file);
+            } catch (final IOException e) {
+                System.out.println("Error deleting " + file.getAbsolutePath());
+                e.printStackTrace(System.out);
+            }
+        });
+
+        return file;
+    }
+
+    public SmokeTestClient(final String name) {
+        this.name = name;
+    }
+
+    public boolean started() {
+        return started;
+    }
+
+    public boolean closed() {
+        return closed;
+    }
+
+    public void start(final Properties streamsProperties) {
+        final Topology build = getTopology();
+        streams = new KafkaStreams(build, getStreamsConfig(streamsProperties));
+
+        final CountDownLatch countDownLatch = new CountDownLatch(1);
+        streams.setStateListener((newState, oldState) -> {
+            System.out.printf("%s %s: %s -> %s%n", name, Instant.now(), oldState, newState);
+            if (oldState == KafkaStreams.State.REBALANCING && newState == KafkaStreams.State.RUNNING) {
+                started = true;
+                countDownLatch.countDown();
+            }
+
+            if (newState == KafkaStreams.State.NOT_RUNNING) {
+                closed = true;
+            }
+        });
+
+        streams.setUncaughtExceptionHandler(e -> {
+            System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION");
+            System.out.println(name + ": FATAL: An unexpected exception is encountered: " + e);
+            e.printStackTrace(System.out);
+            uncaughtException = true;
+            return StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse.SHUTDOWN_CLIENT;
+        });
+
+        addShutdownHook("streams-shutdown-hook", this::close);
+
+        streams.start();
+        try {
+            if (!countDownLatch.await(1, TimeUnit.MINUTES)) {
+                System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION: Didn't start in one minute");
+            }
+        } catch (final InterruptedException e) {
+            System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION: " + e);
+            e.printStackTrace(System.out);
+        }
+        System.out.println(name + ": SMOKE-TEST-CLIENT-STARTED");
+        System.out.println(name + " started at " + Instant.now());
+    }
+
+    public void closeAsync() {
+        streams.close(Duration.ZERO);
+    }
+
+    public void close() {
+        final boolean closed = streams.close(Duration.ofMinutes(1));
+
+        if (closed && !uncaughtException) {
+            System.out.println(name + ": SMOKE-TEST-CLIENT-CLOSED");
+        } else if (closed) {
+            System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION");
+        } else {
+            System.out.println(name + ": SMOKE-TEST-CLIENT-EXCEPTION: Didn't close");
+        }
+    }
+
+    private Properties getStreamsConfig(final Properties props) {
+        final Properties fullProps = new Properties(props);
+        fullProps.put(StreamsConfig.APPLICATION_ID_CONFIG, "SmokeTest");
+        fullProps.put(StreamsConfig.CLIENT_ID_CONFIG, "SmokeTest-" + name);
+        fullProps.put(StreamsConfig.STATE_DIR_CONFIG, tempDirectory().getAbsolutePath());
+        fullProps.putAll(props);
+        return fullProps;
+    }
+
+    public Topology getTopology() {
+        final StreamsBuilder builder = new StreamsBuilder();
+        final Consumed<String, Integer> stringIntConsumed = Consumed.with(stringSerde, intSerde);
+        final KStream<String, Integer> source = builder.stream("data", stringIntConsumed);
+        source.filterNot((k, v) -> k.equals("flush"))
+              .to("echo", Produced.with(stringSerde, intSerde));
+        final KStream<String, Integer> data = source.filter((key, value) -> value == null || value != END);
+        data.process(SmokeTestUtil.printProcessorSupplier("data", name));
+
+        // min
+        final KGroupedStream<String, Integer> groupedData = data.groupByKey(Grouped.with(stringSerde, intSerde));
+
+        final KTable<Windowed<String>, Integer> minAggregation = groupedData
+            .windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofDays(1), Duration.ofMinutes(1)))
+            .aggregate(
+                () -> Integer.MAX_VALUE,
+                (aggKey, value, aggregate) -> (value < aggregate) ? value : aggregate,
+                Materialized
+                    .<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-min")
+                    .withValueSerde(intSerde)
+                    .withRetention(Duration.ofHours(25))
+            );
+
+        streamify(minAggregation, "min-raw");
+
+        streamify(minAggregation.suppress(untilWindowCloses(BufferConfig.unbounded())), "min-suppressed");
+
+        minAggregation
+            .toStream(new Unwindow<>())
+            .filterNot((k, v) -> k.equals("flush"))
+            .to("min", Produced.with(stringSerde, intSerde));
+
+        final KTable<Windowed<String>, Integer> smallWindowSum = groupedData
+            .windowedBy(TimeWindows.ofSizeAndGrace(Duration.ofSeconds(2), Duration.ofSeconds(30)).advanceBy(Duration.ofSeconds(1)))
+            .reduce(Integer::sum);
+
+        streamify(smallWindowSum, "sws-raw");
+        streamify(smallWindowSum.suppress(untilWindowCloses(BufferConfig.unbounded())), "sws-suppressed");
+
+        final KTable<String, Integer> minTable = builder.table(
+            "min",
+            Consumed.with(stringSerde, intSerde),
+            Materialized.as("minStoreName"));
+
+        minTable.toStream().process(SmokeTestUtil.printProcessorSupplier("min", name));
+
+        // max
+        groupedData
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2)))
+            .aggregate(
+                () -> Integer.MIN_VALUE,
+                (aggKey, value, aggregate) -> (value > aggregate) ? value : aggregate,
+                Materialized.<String, Integer, WindowStore<Bytes, byte[]>>as("uwin-max").withValueSerde(intSerde))
+            .toStream(new Unwindow<>())
+            .filterNot((k, v) -> k.equals("flush"))
+            .to("max", Produced.with(stringSerde, intSerde));
+
+        final KTable<String, Integer> maxTable = builder.table(
+            "max",
+            Consumed.with(stringSerde, intSerde),
+            Materialized.as("maxStoreName"));
+        maxTable.toStream().process(SmokeTestUtil.printProcessorSupplier("max", name));
+
+        // sum
+        groupedData
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2)))
+            .aggregate(
+                () -> 0L,
+                (aggKey, value, aggregate) -> (long) value + aggregate,
+                Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("win-sum").withValueSerde(longSerde))
+            .toStream(new Unwindow<>())
+            .filterNot((k, v) -> k.equals("flush"))
+            .to("sum", Produced.with(stringSerde, longSerde));
+
+        final Consumed<String, Long> stringLongConsumed = Consumed.with(stringSerde, longSerde);
+        final KTable<String, Long> sumTable = builder.table("sum", stringLongConsumed);
+        sumTable.toStream().process(SmokeTestUtil.printProcessorSupplier("sum", name));
+
+        // cnt
+        groupedData
+            .windowedBy(TimeWindows.ofSizeWithNoGrace(Duration.ofDays(2)))
+            .count(Materialized.as("uwin-cnt"))
+            .toStream(new Unwindow<>())
+            .filterNot((k, v) -> k.equals("flush"))
+            .to("cnt", Produced.with(stringSerde, longSerde));
+
+        final KTable<String, Long> cntTable = builder.table(
+            "cnt",
+            Consumed.with(stringSerde, longSerde),
+            Materialized.as("cntStoreName"));
+        cntTable.toStream().process(SmokeTestUtil.printProcessorSupplier("cnt", name));
+
+        // dif
+        maxTable
+            .join(
+                minTable,
+                (value1, value2) -> value1 - value2)
+            .toStream()
+            .filterNot((k, v) -> k.equals("flush"))
+            .to("dif", Produced.with(stringSerde, intSerde));
+
+        // avg
+        sumTable
+            .join(
+                cntTable,
+                (value1, value2) -> (double) value1 / (double) value2)
+            .toStream()
+            .filterNot((k, v) -> k.equals("flush"))
+            .to("avg", Produced.with(stringSerde, doubleSerde));
+
+        // test repartition
+        final Agg agg = new Agg();
+        cntTable.groupBy(agg.selector(), Grouped.with(stringSerde, longSerde))
+                .aggregate(agg.init(), agg.adder(), agg.remover(),
+                           Materialized.<String, Long>as(Stores.inMemoryKeyValueStore("cntByCnt"))
+                               .withKeySerde(Serdes.String())
+                               .withValueSerde(Serdes.Long()))
+                .toStream()
+                .to("tagg", Produced.with(stringSerde, longSerde));
+
+        return builder.build();
+    }
+
+    private static void streamify(final KTable<Windowed<String>, Integer> windowedTable, final String topic) {
+        windowedTable
+            .toStream()
+            .filterNot((k, v) -> k.key().equals("flush"))
+            .map((key, value) -> new KeyValue<>(key.toString(), value))
+            .to(topic, Produced.with(stringSerde, intSerde));
+    }
+}
diff --git a/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
new file mode 100644
index 0000000000000..dbacbb9625b61
--- /dev/null
+++ b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestDriver.java
@@ -0,0 +1,670 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.tests;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.producer.Callback;
+import org.apache.kafka.clients.producer.KafkaProducer;
+import org.apache.kafka.clients.producer.ProducerConfig;
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
+import org.apache.kafka.common.PartitionInfo;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.errors.TimeoutException;
+import org.apache.kafka.common.serialization.ByteArraySerializer;
+import org.apache.kafka.common.serialization.Deserializer;
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Utils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static java.util.Collections.emptyMap;
+import static org.apache.kafka.common.utils.Utils.mkEntry;
+
+public class SmokeTestDriver extends SmokeTestUtil {
+    private static final String[] NUMERIC_VALUE_TOPICS = {
+        "data",
+        "echo",
+        "max",
+        "min", "min-suppressed", "min-raw",
+        "dif",
+        "sum",
+        "sws-raw", "sws-suppressed",
+        "cnt",
+        "avg",
+        "tagg"
+    };
+    private static final String[] STRING_VALUE_TOPICS = {
+        "fk"
+    };
+
+    private static final String[] TOPICS = new String[NUMERIC_VALUE_TOPICS.length + STRING_VALUE_TOPICS.length];
+    static {
+        System.arraycopy(NUMERIC_VALUE_TOPICS, 0, TOPICS, 0, NUMERIC_VALUE_TOPICS.length);
+        System.arraycopy(STRING_VALUE_TOPICS, 0, TOPICS, NUMERIC_VALUE_TOPICS.length, STRING_VALUE_TOPICS.length);
+    }
+
+    private static final int MAX_RECORD_EMPTY_RETRIES = 30;
+
+    private static class ValueList {
+        public final String key;
+        private final int[] values;
+        private int index;
+
+        ValueList(final int min, final int max) {
+            key = min + "-" + max;
+
+            values = new int[max - min + 1];
+            for (int i = 0; i < values.length; i++) {
+                values[i] = min + i;
+            }
+            // We want to randomize the order of data to test not completely predictable processing order
+            // However, values are also use as a timestamp of the record. (TODO: separate data and timestamp)
+            // We keep some correlation of time and order. Thus, the shuffling is done with a sliding window
+            shuffle(values, 10);
+
+            index = 0;
+        }
+
+        int next() {
+            return (index < values.length) ? values[index++] : -1;
+        }
+    }
+
+    public static String[] topics() {
+        return Arrays.copyOf(TOPICS, TOPICS.length);
+    }
+
+    static void generatePerpetually(final String kafka,
+                                    final int numKeys,
+                                    final int maxRecordsPerKey) {
+        final Properties producerProps = generatorProperties(kafka);
+
+        int numRecordsProduced = 0;
+
+        final ValueList[] data = new ValueList[numKeys];
+        for (int i = 0; i < numKeys; i++) {
+            data[i] = new ValueList(i, i + maxRecordsPerKey - 1);
+        }
+
+        final Random rand = new Random();
+
+        try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
+            while (true) {
+                final int index = rand.nextInt(numKeys);
+                final String key = data[index].key;
+                final int value = data[index].next();
+
+                final ProducerRecord<byte[], byte[]> record =
+                    new ProducerRecord<>(
+                        "data",
+                        stringSerde.serializer().serialize("", key),
+                        intSerde.serializer().serialize("", value)
+                    );
+                producer.send(record);
+
+                final ProducerRecord<byte[], byte[]> fkRecord =
+                    new ProducerRecord<>(
+                        "fk",
+                        intSerde.serializer().serialize("", value),
+                        stringSerde.serializer().serialize("", key)
+                    );
+                producer.send(fkRecord);
+
+                numRecordsProduced++;
+                if (numRecordsProduced % 100 == 0) {
+                    System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
+                }
+                Utils.sleep(2);
+            }
+        }
+    }
+
+    public static Map<String, Set<Integer>> generate(final String kafka,
+                                                     final int numKeys,
+                                                     final int maxRecordsPerKey,
+                                                     final Duration timeToSpend) {
+        final Properties producerProps = generatorProperties(kafka);
+
+        int numRecordsProduced = 0;
+
+        final Map<String, Set<Integer>> allData = new HashMap<>();
+        final ValueList[] data = new ValueList[numKeys];
+        for (int i = 0; i < numKeys; i++) {
+            data[i] = new ValueList(i, i + maxRecordsPerKey - 1);
+            allData.put(data[i].key, new HashSet<>());
+        }
+        final Random rand = new Random();
+
+        int remaining = data.length;
+
+        final long recordPauseTime = timeToSpend.toMillis() / numKeys / maxRecordsPerKey;
+
+        final List<ProducerRecord<byte[], byte[]>> dataNeedRetry = new ArrayList<>();
+        final List<ProducerRecord<byte[], byte[]>> fkNeedRetry = new ArrayList<>();
+
+        try (final KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps)) {
+            while (remaining > 0) {
+                final int index = rand.nextInt(remaining);
+                final String key = data[index].key;
+                final int value = data[index].next();
+
+                if (value < 0) {
+                    remaining--;
+                    data[index] = data[remaining];
+                } else {
+                    final ProducerRecord<byte[], byte[]> record =
+                        new ProducerRecord<>(
+                            "data",
+                            stringSerde.serializer().serialize("", key),
+                            intSerde.serializer().serialize("", value)
+                        );
+
+                    producer.send(record, new TestCallback(record, dataNeedRetry));
+
+                    final ProducerRecord<byte[], byte[]> fkRecord =
+                        new ProducerRecord<>(
+                            "fk",
+                            intSerde.serializer().serialize("", value),
+                            stringSerde.serializer().serialize("", key)
+                        );
+
+                    producer.send(fkRecord, new TestCallback(fkRecord, fkNeedRetry));
+
+                    numRecordsProduced++;
+                    allData.get(key).add(value);
+                    if (numRecordsProduced % 100 == 0) {
+                        System.out.println(Instant.now() + " " + numRecordsProduced + " records produced");
+                    }
+                    Utils.sleep(Math.max(recordPauseTime, 2));
+                }
+            }
+            producer.flush();
+
+            retry(producer, dataNeedRetry, stringSerde);
+            retry(producer, fkNeedRetry, intSerde);
+
+            flush(producer,
+                "data",
+                stringSerde.serializer().serialize("", "flush"),
+                intSerde.serializer().serialize("", 0)
+            );
+            flush(producer,
+                "fk",
+                intSerde.serializer().serialize("", 0),
+                stringSerde.serializer().serialize("", "flush")
+            );
+        }
+        return Collections.unmodifiableMap(allData);
+    }
+
+    private static void retry(final KafkaProducer<byte[], byte[]> producer,
+        List<ProducerRecord<byte[], byte[]>> needRetry,
+        final Serde<?> keySerde) {
+        int remainingRetries = 5;
+        while (!needRetry.isEmpty()) {
+            final List<ProducerRecord<byte[], byte[]>> needRetry2 = new ArrayList<>();
+            for (final ProducerRecord<byte[], byte[]> record : needRetry) {
+                System.out.println(
+                    "retry producing " + keySerde.deserializer().deserialize("", record.key()));
+                producer.send(record, new TestCallback(record, needRetry2));
+            }
+            producer.flush();
+            needRetry = needRetry2;
+            if (--remainingRetries == 0 && !needRetry.isEmpty()) {
+                System.err.println("Failed to produce all records after multiple retries");
+                Exit.exit(1);
+            }
+        }
+    }
+
+    private static void flush(final KafkaProducer<byte[], byte[]> producer,
+        final String topic,
+        final byte[] keyBytes,
+        final byte[] valBytes) {
+        // now that we've sent everything, we'll send some final records with a timestamp high enough to flush out
+        // all suppressed records.
+        final List<PartitionInfo> partitions = producer.partitionsFor(topic);
+        for (final PartitionInfo partition : partitions) {
+            producer.send(new ProducerRecord<>(
+                partition.topic(),
+                partition.partition(),
+                System.currentTimeMillis() + Duration.ofDays(2).toMillis(),
+                keyBytes,
+                valBytes
+            ));
+        }
+    }
+
+    private static Properties generatorProperties(final String kafka) {
+        final Properties producerProps = new Properties();
+        producerProps.put(ProducerConfig.CLIENT_ID_CONFIG, "SmokeTest");
+        producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka);
+        producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
+        producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class);
+        producerProps.put(ProducerConfig.ACKS_CONFIG, "all");
+        return producerProps;
+    }
+
+    private static class TestCallback implements Callback {
+        private final ProducerRecord<byte[], byte[]> originalRecord;
+        private final List<ProducerRecord<byte[], byte[]>> needRetry;
+
+        TestCallback(final ProducerRecord<byte[], byte[]> originalRecord,
+                     final List<ProducerRecord<byte[], byte[]>> needRetry) {
+            this.originalRecord = originalRecord;
+            this.needRetry = needRetry;
+        }
+
+        @Override
+        public void onCompletion(final RecordMetadata metadata, final Exception exception) {
+            if (exception != null) {
+                if (exception instanceof TimeoutException) {
+                    needRetry.add(originalRecord);
+                } else {
+                    exception.printStackTrace();
+                    Exit.exit(1);
+                }
+            }
+        }
+    }
+
+    private static void shuffle(final int[] data, @SuppressWarnings("SameParameterValue") final int windowSize) {
+        final Random rand = new Random();
+        for (int i = 0; i < data.length; i++) {
+            // we shuffle data within windowSize
+            final int j = rand.nextInt(Math.min(data.length - i, windowSize)) + i;
+
+            // swap
+            final int tmp = data[i];
+            data[i] = data[j];
+            data[j] = tmp;
+        }
+    }
+
+    public static class NumberDeserializer implements Deserializer<Number> {
+        @Override
+        public Number deserialize(final String topic, final byte[] data) {
+            final Number value;
+            switch (topic) {
+                case "data":
+                case "echo":
+                case "min":
+                case "min-raw":
+                case "min-suppressed":
+                case "sws-raw":
+                case "sws-suppressed":
+                case "max":
+                case "dif":
+                    value = intSerde.deserializer().deserialize(topic, data);
+                    break;
+                case "sum":
+                case "cnt":
+                case "tagg":
+                    value = longSerde.deserializer().deserialize(topic, data);
+                    break;
+                case "avg":
+                    value = doubleSerde.deserializer().deserialize(topic, data);
+                    break;
+                default:
+                    throw new RuntimeException("unknown topic: " + topic);
+            }
+            return value;
+        }
+    }
+
+    public static VerificationResult verify(final String kafka,
+                                            final Map<String, Set<Integer>> inputs,
+                                            final int maxRecordsPerKey) {
+        final Properties props = new Properties();
+        props.put(ConsumerConfig.CLIENT_ID_CONFIG, "verifier");
+        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafka);
+        props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
+        props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, NumberDeserializer.class);
+        props.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
+
+        final KafkaConsumer<String, Number> consumer = new KafkaConsumer<>(props);
+        final List<TopicPartition> partitions = getAllPartitions(consumer, NUMERIC_VALUE_TOPICS);
+        consumer.assign(partitions);
+        consumer.seekToBeginning(partitions);
+
+        final int recordsGenerated = inputs.size() * maxRecordsPerKey;
+        int recordsProcessed = 0;
+        final Map<String, AtomicInteger> processed =
+            Stream.of(NUMERIC_VALUE_TOPICS)
+                  .collect(Collectors.toMap(t -> t, t -> new AtomicInteger(0)));
+
+        final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events = new HashMap<>();
+
+        VerificationResult verificationResult = new VerificationResult(false, "no results yet");
+        int retry = 0;
+        final long start = System.currentTimeMillis();
+        while (System.currentTimeMillis() - start < TimeUnit.MINUTES.toMillis(6)) {
+            final ConsumerRecords<String, Number> records = consumer.poll(Duration.ofSeconds(5));
+            if (records.isEmpty() && recordsProcessed >= recordsGenerated) {
+                verificationResult = verifyAll(inputs, events, false);
+                if (verificationResult.passed()) {
+                    break;
+                } else if (retry++ > MAX_RECORD_EMPTY_RETRIES) {
+                    System.out.println(Instant.now() + " Didn't get any more results, verification hasn't passed, and out of retries.");
+                    break;
+                } else {
+                    System.out.println(Instant.now() + " Didn't get any more results, but verification hasn't passed (yet). Retrying..." + retry);
+                }
+            } else {
+                System.out.println(Instant.now() + " Get some more results from " + records.partitions() + ", resetting retry.");
+
+                retry = 0;
+                for (final ConsumerRecord<String, Number> record : records) {
+                    final String key = record.key();
+
+                    final String topic = record.topic();
+                    processed.get(topic).incrementAndGet();
+
+                    if (topic.equals("echo")) {
+                        recordsProcessed++;
+                        if (recordsProcessed % 100 == 0) {
+                            System.out.println("Echo records processed = " + recordsProcessed);
+                        }
+                    }
+
+                    events.computeIfAbsent(topic, t -> new HashMap<>())
+                          .computeIfAbsent(key, k -> new LinkedList<>())
+                          .add(record);
+                }
+
+                System.out.println(processed);
+            }
+        }
+        consumer.close();
+        final long finished = System.currentTimeMillis() - start;
+        System.out.println("Verification time=" + finished);
+        System.out.println("-------------------");
+        System.out.println("Result Verification");
+        System.out.println("-------------------");
+        System.out.println("recordGenerated=" + recordsGenerated);
+        System.out.println("recordProcessed=" + recordsProcessed);
+
+        if (recordsProcessed > recordsGenerated) {
+            System.out.println("PROCESSED-MORE-THAN-GENERATED");
+        } else if (recordsProcessed < recordsGenerated) {
+            System.out.println("PROCESSED-LESS-THAN-GENERATED");
+        }
+
+        boolean success;
+
+        final Map<String, Set<Number>> received =
+            events.get("echo")
+                  .entrySet()
+                  .stream()
+                  .map(entry -> mkEntry(
+                      entry.getKey(),
+                      entry.getValue().stream().map(ConsumerRecord::value).collect(Collectors.toSet()))
+                  )
+                  .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+        success = inputs.equals(received);
+
+        if (success) {
+            System.out.println("ALL-RECORDS-DELIVERED");
+        } else {
+            int missedCount = 0;
+            for (final Map.Entry<String, Set<Integer>> entry : inputs.entrySet()) {
+                missedCount += received.get(entry.getKey()).size();
+            }
+            System.out.println("missedRecords=" + missedCount);
+        }
+
+        // give it one more try if it's not already passing.
+        if (!verificationResult.passed()) {
+            verificationResult = verifyAll(inputs, events, true);
+        }
+        success &= verificationResult.passed();
+
+        System.out.println(verificationResult.result());
+
+        System.out.println(success ? "SUCCESS" : "FAILURE");
+        return verificationResult;
+    }
+
+    public static class VerificationResult {
+        private final boolean passed;
+        private final String result;
+
+        VerificationResult(final boolean passed, final String result) {
+            this.passed = passed;
+            this.result = result;
+        }
+
+        public boolean passed() {
+            return passed;
+        }
+
+        public String result() {
+            return result;
+        }
+    }
+
+    private static VerificationResult verifyAll(final Map<String, Set<Integer>> inputs,
+                                                final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events,
+                                                final boolean printResults) {
+        final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+        boolean pass;
+        try (final PrintStream resultStream = new PrintStream(byteArrayOutputStream)) {
+            pass = verifyTAgg(resultStream, inputs, events.get("tagg"), printResults);
+            pass &= verifySuppressed(resultStream, "min-suppressed", events, printResults);
+            pass &= verify(resultStream, "min-suppressed", inputs, events, windowedKey -> {
+                final String unwindowedKey = windowedKey.substring(1, windowedKey.length() - 1).replaceAll("@.*", "");
+                return getMin(unwindowedKey);
+            }, printResults);
+            pass &= verifySuppressed(resultStream, "sws-suppressed", events, printResults);
+            pass &= verify(resultStream, "min", inputs, events, SmokeTestDriver::getMin, printResults);
+            pass &= verify(resultStream, "max", inputs, events, SmokeTestDriver::getMax, printResults);
+            pass &= verify(resultStream, "dif", inputs, events, key -> getMax(key).intValue() - getMin(key).intValue(), printResults);
+            pass &= verify(resultStream, "sum", inputs, events, SmokeTestDriver::getSum, printResults);
+            pass &= verify(resultStream, "cnt", inputs, events, key1 -> getMax(key1).intValue() - getMin(key1).intValue() + 1L, printResults);
+            pass &= verify(resultStream, "avg", inputs, events, SmokeTestDriver::getAvg, printResults);
+        }
+        return new VerificationResult(pass, new String(byteArrayOutputStream.toByteArray(), StandardCharsets.UTF_8));
+    }
+
+    private static boolean verify(final PrintStream resultStream,
+                                  final String topic,
+                                  final Map<String, Set<Integer>> inputData,
+                                  final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events,
+                                  final Function<String, Number> keyToExpectation,
+                                  final boolean printResults) {
+        final Map<String, LinkedList<ConsumerRecord<String, Number>>> observedInputEvents = events.get("data");
+        final Map<String, LinkedList<ConsumerRecord<String, Number>>> outputEvents = events.getOrDefault(topic, emptyMap());
+        if (outputEvents.isEmpty()) {
+            resultStream.println(topic + " is empty");
+            return false;
+        } else {
+            resultStream.printf("verifying %s with %d keys%n", topic, outputEvents.size());
+
+            if (outputEvents.size() != inputData.size()) {
+                resultStream.printf("fail: resultCount=%d expectedCount=%s%n\tresult=%s%n\texpected=%s%n",
+                                    outputEvents.size(), inputData.size(), outputEvents.keySet(), inputData.keySet());
+                return false;
+            }
+            for (final Map.Entry<String, LinkedList<ConsumerRecord<String, Number>>> entry : outputEvents.entrySet()) {
+                final String key = entry.getKey();
+                final Number expected = keyToExpectation.apply(key);
+                final Number actual = entry.getValue().getLast().value();
+                if (!expected.equals(actual)) {
+                    resultStream.printf("%s fail: key=%s actual=%s expected=%s%n", topic, key, actual, expected);
+
+                    if (printResults) {
+                        resultStream.printf("\t inputEvents=%n%s%n\t" +
+                                "echoEvents=%n%s%n\tmaxEvents=%n%s%n\tminEvents=%n%s%n\tdifEvents=%n%s%n\tcntEvents=%n%s%n\ttaggEvents=%n%s%n",
+                            indent("\t\t", observedInputEvents.get(key)),
+                            indent("\t\t", events.getOrDefault("echo", emptyMap()).getOrDefault(key, new LinkedList<>())),
+                            indent("\t\t", events.getOrDefault("max", emptyMap()).getOrDefault(key, new LinkedList<>())),
+                            indent("\t\t", events.getOrDefault("min", emptyMap()).getOrDefault(key, new LinkedList<>())),
+                            indent("\t\t", events.getOrDefault("dif", emptyMap()).getOrDefault(key, new LinkedList<>())),
+                            indent("\t\t", events.getOrDefault("cnt", emptyMap()).getOrDefault(key, new LinkedList<>())),
+                            indent("\t\t", events.getOrDefault("tagg", emptyMap()).getOrDefault(key, new LinkedList<>())));
+
+                        if (!Utils.mkSet("echo", "max", "min", "dif", "cnt", "tagg").contains(topic))
+                            resultStream.printf("%sEvents=%n%s%n", topic, indent("\t\t", entry.getValue()));
+                    }
+
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+
+    private static boolean verifySuppressed(final PrintStream resultStream,
+                                            @SuppressWarnings("SameParameterValue") final String topic,
+                                            final Map<String, Map<String, LinkedList<ConsumerRecord<String, Number>>>> events,
+                                            final boolean printResults) {
+        resultStream.println("verifying suppressed " + topic);
+        final Map<String, LinkedList<ConsumerRecord<String, Number>>> topicEvents = events.getOrDefault(topic, emptyMap());
+        for (final Map.Entry<String, LinkedList<ConsumerRecord<String, Number>>> entry : topicEvents.entrySet()) {
+            if (entry.getValue().size() != 1) {
+                final String unsuppressedTopic = topic.replace("-suppressed", "-raw");
+                final String key = entry.getKey();
+                final String unwindowedKey = key.substring(1, key.length() - 1).replaceAll("@.*", "");
+                resultStream.printf("fail: key=%s%n\tnon-unique result:%n%s%n",
+                                    key,
+                                    indent("\t\t", entry.getValue()));
+
+                if (printResults)
+                    resultStream.printf("\tresultEvents:%n%s%n\tinputEvents:%n%s%n",
+                        indent("\t\t", events.get(unsuppressedTopic).get(key)),
+                        indent("\t\t", events.get("data").get(unwindowedKey)));
+
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private static String indent(@SuppressWarnings("SameParameterValue") final String prefix,
+                                 final Iterable<ConsumerRecord<String, Number>> list) {
+        final StringBuilder stringBuilder = new StringBuilder();
+        for (final ConsumerRecord<String, Number> record : list) {
+            stringBuilder.append(prefix).append(record).append('\n');
+        }
+        return stringBuilder.toString();
+    }
+
+    private static Long getSum(final String key) {
+        final int min = getMin(key).intValue();
+        final int max = getMax(key).intValue();
+        return ((long) min + max) * (max - min + 1L) / 2L;
+    }
+
+    private static Double getAvg(final String key) {
+        final int min = getMin(key).intValue();
+        final int max = getMax(key).intValue();
+        return ((long) min + max) / 2.0;
+    }
+
+
+    private static boolean verifyTAgg(final PrintStream resultStream,
+                                      final Map<String, Set<Integer>> allData,
+                                      final Map<String, LinkedList<ConsumerRecord<String, Number>>> taggEvents,
+                                      final boolean printResults) {
+        if (taggEvents == null) {
+            resultStream.println("tagg is missing");
+            return false;
+        } else if (taggEvents.isEmpty()) {
+            resultStream.println("tagg is empty");
+            return false;
+        } else {
+            resultStream.println("verifying tagg");
+
+            // generate expected answer
+            final Map<String, Long> expected = new HashMap<>();
+            for (final String key : allData.keySet()) {
+                final int min = getMin(key).intValue();
+                final int max = getMax(key).intValue();
+                final String cnt = Long.toString(max - min + 1L);
+
+                expected.put(cnt, expected.getOrDefault(cnt, 0L) + 1);
+            }
+
+            // check the result
+            for (final Map.Entry<String, LinkedList<ConsumerRecord<String, Number>>> entry : taggEvents.entrySet()) {
+                final String key = entry.getKey();
+                Long expectedCount = expected.remove(key);
+                if (expectedCount == null) {
+                    expectedCount = 0L;
+                }
+
+                if (entry.getValue().getLast().value().longValue() != expectedCount) {
+                    resultStream.println("fail: key=" + key + " tagg=" + entry.getValue() + " expected=" + expectedCount);
+
+                    if (printResults)
+                        resultStream.println("\t taggEvents: " + entry.getValue());
+                    return false;
+                }
+            }
+
+        }
+        return true;
+    }
+
+    private static Number getMin(final String key) {
+        return Integer.parseInt(key.split("-")[0]);
+    }
+
+    private static Number getMax(final String key) {
+        return Integer.parseInt(key.split("-")[1]);
+    }
+
+    private static List<TopicPartition> getAllPartitions(final KafkaConsumer<?, ?> consumer, final String... topics) {
+        final List<TopicPartition> partitions = new ArrayList<>();
+
+        for (final String topic : topics) {
+            for (final PartitionInfo info : consumer.partitionsFor(topic)) {
+                partitions.add(new TopicPartition(info.topic(), info.partition()));
+            }
+        }
+        return partitions;
+    }
+
+}
diff --git a/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestUtil.java b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestUtil.java
new file mode 100644
index 0000000000000..9f17760c7454a
--- /dev/null
+++ b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/SmokeTestUtil.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.tests;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.serialization.Serdes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.Aggregator;
+import org.apache.kafka.streams.kstream.Initializer;
+import org.apache.kafka.streams.kstream.KeyValueMapper;
+import org.apache.kafka.streams.kstream.Windowed;
+import org.apache.kafka.streams.processor.api.ContextualProcessor;
+import org.apache.kafka.streams.processor.api.ProcessorContext;
+import org.apache.kafka.streams.processor.api.ProcessorSupplier;
+import org.apache.kafka.streams.processor.api.Record;
+
+import java.time.Instant;
+
+public class SmokeTestUtil {
+
+    final static int END = Integer.MAX_VALUE;
+
+    static ProcessorSupplier<Object, Object, Void, Void> printProcessorSupplier(final String topic) {
+        return printProcessorSupplier(topic, "");
+    }
+
+    static ProcessorSupplier<Object, Object, Void, Void> printProcessorSupplier(final String topic, final String name) {
+        return () -> new ContextualProcessor<Object, Object, Void, Void>() {
+            private int numRecordsProcessed = 0;
+            private long smallestOffset = Long.MAX_VALUE;
+            private long largestOffset = Long.MIN_VALUE;
+
+            @Override
+            public void init(final ProcessorContext<Void, Void> context) {
+                super.init(context);
+                System.out.println("[3.2] initializing processor: topic=" + topic + " taskId=" + context.taskId());
+                System.out.flush();
+                numRecordsProcessed = 0;
+                smallestOffset = Long.MAX_VALUE;
+                largestOffset = Long.MIN_VALUE;
+            }
+
+            @Override
+            public void process(final Record<Object, Object> record) {
+                numRecordsProcessed++;
+                if (numRecordsProcessed % 100 == 0) {
+                    System.out.printf("%s: %s%n", name, Instant.now());
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
+                }
+
+                context().recordMetadata().ifPresent(recordMetadata -> {
+                    if (smallestOffset > recordMetadata.offset()) {
+                        smallestOffset = recordMetadata.offset();
+                    }
+                    if (largestOffset < recordMetadata.offset()) {
+                        largestOffset = recordMetadata.offset();
+                    }
+                });
+            }
+
+            @Override
+            public void close() {
+                System.out.printf("Close processor for task %s%n", context().taskId());
+                System.out.println("processed " + numRecordsProcessed + " records");
+                final long processed;
+                if (largestOffset >= smallestOffset) {
+                    processed = 1L + largestOffset - smallestOffset;
+                } else {
+                    processed = 0L;
+                }
+                System.out.println("offset " + smallestOffset + " to " + largestOffset + " -> processed " + processed);
+                System.out.flush();
+            }
+        };
+    }
+
+    public static final class Unwindow<K, V> implements KeyValueMapper<Windowed<K>, V, K> {
+        @Override
+        public K apply(final Windowed<K> winKey, final V value) {
+            return winKey.key();
+        }
+    }
+
+    public static class Agg {
+
+        KeyValueMapper<String, Long, KeyValue<String, Long>> selector() {
+            return (key, value) -> new KeyValue<>(value == null ? null : Long.toString(value), 1L);
+        }
+
+        public Initializer<Long> init() {
+            return () -> 0L;
+        }
+
+        Aggregator<String, Long, Long> adder() {
+            return (aggKey, value, aggregate) -> aggregate + value;
+        }
+
+        Aggregator<String, Long, Long> remover() {
+            return (aggKey, value, aggregate) -> aggregate - value;
+        }
+    }
+
+    public static Serde<String> stringSerde = Serdes.String();
+
+    public static Serde<Integer> intSerde = Serdes.Integer();
+
+    static Serde<Long> longSerde = Serdes.Long();
+
+    static Serde<Double> doubleSerde = Serdes.Double();
+
+    public static void sleep(final long duration) {
+        try {
+            Thread.sleep(duration);
+        } catch (final Exception ignore) { }
+    }
+
+}
diff --git a/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/StreamsSmokeTest.java b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/StreamsSmokeTest.java
new file mode 100644
index 0000000000000..5803b2fbd0217
--- /dev/null
+++ b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/StreamsSmokeTest.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.tests;
+
+import org.apache.kafka.common.utils.Exit;
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.streams.StreamsConfig;
+
+import java.io.IOException;
+import java.time.Duration;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.UUID;
+
+import static org.apache.kafka.streams.tests.SmokeTestDriver.generate;
+import static org.apache.kafka.streams.tests.SmokeTestDriver.generatePerpetually;
+
+public class StreamsSmokeTest {
+
+    /**
+     *  args ::= kafka propFileName command disableAutoTerminate
+     *  command := "run" | "process"
+     *
+     * @param args
+     */
+    public static void main(final String[] args) throws IOException {
+        if (args.length < 2) {
+            System.err.println("StreamsSmokeTest are expecting two parameters: propFile, command; but only see " + args.length + " parameter");
+            Exit.exit(1);
+        }
+
+        final String propFileName = args[0];
+        final String command = args[1];
+        final boolean disableAutoTerminate = args.length > 2;
+
+        final Properties streamsProperties = Utils.loadProps(propFileName);
+        final String kafka = streamsProperties.getProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG);
+        final String processingGuarantee = streamsProperties.getProperty(StreamsConfig.PROCESSING_GUARANTEE_CONFIG);
+
+        if (kafka == null) {
+            System.err.println("No bootstrap kafka servers specified in " + StreamsConfig.BOOTSTRAP_SERVERS_CONFIG);
+            Exit.exit(1);
+        }
+
+        if ("process".equals(command)) {
+            if (!StreamsConfig.AT_LEAST_ONCE.equals(processingGuarantee) &&
+                !StreamsConfig.EXACTLY_ONCE_V2.equals(processingGuarantee)) {
+
+                System.err.println("processingGuarantee must be either " + StreamsConfig.AT_LEAST_ONCE + " or " +
+                    StreamsConfig.EXACTLY_ONCE_V2);
+
+                Exit.exit(1);
+            }
+        }
+
+        System.out.println("StreamsTest instance started (StreamsSmokeTest)");
+        System.out.println("command=" + command);
+        System.out.println("props=" + streamsProperties);
+        System.out.println("disableAutoTerminate=" + disableAutoTerminate);
+
+        switch (command) {
+            case "run":
+                // this starts the driver (data generation and result verification)
+                final int numKeys = 10;
+                final int maxRecordsPerKey = 500;
+                if (disableAutoTerminate) {
+                    generatePerpetually(kafka, numKeys, maxRecordsPerKey);
+                } else {
+                    // slow down data production to span 30 seconds so that system tests have time to
+                    // do their bounces, etc.
+                    final Map<String, Set<Integer>> allData =
+                        generate(kafka, numKeys, maxRecordsPerKey, Duration.ofSeconds(30));
+                    SmokeTestDriver.verify(kafka, allData, maxRecordsPerKey);
+                }
+                break;
+            case "process":
+                // this starts the stream processing app
+                new SmokeTestClient(UUID.randomUUID().toString()).start(streamsProperties);
+                break;
+            default:
+                System.out.println("unknown command: " + command);
+        }
+    }
+
+}
diff --git a/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
new file mode 100644
index 0000000000000..7419896a0bf2f
--- /dev/null
+++ b/streams/upgrade-system-tests-32/src/test/java/org/apache/kafka/streams/tests/StreamsUpgradeTest.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.tests;
+
+import org.apache.kafka.common.utils.Utils;
+import org.apache.kafka.streams.KafkaStreams;
+import org.apache.kafka.streams.StreamsBuilder;
+import org.apache.kafka.streams.StreamsConfig;
+import org.apache.kafka.streams.kstream.Consumed;
+import org.apache.kafka.streams.kstream.KStream;
+import org.apache.kafka.streams.kstream.KTable;
+import org.apache.kafka.streams.kstream.Produced;
+import org.apache.kafka.streams.processor.api.ContextualProcessor;
+import org.apache.kafka.streams.processor.api.ProcessorContext;
+import org.apache.kafka.streams.processor.api.ProcessorSupplier;
+import org.apache.kafka.streams.processor.api.Record;
+
+import java.util.Properties;
+import java.util.Random;
+
+import static org.apache.kafka.streams.tests.SmokeTestUtil.intSerde;
+import static org.apache.kafka.streams.tests.SmokeTestUtil.stringSerde;
+
+
+public class StreamsUpgradeTest {
+
+    @SuppressWarnings("unchecked")
+    public static void main(final String[] args) throws Exception {
+        if (args.length < 1) {
+            System.err.println("StreamsUpgradeTest requires one argument (properties-file) but provided none");
+        }
+        final String propFileName = args[0];
+
+        final Properties streamsProperties = Utils.loadProps(propFileName);
+
+        System.out.println("StreamsTest instance started (StreamsUpgradeTest v3.2)");
+        System.out.println("props=" + streamsProperties);
+
+        final StreamsBuilder builder = new StreamsBuilder();
+        final KTable<String, Integer> dataTable = builder.table(
+            "data", Consumed.with(stringSerde, intSerde));
+        final KStream<String, Integer> dataStream = dataTable.toStream();
+        dataStream.process(printProcessorSupplier("data"));
+        dataStream.to("echo");
+
+        final boolean runFkJoin = Boolean.parseBoolean(streamsProperties.getProperty(
+            "test.run_fk_join",
+            "false"));
+        if (runFkJoin) {
+            try {
+                final KTable<Integer, String> fkTable = builder.table(
+                    "fk", Consumed.with(intSerde, stringSerde));
+                buildFKTable(dataStream, fkTable);
+            } catch (final Exception e) {
+                System.err.println("Caught " + e.getMessage());
+            }
+        }
+
+        final Properties config = new Properties();
+        config.setProperty(
+            StreamsConfig.APPLICATION_ID_CONFIG,
+            "StreamsUpgradeTest-" + new Random().nextLong());
+        config.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
+        config.putAll(streamsProperties);
+
+        final KafkaStreams streams = new KafkaStreams(builder.build(), config);
+        streams.start();
+
+        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+            streams.close();
+            System.out.println("UPGRADE-TEST-CLIENT-CLOSED");
+            System.out.flush();
+        }));
+    }
+
+    private static void buildFKTable(final KStream<String, Integer> primaryTable,
+                                     final KTable<Integer, String> otherTable) {
+        final KStream<String, String> kStream = primaryTable.toTable()
+            .join(otherTable, v -> v, (k0, v0) -> v0)
+            .toStream();
+        kStream.process(printProcessorSupplier("fk"));
+        kStream.to("fk-result", Produced.with(stringSerde, stringSerde));
+    }
+
+    private static <KIn, VIn, KOut, VOut> ProcessorSupplier<KIn, VIn, KOut, VOut> printProcessorSupplier(final String topic) {
+        return () -> new ContextualProcessor<KIn, VIn, KOut, VOut>() {
+            private int numRecordsProcessed = 0;
+
+            @Override
+            public void init(final ProcessorContext<KOut, VOut> context) {
+                System.out.println("[3.2] initializing processor: topic=" + topic + "taskId=" + context.taskId());
+                numRecordsProcessed = 0;
+            }
+
+            @Override
+            public void process(final Record<KIn, VIn> record) {
+                numRecordsProcessed++;
+                if (numRecordsProcessed % 100 == 0) {
+                    System.out.println("processed " + numRecordsProcessed + " records from topic=" + topic);
+                }
+            }
+
+            @Override
+            public void close() {}
+        };
+    }
+}
diff --git a/tests/docker/Dockerfile b/tests/docker/Dockerfile
index ac4c39b210edd..736845b3d0bab 100644
--- a/tests/docker/Dockerfile
+++ b/tests/docker/Dockerfile
@@ -63,8 +63,9 @@ RUN mkdir -p "/opt/kafka-2.5.1" && chmod a+rw /opt/kafka-2.5.1 && curl -s "$KAFK
 RUN mkdir -p "/opt/kafka-2.6.2" && chmod a+rw /opt/kafka-2.6.2 && curl -s "$KAFKA_MIRROR/kafka_2.12-2.6.2.tgz" | tar xz --strip-components=1 -C "/opt/kafka-2.6.2"
 RUN mkdir -p "/opt/kafka-2.7.1" && chmod a+rw /opt/kafka-2.7.1 && curl -s "$KAFKA_MIRROR/kafka_2.12-2.7.1.tgz" | tar xz --strip-components=1 -C "/opt/kafka-2.7.1"
 RUN mkdir -p "/opt/kafka-2.8.1" && chmod a+rw /opt/kafka-2.8.1 && curl -s "$KAFKA_MIRROR/kafka_2.12-2.8.1.tgz" | tar xz --strip-components=1 -C "/opt/kafka-2.8.1"
-RUN mkdir -p "/opt/kafka-3.0.0" && chmod a+rw /opt/kafka-3.0.0 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.0.0.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.0.0"
-RUN mkdir -p "/opt/kafka-3.1.0" && chmod a+rw /opt/kafka-3.1.0 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.1.0.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.1.0"
+RUN mkdir -p "/opt/kafka-3.0.1" && chmod a+rw /opt/kafka-3.0.1 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.0.1.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.0.1"
+RUN mkdir -p "/opt/kafka-3.1.1" && chmod a+rw /opt/kafka-3.1.1 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.1.1.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.1.1"
+RUN mkdir -p "/opt/kafka-3.2.0" && chmod a+rw /opt/kafka-3.2.0 && curl -s "$KAFKA_MIRROR/kafka_2.12-3.2.0.tgz" | tar xz --strip-components=1 -C "/opt/kafka-3.2.0"
 
 # Streams test dependencies
 RUN curl -s "$KAFKA_MIRROR/kafka-streams-0.10.0.1-test.jar" -o /opt/kafka-0.10.0.1/libs/kafka-streams-0.10.0.1-test.jar
@@ -82,8 +83,9 @@ RUN curl -s "$KAFKA_MIRROR/kafka-streams-2.5.1-test.jar" -o /opt/kafka-2.5.1/lib
 RUN curl -s "$KAFKA_MIRROR/kafka-streams-2.6.2-test.jar" -o /opt/kafka-2.6.2/libs/kafka-streams-2.6.2-test.jar
 RUN curl -s "$KAFKA_MIRROR/kafka-streams-2.7.1-test.jar" -o /opt/kafka-2.7.1/libs/kafka-streams-2.7.1-test.jar
 RUN curl -s "$KAFKA_MIRROR/kafka-streams-2.8.1-test.jar" -o /opt/kafka-2.8.1/libs/kafka-streams-2.8.1-test.jar
-RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.0.0-test.jar" -o /opt/kafka-3.0.0/libs/kafka-streams-3.0.0-test.jar
-RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.1.0-test.jar" -o /opt/kafka-3.1.0/libs/kafka-streams-3.1.0-test.jar
+RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.0.1-test.jar" -o /opt/kafka-3.0.1/libs/kafka-streams-3.0.1-test.jar
+RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.1.1-test.jar" -o /opt/kafka-3.1.1/libs/kafka-streams-3.1.1-test.jar
+RUN curl -s "$KAFKA_MIRROR/kafka-streams-3.2.0-test.jar" -o /opt/kafka-3.2.0/libs/kafka-streams-3.2.0-test.jar
 
 # The version of Kibosh to use for testing.
 # If you update this, also update vagrant/base.sh
diff --git a/tests/kafkatest/__init__.py b/tests/kafkatest/__init__.py
index 69101d87630b2..00f67dcc81801 100644
--- a/tests/kafkatest/__init__.py
+++ b/tests/kafkatest/__init__.py
@@ -22,4 +22,4 @@
 # Instead, in development branches, the version should have a suffix of the form ".devN"
 #
 # For example, when Kafka is at version 1.0.0-SNAPSHOT, this should be something like "1.0.0.dev0"
-__version__ = '3.2.0.dev0'
+__version__ = '3.4.0.dev0'
diff --git a/tests/kafkatest/services/connect.py b/tests/kafkatest/services/connect.py
index 26c0d927dccd5..41c33ccb9e102 100644
--- a/tests/kafkatest/services/connect.py
+++ b/tests/kafkatest/services/connect.py
@@ -69,7 +69,8 @@ class ConnectServiceBase(KafkaPathResolverMixin, Service):
             "collect_default": True}
     }
 
-    def __init__(self, context, num_nodes, kafka, files, startup_timeout_sec = 60):
+    def __init__(self, context, num_nodes, kafka, files, startup_timeout_sec=60,
+                 include_filestream_connectors=False):
         super(ConnectServiceBase, self).__init__(context, num_nodes)
         self.kafka = kafka
         self.security_config = kafka.security_config.client_config()
@@ -78,6 +79,8 @@ def __init__(self, context, num_nodes, kafka, files, startup_timeout_sec = 60):
         self.startup_timeout_sec = startup_timeout_sec
         self.environment = {}
         self.external_config_template_func = None
+        self.include_filestream_connectors = include_filestream_connectors
+        self.logger.debug("include_filestream_connectors % s", include_filestream_connectors)
 
     def pids(self, node):
         """Return process ids for Kafka Connect processes."""
@@ -279,12 +282,34 @@ def append_to_environment_variable(self, envvar, value):
             env_opts = "\"%s %s\"" % (env_opts.strip('\"'), value)
         self.environment[envvar] = env_opts
 
+    def append_filestream_connectors_to_classpath(self):
+        if self.include_filestream_connectors:
+            cwd = os.getcwd()
+            self.logger.info("Including filestream connectors when starting Connect. "
+                             "Looking for jar locally in: %s" % cwd)
+            relative_path = "/connect/file/build/libs/"
+            local_dir = cwd + relative_path
+            lib_dir = self.path.home() + relative_path
+            for pwd, dirs, files in os.walk(local_dir):
+                for file in files:
+                    if file.startswith("connect-file") and file.endswith(".jar"):
+                        # Use the expected directory on the node instead of the path in the driver node
+                        file_path = lib_dir + file
+                        self.logger.debug("Appending %s to Connect worker's CLASSPATH" % file_path)
+                        return "export CLASSPATH=${CLASSPATH}:%s; " % file_path
+            self.logger.info("Jar with filestream connectors was not found under %s" % lib_dir)
+        else:
+            self.logger.info("Starting Connect without filestream connectors in the CLASSPATH")
+
+        return None
+
 
 class ConnectStandaloneService(ConnectServiceBase):
     """Runs Kafka Connect in standalone mode."""
 
-    def __init__(self, context, kafka, files, startup_timeout_sec = 60):
-        super(ConnectStandaloneService, self).__init__(context, 1, kafka, files, startup_timeout_sec)
+    def __init__(self, context, kafka, files, startup_timeout_sec=60, include_filestream_connectors=False):
+        super(ConnectStandaloneService, self).__init__(context, 1, kafka, files, startup_timeout_sec,
+                                                       include_filestream_connectors)
 
     # For convenience since this service only makes sense with a single node
     @property
@@ -299,6 +324,9 @@ def start_cmd(self, node, connector_configs):
 
         cmd += fix_opts_for_new_jvm(node)
         cmd += "export KAFKA_OPTS=\"%s %s\"; " % (heap_kafka_opts, other_kafka_opts)
+        classpath = self.append_filestream_connectors_to_classpath()
+        cmd += classpath if classpath else ""
+
         for envvar in self.environment:
             cmd += "export %s=%s; " % (envvar, str(self.environment[envvar]))
         cmd += "%s %s " % (self.path.script("connect-standalone.sh", node), self.CONFIG_FILE)
@@ -339,8 +367,9 @@ class ConnectDistributedService(ConnectServiceBase):
     """Runs Kafka Connect in distributed mode."""
 
     def __init__(self, context, num_nodes, kafka, files, offsets_topic="connect-offsets",
-                 configs_topic="connect-configs", status_topic="connect-status", startup_timeout_sec = 60):
-        super(ConnectDistributedService, self).__init__(context, num_nodes, kafka, files, startup_timeout_sec)
+                 configs_topic="connect-configs", status_topic="connect-status", startup_timeout_sec=60,
+                 include_filestream_connectors=False):
+        super(ConnectDistributedService, self).__init__(context, num_nodes, kafka, files, startup_timeout_sec, include_filestream_connectors)
         self.startup_mode = self.STARTUP_MODE_JOIN
         self.offsets_topic = offsets_topic
         self.configs_topic = configs_topic
@@ -355,6 +384,9 @@ def start_cmd(self, node, connector_configs):
         cmd += "export KAFKA_OPTS=\"%s %s\"; " % (heap_kafka_opts, other_kafka_opts)
         for envvar in self.environment:
             cmd += "export %s=%s; " % (envvar, str(self.environment[envvar]))
+
+        classpath = self.append_filestream_connectors_to_classpath()
+        cmd += classpath if classpath else ""
         cmd += "%s %s " % (self.path.script("connect-distributed.sh", node), self.CONFIG_FILE)
         cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE)
         return cmd
diff --git a/tests/kafkatest/services/kafka/kafka.py b/tests/kafkatest/services/kafka/kafka.py
index 55b5b7b87141b..041578b8d04bc 100644
--- a/tests/kafkatest/services/kafka/kafka.py
+++ b/tests/kafkatest/services/kafka/kafka.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import json
+import math
 import os.path
 import re
 import signal
@@ -158,7 +159,8 @@ class for details.
     METADATA_SNAPSHOT_SEARCH_STR = "%s/__cluster_metadata-0/*.checkpoint" % METADATA_LOG_DIR
     METADATA_FIRST_LOG = "%s/__cluster_metadata-0/00000000000000000000.log" % METADATA_LOG_DIR
     # Kafka Authorizer
-    ACL_AUTHORIZER = "kafka.security.authorizer.AclAuthorizer"
+    ZK_ACL_AUTHORIZER = "kafka.security.authorizer.AclAuthorizer"
+    KRAFT_ACL_AUTHORIZER = "org.apache.kafka.metadata.authorizer.StandardAuthorizer"
     HEAP_DUMP_FILE = os.path.join(PERSISTENT_ROOT, "kafka_heap_dump.bin")
     INTERBROKER_LISTENER_NAME = 'INTERNAL'
     JAAS_CONF_PROPERTY = "java.security.auth.login.config=/mnt/security/jaas.conf"
@@ -182,6 +184,9 @@ class for details.
         "kafka_data_2": {
             "path": DATA_LOG_DIR_2,
             "collect_default": False},
+        "kafka_cluster_metadata": {
+            "path": METADATA_LOG_DIR,
+            "collect_default": False},
         "kafka_heap_dump_file": {
             "path": HEAP_DUMP_FILE,
             "collect_default": True}
@@ -863,12 +868,25 @@ def signal_leader(self, topic, partition=0, sig=signal.SIGTERM):
         leader = self.leader(topic, partition)
         self.signal_node(leader, sig)
 
+    def controllers_required_for_quorum(self):
+        """
+        Assume N = the total number of controller nodes in the cluster, and positive
+        For N=1, we need 1 controller to be running to have a quorum
+        For N=2, we need 2 controllers
+        For N=3, we need 2 controllers
+        For N=4, we need 3 controllers
+        For N=5, we need 3 controllers
+
+        :return: the number of controller nodes that must be started for there to be a quorum
+        """
+        return math.ceil((1 + self.num_nodes_controller_role) / 2)
+
     def stop_node(self, node, clean_shutdown=True, timeout_sec=60):
         pids = self.pids(node)
         cluster_has_colocated_controllers = self.quorum_info.has_brokers and self.quorum_info.has_controllers
         force_sigkill_due_to_too_few_colocated_controllers =\
             clean_shutdown and cluster_has_colocated_controllers\
-            and self.colocated_nodes_started < round(self.num_nodes_controller_role / 2)
+            and self.colocated_nodes_started < self.controllers_required_for_quorum()
         if force_sigkill_due_to_too_few_colocated_controllers:
             self.logger.info("Forcing node to stop via SIGKILL due to too few co-located KRaft controllers: %i/%i" %\
                              (self.colocated_nodes_started, self.num_nodes_controller_role))
diff --git a/tests/kafkatest/services/kafka/templates/log4j.properties b/tests/kafkatest/services/kafka/templates/log4j.properties
index 5963c39c089df..5897658da4906 100644
--- a/tests/kafkatest/services/kafka/templates/log4j.properties
+++ b/tests/kafkatest/services/kafka/templates/log4j.properties
@@ -121,6 +121,8 @@ log4j.additivity.kafka.server.KafkaApis=false
 log4j.logger.kafka.request.logger={{ log_level|default("DEBUG") }}, requestInfoAppender, requestDebugAppender
 log4j.additivity.kafka.request.logger=false
 
+log4j.logger.org.apache.kafka.raft={{ log_level|default("DEBUG") }}, controllerInfoAppender, controllerDebugAppender
+log4j.logger.org.apache.kafka.controller={{ log_level|default("DEBUG") }}, controllerInfoAppender, controllerDebugAppender
 log4j.logger.kafka.controller={{ log_level|default("DEBUG") }}, controllerInfoAppender, controllerDebugAppender
 log4j.additivity.kafka.controller=false
 
diff --git a/tests/kafkatest/services/kafka/util.py b/tests/kafkatest/services/kafka/util.py
index 8782ebe7b4221..de6b85ff3c1ed 100644
--- a/tests/kafkatest/services/kafka/util.py
+++ b/tests/kafkatest/services/kafka/util.py
@@ -22,7 +22,6 @@
 
 new_jdk_not_supported = frozenset([str(LATEST_0_8_2), str(LATEST_0_9), str(LATEST_0_10_0), str(LATEST_0_10_1), str(LATEST_0_10_2), str(LATEST_0_11_0), str(LATEST_1_0)])
 
-
 def fix_opts_for_new_jvm(node):
     # Startup scripts for early versions of Kafka contains options
     # that not supported on latest versions of JVM like -XX:+PrintGCDateStamps or -XX:UseParNewGC.
@@ -33,9 +32,11 @@ def fix_opts_for_new_jvm(node):
         return ""
 
     cmd = ""
-    if node.version == LATEST_0_8_2 or node.version == LATEST_0_9 or node.version == LATEST_0_10_0 or node.version == LATEST_0_10_1 or node.version == LATEST_0_10_2 or node.version == LATEST_0_11_0 or node.version == LATEST_1_0:
-        cmd += "export KAFKA_GC_LOG_OPTS=\"-Xlog:gc*:file=kafka-gc.log:time,tags:filecount=10,filesize=102400\"; "
-        cmd += "export KAFKA_JVM_PERFORMANCE_OPTS=\"-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MaxInlineLevel=15 -Djava.awt.headless=true\"; "
+    # check kafka version for kafka node types
+    if hasattr(node, 'version'):
+        if node.version == LATEST_0_8_2 or node.version == LATEST_0_9 or node.version == LATEST_0_10_0 or node.version == LATEST_0_10_1 or node.version == LATEST_0_10_2 or node.version == LATEST_0_11_0 or node.version == LATEST_1_0:
+            cmd += "export KAFKA_GC_LOG_OPTS=\"-Xlog:gc*:file=kafka-gc.log:time,tags:filecount=10,filesize=102400\"; "
+            cmd += "export KAFKA_JVM_PERFORMANCE_OPTS=\"-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MaxInlineLevel=15 -Djava.awt.headless=true\"; "
     return cmd
 
 
diff --git a/tests/kafkatest/services/streams.py b/tests/kafkatest/services/streams.py
index f4f6a6a04f36f..38b303281d2c5 100644
--- a/tests/kafkatest/services/streams.py
+++ b/tests/kafkatest/services/streams.py
@@ -22,7 +22,7 @@
 from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
 from kafkatest.services.kafka import KafkaConfig
 from kafkatest.services.monitor.jmx import JmxMixin
-from kafkatest.version import LATEST_0_10_0, LATEST_0_10_1
+from kafkatest.version import KafkaVersion, LATEST_0_10_0, LATEST_0_10_1
 
 STATE_DIR = "state.dir"
 
@@ -616,6 +616,9 @@ def prop_file(self):
 
         if self.UPGRADE_FROM is not None:
             properties['upgrade.from'] = self.UPGRADE_FROM
+        if (self.UPGRADE_FROM is not None and KafkaVersion(self.UPGRADE_FROM).supports_fk_joins()) or \
+            (self.KAFKA_STREAMS_VERSION is not None and KafkaVersion(self.KAFKA_STREAMS_VERSION).supports_fk_joins()):
+            properties['test.run_fk_join'] = "true"
         if self.UPGRADE_TO == "future_version":
             properties['test.future.metadata'] = "any_value"
 
@@ -690,10 +693,9 @@ def prop_file(self):
                       streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers(),
                       streams_property.NUM_THREADS: self.NUM_THREADS,
                       consumer_property.GROUP_INSTANCE_ID: self.GROUP_INSTANCE_ID,
-                      consumer_property.SESSION_TIMEOUT_MS: 60000,
+                      consumer_property.SESSION_TIMEOUT_MS: 60000, # set longer session timeout for static member test
                       'input.topic': self.INPUT_TOPIC,
-                      "acceptable.recovery.lag": "9223372036854775807", # enable a one-shot assignment
-                      "session.timeout.ms": "10000" # set back to 10s for tests. See KIP-735
+                      "acceptable.recovery.lag": "9223372036854775807" # enable a one-shot assignment
                       }
 
 
diff --git a/tests/kafkatest/services/transactional_message_copier.py b/tests/kafkatest/services/transactional_message_copier.py
index 0717463f3773b..675c7d71531f3 100644
--- a/tests/kafkatest/services/transactional_message_copier.py
+++ b/tests/kafkatest/services/transactional_message_copier.py
@@ -93,7 +93,7 @@ def _worker(self, idx, node):
                         self.consumed = int(data["consumed"])
                         self.logger.info("%s: consumed %d, remaining %d" %
                                          (self.transactional_id, self.consumed, self.remaining))
-                        if "shutdown_complete" in data:
+                        if data["stage"] == "ShutdownComplete":
                            if self.remaining == 0:
                                 # We are only finished if the remaining
                                 # messages at the time of shutdown is 0.
diff --git a/tests/kafkatest/tests/client/client_compatibility_features_test.py b/tests/kafkatest/tests/client/client_compatibility_features_test.py
index 434c8280cb80d..3d8577dece1c5 100644
--- a/tests/kafkatest/tests/client/client_compatibility_features_test.py
+++ b/tests/kafkatest/tests/client/client_compatibility_features_test.py
@@ -26,7 +26,9 @@
 from kafkatest.services.zookeeper import ZookeeperService
 from kafkatest.services.kafka import KafkaService, quorum
 from ducktape.tests.test import Test
-from kafkatest.version import DEV_BRANCH, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, V_0_11_0_0, V_0_10_1_0, KafkaVersion
+from kafkatest.version import DEV_BRANCH, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, \
+    LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, \
+    LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, V_0_11_0_0, V_0_10_1_0, KafkaVersion
 
 def get_broker_features(broker_version):
     features = {}
@@ -43,9 +45,11 @@ def get_broker_features(broker_version):
     if broker_version < V_0_11_0_0:
         features["describe-acls-supported"] = False
         features["describe-configs-supported"] = False
+        features["idempotent-producer-supported"] = False
     else:
         features["describe-acls-supported"] = True
         features["describe-configs-supported"] = True
+        features["idempotent-producer-supported"] = True
     return features
 
 def run_command(node, cmd, ssh_log_file):
@@ -130,6 +134,7 @@ def invoke_compatibility_program(self, features):
     @parametrize(broker_version=str(LATEST_2_8))
     @parametrize(broker_version=str(LATEST_3_0))
     @parametrize(broker_version=str(LATEST_3_1))
+    @parametrize(broker_version=str(LATEST_3_2))
     def run_compatibility_test(self, broker_version, metadata_quorum=quorum.zk):
         if self.zk:
             self.zk.start()
diff --git a/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py b/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py
index 4c10dc0286973..69be84b0b3ed0 100644
--- a/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py
+++ b/tests/kafkatest/tests/client/client_compatibility_produce_consume_test.py
@@ -23,7 +23,9 @@
 from kafkatest.services.console_consumer import ConsoleConsumer
 from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest
 from kafkatest.utils import is_int_with_prefix
-from kafkatest.version import DEV_BRANCH, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, KafkaVersion
+from kafkatest.version import DEV_BRANCH, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, \
+    LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, \
+    LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, KafkaVersion
 
 class ClientCompatibilityProduceConsumeTest(ProduceConsumeValidateTest):
     """
@@ -73,6 +75,7 @@ def min_cluster_size(self):
     @parametrize(broker_version=str(LATEST_2_8))
     @parametrize(broker_version=str(LATEST_3_0))
     @parametrize(broker_version=str(LATEST_3_1))
+    @parametrize(broker_version=str(LATEST_3_2))
     def test_produce_consume(self, broker_version, metadata_quorum=quorum.zk):
         print("running producer_consumer_compat with broker_version = %s" % broker_version, flush=True)
         self.kafka.set_version(KafkaVersion(broker_version))
diff --git a/tests/kafkatest/tests/client/message_format_change_test.py b/tests/kafkatest/tests/client/message_format_change_test.py
index cb6cf72d22e5d..b65ef24704a68 100644
--- a/tests/kafkatest/tests/client/message_format_change_test.py
+++ b/tests/kafkatest/tests/client/message_format_change_test.py
@@ -57,9 +57,9 @@ def produce_and_consume(self, producer_version, consumer_version, group):
             err_msg="Producer did not produce all messages in reasonable amount of time"))
 
     @cluster(num_nodes=12)
-    @matrix(producer_version=[str(DEV_BRANCH)], consumer_version=[str(DEV_BRANCH)], metadata_quorum=quorum.all_non_upgrade)
-    @matrix(producer_version=[str(LATEST_0_10)], consumer_version=[str(LATEST_0_10)], metadata_quorum=quorum.all_non_upgrade)
-    @matrix(producer_version=[str(LATEST_0_9)], consumer_version=[str(LATEST_0_9)], metadata_quorum=quorum.all_non_upgrade)
+    @matrix(producer_version=[str(DEV_BRANCH)], consumer_version=[str(DEV_BRANCH)], metadata_quorum=[quorum.zk])
+    @matrix(producer_version=[str(LATEST_0_10)], consumer_version=[str(LATEST_0_10)], metadata_quorum=[quorum.zk])
+    @matrix(producer_version=[str(LATEST_0_9)], consumer_version=[str(LATEST_0_9)], metadata_quorum=[quorum.zk])
     def test_compatibility(self, producer_version, consumer_version, metadata_quorum=quorum.zk):
         """ This tests performs the following checks:
         The workload is a mix of 0.9.x, 0.10.x and 0.11.x producers and consumers
diff --git a/tests/kafkatest/tests/connect/connect_distributed_test.py b/tests/kafkatest/tests/connect/connect_distributed_test.py
index 6bc52b0d35f49..970779f723fb5 100644
--- a/tests/kafkatest/tests/connect/connect_distributed_test.py
+++ b/tests/kafkatest/tests/connect/connect_distributed_test.py
@@ -80,7 +80,7 @@ def __init__(self, test_context):
         self.value_converter = "org.apache.kafka.connect.json.JsonConverter"
         self.schemas = True
 
-    def setup_services(self, security_protocol=SecurityConfig.PLAINTEXT, timestamp_type=None, broker_version=DEV_BRANCH, auto_create_topics=False):
+    def setup_services(self, security_protocol=SecurityConfig.PLAINTEXT, timestamp_type=None, broker_version=DEV_BRANCH, auto_create_topics=False, include_filestream_connectors=False):
         self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk,
                                   security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                   topics=self.topics, version=broker_version,
@@ -89,7 +89,8 @@ def setup_services(self, security_protocol=SecurityConfig.PLAINTEXT, timestamp_t
             for node in self.kafka.nodes:
                 node.config[config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type
 
-        self.cc = ConnectDistributedService(self.test_context, 3, self.kafka, [self.INPUT_FILE, self.OUTPUT_FILE])
+        self.cc = ConnectDistributedService(self.test_context, 3, self.kafka, [self.INPUT_FILE, self.OUTPUT_FILE],
+                                            include_filestream_connectors=include_filestream_connectors)
         self.cc.log_level = "DEBUG"
 
         self.zk.start()
@@ -370,7 +371,7 @@ def test_file_source_and_sink(self, security_protocol, connect_protocol):
         """
 
         self.CONNECT_PROTOCOL = connect_protocol
-        self.setup_services(security_protocol=security_protocol)
+        self.setup_services(security_protocol=security_protocol, include_filestream_connectors=True)
         self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
 
         self.cc.start()
@@ -522,7 +523,7 @@ def test_bounce(self, clean, connect_protocol):
     @matrix(connect_protocol=['sessioned', 'compatible', 'eager'])
     def test_transformations(self, connect_protocol):
         self.CONNECT_PROTOCOL = connect_protocol
-        self.setup_services(timestamp_type='CreateTime')
+        self.setup_services(timestamp_type='CreateTime', include_filestream_connectors=True)
         self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
         self.cc.start()
 
@@ -610,7 +611,8 @@ def test_broker_compatibility(self, broker_version, auto_create_topics, security
         or relies upon the broker to auto-create the topics (v0.10.0.x and before).
         """
         self.CONNECT_PROTOCOL = connect_protocol
-        self.setup_services(broker_version=KafkaVersion(broker_version), auto_create_topics=auto_create_topics, security_protocol=security_protocol)
+        self.setup_services(broker_version=KafkaVersion(broker_version), auto_create_topics=auto_create_topics,
+                            security_protocol=security_protocol, include_filestream_connectors=True)
         self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
 
         self.cc.start()
diff --git a/tests/kafkatest/tests/connect/connect_rest_test.py b/tests/kafkatest/tests/connect/connect_rest_test.py
index 4d978a232d20b..ff44d9412f1a0 100644
--- a/tests/kafkatest/tests/connect/connect_rest_test.py
+++ b/tests/kafkatest/tests/connect/connect_rest_test.py
@@ -73,7 +73,8 @@ def __init__(self, test_context):
             'test': {'partitions': 1, 'replication-factor': 1}
         })
 
-        self.cc = ConnectDistributedService(test_context, 2, self.kafka, [self.INPUT_FILE, self.INPUT_FILE2, self.OUTPUT_FILE])
+        self.cc = ConnectDistributedService(test_context, 2, self.kafka, [self.INPUT_FILE, self.INPUT_FILE2, self.OUTPUT_FILE],
+                                            include_filestream_connectors=True)
 
     @cluster(num_nodes=4)
     @matrix(connect_protocol=['compatible', 'eager'])
diff --git a/tests/kafkatest/tests/connect/connect_test.py b/tests/kafkatest/tests/connect/connect_test.py
index 1a7f6abfeb8b7..4c2a91a6036b3 100644
--- a/tests/kafkatest/tests/connect/connect_test.py
+++ b/tests/kafkatest/tests/connect/connect_test.py
@@ -91,8 +91,10 @@ def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.Jso
                                   security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                   topics=self.topics, controller_num_nodes_override=self.num_zk)
 
-        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
-        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
+        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE],
+                                               include_filestream_connectors=True)
+        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE],
+                                             include_filestream_connectors=True)
         self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC_TEST,
                                                   consumer_timeout_ms=10000)
 
@@ -164,8 +166,10 @@ def test_skip_and_log_to_dlq(self, error_tolerance):
         else:
             faulty_records = faulty_records[0]
 
-        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
-        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
+        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE],
+                                               include_filestream_connectors=True)
+        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE],
+                                             include_filestream_connectors=True)
 
         self.zk.start()
         self.kafka.start()
diff --git a/tests/kafkatest/tests/core/zookeeper_authorizer_test.py b/tests/kafkatest/tests/core/authorizer_test.py
similarity index 80%
rename from tests/kafkatest/tests/core/zookeeper_authorizer_test.py
rename to tests/kafkatest/tests/core/authorizer_test.py
index 97c20c0c41cd6..20994c5b0dde5 100644
--- a/tests/kafkatest/tests/core/zookeeper_authorizer_test.py
+++ b/tests/kafkatest/tests/core/authorizer_test.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from ducktape.cluster.remoteaccount import RemoteCommandError
-from ducktape.mark import matrix
+from ducktape.mark import parametrize
 from ducktape.mark.resource import cluster
 from ducktape.tests.test import Test
 
@@ -22,8 +22,8 @@
 from kafkatest.services.zookeeper import ZookeeperService
 from kafkatest.services.security.kafka_acls import ACLs
 
-class ZooKeeperAuthorizerTest(Test):
-    """Tests that the ZooKeeper-based Authorizer works wth both ZooKeeper-based and KRaft clusters.
+class AuthorizerTest(Test):
+    """Tests that the default Authorizer implementations work with both ZooKeeper-based and KRaft clusters.
     Alters client quotas, making sure it works.
     Rolls Kafka with an authorizer.
     Alters client quotas, making sure it fails.
@@ -36,22 +36,29 @@ class ZooKeeperAuthorizerTest(Test):
     """
 
     def __init__(self, test_context):
-        super(ZooKeeperAuthorizerTest, self).__init__(test_context=test_context)
-
-        self.topic = "test_topic"
-        # setup ZooKeeper even with KRaft
-        self.zk = ZookeeperService(test_context, num_nodes=1)
-        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
-                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}},
-                                  controller_num_nodes_override=1, allow_zk_with_kraft=True)
+        super(AuthorizerTest, self).__init__(test_context=test_context)
+        self.test_context = test_context
+
     def setUp(self):
-        # start ZooKeeper even with KRaft
-        self.zk.start()
         self.acls = ACLs(self.test_context)
 
     @cluster(num_nodes=4)
-    @matrix(metadata_quorum=quorum.all_non_upgrade)
-    def test_authorizer(self, metadata_quorum):
+    @parametrize(metadata_quorum=quorum.remote_kraft, authorizer_class=KafkaService.KRAFT_ACL_AUTHORIZER)
+    @parametrize(metadata_quorum=quorum.remote_kraft, authorizer_class=KafkaService.ZK_ACL_AUTHORIZER)
+    @parametrize(metadata_quorum=quorum.zk, authorizer_class=KafkaService.ZK_ACL_AUTHORIZER)
+    def test_authorizer(self, metadata_quorum, authorizer_class):
+        topics = {"test_topic": {"partitions": 1, "replication-factor": 1}}
+
+        if (authorizer_class == KafkaService.KRAFT_ACL_AUTHORIZER):
+            self.zk = None
+        else:
+            self.zk = ZookeeperService(self.test_context, num_nodes=1)
+            self.zk.start()
+
+        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk,
+                                  topics=topics, controller_num_nodes_override=1,
+                                  allow_zk_with_kraft=True)
+
         broker_security_protocol = "SSL"
         broker_principal = "User:CN=systemtest"
         client_security_protocol = "SASL_PLAINTEXT"
@@ -80,11 +87,11 @@ def test_authorizer(self, metadata_quorum):
             # we need to explicitly reconfigure/restart any remote controller quorum
             self.kafka.logger.info("Restarting Remote KRaft Controller with authorizer and broker principal as super user")
             controller_quorum = self.kafka.controller_quorum
-            controller_quorum.authorizer_class_name = KafkaService.ACL_AUTHORIZER
+            controller_quorum.authorizer_class_name = authorizer_class
             controller_quorum.server_prop_overrides = [["super.users", broker_principal]] # for broker to work with an authorizer
             controller_quorum.restart_cluster()
         self.kafka.logger.info("Restarting Kafka with authorizer and broker principal as super user")
-        self.kafka.authorizer_class_name = KafkaService.ACL_AUTHORIZER
+        self.kafka.authorizer_class_name = authorizer_class
         self.kafka.server_prop_overrides = [["super.users", broker_principal]] # for broker to work with an authorizer
         self.kafka.restart_cluster()
 
diff --git a/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py
index a12ca3f8b2cbb..406be4a6d998a 100644
--- a/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py
+++ b/tests/kafkatest/tests/core/compatibility_test_new_broker_test.py
@@ -21,7 +21,9 @@
 from kafkatest.services.zookeeper import ZookeeperService
 from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest
 from kafkatest.utils import is_int
-from kafkatest.version import LATEST_0_8_2, LATEST_0_9, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, DEV_BRANCH, KafkaVersion
+from kafkatest.version import LATEST_0_8_2, LATEST_0_9, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, \
+    LATEST_1_0, LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, \
+    LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, DEV_BRANCH, KafkaVersion
 
 # Compatibility tests for moving to a new broker (e.g., 0.10.x) and using a mix of old and new clients (e.g., 0.9.x)
 class ClientCompatibilityTestNewBroker(ProduceConsumeValidateTest):
@@ -56,6 +58,7 @@ def setUp(self):
     @matrix(producer_version=[str(LATEST_2_8)], consumer_version=[str(LATEST_2_8)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
     @matrix(producer_version=[str(LATEST_3_0)], consumer_version=[str(LATEST_3_0)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
     @matrix(producer_version=[str(LATEST_3_1)], consumer_version=[str(LATEST_3_1)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
+    @matrix(producer_version=[str(LATEST_3_2)], consumer_version=[str(LATEST_3_2)], compression_types=[["none"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
     @matrix(producer_version=[str(LATEST_2_1)], consumer_version=[str(LATEST_2_1)], compression_types=[["zstd"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
     @matrix(producer_version=[str(LATEST_2_0)], consumer_version=[str(LATEST_2_0)], compression_types=[["snappy"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
     @matrix(producer_version=[str(LATEST_1_1)], consumer_version=[str(LATEST_1_1)], compression_types=[["lz4"]], timestamp_type=[str("CreateTime")], metadata_quorum=quorum.all_non_upgrade)
diff --git a/tests/kafkatest/tests/core/downgrade_test.py b/tests/kafkatest/tests/core/downgrade_test.py
index 772ec96d6c25a..89cdcdfa469aa 100644
--- a/tests/kafkatest/tests/core/downgrade_test.py
+++ b/tests/kafkatest/tests/core/downgrade_test.py
@@ -19,7 +19,8 @@
 
 from kafkatest.services.kafka import config_property
 from kafkatest.tests.end_to_end import EndToEndTest
-from kafkatest.version import LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, DEV_BRANCH, KafkaVersion
+from kafkatest.version import LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, \
+    LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, DEV_BRANCH, KafkaVersion
 
 class TestDowngrade(EndToEndTest):
     PARTITIONS = 3
@@ -79,6 +80,9 @@ def wait_until_rejoin(self):
                     timeout_sec=60, backoff_sec=1, err_msg="Replicas did not rejoin the ISR in a reasonable amount of time")
 
     @cluster(num_nodes=7)
+    @parametrize(version=str(LATEST_3_2), compression_types=["snappy"])
+    @parametrize(version=str(LATEST_3_2), compression_types=["zstd"], security_protocol="SASL_SSL")
+    @matrix(version=[str(LATEST_3_2)], compression_types=[["none"]], static_membership=[False, True])
     @parametrize(version=str(LATEST_3_1), compression_types=["snappy"])
     @parametrize(version=str(LATEST_3_1), compression_types=["zstd"], security_protocol="SASL_SSL")
     @matrix(version=[str(LATEST_3_1)], compression_types=[["none"]], static_membership=[False, True])
@@ -139,14 +143,15 @@ def test_upgrade_and_downgrade(self, version, compression_types, security_protoc
 
         self.logger.info("First pass bounce - rolling upgrade")
         self.upgrade_from(kafka_version)
-        self.run_validation()
+        self.await_consumed_records(min_records=5000)
 
         upgrade_topic_id = self.kafka.topic_id(self.topic)
         assert start_topic_id == upgrade_topic_id
 
         self.logger.info("Second pass bounce - rolling downgrade")
+        num_records_acked = self.producer.num_acked
         self.downgrade_to(kafka_version)
-        self.run_validation()
+        self.run_validation(min_records=num_records_acked+5000)
 
         downgrade_topic_id = self.kafka.topic_id(self.topic)
         assert upgrade_topic_id == downgrade_topic_id
diff --git a/tests/kafkatest/tests/core/security_rolling_upgrade_test.py b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py
index aa60878dac2aa..571f8241a89e0 100644
--- a/tests/kafkatest/tests/core/security_rolling_upgrade_test.py
+++ b/tests/kafkatest/tests/core/security_rolling_upgrade_test.py
@@ -71,7 +71,7 @@ def roll_in_secured_settings(self, client_protocol, broker_protocol):
         self.set_authorizer_and_bounce(client_protocol, broker_protocol)
 
     def set_authorizer_and_bounce(self, client_protocol, broker_protocol):
-        self.kafka.authorizer_class_name = KafkaService.ACL_AUTHORIZER
+        self.kafka.authorizer_class_name = KafkaService.ZK_ACL_AUTHORIZER
         # Force use of direct ZooKeeper access due to SecurityDisabledException: No Authorizer is configured on the broker.
         self.acls.set_acls(client_protocol, self.kafka, self.topic, self.group, force_use_zk_connection=True)
         self.acls.set_acls(broker_protocol, self.kafka, self.topic, self.group, force_use_zk_connection=True)
diff --git a/tests/kafkatest/tests/core/upgrade_test.py b/tests/kafkatest/tests/core/upgrade_test.py
index 291daca50964f..3cab2ae3e3dbe 100644
--- a/tests/kafkatest/tests/core/upgrade_test.py
+++ b/tests/kafkatest/tests/core/upgrade_test.py
@@ -24,7 +24,10 @@
 from kafkatest.tests.produce_consume_validate import ProduceConsumeValidateTest
 from kafkatest.utils import is_int
 from kafkatest.utils.remote_account import java_version
-from kafkatest.version import LATEST_0_8_2, LATEST_0_9, LATEST_0_10, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, V_0_11_0_0, V_2_8_0, V_3_0_0, DEV_BRANCH, KafkaVersion
+from kafkatest.version import LATEST_0_8_2, LATEST_0_9, LATEST_0_10, LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, \
+    LATEST_0_11_0, LATEST_1_0, LATEST_1_1, LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, \
+    LATEST_2_6, LATEST_2_7, LATEST_2_8, LATEST_3_0, LATEST_3_1, LATEST_3_2, V_0_11_0_0, V_2_8_0, V_3_0_0, DEV_BRANCH, \
+    KafkaVersion
 from kafkatest.services.kafka.util import new_jdk_not_supported
 
 class TestUpgrade(ProduceConsumeValidateTest):
@@ -91,6 +94,9 @@ def perform_upgrade(self, from_kafka_version, to_message_format_version=None):
             self.wait_until_rejoin()
 
     @cluster(num_nodes=6)
+    @parametrize(from_kafka_version=str(LATEST_3_2), to_message_format_version=None, compression_types=["none"])
+    @parametrize(from_kafka_version=str(LATEST_3_2), to_message_format_version=None, compression_types=["lz4"])
+    @parametrize(from_kafka_version=str(LATEST_3_2), to_message_format_version=None, compression_types=["snappy"])
     @parametrize(from_kafka_version=str(LATEST_3_1), to_message_format_version=None, compression_types=["none"])
     @parametrize(from_kafka_version=str(LATEST_3_1), to_message_format_version=None, compression_types=["lz4"])
     @parametrize(from_kafka_version=str(LATEST_3_1), to_message_format_version=None, compression_types=["snappy"])
diff --git a/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py
index 241c3819b79e2..ea1b61bd57f33 100644
--- a/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py
+++ b/tests/kafkatest/tests/core/zookeeper_security_upgrade_test.py
@@ -95,7 +95,7 @@ def test_zk_security_upgrade(self, security_protocol):
 
         # set acls
         if self.is_secure:
-            self.kafka.authorizer_class_name = KafkaService.ACL_AUTHORIZER
+            self.kafka.authorizer_class_name = KafkaService.ZK_ACL_AUTHORIZER
             # Force use of direct ZooKeeper access because Kafka is not yet started
             self.acls.set_acls(security_protocol, self.kafka, self.topic, self.group, force_use_zk_connection=True,
                                additional_cluster_operations_to_grant=['Create'])
diff --git a/tests/kafkatest/tests/end_to_end.py b/tests/kafkatest/tests/end_to_end.py
index 533056540b0cf..3b9cf9f8c3c6d 100644
--- a/tests/kafkatest/tests/end_to_end.py
+++ b/tests/kafkatest/tests/end_to_end.py
@@ -87,7 +87,13 @@ def on_record_consumed(self, record, node):
         self.last_consumed_offsets[partition] = offset
         self.records_consumed.append(record_id)
 
-    def await_consumed_offsets(self, last_acked_offsets, timeout_sec):
+    def await_produced_records(self, min_records, timeout_sec=30):
+        wait_until(lambda: self.producer.num_acked > min_records,
+                   timeout_sec=timeout_sec,
+                   err_msg="Producer failed to produce messages for %ds." %\
+                   timeout_sec)
+
+    def await_consumed_offsets(self, last_acked_offsets, timeout_sec=30):
         def has_finished_consuming():
             for partition, offset in last_acked_offsets.items():
                 if not partition in self.last_consumed_offsets:
@@ -102,6 +108,10 @@ def has_finished_consuming():
                    err_msg="Consumer failed to consume up to offsets %s after waiting %ds." %\
                    (str(last_acked_offsets), timeout_sec))
 
+    def await_consumed_records(self, min_records, producer_timeout_sec=30,
+                               consumer_timeout_sec=30):
+        self.await_produced_records(min_records=min_records)
+        self.await_consumed_offsets(self.producer.last_acked_offsets)
 
     def _collect_all_logs(self):
         for s in self.test_context.services:
@@ -120,11 +130,7 @@ def await_startup(self, min_records=5, timeout_sec=30):
     def run_validation(self, min_records=5000, producer_timeout_sec=30,
                        consumer_timeout_sec=30, enable_idempotence=False):
         try:
-            wait_until(lambda: self.producer.num_acked > min_records,
-                       timeout_sec=producer_timeout_sec,
-                       err_msg="Producer failed to produce messages for %ds." %\
-                       producer_timeout_sec)
-
+            self.await_produced_records(min_records, producer_timeout_sec)
             self.logger.info("Stopping producer after writing up to offsets %s" %\
                          str(self.producer.last_acked_offsets))
             self.producer.stop()
diff --git a/tests/kafkatest/tests/streams/streams_application_upgrade_test.py b/tests/kafkatest/tests/streams/streams_application_upgrade_test.py
index 496c474845086..b4227f6a0182b 100644
--- a/tests/kafkatest/tests/streams/streams_application_upgrade_test.py
+++ b/tests/kafkatest/tests/streams/streams_application_upgrade_test.py
@@ -22,11 +22,12 @@
 from kafkatest.services.streams import StreamsSmokeTestDriverService, StreamsSmokeTestJobRunnerService
 from kafkatest.services.zookeeper import ZookeeperService
 from kafkatest.version import LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \
-  LATEST_3_0, LATEST_3_1, DEV_VERSION, KafkaVersion
+  LATEST_3_0, LATEST_3_1, LATEST_3_2, DEV_VERSION, KafkaVersion
 
 smoke_test_versions = [str(LATEST_2_2), str(LATEST_2_3), str(LATEST_2_4),
                        str(LATEST_2_5), str(LATEST_2_6), str(LATEST_2_7),
-                       str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1)]
+                       str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1),
+                       str(LATEST_3_2)]
 dev_version = [str(DEV_VERSION)]
 
 class StreamsUpgradeTest(Test):
diff --git a/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py b/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py
index 83b0735af7a9e..b1f6dfe1ab94d 100644
--- a/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py
+++ b/tests/kafkatest/tests/streams/streams_broker_compatibility_test.py
@@ -23,7 +23,7 @@
 from kafkatest.services.zookeeper import ZookeeperService
 from kafkatest.version import LATEST_0_11_0, LATEST_0_10_2, LATEST_0_10_1, LATEST_0_10_0, LATEST_1_0, LATEST_1_1, \
     LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \
-    KafkaVersion
+    LATEST_3_0, LATEST_3_1, LATEST_3_2, KafkaVersion
 
 
 class StreamsBrokerCompatibility(Test):
@@ -64,6 +64,13 @@ def setUp(self):
 
 
     @cluster(num_nodes=4)
+    @parametrize(broker_version=str(LATEST_3_2))
+    @parametrize(broker_version=str(LATEST_3_1))
+    @parametrize(broker_version=str(LATEST_3_0))
+    @parametrize(broker_version=str(LATEST_2_8))
+    @parametrize(broker_version=str(LATEST_2_7))
+    @parametrize(broker_version=str(LATEST_2_6))
+    @parametrize(broker_version=str(LATEST_2_5))
     @parametrize(broker_version=str(LATEST_2_4))
     @parametrize(broker_version=str(LATEST_2_3))
     @parametrize(broker_version=str(LATEST_2_2))
@@ -89,6 +96,11 @@ def test_compatible_brokers_eos_disabled(self, broker_version):
         self.kafka.stop()
 
     @cluster(num_nodes=4)
+    @parametrize(broker_version=str(LATEST_3_2))
+    @parametrize(broker_version=str(LATEST_3_1))
+    @parametrize(broker_version=str(LATEST_3_0))
+    @parametrize(broker_version=str(LATEST_2_8))
+    @parametrize(broker_version=str(LATEST_2_7))
     @parametrize(broker_version=str(LATEST_2_6))
     @parametrize(broker_version=str(LATEST_2_5))
     @parametrize(broker_version=str(LATEST_2_4))
@@ -115,6 +127,10 @@ def test_compatible_brokers_eos_alpha_enabled(self, broker_version):
         self.consumer.stop()
         self.kafka.stop()
 
+    @cluster(num_nodes=4)
+    @parametrize(broker_version=str(LATEST_3_2))
+    @parametrize(broker_version=str(LATEST_3_1))
+    @parametrize(broker_version=str(LATEST_3_0))
     @parametrize(broker_version=str(LATEST_2_8))
     @parametrize(broker_version=str(LATEST_2_7))
     @parametrize(broker_version=str(LATEST_2_6))
diff --git a/tests/kafkatest/tests/streams/streams_standby_replica_test.py b/tests/kafkatest/tests/streams/streams_standby_replica_test.py
index a8c07513c1c2e..c0e5953f73d14 100644
--- a/tests/kafkatest/tests/streams/streams_standby_replica_test.py
+++ b/tests/kafkatest/tests/streams/streams_standby_replica_test.py
@@ -73,9 +73,9 @@ def test_standby_tasks_rebalance(self):
 
         processor_3.start()
 
-        self.wait_for_verification(processor_1, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_1.STDOUT_FILE)
-        self.wait_for_verification(processor_2, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_2.STDOUT_FILE)
-        self.wait_for_verification(processor_3, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_3.STDOUT_FILE)
+        self.wait_for_verification(processor_1, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_1.STDOUT_FILE)
+        self.wait_for_verification(processor_2, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_2.STDOUT_FILE)
+        self.wait_for_verification(processor_3, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_3.STDOUT_FILE)
 
         processor_1.stop()
 
@@ -93,9 +93,9 @@ def test_standby_tasks_rebalance(self):
 
         processor_2.start()
 
-        self.wait_for_verification(processor_1, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_1.STDOUT_FILE)
-        self.wait_for_verification(processor_2, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_2.STDOUT_FILE)
-        self.wait_for_verification(processor_3, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_3.STDOUT_FILE, num_lines=2)
+        self.wait_for_verification(processor_1, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_1.STDOUT_FILE)
+        self.wait_for_verification(processor_2, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_2.STDOUT_FILE)
+        self.wait_for_verification(processor_3, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_3.STDOUT_FILE, num_lines=2)
 
         processor_3.stop()
 
@@ -112,10 +112,9 @@ def test_standby_tasks_rebalance(self):
         self.wait_for_verification(processor_2, "ACTIVE_TASKS:3 STANDBY_TASKS:3", processor_2.STDOUT_FILE, num_lines=2)
 
         processor_1.start()
-
-        self.wait_for_verification(processor_1, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_1.STDOUT_FILE)
-        self.wait_for_verification(processor_3, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_3.STDOUT_FILE)
-        self.wait_for_verification(processor_2, "ACTIVE_TASKS:2 STANDBY_TASKS:2", processor_2.STDOUT_FILE, num_lines=2)
+        self.wait_for_verification(processor_1, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_1.STDOUT_FILE)
+        self.wait_for_verification(processor_2, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_2.STDOUT_FILE, num_lines=2)
+        self.wait_for_verification(processor_3, "ACTIVE_TASKS:2 STANDBY_TASKS:[1-3]", processor_3.STDOUT_FILE)
 
         self.assert_consume(self.client_id, "assert all messages consumed from %s" % self.streams_sink_topic_1,
                             self.streams_sink_topic_1, self.num_messages)
diff --git a/tests/kafkatest/tests/streams/streams_upgrade_test.py b/tests/kafkatest/tests/streams/streams_upgrade_test.py
index 57c89aa4c83e0..8e49ec1f76e3d 100644
--- a/tests/kafkatest/tests/streams/streams_upgrade_test.py
+++ b/tests/kafkatest/tests/streams/streams_upgrade_test.py
@@ -26,17 +26,20 @@
 from kafkatest.tests.streams.utils import extract_generation_from_logs, extract_generation_id
 from kafkatest.version import LATEST_0_10_0, LATEST_0_10_1, LATEST_0_10_2, LATEST_0_11_0, LATEST_1_0, LATEST_1_1, \
     LATEST_2_0, LATEST_2_1, LATEST_2_2, LATEST_2_3, LATEST_2_4, LATEST_2_5, LATEST_2_6, LATEST_2_7, LATEST_2_8, \
-    LATEST_3_0, LATEST_3_1, DEV_BRANCH, DEV_VERSION, KafkaVersion
+    LATEST_3_0, LATEST_3_1, LATEST_3_2, DEV_BRANCH, DEV_VERSION, KafkaVersion
 
 # broker 0.10.0 is not compatible with newer Kafka Streams versions
 # broker 0.10.1 and 0.10.2 do not support headers, as required by suppress() (since v2.2.1)
 broker_upgrade_versions = [str(LATEST_0_11_0), str(LATEST_1_0), str(LATEST_1_1),
                            str(LATEST_2_0), str(LATEST_2_1), str(LATEST_2_2), str(LATEST_2_3),
                            str(LATEST_2_4), str(LATEST_2_5), str(LATEST_2_6), str(LATEST_2_7),
-                           str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1), str(DEV_BRANCH)]
+                           str(LATEST_2_8), str(LATEST_3_0), str(LATEST_3_1), str(LATEST_3_2),
+                           str(DEV_BRANCH)]
 
 metadata_1_versions = [str(LATEST_0_10_0)]
 metadata_2_versions = [str(LATEST_0_10_1), str(LATEST_0_10_2), str(LATEST_0_11_0), str(LATEST_1_0), str(LATEST_1_1)]
+fk_join_versions = [str(LATEST_2_4), str(LATEST_2_5), str(LATEST_2_6), str(LATEST_2_7), str(LATEST_2_8), 
+                    str(LATEST_3_0), str(LATEST_3_1), str(LATEST_3_2)]
 
 """
 After each release one should first check that the released version has been uploaded to 
@@ -86,9 +89,11 @@ def __init__(self, test_context):
         self.topics = {
             'echo' : { 'partitions': 5 },
             'data' : { 'partitions': 5 },
+            'fk' : { 'partitions': 5 },
         }
 
-    processed_msg = "processed [0-9]* records"
+    processed_data_msg = "processed [0-9]* records from topic=data"
+    processed_fk_msg = "processed [0-9]* records from topic=fk"
     base_version_number = str(DEV_VERSION).split("-")[0]
 
     def perform_broker_upgrade(self, to_version):
@@ -159,9 +164,9 @@ def test_upgrade_downgrade_brokers(self, from_version, to_version):
 
             with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor:
                 processor.start()
-                monitor.wait_until(self.processed_msg,
+                monitor.wait_until(self.processed_data_msg,
                                    timeout_sec=60,
-                                   err_msg="Never saw output '%s' on " % self.processed_msg + str(processor.node))
+                                   err_msg="Never saw output '%s' on " % self.processed_data_msg + str(processor.node))
 
             connected_message = "Discovered group coordinator"
             with processor.node.account.monitor_log(processor.LOG_FILE) as log_monitor:
@@ -172,9 +177,9 @@ def test_upgrade_downgrade_brokers(self, from_version, to_version):
                                            timeout_sec=120,
                                            err_msg=("Never saw output '%s' on " % connected_message) + str(processor.node.account))
 
-                    stdout_monitor.wait_until(self.processed_msg,
+                    stdout_monitor.wait_until(self.processed_data_msg,
                                               timeout_sec=60,
-                                              err_msg="Never saw output '%s' on" % self.processed_msg + str(processor.node.account))
+                                              err_msg="Never saw output '%s' on" % self.processed_data_msg + str(processor.node.account))
 
             # SmokeTestDriver allows up to 6 minutes to consume all
             # records for the verification step so this timeout is set to
@@ -192,8 +197,12 @@ def test_upgrade_downgrade_brokers(self, from_version, to_version):
     @cluster(num_nodes=6)
     @matrix(from_version=metadata_1_versions, to_version=[str(DEV_VERSION)])
     @matrix(from_version=metadata_2_versions, to_version=[str(DEV_VERSION)])
-    def test_metadata_upgrade(self, from_version, to_version):
+    @matrix(from_version=fk_join_versions, to_version=[str(DEV_VERSION)])
+    def test_rolling_upgrade_with_2_bounces(self, from_version, to_version):
         """
+        This test verifies that the cluster successfully upgrades despite changes in the metadata and FK
+        join protocols.
+        
         Starts 3 KafkaStreams instances with version <from_version> and upgrades one-by-one to <to_version>
         """
 
@@ -311,9 +320,14 @@ def start_all_nodes_with(self, version):
                 log_monitor.wait_until(kafka_version_str,
                                        timeout_sec=60,
                                        err_msg="Could not detect Kafka Streams version " + version + " " + str(node1.account))
-                monitor.wait_until(self.processed_msg,
+                monitor.wait_until(self.processed_data_msg,
                                    timeout_sec=60,
-                                   err_msg="Never saw output '%s' on " % self.processed_msg + str(node1.account))
+                                   err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node1.account))
+                if KafkaVersion(version).supports_fk_joins():
+                    monitor.wait_until(self.processed_fk_msg,
+                    timeout_sec=60,
+                    err_msg="Never saw output '%s' on " % self.processed_fk_msg + str(node1.account))
+
 
         # start second with <version>
         self.prepare_for(self.processor2, version)
@@ -325,12 +339,16 @@ def start_all_nodes_with(self, version):
                     log_monitor.wait_until(kafka_version_str,
                                            timeout_sec=60,
                                            err_msg="Could not detect Kafka Streams version " + version + " on " + str(node2.account))
-                    first_monitor.wait_until(self.processed_msg,
+                    first_monitor.wait_until(self.processed_data_msg,
                                              timeout_sec=60,
-                                             err_msg="Never saw output '%s' on " % self.processed_msg + str(node1.account))
-                    second_monitor.wait_until(self.processed_msg,
+                                             err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node1.account))
+                    second_monitor.wait_until(self.processed_data_msg,
                                               timeout_sec=60,
-                                              err_msg="Never saw output '%s' on " % self.processed_msg + str(node2.account))
+                                              err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node2.account))
+                    if KafkaVersion(version).supports_fk_joins():
+                        second_monitor.wait_until(self.processed_fk_msg,
+                        timeout_sec=60,
+                        err_msg="Never saw output '%s' on " % self.processed_fk_msg + str(node2.account))
 
         # start third with <version>
         self.prepare_for(self.processor3, version)
@@ -343,15 +361,19 @@ def start_all_nodes_with(self, version):
                         log_monitor.wait_until(kafka_version_str,
                                                timeout_sec=60,
                                                err_msg="Could not detect Kafka Streams version " + version + " on " + str(node3.account))
-                        first_monitor.wait_until(self.processed_msg,
+                        first_monitor.wait_until(self.processed_data_msg,
                                                  timeout_sec=60,
-                                                 err_msg="Never saw output '%s' on " % self.processed_msg + str(node1.account))
-                        second_monitor.wait_until(self.processed_msg,
+                                                 err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node1.account))
+                        second_monitor.wait_until(self.processed_data_msg,
                                                   timeout_sec=60,
-                                                  err_msg="Never saw output '%s' on " % self.processed_msg + str(node2.account))
-                        third_monitor.wait_until(self.processed_msg,
+                                                  err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node2.account))
+                        third_monitor.wait_until(self.processed_data_msg,
                                                  timeout_sec=60,
-                                                 err_msg="Never saw output '%s' on " % self.processed_msg + str(node3.account))
+                                                 err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node3.account))
+                        if KafkaVersion(version).supports_fk_joins():
+                            third_monitor.wait_until(self.processed_fk_msg,
+                            timeout_sec=60,
+                            err_msg="Never saw output '%s' on " % self.processed_fk_msg + str(node2.account))
 
     @staticmethod
     def prepare_for(processor, version):
@@ -381,12 +403,12 @@ def do_stop_start_bounce(self, processor, upgrade_from, new_version, counter):
         with first_other_node.account.monitor_log(first_other_processor.STDOUT_FILE) as first_other_monitor:
             with second_other_node.account.monitor_log(second_other_processor.STDOUT_FILE) as second_other_monitor:
                 processor.stop()
-                first_other_monitor.wait_until(self.processed_msg,
+                first_other_monitor.wait_until(self.processed_data_msg,
                                                timeout_sec=60,
-                                               err_msg="Never saw output '%s' on " % self.processed_msg + str(first_other_node.account))
-                second_other_monitor.wait_until(self.processed_msg,
+                                               err_msg="Never saw output '%s' on " % self.processed_data_msg + str(first_other_node.account))
+                second_other_monitor.wait_until(self.processed_data_msg,
                                                 timeout_sec=60,
-                                                err_msg="Never saw output '%s' on " % self.processed_msg + str(second_other_node.account))
+                                                err_msg="Never saw output '%s' on " % self.processed_data_msg + str(second_other_node.account))
         node.account.ssh_capture("grep UPGRADE-TEST-CLIENT-CLOSED %s" % processor.STDOUT_FILE, allow_fail=False)
 
         if upgrade_from is None:  # upgrade disabled -- second round of rolling bounces
@@ -414,23 +436,23 @@ def do_stop_start_bounce(self, processor, upgrade_from, new_version, counter):
                         log_monitor.wait_until(kafka_version_str,
                                                timeout_sec=60,
                                                err_msg="Could not detect Kafka Streams version " + new_version + " on " + str(node.account))
-                        first_other_monitor.wait_until(self.processed_msg,
+                        first_other_monitor.wait_until(self.processed_data_msg,
                                                        timeout_sec=60,
-                                                       err_msg="Never saw output '%s' on " % self.processed_msg + str(first_other_node.account))
+                                                       err_msg="Never saw output '%s' on " % self.processed_data_msg + str(first_other_node.account))
                         found = list(first_other_node.account.ssh_capture(grep_metadata_error + first_other_processor.STDERR_FILE, allow_fail=True))
                         if len(found) > 0:
                             raise Exception("Kafka Streams failed with 'unable to decode subscription data: version=2'")
 
-                        second_other_monitor.wait_until(self.processed_msg,
+                        second_other_monitor.wait_until(self.processed_data_msg,
                                                         timeout_sec=60,
-                                                        err_msg="Never saw output '%s' on " % self.processed_msg + str(second_other_node.account))
+                                                        err_msg="Never saw output '%s' on " % self.processed_data_msg + str(second_other_node.account))
                         found = list(second_other_node.account.ssh_capture(grep_metadata_error + second_other_processor.STDERR_FILE, allow_fail=True))
                         if len(found) > 0:
                             raise Exception("Kafka Streams failed with 'unable to decode subscription data: version=2'")
 
-                        monitor.wait_until(self.processed_msg,
+                        monitor.wait_until(self.processed_data_msg,
                                            timeout_sec=60,
-                                           err_msg="Never saw output '%s' on " % self.processed_msg + str(node.account))
+                                           err_msg="Never saw output '%s' on " % self.processed_data_msg + str(node.account))
 
 
     def do_rolling_bounce(self, processor, counter, current_generation):
diff --git a/tests/kafkatest/version.py b/tests/kafkatest/version.py
index 95100b0532b9e..d29fd4beecc1a 100644
--- a/tests/kafkatest/version.py
+++ b/tests/kafkatest/version.py
@@ -106,6 +106,9 @@ def supports_topic_ids_when_using_zk(self):
         # Self-managed clusters always support topic ID, so this method only applies to ZK clusters.
         return self >= V_2_8_0
 
+    def supports_fk_joins(self):
+        return hasattr(self, "version") and self >= V_2_4_0
+
 def get_version(node=None):
     """Return the version attached to the given node.
     Default to DEV_BRANCH if node or node.version is undefined (aka None)
@@ -116,7 +119,7 @@ def get_version(node=None):
         return DEV_BRANCH
 
 DEV_BRANCH = KafkaVersion("dev")
-DEV_VERSION = KafkaVersion("3.2.0-SNAPSHOT")
+DEV_VERSION = KafkaVersion("3.4.0-SNAPSHOT")
 
 # 0.8.2.x versions
 V_0_8_2_1 = KafkaVersion("0.8.2.1")
@@ -214,12 +217,22 @@ def get_version(node=None):
 
 # 3.0.x versions
 V_3_0_0 = KafkaVersion("3.0.0")
-LATEST_3_0 = V_3_0_0
+V_3_0_1 = KafkaVersion("3.0.1")
+LATEST_3_0 = V_3_0_1
 
 # 3.1.x versions
 V_3_1_0 = KafkaVersion("3.1.0")
-LATEST_3_1 = V_3_1_0
+V_3_1_1 = KafkaVersion("3.1.1")
+LATEST_3_1 = V_3_1_1
 
 # 3.2.x versions
 V_3_2_0 = KafkaVersion("3.2.0")
-LATEST_3_2 = V_3_2_0
\ No newline at end of file
+LATEST_3_2 = V_3_2_0
+
+# 3.3.x versions
+V_3_3_0 = KafkaVersion("3.3.0")
+LATEST_3_3 = V_3_3_0
+
+# 3.4.x versions
+V_3_4_0 = KafkaVersion("3.4.0")
+LATEST_3_4 = V_3_4_0
diff --git a/tests/setup.py b/tests/setup.py
index d00248d4ee76e..c33a802f05fd8 100644
--- a/tests/setup.py
+++ b/tests/setup.py
@@ -51,7 +51,7 @@ def run_tests(self):
       license="apache2.0",
       packages=find_packages(),
       include_package_data=True,
-      install_requires=["ducktape>0.8", "requests==2.24.0"],
+      install_requires=["ducktape<0.9", "requests==2.24.0"],
       tests_require=["pytest", "mock"],
       cmdclass={'test': PyTest},
       zip_safe=False
diff --git a/tools/src/main/java/org/apache/kafka/tools/ClientCompatibilityTest.java b/tools/src/main/java/org/apache/kafka/tools/ClientCompatibilityTest.java
index e40ca7ae82ecf..9a04f068a66f5 100644
--- a/tools/src/main/java/org/apache/kafka/tools/ClientCompatibilityTest.java
+++ b/tools/src/main/java/org/apache/kafka/tools/ClientCompatibilityTest.java
@@ -88,6 +88,7 @@ static class TestConfig {
         final boolean createTopicsSupported;
         final boolean describeAclsSupported;
         final boolean describeConfigsSupported;
+        final boolean idempotentProducerSupported;
 
         TestConfig(Namespace res) {
             this.bootstrapServer = res.getString("bootstrapServer");
@@ -99,6 +100,7 @@ static class TestConfig {
             this.createTopicsSupported = res.getBoolean("createTopicsSupported");
             this.describeAclsSupported = res.getBoolean("describeAclsSupported");
             this.describeConfigsSupported = res.getBoolean("describeConfigsSupported");
+            this.idempotentProducerSupported = res.get("idempotentProducerSupported");
         }
     }
 
@@ -172,6 +174,13 @@ public static void main(String[] args) throws Exception {
             .dest("describeConfigsSupported")
             .metavar("DESCRIBE_CONFIGS_SUPPORTED")
             .help("Whether describeConfigs is supported in the AdminClient.");
+        parser.addArgument("--idempotent-producer-supported")
+            .action(store())
+            .required(true)
+            .type(Boolean.class)
+            .dest("idempotentProducerSupported")
+            .metavar("IDEMPOTENT_PRODUCER_SUPPORTED")
+            .help("Whether the producer supports idempotency.");
 
         Namespace res = null;
         try {
@@ -243,6 +252,9 @@ void run() throws Throwable {
     public void testProduce() throws Exception {
         Properties producerProps = new Properties();
         producerProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, testConfig.bootstrapServer);
+        if (!testConfig.idempotentProducerSupported) {
+            producerProps.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "false");
+        }
         ByteArraySerializer serializer = new ByteArraySerializer();
         KafkaProducer<byte[], byte[]> producer = new KafkaProducer<>(producerProps, serializer, serializer);
         ProducerRecord<byte[], byte[]> record1 = new ProducerRecord<>(testConfig.topic, message1);
diff --git a/tools/src/main/java/org/apache/kafka/tools/PushHttpMetricsReporter.java b/tools/src/main/java/org/apache/kafka/tools/PushHttpMetricsReporter.java
index b33b75c71061a..e8d590eeb0ee2 100644
--- a/tools/src/main/java/org/apache/kafka/tools/PushHttpMetricsReporter.java
+++ b/tools/src/main/java/org/apache/kafka/tools/PushHttpMetricsReporter.java
@@ -213,7 +213,6 @@ public void run() {
                 }
             } catch (Throwable t) {
                 log.error("Error reporting metrics", t);
-                throw new KafkaException("Failed to report current metrics", t);
             } finally {
                 if (connection != null) {
                     connection.disconnect();
diff --git a/tools/src/main/java/org/apache/kafka/tools/TransactionalMessageCopier.java b/tools/src/main/java/org/apache/kafka/tools/TransactionalMessageCopier.java
index 18c80996f2829..289327e57094b 100644
--- a/tools/src/main/java/org/apache/kafka/tools/TransactionalMessageCopier.java
+++ b/tools/src/main/java/org/apache/kafka/tools/TransactionalMessageCopier.java
@@ -47,6 +47,7 @@
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
@@ -260,27 +261,23 @@ private static String toJsonString(Map<String, Object> data) {
         return json;
     }
 
-    private static synchronized String statusAsJson(long totalProcessed, long consumedSinceLastRebalanced, long remaining, String transactionalId, String stage) {
-        Map<String, Object> statusData = new HashMap<>();
-        statusData.put("progress", transactionalId);
+    private static synchronized String statusAsJson(
+        String stage,
+        long totalProcessed,
+        long consumedSinceLastRebalanced,
+        long remaining,
+        String transactionalId
+    ) {
+        Map<String, Object> statusData = new LinkedHashMap<>();
+        statusData.put("transactionalId", transactionalId);
+        statusData.put("stage", stage);
+        statusData.put("time", FORMAT.format(new Date()));
         statusData.put("totalProcessed", totalProcessed);
         statusData.put("consumed", consumedSinceLastRebalanced);
         statusData.put("remaining", remaining);
-        statusData.put("time", FORMAT.format(new Date()));
-        statusData.put("stage", stage);
         return toJsonString(statusData);
     }
 
-    private static synchronized String shutDownString(long totalProcessed, long consumedSinceLastRebalanced, long remaining, String transactionalId) {
-        Map<String, Object> shutdownData = new HashMap<>();
-        shutdownData.put("shutdown_complete", transactionalId);
-        shutdownData.put("totalProcessed", totalProcessed);
-        shutdownData.put("consumed", consumedSinceLastRebalanced);
-        shutdownData.put("remaining", remaining);
-        shutdownData.put("time", FORMAT.format(new Date()));
-        return toJsonString(shutdownData);
-    }
-
     private static void abortTransactionAndResetPosition(
         KafkaProducer<String, String> producer,
         KafkaConsumer<String, String> consumer
@@ -330,8 +327,13 @@ public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
                         .mapToLong(partition -> messagesRemaining(consumer, partition)).sum());
                     numMessagesProcessedSinceLastRebalance.set(0);
                     // We use message cap for remaining here as the remainingMessages are not set yet.
-                    System.out.println(statusAsJson(totalMessageProcessed.get(),
-                        numMessagesProcessedSinceLastRebalance.get(), remainingMessages.get(), transactionalId, "RebalanceComplete"));
+                    System.out.println(statusAsJson(
+                        "RebalanceComplete",
+                        totalMessageProcessed.get(),
+                        numMessagesProcessedSinceLastRebalance.get(),
+                        remainingMessages.get(),
+                        transactionalId
+                    ));
                 }
             });
         } else {
@@ -349,16 +351,26 @@ public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
         Exit.addShutdownHook("transactional-message-copier-shutdown-hook", () -> {
             isShuttingDown.set(true);
             consumer.wakeup();
-            System.out.println(shutDownString(totalMessageProcessed.get(),
-                numMessagesProcessedSinceLastRebalance.get(), remainingMessages.get(), transactionalId));
+            System.out.println(statusAsJson(
+                "ShutdownComplete",
+                totalMessageProcessed.get(),
+                numMessagesProcessedSinceLastRebalance.get(),
+                remainingMessages.get(),
+                transactionalId
+            ));
         });
 
         final boolean useGroupMetadata = parsedArgs.getBoolean("useGroupMetadata");
         try {
             Random random = new Random();
             while (!isShuttingDown.get() && remainingMessages.get() > 0) {
-                System.out.println(statusAsJson(totalMessageProcessed.get(),
-                    numMessagesProcessedSinceLastRebalance.get(), remainingMessages.get(), transactionalId, "ProcessLoop"));
+                System.out.println(statusAsJson(
+                    "ProcessLoop",
+                    totalMessageProcessed.get(),
+                    numMessagesProcessedSinceLastRebalance.get(),
+                    remainingMessages.get(),
+                    transactionalId
+                ));
 
                 ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(200));
                 if (records.count() > 0) {
diff --git a/vagrant/base.sh b/vagrant/base.sh
index 24eecdb9f6bff..0e69add6616af 100755
--- a/vagrant/base.sh
+++ b/vagrant/base.sh
@@ -148,10 +148,13 @@ get_kafka 2.7.1 2.12
 chmod a+rw /opt/kafka-2.7.1
 get_kafka 2.8.1 2.12
 chmod a+rw /opt/kafka-2.8.1
-get_kafka 3.0.0 2.12
-chmod a+rw /opt/kafka-3.0.0
-get_kafka 3.1.0 2.12
-chmod a+rw /opt/kafka-3.1.0
+get_kafka 3.0.1 2.12
+chmod a+rw /opt/kafka-3.0.1
+get_kafka 3.1.1 2.12
+chmod a+rw /opt/kafka-3.1.1
+get_kafka 3.2.0 2.12
+chmod a+rw /opt/kafka-3.2.0
+
 
 # For EC2 nodes, we want to use /mnt, which should have the local disk. On local
 # VMs, we can just create it if it doesn't exist and use it like we'd use